def test_create_session_with_existing(self, mock_datetime, mock_client, mock_query): mock_datetime.now.return_value = fixed_now existing_session = ScrapeSession.new( key=datastore.key.Key("session", "existing", project=0), start=fixed_now, scrape_type=constants.ScrapeType.BACKGROUND, region="us_ny", phase=scrape_phase.ScrapePhase.START, ) new_key = datastore.key.Key("session", "new", project=0) new_session = ScrapeSession.new( key=new_key, start=fixed_now, scrape_type=constants.ScrapeType.BACKGROUND, region="us_wy", phase=scrape_phase.ScrapePhase.START, ) client = mock_client.return_value client.key.return_value = new_key wire_sessions_to_query(mock_client, mock_query, [existing_session]) scrape_key = ScrapeKey("us_wy", constants.ScrapeType.BACKGROUND) sessions.create_session(scrape_key) existing_session.end = fixed_now client.put.assert_any_call(existing_session.to_entity()) client.put.assert_any_call(new_session.to_entity()) assert client.put.call_count == 2
def test_add_item_happy_path(self, mock_client, mock_query): current_session_key = datastore.key.Key('session', 'current', project=0) current_session_vars = { 'region': 'us_va', 'scrape_type': constants.ScrapeType.SNAPSHOT, 'phase': scrape_phase.ScrapePhase.START, 'start': fix_dt(datetime(2014, 8, 31)) } current_session = ScrapeSession.new(current_session_key, **current_session_vars) prior_session = ScrapeSession.new( datastore.key.Key('session', 'prior', project=0), region='us_ny', scrape_type=constants.ScrapeType.SNAPSHOT, start=fix_dt( datetime(2014, 8, 17)), phase=scrape_phase.ScrapePhase.SCRAPE, ) wire_sessions_to_query( mock_client, mock_query, [current_session, prior_session]) assert sessions.add_docket_item_to_current_session( "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT)) current_session_vars.update({'docket_ack_id': 'alpha'}) expected_session = ScrapeSession.new( current_session_key, **current_session_vars ) mock_client.return_value.put.assert_called_with( expected_session.to_entity())
def test_add_item_happy_path(self, mock_client, mock_query): current_session_key = datastore.key.Key("session", "current", project=0) current_session_vars = { "region": "us_va", "scrape_type": constants.ScrapeType.SNAPSHOT, "phase": scrape_phase.ScrapePhase.START, "start": fix_dt(datetime(2014, 8, 31)), } current_session = ScrapeSession.new(current_session_key, **current_session_vars) prior_session = ScrapeSession.new( datastore.key.Key("session", "prior", project=0), region="us_ny", scrape_type=constants.ScrapeType.SNAPSHOT, start=fix_dt(datetime(2014, 8, 17)), phase=scrape_phase.ScrapePhase.SCRAPE, ) wire_sessions_to_query( mock_client, mock_query, [current_session, prior_session] ) assert sessions.add_docket_item_to_current_session( "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT) ) current_session_vars.update({"docket_ack_id": "alpha"}) expected_session = ScrapeSession.new( current_session_key, **current_session_vars ) mock_client.return_value.put.assert_called_with(expected_session.to_entity())
def test_stop_scrape_resume_other_scrapes( self, mock_resume, mock_get_region, mock_sessions, mock_purge_scrape_tasks): """Tests that the stop_scrape method will launch other scrape types we didn't mean to stop.""" region = "us_sd" scrape_type = constants.ScrapeType.BACKGROUND queue_name = "us_sd_scraper" initial_task = "mail_upgrade_it" mock_get_region.return_value = mock_region(region, queue_name, is_stoppable=True) open_session_other = ScrapeSession.new( key=None, scrape_type=constants.ScrapeType.SNAPSHOT, phase=scrape_phase.ScrapePhase.SCRAPE, ) open_session_matching = ScrapeSession.new( key=None, scrape_type=constants.ScrapeType.BACKGROUND, phase=scrape_phase.ScrapePhase.SCRAPE, ) mock_sessions.return_value = [open_session_other, open_session_matching] mock_purge_scrape_tasks.return_value = None scraper = FakeScraper(region, initial_task) scraper.stop_scrape([scrape_type]) mock_get_region.assert_called_with(region) mock_sessions.assert_called_with(region, include_closed=False) mock_resume.assert_called_with(constants.ScrapeType.SNAPSHOT) mock_purge_scrape_tasks.assert_called_with( region_code=region, queue_name=queue_name)
def test_resume_scrape_background( self, mock_get_region: Mock, mock_sessions: Mock, mock_task_manager: Mock, mock_datetime: Mock, ) -> None: """Tests the resume_scrape flow for background scraping.""" region = "us_nd" scrape_type = constants.ScrapeType.BACKGROUND queue_name = "us_nd_scraper" initial_task = "charge_it" mock_get_region.return_value = mock_region(region, queue_name) recent_session_none_scraped = ScrapeSession.new( key=None, scrape_type=constants.ScrapeType.BACKGROUND, phase=scrape_phase.ScrapePhase.SCRAPE, ) recent_session = ScrapeSession.new( key=None, scrape_type=constants.ScrapeType.BACKGROUND, phase=scrape_phase.ScrapePhase.SCRAPE, last_scraped="Bangalter, Thomas", ) mock_sessions.return_value = [ recent_session_none_scraped, recent_session ] mock_task_manager.return_value.create_scrape_task.return_value = None mock_datetime.now.return_value = _DATETIME scraper = FakeScraper(region, initial_task) scraper.resume_scrape(scrape_type) mock_get_region.assert_called_with(region) mock_sessions.assert_called_with(ScrapeKey(region, scrape_type)) queue_params = QueueRequest( scrape_type=scrape_type, scraper_start_time=_DATETIME, next_task=FAKE_TASK, # content=['Bangalter', 'Thomas'], ) request_body = { "region": region, "task": initial_task, "params": queue_params.to_serializable(), } mock_task_manager.return_value.create_scrape_task.assert_called_with( region_code=region, queue_name=queue_name, url=scraper.scraper_work_url, body=request_body, )
def test_stop_scrape_not_executed(self, mock_get_region, mock_sessions, mock_task_manager): region = "us_sd" scrape_type = constants.ScrapeType.BACKGROUND queue_name = "us_sd_scraper" initial_task = "change_it" mock_get_region.return_value = mock_region(region, queue_name, is_stoppable=False) open_session = ScrapeSession.new( key=None, scrape_type=scrape_type, region=region, phase=scrape_phase.ScrapePhase.SCRAPE, ) mock_sessions.return_value = [open_session] mock_task_manager.return_value.purge_scrape_tasks.return_value = None scraper = FakeScraper(region, initial_task) scraper.stop_scrape([scrape_type], respect_is_stoppable=True) mock_get_region.assert_called_with(region) mock_sessions.assert_not_called() mock_task_manager.return_value.purge_scrape_tasks.assert_not_called()
def test_infer_release( self, mock_get_region: Mock, mock_validate_regions: Mock, mock_update_phase: Mock, mock_get_most_recent_session: Mock, mock_infer_release: Mock, ) -> None: headers = {"X-Appengine-Cron": "test-cron"} mock_validate_regions.return_value = [r.region_code for r in _REGIONS] mock_get_region.side_effect = _REGIONS time = datetime(2014, 8, 31) recent_session = ScrapeSession.new( key=None, start=time, scrape_type=constants.ScrapeType.BACKGROUND, phase=scrape_phase.ScrapePhase.RELEASE, ) mock_get_most_recent_session.return_value = recent_session response = self.client.get( "/release?region=us_ut®ion=us_wy", headers=headers ) assert response.status_code == 200 mock_infer_release.assert_has_calls( [ call("us_ut", time, CustodyStatus.INFERRED_RELEASE), call("us_wy", time, CustodyStatus.REMOVED_WITHOUT_INFO), ] ) mock_update_phase.assert_called_with( recent_session, scrape_phase.ScrapePhase.DONE )
def test_stop_scrape(self, mock_get_region: Mock, mock_sessions: Mock, mock_task_manager: Mock) -> None: region = "us_sd" scrape_type = constants.ScrapeType.BACKGROUND queue_name = "us_sd_scraper" initial_task = "change_it" mock_get_region.return_value = mock_region(region, queue_name, is_stoppable=True) open_session = ScrapeSession.new( key=None, scrape_type=scrape_type, region=region, phase=scrape_phase.ScrapePhase.SCRAPE, ) mock_sessions.return_value = [open_session] mock_task_manager.return_value.purge_scrape_tasks.return_value = None scraper = FakeScraper(region, initial_task) scraper.stop_scrape(scrape_type) mock_get_region.assert_called_with(region) mock_sessions.assert_called_with(region, include_closed=False) mock_task_manager.return_value.purge_scrape_tasks.assert_called_with( region_code=region, queue_name=queue_name)
def create_session(self, region_code, scrape_type, phase, start, end=None, docket_ack_id=None): session = ScrapeSession.new( key=sessions.ds().key('ScrapeSession'), region=region_code, scrape_type=scrape_type, phase=phase, docket_ack_id=docket_ack_id, start=start, end=end) sessions.ds().put(session.to_entity()) self.keys_to_delete.append(session.to_entity().key) return session
def test_close_session(self, mock_datetime, mock_client, mock_query): mock_datetime.now.return_value = fixed_now key = datastore.key.Key('session', 'key', project=0) session = ScrapeSession.new( key, start=fixed_now, scrape_type=constants.ScrapeType.SNAPSHOT, region='us_sd', phase=scrape_phase.ScrapePhase.SCRAPE, ) wire_sessions_to_query(mock_client, mock_query, [session]) session.end_time = fixed_now scrape_key = ScrapeKey("us_sd", constants.ScrapeType.SNAPSHOT) assert to_entities(sessions.close_session(scrape_key)) == \ to_entities([session]) mock_client.return_value.put.assert_called_with(session.to_entity())
def test_update_session(self, mock_datetime, mock_client, mock_query): mock_datetime.now.return_value = fixed_now key = datastore.key.Key('session', 'key', project=0) session = ScrapeSession.new( key, start=fixed_now, scrape_type=constants.ScrapeType.SNAPSHOT, region='us_sd', phase=scrape_phase.ScrapePhase.START, ) wire_sessions_to_query(mock_client, mock_query, [session]) scrape_key = ScrapeKey("us_sd", constants.ScrapeType.SNAPSHOT) assert sessions.update_session("CAMUS, ALBERT", scrape_key) session.last_scraped = 'CAMUS, ALBERT' mock_client.return_value.put.assert_called_with(session.to_entity())
def test_resume_scrape_background_none_scraped(self, mock_get_region, mock_sessions): region = "us_nd" scrape_type = constants.ScrapeType.BACKGROUND initial_task = "point_it" mock_get_region.return_value = mock_region(region) recent_session_none_scraped = ScrapeSession.new( key=None, scrape_type=constants.ScrapeType.BACKGROUND, phase=scrape_phase.ScrapePhase.SCRAPE) mock_sessions.return_value = [recent_session_none_scraped] scraper = FakeScraper(region, initial_task) scraper.resume_scrape(scrape_type) mock_get_region.assert_called_with(region) mock_sessions.assert_called_with(ScrapeKey(region, scrape_type))
def test_create_session(self, mock_datetime, mock_client): mock_datetime.now.return_value = fixed_now # Must use a full key so that the entities are equal. key = datastore.key.Key('session', 'key', project=0) client = mock_client.return_value client.key.return_value = key scrape_key = ScrapeKey("us_ok", constants.ScrapeType.SNAPSHOT) sessions.create_session(scrape_key) session = ScrapeSession.new( key=datastore.key.Key('session', 'key', project=0), start=fixed_now, scrape_type=constants.ScrapeType.SNAPSHOT, region='us_ok', phase=scrape_phase.ScrapePhase.START, ) client.put.assert_called_with(session.to_entity())
def create_session( self, region_code: str, scrape_type: constants.ScrapeType, phase: scrape_phase.ScrapePhase, start: datetime, end: Optional[datetime] = None, docket_ack_id: Optional[str] = None, ) -> ScrapeSession: session = ScrapeSession.new( key=sessions.ds().key("ScrapeSession"), region=region_code, scrape_type=scrape_type, phase=phase, docket_ack_id=docket_ack_id, start=start, end=end, ) sessions.ds().put(session.to_entity()) self.keys_to_delete.append(session.to_entity().key) return session