예제 #1
0
    def test_create_session_with_existing(self, mock_datetime, mock_client, mock_query):
        mock_datetime.now.return_value = fixed_now

        existing_session = ScrapeSession.new(
            key=datastore.key.Key("session", "existing", project=0),
            start=fixed_now,
            scrape_type=constants.ScrapeType.BACKGROUND,
            region="us_ny",
            phase=scrape_phase.ScrapePhase.START,
        )
        new_key = datastore.key.Key("session", "new", project=0)
        new_session = ScrapeSession.new(
            key=new_key,
            start=fixed_now,
            scrape_type=constants.ScrapeType.BACKGROUND,
            region="us_wy",
            phase=scrape_phase.ScrapePhase.START,
        )

        client = mock_client.return_value
        client.key.return_value = new_key
        wire_sessions_to_query(mock_client, mock_query, [existing_session])

        scrape_key = ScrapeKey("us_wy", constants.ScrapeType.BACKGROUND)
        sessions.create_session(scrape_key)

        existing_session.end = fixed_now
        client.put.assert_any_call(existing_session.to_entity())
        client.put.assert_any_call(new_session.to_entity())
        assert client.put.call_count == 2
예제 #2
0
    def test_add_item_happy_path(self, mock_client, mock_query):
        current_session_key = datastore.key.Key('session', 'current', project=0)
        current_session_vars = {
            'region': 'us_va',
            'scrape_type': constants.ScrapeType.SNAPSHOT,
            'phase': scrape_phase.ScrapePhase.START,
            'start': fix_dt(datetime(2014, 8, 31))
        }
        current_session = ScrapeSession.new(current_session_key,
                                            **current_session_vars)
        prior_session = ScrapeSession.new(
            datastore.key.Key('session', 'prior', project=0), region='us_ny',
            scrape_type=constants.ScrapeType.SNAPSHOT, start=fix_dt(
                datetime(2014, 8, 17)), phase=scrape_phase.ScrapePhase.SCRAPE,
        )

        wire_sessions_to_query(
            mock_client, mock_query, [current_session, prior_session])

        assert sessions.add_docket_item_to_current_session(
            "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT))

        current_session_vars.update({'docket_ack_id': 'alpha'})
        expected_session = ScrapeSession.new(
            current_session_key, **current_session_vars
        )
        mock_client.return_value.put.assert_called_with(
            expected_session.to_entity())
예제 #3
0
    def test_add_item_happy_path(self, mock_client, mock_query):
        current_session_key = datastore.key.Key("session", "current", project=0)
        current_session_vars = {
            "region": "us_va",
            "scrape_type": constants.ScrapeType.SNAPSHOT,
            "phase": scrape_phase.ScrapePhase.START,
            "start": fix_dt(datetime(2014, 8, 31)),
        }
        current_session = ScrapeSession.new(current_session_key, **current_session_vars)
        prior_session = ScrapeSession.new(
            datastore.key.Key("session", "prior", project=0),
            region="us_ny",
            scrape_type=constants.ScrapeType.SNAPSHOT,
            start=fix_dt(datetime(2014, 8, 17)),
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )

        wire_sessions_to_query(
            mock_client, mock_query, [current_session, prior_session]
        )

        assert sessions.add_docket_item_to_current_session(
            "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT)
        )

        current_session_vars.update({"docket_ack_id": "alpha"})
        expected_session = ScrapeSession.new(
            current_session_key, **current_session_vars
        )
        mock_client.return_value.put.assert_called_with(expected_session.to_entity())
예제 #4
0
    def test_stop_scrape_resume_other_scrapes(
            self, mock_resume, mock_get_region, mock_sessions,
            mock_purge_scrape_tasks):
        """Tests that the stop_scrape method will launch other scrape types we
        didn't mean to stop."""
        region = "us_sd"
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = "us_sd_scraper"
        initial_task = "mail_upgrade_it"

        mock_get_region.return_value = mock_region(region, queue_name,
                                                   is_stoppable=True)
        open_session_other = ScrapeSession.new(
            key=None, scrape_type=constants.ScrapeType.SNAPSHOT,
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )
        open_session_matching = ScrapeSession.new(
            key=None, scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )
        mock_sessions.return_value = [open_session_other,
                                      open_session_matching]
        mock_purge_scrape_tasks.return_value = None

        scraper = FakeScraper(region, initial_task)
        scraper.stop_scrape([scrape_type])

        mock_get_region.assert_called_with(region)
        mock_sessions.assert_called_with(region, include_closed=False)
        mock_resume.assert_called_with(constants.ScrapeType.SNAPSHOT)
        mock_purge_scrape_tasks.assert_called_with(
            region_code=region, queue_name=queue_name)
예제 #5
0
    def test_resume_scrape_background(
        self,
        mock_get_region: Mock,
        mock_sessions: Mock,
        mock_task_manager: Mock,
        mock_datetime: Mock,
    ) -> None:
        """Tests the resume_scrape flow for background scraping."""
        region = "us_nd"
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = "us_nd_scraper"
        initial_task = "charge_it"

        mock_get_region.return_value = mock_region(region, queue_name)
        recent_session_none_scraped = ScrapeSession.new(
            key=None,
            scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )
        recent_session = ScrapeSession.new(
            key=None,
            scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE,
            last_scraped="Bangalter, Thomas",
        )
        mock_sessions.return_value = [
            recent_session_none_scraped, recent_session
        ]
        mock_task_manager.return_value.create_scrape_task.return_value = None
        mock_datetime.now.return_value = _DATETIME

        scraper = FakeScraper(region, initial_task)
        scraper.resume_scrape(scrape_type)

        mock_get_region.assert_called_with(region)
        mock_sessions.assert_called_with(ScrapeKey(region, scrape_type))

        queue_params = QueueRequest(
            scrape_type=scrape_type,
            scraper_start_time=_DATETIME,
            next_task=FAKE_TASK,
            # content=['Bangalter', 'Thomas'],
        )
        request_body = {
            "region": region,
            "task": initial_task,
            "params": queue_params.to_serializable(),
        }

        mock_task_manager.return_value.create_scrape_task.assert_called_with(
            region_code=region,
            queue_name=queue_name,
            url=scraper.scraper_work_url,
            body=request_body,
        )
예제 #6
0
    def test_stop_scrape_not_executed(self, mock_get_region, mock_sessions,
                                      mock_task_manager):
        region = "us_sd"
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = "us_sd_scraper"
        initial_task = "change_it"

        mock_get_region.return_value = mock_region(region,
                                                   queue_name,
                                                   is_stoppable=False)
        open_session = ScrapeSession.new(
            key=None,
            scrape_type=scrape_type,
            region=region,
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )
        mock_sessions.return_value = [open_session]
        mock_task_manager.return_value.purge_scrape_tasks.return_value = None

        scraper = FakeScraper(region, initial_task)
        scraper.stop_scrape([scrape_type], respect_is_stoppable=True)

        mock_get_region.assert_called_with(region)
        mock_sessions.assert_not_called()
        mock_task_manager.return_value.purge_scrape_tasks.assert_not_called()
예제 #7
0
    def test_infer_release(
        self,
        mock_get_region: Mock,
        mock_validate_regions: Mock,
        mock_update_phase: Mock,
        mock_get_most_recent_session: Mock,
        mock_infer_release: Mock,
    ) -> None:
        headers = {"X-Appengine-Cron": "test-cron"}
        mock_validate_regions.return_value = [r.region_code for r in _REGIONS]
        mock_get_region.side_effect = _REGIONS

        time = datetime(2014, 8, 31)
        recent_session = ScrapeSession.new(
            key=None,
            start=time,
            scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.RELEASE,
        )
        mock_get_most_recent_session.return_value = recent_session

        response = self.client.get(
            "/release?region=us_ut&region=us_wy", headers=headers
        )
        assert response.status_code == 200
        mock_infer_release.assert_has_calls(
            [
                call("us_ut", time, CustodyStatus.INFERRED_RELEASE),
                call("us_wy", time, CustodyStatus.REMOVED_WITHOUT_INFO),
            ]
        )

        mock_update_phase.assert_called_with(
            recent_session, scrape_phase.ScrapePhase.DONE
        )
예제 #8
0
    def test_stop_scrape(self, mock_get_region: Mock, mock_sessions: Mock,
                         mock_task_manager: Mock) -> None:
        region = "us_sd"
        scrape_type = constants.ScrapeType.BACKGROUND
        queue_name = "us_sd_scraper"
        initial_task = "change_it"

        mock_get_region.return_value = mock_region(region,
                                                   queue_name,
                                                   is_stoppable=True)
        open_session = ScrapeSession.new(
            key=None,
            scrape_type=scrape_type,
            region=region,
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )
        mock_sessions.return_value = [open_session]
        mock_task_manager.return_value.purge_scrape_tasks.return_value = None

        scraper = FakeScraper(region, initial_task)
        scraper.stop_scrape(scrape_type)

        mock_get_region.assert_called_with(region)
        mock_sessions.assert_called_with(region, include_closed=False)
        mock_task_manager.return_value.purge_scrape_tasks.assert_called_with(
            region_code=region, queue_name=queue_name)
예제 #9
0
 def create_session(self, region_code, scrape_type, phase, start, end=None,
                    docket_ack_id=None):
     session = ScrapeSession.new(
         key=sessions.ds().key('ScrapeSession'), region=region_code,
         scrape_type=scrape_type, phase=phase, docket_ack_id=docket_ack_id,
         start=start, end=end)
     sessions.ds().put(session.to_entity())
     self.keys_to_delete.append(session.to_entity().key)
     return session
예제 #10
0
    def test_close_session(self, mock_datetime, mock_client, mock_query):
        mock_datetime.now.return_value = fixed_now

        key = datastore.key.Key('session', 'key', project=0)
        session = ScrapeSession.new(
            key, start=fixed_now, scrape_type=constants.ScrapeType.SNAPSHOT,
            region='us_sd', phase=scrape_phase.ScrapePhase.SCRAPE,
        )

        wire_sessions_to_query(mock_client, mock_query, [session])
        session.end_time = fixed_now

        scrape_key = ScrapeKey("us_sd", constants.ScrapeType.SNAPSHOT)
        assert to_entities(sessions.close_session(scrape_key)) == \
            to_entities([session])

        mock_client.return_value.put.assert_called_with(session.to_entity())
예제 #11
0
    def test_update_session(self, mock_datetime, mock_client, mock_query):
        mock_datetime.now.return_value = fixed_now

        key = datastore.key.Key('session', 'key', project=0)
        session = ScrapeSession.new(
            key, start=fixed_now, scrape_type=constants.ScrapeType.SNAPSHOT,
            region='us_sd',
            phase=scrape_phase.ScrapePhase.START,
        )

        wire_sessions_to_query(mock_client, mock_query, [session])

        scrape_key = ScrapeKey("us_sd", constants.ScrapeType.SNAPSHOT)
        assert sessions.update_session("CAMUS, ALBERT", scrape_key)

        session.last_scraped = 'CAMUS, ALBERT'
        mock_client.return_value.put.assert_called_with(session.to_entity())
예제 #12
0
    def test_resume_scrape_background_none_scraped(self, mock_get_region,
                                                   mock_sessions):
        region = "us_nd"
        scrape_type = constants.ScrapeType.BACKGROUND
        initial_task = "point_it"

        mock_get_region.return_value = mock_region(region)
        recent_session_none_scraped = ScrapeSession.new(
            key=None, scrape_type=constants.ScrapeType.BACKGROUND,
            phase=scrape_phase.ScrapePhase.SCRAPE)

        mock_sessions.return_value = [recent_session_none_scraped]

        scraper = FakeScraper(region, initial_task)
        scraper.resume_scrape(scrape_type)

        mock_get_region.assert_called_with(region)
        mock_sessions.assert_called_with(ScrapeKey(region, scrape_type))
예제 #13
0
    def test_create_session(self, mock_datetime, mock_client):
        mock_datetime.now.return_value = fixed_now

        # Must use a full key so that the entities are equal.
        key = datastore.key.Key('session', 'key', project=0)

        client = mock_client.return_value
        client.key.return_value = key

        scrape_key = ScrapeKey("us_ok", constants.ScrapeType.SNAPSHOT)
        sessions.create_session(scrape_key)

        session = ScrapeSession.new(
            key=datastore.key.Key('session', 'key', project=0), start=fixed_now,
            scrape_type=constants.ScrapeType.SNAPSHOT, region='us_ok',
            phase=scrape_phase.ScrapePhase.START,
        )
        client.put.assert_called_with(session.to_entity())
예제 #14
0
 def create_session(
     self,
     region_code: str,
     scrape_type: constants.ScrapeType,
     phase: scrape_phase.ScrapePhase,
     start: datetime,
     end: Optional[datetime] = None,
     docket_ack_id: Optional[str] = None,
 ) -> ScrapeSession:
     session = ScrapeSession.new(
         key=sessions.ds().key("ScrapeSession"),
         region=region_code,
         scrape_type=scrape_type,
         phase=phase,
         docket_ack_id=docket_ack_id,
         start=start,
         end=end,
     )
     sessions.ds().put(session.to_entity())
     self.keys_to_delete.append(session.to_entity().key)
     return session