Ejemplo n.º 1
0
    def test_add_item_happy_path(self, mock_client, mock_query):
        current_session_key = datastore.key.Key("session", "current", project=0)
        current_session_vars = {
            "region": "us_va",
            "scrape_type": constants.ScrapeType.SNAPSHOT,
            "phase": scrape_phase.ScrapePhase.START,
            "start": fix_dt(datetime(2014, 8, 31)),
        }
        current_session = ScrapeSession.new(current_session_key, **current_session_vars)
        prior_session = ScrapeSession.new(
            datastore.key.Key("session", "prior", project=0),
            region="us_ny",
            scrape_type=constants.ScrapeType.SNAPSHOT,
            start=fix_dt(datetime(2014, 8, 17)),
            phase=scrape_phase.ScrapePhase.SCRAPE,
        )

        wire_sessions_to_query(
            mock_client, mock_query, [current_session, prior_session]
        )

        assert sessions.add_docket_item_to_current_session(
            "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT)
        )

        current_session_vars.update({"docket_ack_id": "alpha"})
        expected_session = ScrapeSession.new(
            current_session_key, **current_session_vars
        )
        mock_client.return_value.put.assert_called_with(expected_session.to_entity())
Ejemplo n.º 2
0
    def test_add_item_happy_path(self, mock_client, mock_query):
        current_session_key = datastore.key.Key('session', 'current', project=0)
        current_session_vars = {
            'region': 'us_va',
            'scrape_type': constants.ScrapeType.SNAPSHOT,
            'phase': scrape_phase.ScrapePhase.START,
            'start': fix_dt(datetime(2014, 8, 31))
        }
        current_session = ScrapeSession.new(current_session_key,
                                            **current_session_vars)
        prior_session = ScrapeSession.new(
            datastore.key.Key('session', 'prior', project=0), region='us_ny',
            scrape_type=constants.ScrapeType.SNAPSHOT, start=fix_dt(
                datetime(2014, 8, 17)), phase=scrape_phase.ScrapePhase.SCRAPE,
        )

        wire_sessions_to_query(
            mock_client, mock_query, [current_session, prior_session])

        assert sessions.add_docket_item_to_current_session(
            "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT))

        current_session_vars.update({'docket_ack_id': 'alpha'})
        expected_session = ScrapeSession.new(
            current_session_key, **current_session_vars
        )
        mock_client.return_value.put.assert_called_with(
            expected_session.to_entity())
Ejemplo n.º 3
0
def iterate_docket_item(scrape_key, return_immediately=False):
    """Leases new docket item, updates current session, returns item contents

    Pulls an arbitrary new item from the docket type provided, adds it to the
    current session info, and returns the payload of the docket item.

    This payload should be an entity fit to scrape, or information suitable for
    retrieving an entity fit to scrape, depending on scrape type.

    Args:
        scrape_key: (ScrapeKey) The scraper to retrieve a docket item for
        return_immediately: (bool) Whether to return immediately or to wait for
            a bounded period of time for a message to enter the docket.

    Returns:
        The payload of the next docket item, if successfully retrieved and added
        to the current session for this region and scrape type. If not retrieved
        or not successfully added to the session, returns None.
    """

    docket_item = docket.get_new_docket_item(
        scrape_key, return_immediately=return_immediately
    )

    if not docket_item:
        logging.info("No items in docket for [%s]. Ending scrape.", scrape_key)
        return None

    item_content = json.loads(docket_item.message.data.decode())
    item_added = sessions.add_docket_item_to_current_session(
        docket_item.ack_id, scrape_key
    )
    if not item_added:
        logging.error(
            "Failed to update session for scraper [%s] " "with docket item [%s].",
            scrape_key,
            str(item_content),
        )
        return None

    return item_content
Ejemplo n.º 4
0
 def test_add_item_no_open_sessions(self, _mock_client):
     assert not sessions.add_docket_item_to_current_session(
         "alpha", ScrapeKey("us_va", constants.ScrapeType.SNAPSHOT)
     )