Example #1
0
    def test_load_target_list_last_names(self, mock_region):
        mock_region.return_value.names_file = \
            '../recidiviz/tests/ingest/testdata/docket/names/last_only.csv'
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key)

        names = []
        for _ in range(12):
            item = docket.get_new_docket_item(scrape_key)
            name_serialized = item.message.data.decode()
            names.append(json.loads(name_serialized))
        assert names == [
            ['SMITH', ''],
            ['JOHNSON', ''],
            ['WILLIAMS', ''],
            ['BROWN', ''],
            ['JONES', ''],
            ['MILLER', ''],
            ['DAVIS', ''],
            ['GARCIA', ''],
            ['RODRIGUEZ', ''],
            ['WILSON', ''],
            ['MARTINEZ', ''],
            ['ANDERSON', ''],
        ]
        assert not docket.get_new_docket_item(scrape_key)
Example #2
0
    def test_load_target_list_last_names(self, mock_region):
        mock_region.return_value.names_file = (
            "../recidiviz/tests/ingest/testdata/docket/names/last_only.csv")
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key)

        names = []
        for _ in range(12):
            item = docket.get_new_docket_item(scrape_key)
            name_serialized = item.message.data.decode()
            names.append(json.loads(name_serialized))
        assert names == [
            ["SMITH", ""],
            ["JOHNSON", ""],
            ["WILLIAMS", ""],
            ["BROWN", ""],
            ["JONES", ""],
            ["MILLER", ""],
            ["DAVIS", ""],
            ["GARCIA", ""],
            ["RODRIGUEZ", ""],
            ["WILSON", ""],
            ["MARTINEZ", ""],
            ["ANDERSON", ""],
        ]
        assert not docket.get_new_docket_item(scrape_key)
Example #3
0
    def test_load_target_list_full_names(self, mock_region: Mock) -> None:
        mock_region.return_value.names_file = (
            "../recidiviz/tests/ingest/testdata/docket/names/last_and_first.csv"
        )
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key)

        names = []
        for _ in range(8):
            item = docket.get_new_docket_item(scrape_key)
            assert item is not None
            name_serialized = item.message.data.decode()
            names.append(json.loads(name_serialized))
        assert names == [
            ["Smith", "James"],
            ["Smith", "Michael"],
            ["Smith", "Robert"],
            ["Smith", "David"],
            ["Johnson", "James"],
            ["Johnson", "Michael"],
            ["Smith", "William"],
            ["Williams", "James"],
        ]
        assert not docket.get_new_docket_item(scrape_key)
Example #4
0
    def test_load_target_list_last_names_with_bad_query(self, mock_region):
        mock_region.return_value.names_file = (
            "../recidiviz/tests/ingest/testdata/docket/names/last_only.csv")
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key, surname="GARBAGE")

        item = docket.get_new_docket_item(scrape_key)
        assert item.message.data.decode() == json.dumps(("GARBAGE", ""))
        assert not docket.get_new_docket_item(scrape_key)
Example #5
0
    def test_load_target_list_background_no_names_file(self, mock_region):
        mock_region.return_value.names_file = None
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key)

        item = docket.get_new_docket_item(scrape_key)
        assert item.message.data.decode() == json.dumps("empty")
Example #6
0
    def test_add_to_query_docket_background(self):
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        pubsub_helper.create_topic_and_subscription(scrape_key,
                                                    docket.PUBSUB_TYPE)

        docket.add_to_query_docket(scrape_key, get_payload()[0]).result()
        docket.add_to_query_docket(scrape_key, get_payload()[1]).result()

        items = [
            docket.get_new_docket_item(scrape_key),
            docket.get_new_docket_item(scrape_key),
        ]
        assert len(items) == 2

        for i, item in enumerate(items):
            assert item.message.data.decode() == json.dumps(get_payload()[i])
Example #7
0
    def test_purge_query_docket(self):
        scrape_key_purge = ScrapeKey(REGIONS[0],
                                     constants.ScrapeType.BACKGROUND)
        scrape_key_read = ScrapeKey(REGIONS[1],
                                    constants.ScrapeType.BACKGROUND)

        pubsub_helper.create_topic_and_subscription(scrape_key_purge,
                                                    docket.PUBSUB_TYPE)
        pubsub_helper.create_topic_and_subscription(scrape_key_read,
                                                    docket.PUBSUB_TYPE)
        docket.add_to_query_docket(scrape_key_purge, get_payload()).result()
        docket.add_to_query_docket(scrape_key_read, get_payload()).result()

        docket.purge_query_docket(scrape_key_purge)
        assert not docket.get_new_docket_item(scrape_key_purge,
                                              return_immediately=True)
        assert docket.get_new_docket_item(scrape_key_read,
                                          return_immediately=True)
Example #8
0
    def test_load_target_list_last_names_with_query(self, mock_region):
        mock_region.return_value.names_file = (
            "../recidiviz/tests/ingest/testdata/docket/names/last_only.csv")
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key, surname="WILSON")

        names = []
        for _ in range(3):
            item = docket.get_new_docket_item(scrape_key)
            name_serialized = item.message.data.decode()
            names.append(json.loads(name_serialized))
        assert names == [
            ["WILSON", ""],
            ["MARTINEZ", ""],
            ["ANDERSON", ""],
        ]
        assert not docket.get_new_docket_item(scrape_key)
Example #9
0
    def test_get_new_docket_item_no_matching_items(self):
        write_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
        read_key = ScrapeKey(REGIONS[1], constants.ScrapeType.BACKGROUND)

        pubsub_helper.create_topic_and_subscription(write_key,
                                                    docket.PUBSUB_TYPE)
        docket.add_to_query_docket(write_key, get_payload()).result()

        docket_item = docket.get_new_docket_item(read_key,
                                                 return_immediately=True)
        assert not docket_item
Example #10
0
    def test_load_target_list_full_names(self, mock_region):
        mock_region.return_value.names_file = \
            '../recidiviz/tests/ingest/testdata/docket/names/last_and_first.csv'
        scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)

        docket.load_target_list(scrape_key)

        names = []
        for _ in range(8):
            item = docket.get_new_docket_item(scrape_key)
            name_serialized = item.message.data.decode()
            names.append(json.loads(name_serialized))
        assert names == [
            ['Smith', 'James'],
            ['Smith', 'Michael'],
            ['Smith', 'Robert'],
            ['Smith', 'David'],
            ['Johnson', 'James'],
            ['Johnson', 'Michael'],
            ['Smith', 'William'],
            ['Williams', 'James'],
        ]
        assert not docket.get_new_docket_item(scrape_key)
Example #11
0
def iterate_docket_item(scrape_key, return_immediately=False):
    """Leases new docket item, updates current session, returns item contents

    Pulls an arbitrary new item from the docket type provided, adds it to the
    current session info, and returns the payload of the docket item.

    This payload should be an entity fit to scrape, or information suitable for
    retrieving an entity fit to scrape, depending on scrape type.

    Args:
        scrape_key: (ScrapeKey) The scraper to retrieve a docket item for
        return_immediately: (bool) Whether to return immediately or to wait for
            a bounded period of time for a message to enter the docket.

    Returns:
        The payload of the next docket item, if successfully retrieved and added
        to the current session for this region and scrape type. If not retrieved
        or not successfully added to the session, returns None.
    """

    docket_item = docket.get_new_docket_item(
        scrape_key, return_immediately=return_immediately
    )

    if not docket_item:
        logging.info("No items in docket for [%s]. Ending scrape.", scrape_key)
        return None

    item_content = json.loads(docket_item.message.data.decode())
    item_added = sessions.add_docket_item_to_current_session(
        docket_item.ack_id, scrape_key
    )
    if not item_added:
        logging.error(
            "Failed to update session for scraper [%s] " "with docket item [%s].",
            scrape_key,
            str(item_content),
        )
        return None

    return item_content
Example #12
0
 def test_purge_query_docket_already_empty(self):
     scrape_key = ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND)
     docket.purge_query_docket(scrape_key)
     assert not docket.get_new_docket_item(scrape_key,
                                           return_immediately=True)
Example #13
0
 def test_get_new_docket_item_no_items_at_all(self):
     docket_item = docket.get_new_docket_item(
         ScrapeKey(REGIONS[0], constants.ScrapeType.BACKGROUND),
         return_immediately=True,
     )
     assert not docket_item