Exemple #1
0
    def testWrite_SingleCountBadDataFails(self):
        def test_db_empty():
            query = SessionFactory.for_schema_base(JailsBase).query(
                SingleCountAggregate)
            self.assertEqual(query.all(), [])

        with self.assertRaises(ValueError):
            store_single_count(SingleCount(count=311), '1001001')
        test_db_empty()

        with self.assertRaises(EnumParsingError):
            store_single_count(
                SingleCount(count=311, ethnicity='Not an Ethnicity'),
                '01001001')
        test_db_empty()

        with self.assertRaises(EnumParsingError):
            store_single_count(SingleCount(count=311, gender='Not a Gender'),
                               '01001001')
        test_db_empty()

        with self.assertRaises(EnumParsingError):
            store_single_count(SingleCount(count=311, race='Not a Race'),
                               '01001001')
        test_db_empty()

        with self.assertRaises(ValueError):
            store_single_count(SingleCount(count=311, date='Not a date'),
                               '01001001')
        test_db_empty()
Exemple #2
0
    def testWrite_SingleCountBadDataFails(self):
        def test_db_empty():
            with SessionFactory.using_database(self.database_key,
                                               autocommit=False) as session:
                query = session.query(SingleCountAggregate)
                self.assertEqual(query.all(), [])

        with self.assertRaises(ValueError):
            store_single_count(SingleCount(count=311), "1001001")
        test_db_empty()

        with self.assertRaises(EnumParsingError):
            store_single_count(
                SingleCount(count=311, ethnicity="Not an Ethnicity"),
                "01001001")
        test_db_empty()

        with self.assertRaises(EnumParsingError):
            store_single_count(SingleCount(count=311, gender="Not a Gender"),
                               "01001001")
        test_db_empty()

        with self.assertRaises(EnumParsingError):
            store_single_count(SingleCount(count=311, race="Not a Race"),
                               "01001001")
        test_db_empty()

        with self.assertRaises(ValueError):
            store_single_count(SingleCount(count=311, date="Not a date"),
                               "01001001")
        test_db_empty()
Exemple #3
0
    def testWrite_SingleCountWithDate(self):
        store_single_count(SingleCount(count=_COUNT, date=_TODAY), '01001001')

        query = SessionFactory.for_schema_base(JailsBase).query(
            SingleCountAggregate)
        result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(result.date, _TODAY)
Exemple #4
0
    def testWrite_SingleCountWithRace(self):
        store_single_count(SingleCount(count=_COUNT, race=Race.ASIAN),
                           '01001001')

        query = SessionFactory.for_schema_base(JailsBase).query(
            SingleCountAggregate)
        result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(Race(result.race), Race.ASIAN)
Exemple #5
0
    def testWrite_SingleCountWithGender(self):
        store_single_count(SingleCount(count=_COUNT, gender=Gender.FEMALE),
                           '01001001')

        query = SessionFactory.for_schema_base(JailsBase).query(
            SingleCountAggregate)
        result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(Gender(result.gender), Gender.FEMALE)
Exemple #6
0
    def testWrite_SingleCountWithDate(self):
        store_single_count(SingleCount(count=_COUNT, date=_TODAY), "01001001")

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(SingleCountAggregate)
            result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(result.date, _TODAY)
Exemple #7
0
    def testWrite_SingleCountWithEthnicity(self):
        store_single_count(
            SingleCount(count=_COUNT, ethnicity=Ethnicity.HISPANIC),
            '01001001')

        query = SessionFactory.for_schema_base(JailsBase).query(
            SingleCountAggregate)
        result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(Ethnicity(result.ethnicity), Ethnicity.HISPANIC)
Exemple #8
0
    def testWrite_SingleCountWithGender(self):
        store_single_count(SingleCount(count=_COUNT, gender=Gender.FEMALE),
                           "01001001")

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(SingleCountAggregate)
            result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(Gender(result.gender), Gender.FEMALE)
Exemple #9
0
    def testWrite_SingleCountWithEthnicity(self):
        store_single_count(
            SingleCount(count=_COUNT, ethnicity=Ethnicity.HISPANIC),
            "01001001")

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(SingleCountAggregate)
            result = one(query.all())

        self.assertEqual(result.jid, _JID)
        self.assertEqual(result.count, _COUNT)
        self.assertEqual(Ethnicity(result.ethnicity), Ethnicity.HISPANIC)
Exemple #10
0
def store_single_count_endpoint():
    """Endpoint to store a single count"""

    jid = get_str_param_value('jid', request.args)
    ethnicity = get_str_param_value('ethnicity', request.args)
    gender = get_str_param_value('gender', request.args)
    race = get_str_param_value('race', request.args)
    count = get_str_param_value('count', request.args)
    date = get_str_param_value('date', request.args)
    sc = SingleCount(
        count=count,
        ethnicity=ethnicity,
        gender=gender,
        race=race,
        date=date,
    )
    stored = store_single_count(sc, jid)

    if stored:
        logging.info("Stored [%d] as [%s] for [%s]", count,
                     ' '.join(filter(None, (race, gender, ethnicity))), jid)
        return '', HTTPStatus.OK

    logging.error("Failed to store single count for [%s]", jid)
    return '', HTTPStatus.INTERNAL_SERVER_ERROR
Exemple #11
0
    def _generic_scrape(self, request: QueueRequest):
        """
        General handler for all scrape tasks.  This function is a generic entry
        point into all types of scrapes.  It decides what to call based on
        params.

        Args:
            params: dict of parameters passed from the last scrape session.
        """
        try:
            task = request.next_task

            # Here we handle a special case where we weren't really sure
            # we were going to get data when we submitted a task, but then
            # we ended up with data, so no more requests are required,
            # just the content we already have.
            # TODO(#680): remove this
            if task.content is not None:
                content = self._parse_html_content(task.content)
                cookies = None
            else:
                post_data = task.post_data

                # Let the child transform the post_data if it wants before
                # sending the requests.  This hook is in here in case the
                # child did something like compress the post_data before
                # it put it on the queue.
                self.transform_post_data(post_data)

                # We always fetch some content before doing anything.
                # Note that we use get here for the post_data to return a
                # default value of None if this scraper doesn't set it.
                try:
                    content, cookies = self._fetch_content(
                        task.endpoint,
                        task.response_type,
                        headers=task.headers,
                        cookies=task.cookies,
                        params=task.params,
                        post_data=post_data,
                        json_data=task.json)
                except Exception as e:
                    raise ScraperFetchError(str(e)) from e

            scraped_data = None
            if self.should_scrape_data(task.task_type):
                # If we want to scrape data, we should either create an
                # ingest_info object or get the one that already exists.
                logging.info("Scraping data for [%s] and endpoint: [%s]",
                             self.region.region_code, task.endpoint)
                try:
                    scraped_data = self.populate_data(
                        content, task, request.ingest_info or IngestInfo())
                except Exception as e:
                    raise ScraperPopulateDataError(str(e)) from e

            if self.should_get_more_tasks(task.task_type):
                logging.info("Getting more tasks for [%s] and endpoint: [%s]",
                             self.region.region_code, task.endpoint)

                # Only send along ingest info if it will not be persisted now.
                ingest_info_to_send = None
                if scraped_data is not None and not scraped_data.persist:
                    ingest_info_to_send = scraped_data.ingest_info

                try:
                    # pylint: disable=assignment-from-no-return
                    next_tasks = self.get_more_tasks(content, task)
                except Exception as e:
                    raise ScraperGetMoreTasksError(str(e)) from e
                for next_task in next_tasks:
                    # Include cookies received from response, if any
                    if cookies:
                        cookies.update(next_task.cookies)
                        next_task = Task.evolve(next_task, cookies=cookies)
                    self.add_task(
                        '_generic_scrape',
                        QueueRequest(
                            scrape_type=request.scrape_type,
                            scraper_start_time=request.scraper_start_time,
                            next_task=next_task,
                            ingest_info=ingest_info_to_send,
                        ))

            if scraped_data is not None and scraped_data.persist:
                if scraped_data.ingest_info:
                    logging.info("Logging at most 4 people (were %d):",
                                 len(scraped_data.ingest_info.people))
                    loop_count = min(len(scraped_data.ingest_info.people),
                                     constants.MAX_PEOPLE_TO_LOG)
                    for i in range(loop_count):
                        logging.info("[%s]",
                                     str(scraped_data.ingest_info.people[i]))
                    logging.info("Last seen time of person being set as: [%s]",
                                 request.scraper_start_time)
                    metadata = IngestMetadata(self.region.region_code,
                                              self.region.jurisdiction_id,
                                              request.scraper_start_time,
                                              self.get_enum_overrides())
                    if self.BATCH_WRITES:
                        logging.info(
                            "Queuing ingest_info ([%d] people) to "
                            "batch_persistence for [%s]",
                            len(scraped_data.ingest_info.people),
                            self.region.region_code)
                        scrape_key = ScrapeKey(self.region.region_code,
                                               request.scrape_type)
                        batch_persistence.write(
                            ingest_info=scraped_data.ingest_info,
                            scrape_key=scrape_key,
                            task=task,
                        )
                    else:
                        logging.info(
                            "Writing ingest_info ([%d] people) to the database"
                            " for [%s]", len(scraped_data.ingest_info.people),
                            self.region.region_code)
                        persistence.write(
                            ingest_utils.convert_ingest_info_to_proto(
                                scraped_data.ingest_info), metadata)
                for sc in scraped_data.single_counts:
                    if not sc.date:
                        scrape_key = ScrapeKey(self.region.region_code,
                                               constants.ScrapeType.BACKGROUND)
                        session = sessions.get_current_session(scrape_key)
                        if session:
                            sc = attr.evolve(sc, date=session.start.date())
                    single_count.store_single_count(
                        sc, self.region.jurisdiction_id)
        except Exception as e:
            if self.BATCH_WRITES:
                scrape_key = ScrapeKey(self.region.region_code,
                                       request.scrape_type)
                batch_persistence.write_error(
                    error=str(e),
                    trace_id=get_trace_id_from_flask(),
                    task=task,
                    scrape_key=scrape_key,
                )
            raise e