Esempio n. 1
0
    def testWrite_CalculatesSum(self):
        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(NyFacilityAggregate.in_house))
        result = one(one(query.all()))

        expected_sum_in_house = 189012
        self.assertEqual(result, expected_sum_in_house)
Esempio n. 2
0
    def testWrite_CalculatesCountyPopulationSum(self):
        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(FlCountyAggregate.county_population))
        result = one(one(query.all()))

        expected_sum_county_populations = 20148654
        self.assertEqual(result, expected_sum_county_populations)
Esempio n. 3
0
    def testWrite_CalculatesSum(self):
        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(HiFacilityAggregate.total_population))
        result = one(one(query.all()))

        expected_sum_total_population = 5241
        self.assertEqual(result, expected_sum_total_population)
Esempio n. 4
0
    def testWrite_CalculatesSum(self):
        # Act
        for table, df in PARSED_RESULT.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(CaFacilityAggregate.average_daily_population))
        result = one(one(query.all()))

        expected_sum_adp = 900124
        self.assertEqual(result, expected_sum_adp)
Esempio n. 5
0
    def testWrite_CalculatesSum(self):
        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(GaCountyAggregate.total_number_of_inmates_in_jail))
        result = one(one(query.all()))

        expected_sum_county_populations = 37697
        self.assertEqual(result, expected_sum_county_populations)
Esempio n. 6
0
    def testWrite_Table2_CalculateSum(self):
        # Act
        for table, df in PARSED_RESULT.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(PaCountyPreSentencedAggregate.pre_sentenced_population))
        result = one(one(query.all()))

        expected_pretrial_population = 82521
        self.assertEqual(result, expected_pretrial_population)
Esempio n. 7
0
    def testWrite_CalculatesSum_before_1996(self):

        for table, df in self.parsed_pdf_before_1996.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(TxCountyAggregate.pretrial_felons))
        result = one(one(query.all()))

        expected_pretrial_felons = 14727
        self.assertEqual(result, expected_pretrial_felons)
Esempio n. 8
0
    def testWrite_CalculatesFacilityAdpSum(self):
        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(FlFacilityAggregate.average_daily_population))
        result = one(one(query.all()))

        expected_sum_facility_adp = 52388
        self.assertEqual(result, expected_sum_facility_adp)
Esempio n. 9
0
    def testWriteDf_doesNotOverrideMatchingColumnNames(self):
        # Arrange
        subject = pd.DataFrame({
            "county_name": ["Alachua", "Baker", "Bay", "Bradford", "Brevard"],
            "county_population": [257062, 26965, 176016, 27440, 568919],
            "average_daily_population": [799, 478, 1015, 141, 1547],
            "date_reported": [
                pd.NaT,
                pd.NaT,
                datetime.datetime(year=2017, month=9, day=1),
                pd.NaT,
                pd.NaT,
            ],
            "fips": ["00000", "00001", "00002", "00003", "00004"],
            "report_date":
            5 * [DATE_SCRAPED],
            "aggregation_window":
            5 * [enum_strings.monthly_granularity],
            "report_frequency":
            5 * [enum_strings.monthly_granularity],
        })
        dao.write_df(FlCountyAggregate, subject)

        subject = pd.DataFrame({
            "facility_name": ["One", "Two", "Three", "Four", "Five"],
            "average_daily_population": [13, 14, 15, 16, 17],
            "number_felony_pretrial": [23, 24, 25, 26, 27],
            "number_misdemeanor_pretrial":
            5 * [pd.NaT],
            "fips": ["10000", "10111", "10222", "10333", "10444"],
            "report_date":
            5 * [DATE_SCRAPED],
            "aggregation_window":
            5 * [enum_strings.monthly_granularity],
            "report_frequency":
            5 * [enum_strings.monthly_granularity],
        })

        # Act
        dao.write_df(FlFacilityAggregate, subject)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(FlCountyAggregate).filter(
                FlCountyAggregate.county_name == "Bay")
            result = one(query.all())

            fips_not_overridden_by_facility_table = "00002"
            self.assertEqual(result.county_name, "Bay")
            self.assertEqual(result.fips,
                             fips_not_overridden_by_facility_table)
Esempio n. 10
0
    def testWrite_CalculatesSum(self) -> None:
        # Act
        for table, df in self.parsed_csv.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key) as session:
            query = session.query(func.sum(
                MaFacilityAggregate.jail_total_male))
            result = one(one(query.all()))

        expected_sum_male = 12366
        self.assertEqual(result, expected_sum_male)
Esempio n. 11
0
    def testWrite_Table1_CalculatesSums(self):
        # Act
        for table, df in PARSED_RESULT.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(PaFacilityPopAggregate.housed_elsewhere_adp))
        result = one(one(query.all()))

        # Note: This report contains fractional averages
        expected_housed_elsewhere_adp = 1564.0257
        self.assertEqual(result, expected_housed_elsewhere_adp)
Esempio n. 12
0
    def testWriteDf_OverlappingData_WritesNewAndIgnoresDuplicateRows(self):
        # Arrange
        initial_df = pd.DataFrame({
            "county_name": ["Alachua", "Baker", "Bay", "Bradford", "Brevard"],
            "county_population": [257062, 26965, 176016, 27440, 568919],
            "average_daily_population": [799, 478, 1015, 141, 1547],
            "date_reported": [
                pd.NaT,
                pd.NaT,
                datetime.datetime(year=2017, month=9, day=1),
                pd.NaT,
                pd.NaT,
            ],
            "fips": ["00000", "00001", "00002", "00003", "00004"],
            "report_date":
            5 * [DATE_SCRAPED],
            "aggregation_window":
            5 * [enum_strings.monthly_granularity],
            "report_frequency":
            5 * [enum_strings.monthly_granularity],
        })
        dao.write_df(FlCountyAggregate, initial_df)

        subject = pd.DataFrame({
            "county_name": ["Alachua", "NewCounty", "Baker"],
            "county_population": [0, 1000000000, 0],
            "average_daily_population": [0, 50, 0],
            "date_reported": [pd.NaT, pd.NaT, pd.NaT],
            "fips": ["00000", "01000", "00002"],
            "report_date":
            3 * [DATE_SCRAPED],
            "aggregation_window":
            3 * [enum_strings.monthly_granularity],
            "report_frequency":
            3 * [enum_strings.monthly_granularity],
        })

        # Act
        dao.write_df(FlCountyAggregate, subject)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(func.sum(
                FlCountyAggregate.county_population))
            result = one(one(query.all()))

            # This sum includes intial_df + NewCounty and ignores other changes in
            # the subject (eg. county_population = 0 for 'Alachua')
            expected_sum_county_populations = 1001056402
            self.assertEqual(result, expected_sum_county_populations)
Esempio n. 13
0
    def testWrite_CalculatesSum(self) -> None:
        # Act
        for table, df in self.parsed_csv.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(
                func.sum(CoFacilityAggregate.male_number_of_inmates))
            result = one(one(query.all()))

        expected_sum_male = 45933
        self.assertEqual(result, expected_sum_male)
    def testWrite_CalculatesSum_1996(self) -> None:
        if not self.parsed_pdf_1996:
            raise ValueError("Unexpectedly empty parsed_pdf_1996")
        for table, df in self.parsed_pdf_1996.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(func.sum(TxCountyAggregate.pretrial_felons))
            result = one(one(query.all()))

        expected_pretrial_felons = 14140
        self.assertEqual(result, expected_pretrial_felons)
    def testWrite_CalculatesSum_Concat(self) -> None:
        if not self.parsed_pdf_concat:
            raise ValueError("Unexpectedly empty parsed_pdf_concat")
        for table, df in self.parsed_pdf_concat.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(func.sum(TxCountyAggregate.available_beds))
            result = one(one(query.all()))

        expected_sum_available_beds = 7044
        self.assertEqual(result, expected_sum_available_beds)
    def testWrite_CalculatesSum(self) -> None:
        # Act
        for table, df in self.parsed_csv.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(
                func.sum(WvFacilityAggregate.total_jail_population))
            result = one(one(query.all()))

        expected_sum = 88
        self.assertEqual(result, expected_sum)
Esempio n. 17
0
    def testWrite_CalculatesSum(self) -> None:
        # Act
        for table, df in self.parsed_excel.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(
                func.sum(InCountyAggregate.total_jail_population))
            result = one(one(query.all()))

        # This is the expected sum, even though the excel file has a different sum.
        expected_sum = 17164
        self.assertEqual(result, expected_sum)
    def testWrite_Table2_CalculateSum(self) -> None:
        # Act
        for table, df in _parsed_result().items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(
                func.sum(
                    PaCountyPreSentencedAggregate.pre_sentenced_population))
            result = one(one(query.all()))

        expected_pretrial_population = 82521
        self.assertEqual(result, expected_pretrial_population)
    def testWrite_Table1_CalculatesSums(self) -> None:
        # Act
        for table, df in _parsed_result().items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(
                func.sum(PaFacilityPopAggregate.housed_elsewhere_adp))
            result = one(one(query.all()))

        # Note: This report contains fractional averages
        expected_housed_elsewhere_adp = 1564.0257
        self.assertEqual(result, expected_housed_elsewhere_adp)
Esempio n. 20
0
    def testWrite_CalculatesFacilityAdpSum(self) -> None:
        if not self.parsed_pdf:
            raise ValueError("Unexpectedly empty parsed_pdf")

        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key) as session:
            query = session.query(
                func.sum(FlFacilityAggregate.average_daily_population))
            result = one(one(query.all()))

        expected_sum_facility_adp = 52388
        self.assertEqual(result, expected_sum_facility_adp)
Esempio n. 21
0
    def testWrite_CalculatesCountyPopulationSum(self) -> None:
        if not self.parsed_pdf:
            raise ValueError("Unexpectedly empty parsed_pdf")

        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key) as session:
            query = session.query(func.sum(
                FlCountyAggregate.county_population))
            result = one(one(query.all()))

        expected_sum_county_populations = 20148654
        self.assertEqual(result, expected_sum_county_populations)
Esempio n. 22
0
    def testWriteDf_doesNotOverrideMatchingColumnNames(self):
        # Arrange
        subject = pd.DataFrame({
            'county_name': ['Alachua', 'Baker', 'Bay', 'Bradford', 'Brevard'],
            'county_population': [257062, 26965, 176016, 27440, 568919],
            'average_daily_population': [799, 478, 1015, 141, 1547],
            'date_reported': [
                pd.NaT, pd.NaT,
                datetime.datetime(year=2017, month=9, day=1), pd.NaT, pd.NaT
            ],
            'fips': ['00000', '00001', '00002', '00003', '00004'],
            'report_date':
            5 * [DATE_SCRAPED],
            'aggregation_window':
            5 * [enum_strings.monthly_granularity],
            'report_frequency':
            5 * [enum_strings.monthly_granularity]
        })
        dao.write_df(FlCountyAggregate, subject)

        subject = pd.DataFrame({
            'facility_name': ['One', 'Two', 'Three', 'Four', 'Five'],
            'average_daily_population': [13, 14, 15, 16, 17],
            'number_felony_pretrial': [23, 24, 25, 26, 27],
            'number_misdemeanor_pretrial':
            5 * [pd.NaT],
            'fips': ['10000', '10111', '10222', '10333', '10444'],
            'report_date':
            5 * [DATE_SCRAPED],
            'aggregation_window':
            5 * [enum_strings.monthly_granularity],
            'report_frequency':
            5 * [enum_strings.monthly_granularity]
        })

        # Act
        dao.write_df(FlFacilityAggregate, subject)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase) \
            .query(FlCountyAggregate) \
            .filter(FlCountyAggregate.county_name == 'Bay')
        result = one(query.all())

        fips_not_overridden_by_facility_table = '00002'
        self.assertEqual(result.county_name, 'Bay')
        self.assertEqual(result.fips, fips_not_overridden_by_facility_table)
    def testWrite_CalculatesSum(self) -> None:
        # Act
        parsed_result = _parsed_result()
        for table, df in parsed_result.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(
            self.database_key, autocommit=False
        ) as session:
            query = session.query(
                func.sum(CaFacilityAggregate.average_daily_population)
            )
            result = one(one(query.all()))

        expected_sum_adp = 900124
        self.assertEqual(result, expected_sum_adp)
Esempio n. 24
0
    def testWriteDf_OverlappingData_WritesNewAndIgnoresDuplicateRows(self):
        # Arrange
        initial_df = pd.DataFrame({
            'county_name': ['Alachua', 'Baker', 'Bay', 'Bradford', 'Brevard'],
            'county_population': [257062, 26965, 176016, 27440, 568919],
            'average_daily_population': [799, 478, 1015, 141, 1547],
            'date_reported': [
                pd.NaT, pd.NaT,
                datetime.datetime(year=2017, month=9, day=1), pd.NaT, pd.NaT
            ],
            'fips': ['00000', '00001', '00002', '00003', '00004'],
            'report_date':
            5 * [DATE_SCRAPED],
            'aggregation_window':
            5 * [enum_strings.monthly_granularity],
            'report_frequency':
            5 * [enum_strings.monthly_granularity]
        })
        dao.write_df(FlCountyAggregate, initial_df)

        subject = pd.DataFrame({
            'county_name': ['Alachua', 'NewCounty', 'Baker'],
            'county_population': [0, 1000000000, 0],
            'average_daily_population': [0, 50, 0],
            'date_reported': [pd.NaT, pd.NaT, pd.NaT],
            'fips': ['00000', '01000', '00002'],
            'report_date':
            3 * [DATE_SCRAPED],
            'aggregation_window':
            3 * [enum_strings.monthly_granularity],
            'report_frequency':
            3 * [enum_strings.monthly_granularity]
        })

        # Act
        dao.write_df(FlCountyAggregate, subject)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase).query(
            func.sum(FlCountyAggregate.county_population))
        result = one(one(query.all()))

        # This sum includes intial_df + NewCounty and ignores other changes in
        # the subject (eg. county_population = 0 for 'Alachua')
        expected_sum_county_populations = 1001056402
        self.assertEqual(result, expected_sum_county_populations)
    def testWrite_CalculatesSum(self) -> None:
        if not self.parsed_pdf:
            raise ValueError("Unexpectedly empty parsed_pdf")

        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(
                func.sum(HiFacilityAggregate.total_population))
            result = one(one(query.all()))

        expected_sum_total_population = 5241
        self.assertEqual(result, expected_sum_total_population)
    def testWrite_CorrectlyReadsHernandoCounty(self):
        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        query = SessionFactory.for_schema_base(JailsBase) \
            .query(FlCountyAggregate) \
            .filter(FlCountyAggregate.county_name == 'Hernando')

        hernando_row = one(query.all())

        self.assertEqual(hernando_row.county_name, 'Hernando')
        self.assertEqual(hernando_row.county_population, 179503)
        self.assertEqual(hernando_row.average_daily_population, 632)
        self.assertEqual(hernando_row.date_reported,
                         datetime.date(year=2017, month=9, day=1))
Esempio n. 27
0
def state_aggregate() -> Tuple[str, HTTPStatus]:
    """Calls state aggregates"""
    bucket = get_str_param_value("bucket", request.args)
    state = get_str_param_value("state", request.args)
    filename = get_str_param_value("filename", request.args)
    project_id = metadata.project_id()
    logging.info("The project id is %s", project_id)
    if not bucket or not state or not filename:
        raise StateAggregateError("All of state, bucket, and filename must be provided")
    directory_path = GcsfsDirectoryPath(bucket, state)
    path = GcsfsFilePath.from_directory_and_file_name(directory_path, filename)
    parser = STATE_TO_PARSER[state]
    fs = GcsfsFactory.build()
    logging.info("The path to download from is %s", path)

    logging.info("The files in the directory are:")
    logging.info(
        fs.ls_with_blob_prefix(
            bucket_name=directory_path.bucket_name,
            blob_prefix=directory_path.relative_path,
        )
    )

    # Providing a stream buffer to tabula reader does not work because it
    # tries to load the file into the local filesystem, since appengine is a
    # read only filesystem (except for the tmpdir) we download the file into
    # the local tmpdir and pass that in.
    handle = fs.download_to_temp_file(path)
    if not handle:
        raise StateAggregateError(f"Unable to download file: {path}")
    logging.info("Successfully downloaded file from gcs: %s", handle.local_file_path)

    result = parser(handle.local_file_path)
    logging.info("Successfully parsed the report")
    for table, df in result.items():
        dao.write_df(table, df)

    # If we are successful, we want to move the file out of the cloud
    # function triggered directory, and into the historical path.
    historical_path = GcsfsFilePath.from_directory_and_file_name(
        GcsfsDirectoryPath(HISTORICAL_BUCKET.format(project_id), state), filename
    )
    fs.mv(path, historical_path)
    return "", HTTPStatus.OK
Esempio n. 28
0
    def testWriteDf(self):
        # Arrange
        subject = pd.DataFrame({
            "county_name": ["Alachua", "Baker", "Bay", "Bradford", "Brevard"],
            "county_population": [257062, 26965, 176016, 27440, 568919],
            "average_daily_population": [799, 478, 1015, 141, 1547],
            "date_reported": [
                pd.NaT,
                pd.NaT,
                datetime.datetime(year=2017, month=9, day=1),
                pd.NaT,
                pd.NaT,
            ],
            "fips": ["00000", "00001", "00002", "00003", "00004"],
            "report_date":
            5 * [DATE_SCRAPED],
            "aggregation_window":
            5 * [enum_strings.monthly_granularity],
            "report_frequency":
            5 * [enum_strings.monthly_granularity],
        })

        # Act
        dao.write_df(FlCountyAggregate, subject)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(FlCountyAggregate).filter(
                FlCountyAggregate.county_name == "Bay")
            result = one(query.all())

            self.assertEqual(result.county_name, "Bay")
            self.assertEqual(result.county_population, 176016)
            self.assertEqual(result.average_daily_population, 1015)
            self.assertEqual(result.date_reported,
                             datetime.date(year=2017, month=9, day=1))
            self.assertEqual(result.fips, "00002")
            self.assertEqual(result.report_date, DATE_SCRAPED)
            self.assertEqual(result.aggregation_window,
                             enum_strings.monthly_granularity)
Esempio n. 29
0
    def testWriteDf_rowsWithSameColumnsThatMustBeUnique_onlyWritesOnce(self):
        # Arrange
        shared_fips = '12345'
        subject = pd.DataFrame({
            'county_name': ['Alachua', 'Baker'],
            'county_population': [257062, 26965],
            'average_daily_population': [799, 478],
            'date_reported': [pd.NaT, pd.NaT],
            'fips': 2 * [shared_fips],
            'report_date': 2 * [DATE_SCRAPED],
            'aggregation_window': 2 * [enum_strings.monthly_granularity],
            'report_frequency': 2 * [enum_strings.monthly_granularity]
        })

        # Act
        dao.write_df(FlCountyAggregate, subject)

        # Assert
        query = \
            SessionFactory.for_schema_base(JailsBase).query(FlCountyAggregate)
        self.assertEqual(len(query.all()), 1)
Esempio n. 30
0
    def testWrite_CorrectlyReadsHernandoCounty(self) -> None:
        if not self.parsed_pdf:
            raise ValueError("Unexpectedly empty parsed_pdf")

        # Act
        for table, df in self.parsed_pdf.items():
            dao.write_df(table, df)

        # Assert
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            query = session.query(FlCountyAggregate).filter(
                FlCountyAggregate.county_name == "Hernando")

            hernando_row = one(query.all())

        self.assertEqual(hernando_row.county_name, "Hernando")
        self.assertEqual(hernando_row.county_population, 179503)
        self.assertEqual(hernando_row.average_daily_population, 632)
        self.assertEqual(hernando_row.date_reported,
                         datetime.date(year=2017, month=9, day=1))