def test_query_multiple_variables(): variable = ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="cumulative", unit="people", provider="cdc", common_field=CommonFields.VACCINATIONS_COMPLETED, ) not_included_variable = ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="cumulative", unit="people", # Different provider, so query shouldn't return it provider="hhs", common_field=CommonFields.VACCINATIONS_COMPLETED, ) input_data = build_can_scraper_dataframe({ variable: [10, 20, 30], not_included_variable: [10, 20, 40] }) data = ccd_helpers.CanScraperLoader(input_data) results, _ = data.query_multiple_variables([variable]) expected_buf = io.StringIO( "fips,date,aggregate_level,vaccinations_completed\n" f"36,2021-01-01,state,10\n" f"36,2021-01-02,state,20\n" f"36,2021-01-03,state,30\n") expected = common_df.read_csv(expected_buf, set_index=False) pd.testing.assert_frame_equal(expected, results)
def test_query_source_url(): variable = ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="cumulative", unit="people", provider="cdc", common_field=CommonFields.VACCINATIONS_COMPLETED, ) input_data = build_can_scraper_dataframe({variable: [10, 20, 30]}, source_url="http://foo.com") data = ccd_helpers.CanScraperLoader(input_data) results, tags = data.query_multiple_variables([variable]) expected_data_buf = io.StringIO( "fips, date,aggregate_level,vaccinations_completed\n" " 36,2021-01-01, state, 10\n" " 36,2021-01-02, state, 20\n" " 36,2021-01-03, state, 30\n".replace( " ", "")) expected = common_df.read_csv(expected_data_buf, set_index=False) pd.testing.assert_frame_equal(expected, results) expected_tag_buf = io.StringIO( "fips, date, variable, content\n" " 36,2021-01-01,vaccinations_completed,http://foo.com\n" " 36,2021-01-02,vaccinations_completed,http://foo.com\n" " 36,2021-01-03,vaccinations_completed,http://foo.com\n".replace( " ", "")) expected = common_df.read_csv(expected_tag_buf, set_index=False) pd.testing.assert_frame_equal(expected, tags, check_like=True)
class CANScraperUSAFactsProvider(data_source.CanScraperBase): SOURCE_TYPE = "USAFacts" VARIABLES = [ ccd_helpers.ScraperVariable( variable_name="cases", measurement="cumulative", unit="people", provider="usafacts", common_field=CommonFields.CASES, ), ccd_helpers.ScraperVariable( variable_name="deaths", measurement="cumulative", unit="people", provider="usafacts", common_field=CommonFields.DEATHS, ), ]
def make_hhs_variable(can_scraper_field, common_field, measurement): """Helper to create a ScraperVariable, since we have a bunch of variables to deal with and two different measurements to deal with (for county and state data).""" return ccd_helpers.ScraperVariable( variable_name=can_scraper_field, measurement=measurement, provider="hhs", unit="beds", common_field=common_field, )
class CDCVaccinesDataset(data_source.CanScraperBase): SOURCE_TYPE = "CDCVaccine" VARIABLES = [ ccd_helpers.ScraperVariable( variable_name="total_vaccine_allocated", measurement="cumulative", unit="doses", provider="cdc", common_field=CommonFields.VACCINES_ALLOCATED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_distributed", measurement="cumulative", unit="doses", provider="cdc", common_field=CommonFields.VACCINES_DISTRIBUTED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_doses_administered", measurement="cumulative", unit="doses", provider="cdc", common_field=CommonFields.VACCINES_ADMINISTERED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_initiated", measurement="cumulative", unit="people", provider="cdc", common_field=CommonFields.VACCINATIONS_INITIATED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="cumulative", unit="people", provider="cdc", common_field=CommonFields.VACCINATIONS_COMPLETED, ), ]
class CDCTestingDataset(data_source.CanScraperBase): SOURCE_TYPE = "CDCTesting" VARIABLES = [ ccd_helpers.ScraperVariable( variable_name="pcr_tests_positive", measurement="rolling_average_7_day", provider="cdc", unit="percentage", common_field=CommonFields.TEST_POSITIVITY_7D, ), ] @classmethod @lru_cache(None) def make_dataset(cls) -> MultiRegionDataset: return modify_dataset(super().make_dataset())
class CDCTestingDataset(data_source.CanScraperBase): SOURCE_NAME = "CDCTesting" EXPECTED_FIELDS = [ CommonFields.TEST_POSITIVITY_7D, ] VARIABLES = [ ccd_helpers.ScraperVariable( variable_name="pcr_tests_positive", measurement="rolling_average_7_day", provider="cdc", unit="percentage", common_field=CommonFields.TEST_POSITIVITY_7D, ), ] @classmethod def transform_data(cls, results: pd.DataFrame) -> pd.DataFrame: # Test positivity should be a ratio results.loc[:, CommonFields.TEST_POSITIVITY_7D] = ( results.loc[:, CommonFields.TEST_POSITIVITY_7D] / 100.0) # Should only be picking up county all_df for now. May need additional logic if states # are included as well assert (results[CommonFields.FIPS].str.len() == 5).all() # Duplicating DC County results as state results because of a downstream # use of how dc state data is used to override DC county data. dc_results = results.loc[results[CommonFields.FIPS] == DC_COUNTY_FIPS, :].copy() dc_results.loc[:, CommonFields.FIPS] = DC_STATE_FIPS dc_results.loc[:, CommonFields.AGGREGATE_LEVEL] = "state" results = pd.concat([results, dc_results]) return remove_trailing_zeros(results)
def make_scraper_variables(provider: str): """Helper to generate all variables that could be captured from a state / county dashboard.""" return [ ccd_helpers.ScraperVariable(variable_name="pcr_tests_negative", provider=provider), ccd_helpers.ScraperVariable(variable_name="unspecified_tests_total", provider=provider), ccd_helpers.ScraperVariable(variable_name="unspecified_tests_positive", provider=provider), ccd_helpers.ScraperVariable(variable_name="icu_beds_available", provider=provider), ccd_helpers.ScraperVariable(variable_name="antibody_tests_total", provider=provider), ccd_helpers.ScraperVariable(variable_name="antigen_tests_positive", provider=provider), ccd_helpers.ScraperVariable(variable_name="antigen_tests_negative", provider=provider), ccd_helpers.ScraperVariable( variable_name="total_vaccine_doses_administered", provider=provider), ccd_helpers.ScraperVariable(variable_name="hospital_beds_in_use", provider=provider), ccd_helpers.ScraperVariable(variable_name="ventilators_in_use", provider=provider), ccd_helpers.ScraperVariable(variable_name="ventilators_available", provider=provider), ccd_helpers.ScraperVariable(variable_name="ventilators_capacity", provider=provider), ccd_helpers.ScraperVariable(variable_name="pediatric_icu_beds_in_use", provider=provider), ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_available", provider=provider), ccd_helpers.ScraperVariable( variable_name="pediatric_icu_beds_capacity", provider=provider), ccd_helpers.ScraperVariable(variable_name="unspecified_tests_negative", provider=provider), ccd_helpers.ScraperVariable(variable_name="antigen_tests_total", provider=provider), ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_in_use", provider=provider), ccd_helpers.ScraperVariable(variable_name="hospital_beds_available", provider=provider), ccd_helpers.ScraperVariable( variable_name="pediatric_icu_beds_available", provider=provider), ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_capacity", provider=provider), ccd_helpers.ScraperVariable(variable_name="icu_beds_in_use", provider=provider), ccd_helpers.ScraperVariable( variable_name="cases", measurement="cumulative", unit="people", provider=provider, common_field=CommonFields.CASES, ), ccd_helpers.ScraperVariable( variable_name="deaths", measurement="cumulative", unit="people", provider=provider, common_field=CommonFields.DEATHS, ), ccd_helpers.ScraperVariable( variable_name="hospital_beds_in_use_covid", measurement="current", unit="beds", provider=provider, common_field=CommonFields.CURRENT_HOSPITALIZED, ), ccd_helpers.ScraperVariable( variable_name="hospital_beds_capacity", measurement="current", unit="beds", provider=provider, common_field=CommonFields.STAFFED_BEDS, ), ccd_helpers.ScraperVariable( variable_name="icu_beds_capacity", measurement="current", unit="beds", provider=provider, common_field=CommonFields.ICU_BEDS, ), ccd_helpers.ScraperVariable( variable_name="icu_beds_in_use_covid", measurement="current", unit="beds", provider=provider, common_field=CommonFields.CURRENT_ICU, ), ccd_helpers.ScraperVariable( variable_name="pcr_tests_total", measurement="cumulative", unit= "specimens", # Ignores less common unit=test_encounters and unit=unique_people provider=provider, common_field=CommonFields.TOTAL_TESTS_VIRAL, ), ccd_helpers.ScraperVariable( variable_name="pcr_tests_positive", measurement="cumulative", unit="specimens", # Ignores test_encounters and unique_people provider=provider, common_field=CommonFields.POSITIVE_TESTS_VIRAL, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_allocated", measurement="cumulative", unit="doses", provider=provider, common_field=CommonFields.VACCINES_ALLOCATED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_distributed", measurement="cumulative", unit="doses", provider=provider, common_field=CommonFields.VACCINES_DISTRIBUTED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_initiated", measurement="cumulative", unit="people", provider=provider, common_field=CommonFields.VACCINATIONS_INITIATED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_initiated", measurement="current", unit="percentage", provider=provider, common_field=CommonFields.VACCINATIONS_INITIATED_PCT, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="cumulative", unit="people", provider=provider, common_field=CommonFields.VACCINATIONS_COMPLETED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="current", unit="percentage", provider=provider, common_field=CommonFields.VACCINATIONS_COMPLETED_PCT, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_doses_administered", measurement="cumulative", unit="doses", provider=provider, common_field=CommonFields.VACCINES_ADMINISTERED, ), ]
class CANScraperStateProviders(data_source.CanScraperBase): SOURCE_NAME = "CANScrapersStateProviders" EXPECTED_FIELDS = [ CommonFields.STAFFED_BEDS, CommonFields.CASES, CommonFields.DEATHS, CommonFields.VACCINES_ALLOCATED, CommonFields.VACCINES_ADMINISTERED, CommonFields.VACCINES_DISTRIBUTED, CommonFields.VACCINATIONS_INITIATED, CommonFields.VACCINATIONS_COMPLETED, CommonFields.TOTAL_TESTS_VIRAL, CommonFields.ICU_BEDS, CommonFields.CURRENT_HOSPITALIZED, CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.CURRENT_ICU, CommonFields.VACCINATIONS_INITIATED_PCT, CommonFields.VACCINATIONS_COMPLETED_PCT, ] VARIABLES = [ ccd_helpers.ScraperVariable(variable_name="pcr_tests_negative", provider="state"), ccd_helpers.ScraperVariable(variable_name="unspecified_tests_total", provider="state"), ccd_helpers.ScraperVariable(variable_name="unspecified_tests_positive", provider="state"), ccd_helpers.ScraperVariable(variable_name="icu_beds_available", provider="state"), ccd_helpers.ScraperVariable(variable_name="antibody_tests_total", provider="state"), ccd_helpers.ScraperVariable(variable_name="antigen_tests_positive", provider="state"), ccd_helpers.ScraperVariable(variable_name="antigen_tests_negative", provider="state"), ccd_helpers.ScraperVariable( variable_name="total_vaccine_doses_administered", provider="state"), ccd_helpers.ScraperVariable(variable_name="hospital_beds_in_use", provider="state"), ccd_helpers.ScraperVariable(variable_name="ventilators_in_use", provider="state"), ccd_helpers.ScraperVariable(variable_name="ventilators_available", provider="state"), ccd_helpers.ScraperVariable(variable_name="ventilators_capacity", provider="state"), ccd_helpers.ScraperVariable(variable_name="pediatric_icu_beds_in_use", provider="state"), ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_available", provider="state"), ccd_helpers.ScraperVariable( variable_name="pediatric_icu_beds_capacity", provider="state"), ccd_helpers.ScraperVariable(variable_name="unspecified_tests_negative", provider="state"), ccd_helpers.ScraperVariable(variable_name="antigen_tests_total", provider="state"), ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_in_use", provider="state"), ccd_helpers.ScraperVariable(variable_name="hospital_beds_available", provider="state"), ccd_helpers.ScraperVariable( variable_name="pediatric_icu_beds_available", provider="state"), ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_capacity", provider="state"), ccd_helpers.ScraperVariable(variable_name="icu_beds_in_use", provider="state"), ccd_helpers.ScraperVariable( variable_name="cases", measurement="cumulative", unit="people", provider="state", common_field=CommonFields.CASES, ), ccd_helpers.ScraperVariable( variable_name="deaths", measurement="cumulative", unit="people", provider="state", common_field=CommonFields.DEATHS, ), ccd_helpers.ScraperVariable( variable_name="hospital_beds_in_use_covid", measurement="current", unit="beds", provider="state", common_field=CommonFields.CURRENT_HOSPITALIZED, ), ccd_helpers.ScraperVariable( variable_name="hospital_beds_capacity", measurement="current", unit="beds", provider="state", common_field=CommonFields.STAFFED_BEDS, ), ccd_helpers.ScraperVariable( variable_name="icu_beds_capacity", measurement="current", unit="beds", provider="state", common_field=CommonFields.ICU_BEDS, ), ccd_helpers.ScraperVariable( variable_name="icu_beds_in_use_covid", measurement="current", unit="beds", provider="state", common_field=CommonFields.CURRENT_ICU, ), ccd_helpers.ScraperVariable( variable_name="pcr_tests_total", measurement="cumulative", unit= "specimens", # Ignores less common unit=test_encounters and unit=unique_people provider="state", common_field=CommonFields.TOTAL_TESTS_VIRAL, ), ccd_helpers.ScraperVariable( variable_name="pcr_tests_positive", measurement="cumulative", unit="specimens", # Ignores test_encounters and unique_people provider="state", common_field=CommonFields.POSITIVE_TESTS_VIRAL, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_allocated", measurement="cumulative", unit="doses", provider="state", common_field=CommonFields.VACCINES_ALLOCATED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_distributed", measurement="cumulative", unit="doses", provider="state", common_field=CommonFields.VACCINES_DISTRIBUTED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_initiated", measurement="cumulative", unit="people", provider="state", common_field=CommonFields.VACCINATIONS_INITIATED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_initiated", measurement="current", unit="percentage", provider="state", common_field=CommonFields.VACCINATIONS_INITIATED_PCT, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="cumulative", unit="people", provider="state", common_field=CommonFields.VACCINATIONS_COMPLETED, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_completed", measurement="current", unit="percentage", provider="state", common_field=CommonFields.VACCINATIONS_COMPLETED_PCT, ), ccd_helpers.ScraperVariable( variable_name="total_vaccine_doses_administered", measurement="cumulative", unit="doses", provider="state", common_field=CommonFields.VACCINES_ADMINISTERED, ), ]