예제 #1
0
def test_query_multiple_variables():
    variable = ccd_helpers.ScraperVariable(
        variable_name="total_vaccine_completed",
        measurement="cumulative",
        unit="people",
        provider="cdc",
        common_field=CommonFields.VACCINATIONS_COMPLETED,
    )
    not_included_variable = ccd_helpers.ScraperVariable(
        variable_name="total_vaccine_completed",
        measurement="cumulative",
        unit="people",
        # Different provider, so query shouldn't return it
        provider="hhs",
        common_field=CommonFields.VACCINATIONS_COMPLETED,
    )

    input_data = build_can_scraper_dataframe({
        variable: [10, 20, 30],
        not_included_variable: [10, 20, 40]
    })
    data = ccd_helpers.CanScraperLoader(input_data)
    results, _ = data.query_multiple_variables([variable])

    expected_buf = io.StringIO(
        "fips,date,aggregate_level,vaccinations_completed\n"
        f"36,2021-01-01,state,10\n"
        f"36,2021-01-02,state,20\n"
        f"36,2021-01-03,state,30\n")
    expected = common_df.read_csv(expected_buf, set_index=False)
    pd.testing.assert_frame_equal(expected, results)
예제 #2
0
def test_query_source_url():
    variable = ccd_helpers.ScraperVariable(
        variable_name="total_vaccine_completed",
        measurement="cumulative",
        unit="people",
        provider="cdc",
        common_field=CommonFields.VACCINATIONS_COMPLETED,
    )

    input_data = build_can_scraper_dataframe({variable: [10, 20, 30]},
                                             source_url="http://foo.com")
    data = ccd_helpers.CanScraperLoader(input_data)
    results, tags = data.query_multiple_variables([variable])

    expected_data_buf = io.StringIO(
        "fips,      date,aggregate_level,vaccinations_completed\n"
        "  36,2021-01-01,          state,                    10\n"
        "  36,2021-01-02,          state,                    20\n"
        "  36,2021-01-03,          state,                    30\n".replace(
            " ", ""))
    expected = common_df.read_csv(expected_data_buf, set_index=False)
    pd.testing.assert_frame_equal(expected, results)

    expected_tag_buf = io.StringIO(
        "fips,      date,              variable,       content\n"
        "  36,2021-01-01,vaccinations_completed,http://foo.com\n"
        "  36,2021-01-02,vaccinations_completed,http://foo.com\n"
        "  36,2021-01-03,vaccinations_completed,http://foo.com\n".replace(
            " ", ""))
    expected = common_df.read_csv(expected_tag_buf, set_index=False)
    pd.testing.assert_frame_equal(expected, tags, check_like=True)
예제 #3
0
class CANScraperUSAFactsProvider(data_source.CanScraperBase):
    SOURCE_TYPE = "USAFacts"

    VARIABLES = [
        ccd_helpers.ScraperVariable(
            variable_name="cases",
            measurement="cumulative",
            unit="people",
            provider="usafacts",
            common_field=CommonFields.CASES,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="deaths",
            measurement="cumulative",
            unit="people",
            provider="usafacts",
            common_field=CommonFields.DEATHS,
        ),
    ]
def make_hhs_variable(can_scraper_field, common_field, measurement):
    """Helper to create a ScraperVariable, since we have a bunch of variables to deal with and
    two different measurements to deal with (for county and state data)."""
    return ccd_helpers.ScraperVariable(
        variable_name=can_scraper_field,
        measurement=measurement,
        provider="hhs",
        unit="beds",
        common_field=common_field,
    )
예제 #5
0
class CDCVaccinesDataset(data_source.CanScraperBase):
    SOURCE_TYPE = "CDCVaccine"

    VARIABLES = [
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_allocated",
            measurement="cumulative",
            unit="doses",
            provider="cdc",
            common_field=CommonFields.VACCINES_ALLOCATED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_distributed",
            measurement="cumulative",
            unit="doses",
            provider="cdc",
            common_field=CommonFields.VACCINES_DISTRIBUTED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_doses_administered",
            measurement="cumulative",
            unit="doses",
            provider="cdc",
            common_field=CommonFields.VACCINES_ADMINISTERED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_initiated",
            measurement="cumulative",
            unit="people",
            provider="cdc",
            common_field=CommonFields.VACCINATIONS_INITIATED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_completed",
            measurement="cumulative",
            unit="people",
            provider="cdc",
            common_field=CommonFields.VACCINATIONS_COMPLETED,
        ),
    ]
예제 #6
0
class CDCTestingDataset(data_source.CanScraperBase):
    SOURCE_TYPE = "CDCTesting"

    VARIABLES = [
        ccd_helpers.ScraperVariable(
            variable_name="pcr_tests_positive",
            measurement="rolling_average_7_day",
            provider="cdc",
            unit="percentage",
            common_field=CommonFields.TEST_POSITIVITY_7D,
        ),
    ]

    @classmethod
    @lru_cache(None)
    def make_dataset(cls) -> MultiRegionDataset:
        return modify_dataset(super().make_dataset())
예제 #7
0
class CDCTestingDataset(data_source.CanScraperBase):
    SOURCE_NAME = "CDCTesting"

    EXPECTED_FIELDS = [
        CommonFields.TEST_POSITIVITY_7D,
    ]

    VARIABLES = [
        ccd_helpers.ScraperVariable(
            variable_name="pcr_tests_positive",
            measurement="rolling_average_7_day",
            provider="cdc",
            unit="percentage",
            common_field=CommonFields.TEST_POSITIVITY_7D,
        ),
    ]

    @classmethod
    def transform_data(cls, results: pd.DataFrame) -> pd.DataFrame:
        # Test positivity should be a ratio
        results.loc[:, CommonFields.TEST_POSITIVITY_7D] = (
            results.loc[:, CommonFields.TEST_POSITIVITY_7D] / 100.0)
        # Should only be picking up county all_df for now.  May need additional logic if states
        # are included as well
        assert (results[CommonFields.FIPS].str.len() == 5).all()

        # Duplicating DC County results as state results because of a downstream
        # use of how dc state data is used to override DC county data.
        dc_results = results.loc[results[CommonFields.FIPS] ==
                                 DC_COUNTY_FIPS, :].copy()
        dc_results.loc[:, CommonFields.FIPS] = DC_STATE_FIPS
        dc_results.loc[:, CommonFields.AGGREGATE_LEVEL] = "state"

        results = pd.concat([results, dc_results])

        return remove_trailing_zeros(results)
def make_scraper_variables(provider: str):
    """Helper to generate all variables that could be captured from a state / county dashboard."""
    return [
        ccd_helpers.ScraperVariable(variable_name="pcr_tests_negative",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="unspecified_tests_total",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="unspecified_tests_positive",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="icu_beds_available",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="antibody_tests_total",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="antigen_tests_positive",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="antigen_tests_negative",
                                    provider=provider),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_doses_administered",
            provider=provider),
        ccd_helpers.ScraperVariable(variable_name="hospital_beds_in_use",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="ventilators_in_use",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="ventilators_available",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="ventilators_capacity",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="pediatric_icu_beds_in_use",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_available",
                                    provider=provider),
        ccd_helpers.ScraperVariable(
            variable_name="pediatric_icu_beds_capacity", provider=provider),
        ccd_helpers.ScraperVariable(variable_name="unspecified_tests_negative",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="antigen_tests_total",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_in_use",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="hospital_beds_available",
                                    provider=provider),
        ccd_helpers.ScraperVariable(
            variable_name="pediatric_icu_beds_available", provider=provider),
        ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_capacity",
                                    provider=provider),
        ccd_helpers.ScraperVariable(variable_name="icu_beds_in_use",
                                    provider=provider),
        ccd_helpers.ScraperVariable(
            variable_name="cases",
            measurement="cumulative",
            unit="people",
            provider=provider,
            common_field=CommonFields.CASES,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="deaths",
            measurement="cumulative",
            unit="people",
            provider=provider,
            common_field=CommonFields.DEATHS,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="hospital_beds_in_use_covid",
            measurement="current",
            unit="beds",
            provider=provider,
            common_field=CommonFields.CURRENT_HOSPITALIZED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="hospital_beds_capacity",
            measurement="current",
            unit="beds",
            provider=provider,
            common_field=CommonFields.STAFFED_BEDS,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="icu_beds_capacity",
            measurement="current",
            unit="beds",
            provider=provider,
            common_field=CommonFields.ICU_BEDS,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="icu_beds_in_use_covid",
            measurement="current",
            unit="beds",
            provider=provider,
            common_field=CommonFields.CURRENT_ICU,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="pcr_tests_total",
            measurement="cumulative",
            unit=
            "specimens",  # Ignores less common unit=test_encounters and unit=unique_people
            provider=provider,
            common_field=CommonFields.TOTAL_TESTS_VIRAL,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="pcr_tests_positive",
            measurement="cumulative",
            unit="specimens",  # Ignores test_encounters and unique_people
            provider=provider,
            common_field=CommonFields.POSITIVE_TESTS_VIRAL,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_allocated",
            measurement="cumulative",
            unit="doses",
            provider=provider,
            common_field=CommonFields.VACCINES_ALLOCATED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_distributed",
            measurement="cumulative",
            unit="doses",
            provider=provider,
            common_field=CommonFields.VACCINES_DISTRIBUTED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_initiated",
            measurement="cumulative",
            unit="people",
            provider=provider,
            common_field=CommonFields.VACCINATIONS_INITIATED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_initiated",
            measurement="current",
            unit="percentage",
            provider=provider,
            common_field=CommonFields.VACCINATIONS_INITIATED_PCT,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_completed",
            measurement="cumulative",
            unit="people",
            provider=provider,
            common_field=CommonFields.VACCINATIONS_COMPLETED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_completed",
            measurement="current",
            unit="percentage",
            provider=provider,
            common_field=CommonFields.VACCINATIONS_COMPLETED_PCT,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_doses_administered",
            measurement="cumulative",
            unit="doses",
            provider=provider,
            common_field=CommonFields.VACCINES_ADMINISTERED,
        ),
    ]
class CANScraperStateProviders(data_source.CanScraperBase):
    SOURCE_NAME = "CANScrapersStateProviders"

    EXPECTED_FIELDS = [
        CommonFields.STAFFED_BEDS,
        CommonFields.CASES,
        CommonFields.DEATHS,
        CommonFields.VACCINES_ALLOCATED,
        CommonFields.VACCINES_ADMINISTERED,
        CommonFields.VACCINES_DISTRIBUTED,
        CommonFields.VACCINATIONS_INITIATED,
        CommonFields.VACCINATIONS_COMPLETED,
        CommonFields.TOTAL_TESTS_VIRAL,
        CommonFields.ICU_BEDS,
        CommonFields.CURRENT_HOSPITALIZED,
        CommonFields.POSITIVE_TESTS_VIRAL,
        CommonFields.CURRENT_ICU,
        CommonFields.VACCINATIONS_INITIATED_PCT,
        CommonFields.VACCINATIONS_COMPLETED_PCT,
    ]

    VARIABLES = [
        ccd_helpers.ScraperVariable(variable_name="pcr_tests_negative",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="unspecified_tests_total",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="unspecified_tests_positive",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="icu_beds_available",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="antibody_tests_total",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="antigen_tests_positive",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="antigen_tests_negative",
                                    provider="state"),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_doses_administered",
            provider="state"),
        ccd_helpers.ScraperVariable(variable_name="hospital_beds_in_use",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="ventilators_in_use",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="ventilators_available",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="ventilators_capacity",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="pediatric_icu_beds_in_use",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_available",
                                    provider="state"),
        ccd_helpers.ScraperVariable(
            variable_name="pediatric_icu_beds_capacity", provider="state"),
        ccd_helpers.ScraperVariable(variable_name="unspecified_tests_negative",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="antigen_tests_total",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_in_use",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="hospital_beds_available",
                                    provider="state"),
        ccd_helpers.ScraperVariable(
            variable_name="pediatric_icu_beds_available", provider="state"),
        ccd_helpers.ScraperVariable(variable_name="adult_icu_beds_capacity",
                                    provider="state"),
        ccd_helpers.ScraperVariable(variable_name="icu_beds_in_use",
                                    provider="state"),
        ccd_helpers.ScraperVariable(
            variable_name="cases",
            measurement="cumulative",
            unit="people",
            provider="state",
            common_field=CommonFields.CASES,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="deaths",
            measurement="cumulative",
            unit="people",
            provider="state",
            common_field=CommonFields.DEATHS,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="hospital_beds_in_use_covid",
            measurement="current",
            unit="beds",
            provider="state",
            common_field=CommonFields.CURRENT_HOSPITALIZED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="hospital_beds_capacity",
            measurement="current",
            unit="beds",
            provider="state",
            common_field=CommonFields.STAFFED_BEDS,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="icu_beds_capacity",
            measurement="current",
            unit="beds",
            provider="state",
            common_field=CommonFields.ICU_BEDS,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="icu_beds_in_use_covid",
            measurement="current",
            unit="beds",
            provider="state",
            common_field=CommonFields.CURRENT_ICU,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="pcr_tests_total",
            measurement="cumulative",
            unit=
            "specimens",  # Ignores less common unit=test_encounters and unit=unique_people
            provider="state",
            common_field=CommonFields.TOTAL_TESTS_VIRAL,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="pcr_tests_positive",
            measurement="cumulative",
            unit="specimens",  # Ignores test_encounters and unique_people
            provider="state",
            common_field=CommonFields.POSITIVE_TESTS_VIRAL,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_allocated",
            measurement="cumulative",
            unit="doses",
            provider="state",
            common_field=CommonFields.VACCINES_ALLOCATED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_distributed",
            measurement="cumulative",
            unit="doses",
            provider="state",
            common_field=CommonFields.VACCINES_DISTRIBUTED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_initiated",
            measurement="cumulative",
            unit="people",
            provider="state",
            common_field=CommonFields.VACCINATIONS_INITIATED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_initiated",
            measurement="current",
            unit="percentage",
            provider="state",
            common_field=CommonFields.VACCINATIONS_INITIATED_PCT,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_completed",
            measurement="cumulative",
            unit="people",
            provider="state",
            common_field=CommonFields.VACCINATIONS_COMPLETED,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_completed",
            measurement="current",
            unit="percentage",
            provider="state",
            common_field=CommonFields.VACCINATIONS_COMPLETED_PCT,
        ),
        ccd_helpers.ScraperVariable(
            variable_name="total_vaccine_doses_administered",
            measurement="cumulative",
            unit="doses",
            provider="state",
            common_field=CommonFields.VACCINES_ADMINISTERED,
        ),
    ]