Example #1
0
def single_dataset_all(c=cmdc.Client()):
    """
    This example loads the within county mobility data for all fips
    and all dates
    """
    c.mobility_devices()
    df = c.fetch()
    return df
Example #2
0
def single_dataset_deaths_filter(c=cmdc.Client()):
    """
    This example loads a subset of the demographic data by selecting
    a few variables and a few fips codes
    """
    c.covid_us(location="<100", variable="deaths_total", value=">100")
    df = c.fetch()
    return df
Example #3
0
def multiple_dataset_all(c=cmdc.Client()):
    """
    This example loads multiple datasets (demographics and covid) for all
    us fips
    """
    c.demographics().covid_us()
    df = c.fetch()
    return df
Example #4
0
def multiple_dataset_counties_only(c=cmdc.Client()):
    """
    This example loads multiple datasets (demographics and covid) for all
    us states -- It selects counties by only taking fips >= 1000
    """
    c.demographics().covid_us(location=">=1000")
    df = c.fetch()
    return df
Example #5
0
def single_dataset_multiplestatesallcounties(c=cmdc.Client()):
    """
    This example loads the within county mobility data for all counties
    in multiple states and all dates
    """
    c.mobility_devices(state=["CA", "TX"])
    # Could replace "CA" or "TX" with ("Californa", 6, "06") or
    # ("Texas", 48, "48") respectively
    df = c.fetch()
    return df
Example #6
0
def single_dataset_onestateallcounties(c=cmdc.Client()):
    """
    This example loads the within county mobility data for a single
    state (CA) and all dates
    """

    c.mobility_devices(state="CA")
    # Could also do any of the below
    # c.mobility_devices(state=6)
    # c.mobility_devices(state="California")
    # c.mobility_devices(state="06")
    df = c.fetch()
    return df
Example #7
0
def single_dataset_variableselect(c=cmdc.Client()):
    """
    This example loads a subset of the demographic data by selecting
    a few variables and a few fips codes
    """
    c.demographics(variable=[
        "Total population",
        "Fraction of population over 65",
        "Median age",
        "Mean household income",
        "Percent of civilian population with no health insurance",
    ])
    df = c.fetch()
    return df
Example #8
0
def test_pass_apikey_updates_headers():
    c = cmdc.Client(apikey="foobar")
    assert "apikey" in c.sess.headers
    assert c.sess.headers["apikey"] == "foobar"
    def transform(self) -> pd.DataFrame:
        cmdc_client = cmdc.Client(apikey=self.cmdc_key)

        cmdc_client.covid_us()
        df = cmdc_client.fetch()
        # Transform FIPS from an int64 to a string of 2 or 5 chars. See
        # https://github.com/valorumdata/cmdc.py/issues/3
        df[CommonFields.FIPS] = df[Fields.LOCATION].apply(
            lambda v: f"{v:0>{2 if v < 100 else 5}}")

        # Already transformed from Fields to CommonFields
        already_transformed_fields = {CommonFields.FIPS}

        extra_fields = set(
            df.columns) - set(Fields) - already_transformed_fields
        missing_fields = set(Fields) - set(df.columns)
        if extra_fields or missing_fields:
            # If this warning happens in a test you may need to edit the sample data in test/data
            # to make sure all the expected fields appear in the sample.
            self.log.warning(
                "columns from cmdc do not match Fields",
                extra_fields=extra_fields,
                missing_fields=missing_fields,
            )

        # TODO(tom): Factor out this rename and re-order code. It is stricter than
        # update_covid_data_scraper because this code expects every field in the source DataFrame
        # to appear in Fields.
        rename: MutableMapping[str, str] = {
            f: f
            for f in already_transformed_fields
        }
        for col in df.columns:
            field = Fields.get(col)
            if field and field.common_field:
                if field.value in rename:
                    raise AssertionError("Field misconfigured")
                rename[field.value] = field.common_field.value

        # Copy only columns in `rename.keys()` to a new DataFrame and rename.
        df = df.loc[:, list(rename.keys())].rename(columns=rename)

        df[CommonFields.COUNTRY] = "USA"

        # Partition df by region type so states and counties can by merged with different
        # data to get their names.
        state_mask = df[CommonFields.FIPS].str.len() == 2
        states = df.loc[state_mask, :]
        counties = df.loc[~state_mask, :]

        fips_data = load_county_fips_data(self.county_fips_csv).set_index(
            [CommonFields.FIPS])
        counties = counties.merge(
            fips_data[[CommonFields.STATE, CommonFields.COUNTY]],
            left_on=[CommonFields.FIPS],
            suffixes=(False, False),
            how="left",
            right_index=True,
        )
        no_match_counties_mask = counties.state.isna()
        if no_match_counties_mask.sum() > 0:
            self.log.warning(
                "Some counties did not match by fips",
                bad_fips=counties.loc[no_match_counties_mask,
                                      CommonFields.FIPS].unique().tolist(),
            )
        counties = counties.loc[~no_match_counties_mask, :]
        counties[CommonFields.AGGREGATE_LEVEL] = "county"

        state_df = load_census_state(self.census_state_path).set_index(
            CommonFields.FIPS)
        states = states.merge(
            state_df[[CommonFields.STATE]],
            left_on=[CommonFields.FIPS],
            suffixes=(False, False),
            how="left",
            right_index=True,
        )
        states[CommonFields.AGGREGATE_LEVEL] = "state"

        df = pd.concat([states, counties])

        df = sort_common_field_columns(df)

        df = df.set_index(COMMON_FIELDS_TIMESERIES_KEYS, verify_integrity=True)
        return df
Example #10
0
def client():
    return cmdc.Client(apikey=os.environ.get("CMDC_API_KEY", None))