def single_dataset_all(c=cmdc.Client()): """ This example loads the within county mobility data for all fips and all dates """ c.mobility_devices() df = c.fetch() return df
def single_dataset_deaths_filter(c=cmdc.Client()): """ This example loads a subset of the demographic data by selecting a few variables and a few fips codes """ c.covid_us(location="<100", variable="deaths_total", value=">100") df = c.fetch() return df
def multiple_dataset_all(c=cmdc.Client()): """ This example loads multiple datasets (demographics and covid) for all us fips """ c.demographics().covid_us() df = c.fetch() return df
def multiple_dataset_counties_only(c=cmdc.Client()): """ This example loads multiple datasets (demographics and covid) for all us states -- It selects counties by only taking fips >= 1000 """ c.demographics().covid_us(location=">=1000") df = c.fetch() return df
def single_dataset_multiplestatesallcounties(c=cmdc.Client()): """ This example loads the within county mobility data for all counties in multiple states and all dates """ c.mobility_devices(state=["CA", "TX"]) # Could replace "CA" or "TX" with ("Californa", 6, "06") or # ("Texas", 48, "48") respectively df = c.fetch() return df
def single_dataset_onestateallcounties(c=cmdc.Client()): """ This example loads the within county mobility data for a single state (CA) and all dates """ c.mobility_devices(state="CA") # Could also do any of the below # c.mobility_devices(state=6) # c.mobility_devices(state="California") # c.mobility_devices(state="06") df = c.fetch() return df
def single_dataset_variableselect(c=cmdc.Client()): """ This example loads a subset of the demographic data by selecting a few variables and a few fips codes """ c.demographics(variable=[ "Total population", "Fraction of population over 65", "Median age", "Mean household income", "Percent of civilian population with no health insurance", ]) df = c.fetch() return df
def test_pass_apikey_updates_headers(): c = cmdc.Client(apikey="foobar") assert "apikey" in c.sess.headers assert c.sess.headers["apikey"] == "foobar"
def transform(self) -> pd.DataFrame: cmdc_client = cmdc.Client(apikey=self.cmdc_key) cmdc_client.covid_us() df = cmdc_client.fetch() # Transform FIPS from an int64 to a string of 2 or 5 chars. See # https://github.com/valorumdata/cmdc.py/issues/3 df[CommonFields.FIPS] = df[Fields.LOCATION].apply( lambda v: f"{v:0>{2 if v < 100 else 5}}") # Already transformed from Fields to CommonFields already_transformed_fields = {CommonFields.FIPS} extra_fields = set( df.columns) - set(Fields) - already_transformed_fields missing_fields = set(Fields) - set(df.columns) if extra_fields or missing_fields: # If this warning happens in a test you may need to edit the sample data in test/data # to make sure all the expected fields appear in the sample. self.log.warning( "columns from cmdc do not match Fields", extra_fields=extra_fields, missing_fields=missing_fields, ) # TODO(tom): Factor out this rename and re-order code. It is stricter than # update_covid_data_scraper because this code expects every field in the source DataFrame # to appear in Fields. rename: MutableMapping[str, str] = { f: f for f in already_transformed_fields } for col in df.columns: field = Fields.get(col) if field and field.common_field: if field.value in rename: raise AssertionError("Field misconfigured") rename[field.value] = field.common_field.value # Copy only columns in `rename.keys()` to a new DataFrame and rename. df = df.loc[:, list(rename.keys())].rename(columns=rename) df[CommonFields.COUNTRY] = "USA" # Partition df by region type so states and counties can by merged with different # data to get their names. state_mask = df[CommonFields.FIPS].str.len() == 2 states = df.loc[state_mask, :] counties = df.loc[~state_mask, :] fips_data = load_county_fips_data(self.county_fips_csv).set_index( [CommonFields.FIPS]) counties = counties.merge( fips_data[[CommonFields.STATE, CommonFields.COUNTY]], left_on=[CommonFields.FIPS], suffixes=(False, False), how="left", right_index=True, ) no_match_counties_mask = counties.state.isna() if no_match_counties_mask.sum() > 0: self.log.warning( "Some counties did not match by fips", bad_fips=counties.loc[no_match_counties_mask, CommonFields.FIPS].unique().tolist(), ) counties = counties.loc[~no_match_counties_mask, :] counties[CommonFields.AGGREGATE_LEVEL] = "county" state_df = load_census_state(self.census_state_path).set_index( CommonFields.FIPS) states = states.merge( state_df[[CommonFields.STATE]], left_on=[CommonFields.FIPS], suffixes=(False, False), how="left", right_index=True, ) states[CommonFields.AGGREGATE_LEVEL] = "state" df = pd.concat([states, counties]) df = sort_common_field_columns(df) df = df.set_index(COMMON_FIELDS_TIMESERIES_KEYS, verify_integrity=True) return df
def client(): return cmdc.Client(apikey=os.environ.get("CMDC_API_KEY", None))