def load_standardized(filename): df = _load_merged(filename) \ .drop(columns=[ 'countriesAndTerritories', 'geoId', 'day', 'month', 'year', ]) \ .rename(columns={ 'dateRep': 'date', 'cases': 'new_cases', 'deaths': 'new_deaths' }) df = df[['date', 'location', 'new_cases', 'new_deaths']] df = discard_rows(df) df = inject_owid_aggregates(df) df = inject_total_daily_cols(df, ['cases', 'deaths']) df = inject_per_million(df, [ 'new_cases', 'new_deaths', 'total_cases', 'total_deaths' ]) df = inject_cfr(df) df = inject_rolling_avg(df) df = inject_days_since_all(df) return df.sort_values(by=['location', 'date'])
def load_standardized(filename): df = _load_merged(filename) \ .drop(columns=[ 'Countries and territories', 'GeoId', 'Day', 'Month', 'Year', ]) \ .rename(columns={ 'DateRep': 'date', 'Cases': 'new_cases', 'Deaths': 'new_deaths' }) df = df[['date', 'location', 'new_cases', 'new_deaths']] df = inject_world(df) df = inject_total_daily_cols(df, ['cases', 'deaths']) df = inject_per_million(df, [ 'new_cases', 'new_deaths', 'total_cases', 'total_deaths' ]) df = inject_days_since_all(df) df = inject_cfr(df) return df.sort_values(by=['location', 'date'])
def load_standardized(): df = _load_merged() \ .drop(columns=[ 'Countries and territories', 'GeoId', 'Day', 'Month', 'Year', ]) \ .rename(columns={ 'DateRep': 'date', 'Cases': 'new_cases', 'Deaths': 'new_deaths' }) df = df[['date', 'location', 'new_cases', 'new_deaths']] df = inject_world(df) df = inject_total_daily_cols(df, ['cases', 'deaths']) df = inject_per_million( df, ['new_cases', 'new_deaths', 'total_cases', 'total_deaths']) df = inject_days_since_all(df) df_dupe_mask = df.duplicated(subset=['location', 'date']) if df_dupe_mask.any(): print("Dataet contains duplicates for date, location:") df_dupes = df[df_dupe_mask] print(df_dupes) assert False return df.sort_values(by=['location', 'date'])