def pipe_week_to_date(df): df["date"] = clean_date_series(df.date, "%Y-%m-%d") if df.date.dtypes == "int64": df["date"] = clean_date_series(df.date, "%Y%m%d") daily_records = df[df["indicator"].str.contains("Daily")] date_week_mapping = daily_records[["year_week", "date"]].groupby("year_week", as_index=False).max() weekly_records = df[df["indicator"].str.contains("Weekly")].drop( columns="date") weekly_records = pd.merge(weekly_records, date_week_mapping, on="year_week") df = pd.concat([daily_records, weekly_records]).drop(columns="year_week") return df
def read(self) -> pd.DataFrame: df = pd.read_csv( self.source_url, usecols=["Publication date", "Total NAA and rapid antigen tests"]) df["Publication date"] = clean_date_series(df["Publication date"], "%d/%m/%Y") return df
def read(self) -> pd.DataFrame: data = request_json(self.source_url) data = [x["value"] for x in data["rows"]] df = pd.DataFrame.from_records(data, columns=["report_time", "samples" ]) # Load only columns needed df["report_time"] = clean_date_series( df["report_time"], "%Y-%m-%dT%H:%M:%S.%f%z") # Clean date return df
def main() -> pd.DataFrame: df = pd.read_csv(METADATA["source_url"], usecols=["Week ending:", "National"]) df = (df.assign(indicator="Weekly new hospital admissions").rename( columns={ "Week ending:": "date", "National": "value", }).drop_duplicates("date", keep=False)) df["date"] = clean_date_series(df.date, "%d/%m/%Y") df["entity"] = METADATA["entity"] return df, METADATA
def main() -> pd.DataFrame: df = pd.read_csv(METADATA["source_url"], usecols=["Fecha", "Total UTI"]) df = (df.assign(Fecha=clean_date_series(df.Fecha, "%d/%m/%Y")).rename( columns={ "Fecha": "date" }).melt("date", var_name="indicator")) df["indicator"] = df.indicator.replace( {"Total UTI": "Daily ICU occupancy"}) df["entity"] = METADATA["entity"] return df, METADATA
def main(): df = ( pd.read_csv(METADATA["source_url"], usecols=["ts", "hospitalized", "ventilated"]) .rename( columns={"ts": "date", "hospitalized": "Daily hospital occupancy", "ventilated": "Daily ICU occupancy"} ) .melt(id_vars="date", var_name="indicator", value_name="value") .assign( entity=METADATA["entity"], ) ) df["date"] = clean_date_series(df.date, "%Y-%m-%d %H:%M:%S") return df, METADATA
def main() -> pd.DataFrame: data = requests.get(METADATA["source_url"]).json() df = pd.DataFrame.from_records(data["hospitalised"]) df = df[df.area == "Finland"][["date", "totalHospitalised", "inIcu"]] df["date"] = clean_date_series(df.date, "%Y-%m-%dT%H:%M:%S.%fZ") df = df.melt("date", var_name="indicator").dropna(subset=["value"]) df["indicator"] = df.indicator.replace({ "totalHospitalised": "Daily hospital occupancy", "inIcu": "Daily ICU occupancy", }) df["entity"] = "Finland" return df, METADATA
def main(): soup = get_soup(METADATA["source_url_ref"]) records = json.loads(soup.find("cv-stats-virus")[":charts-data"]) df = ( pd.DataFrame.from_records(records, columns=["date", "hospitalized"]) .rename(columns={"hospitalized": "value"}) .assign(entity=METADATA["entity"], indicator="Weekly new hospital admissions") ) df["date"] = clean_date_series(df.date, "%d.%m.%Y") df = df[df.value > 0].sort_values("date") df["value"] = df.value.rolling(7).sum() df = df.dropna(subset=["value"]) return df, METADATA
def main() -> pd.DataFrame: df = pd.read_csv(METADATA["source_url"], usecols=["data", "internados", "internados_uci"]).rename( columns={"data": "date"} ) df["date"] = clean_date_series(df.date, "%d-%m-%Y") df = df.melt("date", var_name="indicator").dropna(subset=["value"]) df["indicator"] = df.indicator.replace( { "internados": "Daily hospital occupancy", "internados_uci": "Daily ICU occupancy", } ) df["entity"] = METADATA["entity"] return df, METADATA
def pipeline_age(self, df): # Melt df = df.melt("Date") # Separate age group and variable var = df.variable.str.extract(r"(\d+)[\+\-](\d*)\s(.+)") # Assign new columns and clean date df = df.assign( age_group_min=var[0], age_group_max=var[1], variable=var[2], date=clean_date_series(df.Date, "%Y-%m-%dT%H:%M:%S.%fZ"), ) # Keep last entry for each date df = df.sort_values("date") df = df.drop_duplicates( subset=["date", "variable", "age_group_min", "age_group_max"], keep="last") df = df.drop(columns="Date") # Pivot and fix column names df = df.pivot(index=["date", "age_group_min", "age_group_max"], columns=["variable"], values=["value"]) df.columns = [col[1] for col in df.columns] df = df.reset_index() # Ignore agr group 10-19 df = df[(df.age_group_min != "10") | (df.age_group_max != "19")] # Final column creations df = df.assign(location=self.location).rename( columns={ "1st perc": "people_vaccinated_per_hundred", "2nd perc": "people_fully_vaccinated_per_hundred", "3rd perc": "people_with_booster_per_hundred", }) # Select output columns df = df[[ "location", "date", "age_group_min", "age_group_max", "people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred", "people_with_booster_per_hundred", ]] return df
def main(): data = requests.get(METADATA["source_url"]).json() data = json.dumps(data["data"]) df = pd.read_json(data, orient="records") df = df[["date", "total_hospitalizations", "total_criticals"]] df = df.melt("date", ["total_hospitalizations", "total_criticals"], "indicator") df["indicator"] = df.indicator.replace({ "total_hospitalizations": "Daily hospital occupancy", "total_criticals": "Daily ICU occupancy", }) df["date"] = clean_date_series(df.date, "%Y.%m.%d") df["entity"] = METADATA["entity"] return df, METADATA
def main() -> pd.DataFrame: df = read() df = df[df.Bundesland == "Alle"].drop(columns="Bundesland").rename( columns={"Meldedat": "date"}) df["date"] = clean_date_series(df.date, "%d.%m.%Y") # FZHosp only includes patients in a "normal ward", i.e. all patients – ICU patients df["FZHosp"] = df.FZHosp + df.FZICU df = df.melt("date", var_name="indicator").dropna(subset=["value"]) df["indicator"] = df.indicator.replace( { "FZHosp": "Daily hospital occupancy", "FZICU": "Daily ICU occupancy", }, ) df["entity"] = METADATA["entity"] return df, METADATA
def main(): df = pd.read_csv( METADATA["source_url"], usecols=[ "date", "total_adult_patients_hospitalized_confirmed_covid", "total_pediatric_patients_hospitalized_confirmed_covid", "staffed_icu_adult_patients_confirmed_covid", "previous_day_admission_adult_covid_confirmed", "previous_day_admission_pediatric_covid_confirmed", ], ) df["date"] = clean_date_series(df.date, "%Y/%m/%d") df = df[df.date >= "2020-07-15"] df = df.groupby("date", as_index=False).sum().sort_values("date").head(-2) df["total_hospital_stock"] = df.total_adult_patients_hospitalized_confirmed_covid.fillna( 0).add( df.total_pediatric_patients_hospitalized_confirmed_covid.fillna(0)) df["total_hospital_flow"] = df.previous_day_admission_adult_covid_confirmed.fillna( 0).add(df.previous_day_admission_pediatric_covid_confirmed.fillna(0)) df["total_hospital_flow"] = df.total_hospital_flow.rolling(7).sum() df = (df[[ "date", "total_hospital_stock", "total_hospital_flow", "staffed_icu_adult_patients_confirmed_covid" ]].melt("date", var_name="indicator").dropna(subset=["value"])) df["indicator"] = df.indicator.replace({ "total_hospital_stock": "Daily hospital occupancy", "staffed_icu_adult_patients_confirmed_covid": "Daily ICU occupancy", "total_hospital_flow": "Weekly new hospital admissions", }) df["entity"] = METADATA["entity"] return df, METADATA
def main() -> pd.DataFrame: soup = get_soup(METADATA["source_url_ref"]) url = soup.find(class_="informacion").find("a")["href"] url = "https://www.mscbs.gob.es/profesionales/saludPublica/ccayes/alertasActual/nCov/" + url df = pd.read_csv( url, usecols=[ "Fecha", "Unidad", "OCUPADAS_COVID19", "INGRESOS_COVID19", "Provincia", "CCAA" ], encoding="Latin-1", sep=";", ) df["Fecha"] = clean_date_series(df.Fecha, "%d/%m/%Y") df = df.drop_duplicates(subset=["Fecha", "Unidad", "Provincia", "CCAA"], keep="first").dropna(subset=["Unidad"]) df.loc[df.Unidad.str.contains("U. Críticas"), "Unidad"] = "ICU" df = (df.drop(columns=["Provincia", "CCAA"]).groupby( ["Fecha", "Unidad"], as_index=False).sum().sort_values("Unidad").pivot( index="Fecha", columns="Unidad").reset_index().sort_values("Fecha")) df.columns = ["date", "hosp_stock", "icu_stock", "hosp_flow", "icu_flow"] df["hosp_flow"] = df.hosp_flow.rolling(7).sum() df["icu_flow"] = df.icu_flow.rolling(7).sum() df = df.melt("date", var_name="indicator").dropna(subset=["value"]) df["indicator"] = df.indicator.replace({ "hosp_flow": "Weekly new hospital admissions", "icu_flow": "Weekly new ICU admissions", "hosp_stock": "Daily hospital occupancy", "icu_stock": "Daily ICU occupancy", }) df["entity"] = METADATA["entity"] return df, METADATA
def _load_cases(self): url = "http://www.chp.gov.hk/files/misc/latest_situation_of_reported_cases_covid_19_eng.csv" df = pd.read_csv( url, usecols=[ "As of date", "Number of confirmed cases", "Number of cases tested positive for SARS-CoV-2 virus by nucleic acid tests", "Number of cases tested positive for SARS-CoV-2 virus by rapid antigen tests", ], ) df["Number of cases tested positive for SARS-CoV-2 virus"] = df[ "Number of cases tested positive for SARS-CoV-2 virus by nucleic acid tests"] + df[ "Number of cases tested positive for SARS-CoV-2 virus by rapid antigen tests"].fillna( 0) df["Number of confirmed cases"] = df[ "Number of confirmed cases"].fillna( df["Number of cases tested positive for SARS-CoV-2 virus"]) return df.assign(Date=clean_date_series(df["As of date"], "%d/%m/%Y"))
def main() -> pd.DataFrame: hosp_flow = (pd.read_csv(METADATA["source_url"]["flow"], usecols=["data", "casi"]).rename(columns={ "data": "date" }).sort_values("date").head(-5)) hosp_flow["casi"] = hosp_flow.casi.rolling(7).sum() df = (pd.read_csv( METADATA["source_url"]["main"], usecols=[ "data", "totale_ospedalizzati", "terapia_intensiva", "ingressi_terapia_intensiva" ], ).rename(columns={ "data": "date" }).sort_values("date")) df["date"] = clean_date_series(df.date, "%Y-%m-%dT%H:%M:%S") df["ingressi_terapia_intensiva"] = df.ingressi_terapia_intensiva.rolling( 7).sum() df = (pd.merge(hosp_flow, df, on="date", how="outer", validate="one_to_one").melt( "date", var_name="indicator").dropna( subset=["value"]).assign(entity=METADATA["entity"])) df["indicator"] = df.indicator.replace({ "casi": "Weekly new hospital admissions", "ingressi_terapia_intensiva": "Weekly new ICU admissions", "totale_ospedalizzati": "Daily hospital occupancy", "terapia_intensiva": "Daily ICU occupancy", }) return df, METADATA
def main() -> pd.DataFrame: df = pd.read_csv( METADATA["source_url"], usecols=[ "Datum", "Kliniek_Bedden_Nederland", "IC_Bedden_COVID_Nederland", "Kliniek_Nieuwe_Opnames_COVID_Nederland", "IC_Nieuwe_Opnames_COVID_Nederland", ], ) df["Datum"] = clean_date_series(df.Datum, "%d-%m-%Y") df = df.rename(columns={"Datum": "date"}).sort_values("date") df["Kliniek_Nieuwe_Opnames_COVID_Nederland"] = df.Kliniek_Nieuwe_Opnames_COVID_Nederland.rolling( 7).sum() df["IC_Nieuwe_Opnames_COVID_Nederland"] = df.IC_Nieuwe_Opnames_COVID_Nederland.rolling( 7).sum() df = df.melt("date", var_name="indicator").dropna(subset=["value"]) df["indicator"] = df.indicator.replace({ "Kliniek_Bedden_Nederland": "Daily hospital occupancy", "IC_Bedden_COVID_Nederland": "Daily ICU occupancy", "Kliniek_Nieuwe_Opnames_COVID_Nederland": "Weekly new hospital admissions", "IC_Nieuwe_Opnames_COVID_Nederland": "Weekly new ICU admissions", }) df["entity"] = METADATA["entity"] df = df.drop_duplicates(["date", "indicator"], keep=False) return df, METADATA
def read(self) -> pd.DataFrame: df = pd.read_csv(self.source_url, usecols=["Unnamed: 0", "Bolivia"]).dropna() df["Bolivia"] = df["Bolivia"].astype(int) df["Unnamed: 0"] = clean_date_series(df["Unnamed: 0"]) return df
def pipe_date(self, df): return df.assign(Date=clean_date_series(df["Date"], "%d/%m/%Y"))
def pipe_date(self, df: pd.DataFrame, date_var: str) -> pd.DataFrame: return df.assign(date=clean_date_series(df[date_var], "%d/%m/%Y"))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: df.loc[:, ("date", "date")] = clean_date_series(df[("date", "date")], format_input="'%y.%m.%d") return df
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign( date=clean_date_series(df.date, format_input="%d-%m-%Y"))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: """Cleans date column""" return df.assign(Date=clean_date_series(df["Date"], "%Y-%m-%d"))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=clean_date_series(df.date))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: """Convert date to datetime""" return df.assign(Date=clean_date_series(df["Date"], "%d/%m/%Y"))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=clean_date_series(df.date, "%Y-%m-%dT%H:%M:%S"))
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign( date=clean_date_series(df.date, "%d/%m/%Y")).sort_values("date")
def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: """Clean date""" return df.assign( Date=clean_date_series(df.iloc[:, 0], "%d.%m.%y")).sort_values("Date")
def format_date(df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=clean_date_series(df.date, "%d/%m/%Y"))
def read(self) -> pd.DataFrame: df = pd.read_csv(self.source_url, usecols=["data", "tamponi"]) df["data"] = df["data"].str.replace("T", " ") df["data"] = clean_date_series(df["data"], "%Y-%m-%d %H:%M:%S") return df