def to_csv(self, paths): locations = ["Switzerland", "Liechtenstein"] df, df_manuf, df_age = self.read() # Main data for location in locations: df.pipe(self.pipeline, location).to_csv(paths.tmp_vax_out(location), index=False) # Manufacturer df_manuf = df_manuf.pipe(self.pipeline_manufacturer) df_manuf.to_csv(paths.tmp_vax_out_man("Switzerland"), index=False) export_metadata( df_manuf, "Federal Office of Public Health", self.source_url, paths.tmp_vax_metadata_man, ) # Age for location in locations: df_age_ = df_age.pipe(self.pipeline_age, location) df_age_.to_csv(paths.tmp_vax_out_by_age_group(location), index=False) export_metadata( df_age_, "Federal Office of Public Health", self.source_url, paths.tmp_vax_metadata_age, )
def to_csv(self, paths): # Load data df, df_age = self.read() # Export main df.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df_man = df.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "Ministry of Health via vacuna.uy", self.source_url, paths.tmp_vax_metadata_man, ) # Export age data df_age = df_age.pipe(self.pipeline_age) df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False) export_metadata( df_age, "Ministry of Health via vacuna.uy", self.source_url_age, paths.tmp_vax_metadata_age, )
def _export_metadata(self, df, output_path): export_metadata( df=df, source_name="European Centre for Disease Prevention and Control (ECDC)", source_url=self.source_url_ref, output_path=output_path, )
def _export_datafile_secondary(self, df, metadata, output_path, output_path_meta): """Export secondary data.""" # Check metadata self._check_metadata(metadata) # Export data df.to_csv(output_path, index=False) # Export metadata export_metadata(df, metadata["source_name"], metadata["source_url"], output_path_meta)
def to_csv(self, paths): df_base = self.read().pipe(self.pipeline_base) # Export data df = df_base.pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df = df_base.pipe(self.pipeline_manufacturer) df.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata(df, "Robert Koch Institut", self.source_url_ref, paths.tmp_vax_metadata_man)
def to_csv(self, paths): self.read().pipe(self.pipeline).to_csv(paths.tmp_vax_out("United States"), index=False) df_manufacturer = self.read_manufacturer(paths) df_manufacturer.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_manufacturer, "Centers for Disease Control and Prevention", "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_data", paths.tmp_vax_metadata_man, )
def to_csv(self, paths): """Generalized.""" df_base = self.read() # Main data df = df_base.pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Age data df_age = df_base.pipe(self.pipeline_age) df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False) export_metadata( df_age, "Government of Jersey", self.source_url, paths.tmp_vax_metadata_age )
def export(self, paths): df = self.read().pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Age data df_age = self.read_age().pipe(self.pipeline_age) df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False) export_metadata( df_age, "Ministerio de Salud via https://github.com/jmcastagnetto/covid-19-peru-vacunas", self.source_url_ref, paths.tmp_vax_metadata_age, )
def main(paths): source = "https://static.data.gov.hk/covid-vaccine/bar_vaccination_date.json" data = read(source).pipe(pipeline) destination = paths.tmp_vax_out("Hong Kong") data.drop(columns=["total_pfizer", "total_sinovac"]).to_csv(destination, index=False) destination = paths.tmp_vax_out_man("Hong Kong") manufacturer = data.pipe(manufacturer_pipeline) manufacturer.to_csv(destination, index=False) export_metadata(manufacturer, "Government of Hong Kong", source, paths.tmp_vax_metadata_man)
def export(self, paths): destination = paths.tmp_vax_out(self.location) self.read().pipe(self.pipeline).to_csv(destination, index=False) # Export age data df_age = self.read_age().pipe(self.pipeline_age) df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False) export_metadata( df_age, "Ministry of Health via github.com/dancarmoz/israel_moh_covid_dashboard_data", self.source_url_age, paths.tmp_vax_metadata_age, )
def main(paths): source = "https://onemocneni-aktualne.mzcr.cz/api/v2/covid-19/ockovani.csv" base = read(source).pipe(base_pipeline) # Manufacturer data df_man = base.pipe(breakdown_per_vaccine) df_man.to_csv(paths.tmp_vax_out_man("Czechia"), index=False) export_metadata(df_man, "Ministry of Health", source, paths.tmp_vax_metadata_man) # Main data base.pipe(global_pipeline).to_csv(paths.tmp_vax_out("Czechia"), index=False)
def to_csv(self, paths): df_base = self.read().pipe(self.pipeline_base) # Export data df = df_base.copy().pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df = df_base.copy().pipe(self.pipeline_manufacturer) df.to_csv(paths.tmp_vax_out_man(f"{self.location}"), index=False) export_metadata( df, "Government of Romania via datelazi.ro", self.source_url, paths.tmp_vax_metadata_man, )
def to_csv(self, paths): df = self.read() df_base = df.pipe(self.pipe_base) # Main data df_base.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False) # Manufacturer data df_man = df_base.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "National Health Service", self.source_url, paths.tmp_vax_metadata_man, )
def to_csv(self, paths): # Load data df = self.read() # Export main df.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df_man = df.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "National Security and Defense Council of Ukraine", self.source_url, paths.tmp_vax_metadata_man, )
def to_csv(self, paths) -> None: vaccine_data = self.read().pipe(self.pipeline_base) self.vax_date_mapping = self.vaccine_start_dates(vaccine_data) vaccine_data.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False) df_man = vaccine_data.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "Extraordinary commissioner for the Covid-19 emergency", self.source_url, paths.tmp_vax_metadata_man, )
def to_csv(self, paths): # Manufacturer df_man = self.read(self.source_url_manufacturer).pipe( self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "Ministerio de Ciencia, Tecnología, Conocimiento e Innovación", self.source_url_ref, paths.tmp_vax_metadata_man, ) # Main data df = self.read(self.source_url_vaccinations).pipe( self.pipeline_vaccinations) df.to_csv(paths.tmp_vax_out(self.location), index=False)
def to_csv(self, paths): vaccine_data, manufacturer_data = self.read() vaccine_data.pipe(self.pipeline, country_code="CH").to_csv( paths.tmp_vax_out("Switzerland"), index=False ) vaccine_data.pipe(self.pipeline, country_code="FL").to_csv( paths.tmp_vax_out("Liechtenstein"), index=False ) df_man = manufacturer_data.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man("Switzerland"), index=False) export_metadata( df_man, "Federal Office of Public Health", self.source_url, paths.tmp_vax_metadata_man, )
def to_csv(self, paths): df = self.read(self.source_url).pipe(self.pipeline_base) # Manufacturer df_man = df.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "Ministerio de Salud Pública del Ecuador (via https://github.com/andrab/ecuacovid)", self.source_url_ref, paths.tmp_vax_metadata_man, ) # Main data df = df.pipe(self.pipeline) boosters = self.read(self.source_url_boosters).pipe(self.pipeline_boosters) df.merge(boosters, on="date", how="left", validate="one_to_one").to_csv( paths.tmp_vax_out(self.location), index=False )
def to_csv(self, paths): """Generalized.""" # Main data data = self.read().pipe(self.pipeline) increment( paths=paths, location=data["location"], total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], date=data["date"], source_url=data["source_url"], vaccine=data["vaccine"], ) # Vaccination by manufacturer df_manufacturer = self.read_manufacturer(paths) df_manufacturer.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_manufacturer, "Centers for Disease Control and Prevention", self.source_url, paths.tmp_vax_metadata_man, )
def main(paths): url = "https://e.infogram.com/c3bc3569-c86d-48a7-9d4c-377928f102bf" soup = get_soup(url) for script in soup.find_all("script"): if "infographicData" in str(script): json_data = str(script).replace("<script>window.infographicData=", "").replace(";</script>", "") json_data = json.loads(json_data) break metric_entities = { "total_vaccinations": "7287c058-7921-4abc-a667-ce298827c969", "people_vaccinated": "8d14f33a-d482-4176-af55-71209314b07b", "people_fully_vaccinated": "16a69e30-01fd-4806-920c-436f8f29e9bf", "total_boosters": "209af2de-9927-4c51-a704-ddc85e28bab9", } data = {} for metric, entity in metric_entities.items(): value = json_data["elements"]["content"]["content"]["entities"][ entity]["props"]["chartData"]["data"][0][0][0] value = re.search(r'18px;">([\d\.]+)', value).group(1) value = clean_count(value) data[metric] = value date = json_data["updatedAt"][:10] increment( paths=paths, location="Iceland", total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], total_boosters=data["total_boosters"], date=date, source_url="https://www.covid.is/tolulegar-upplysingar-boluefni", vaccine=", ".join(sorted(VACCINE_MAPPING.values())), ) # By manufacturer data = json_data["elements"]["content"]["content"]["entities"][ "e329559c-c3cc-48e9-8b7b-1a5f87ea7ad3"]["props"]["chartData"]["data"][ 0] df = pd.DataFrame(data[1:]).reset_index(drop=True) df.columns = ["date"] + data[0][1:] df = df.melt("date", var_name="vaccine", value_name="total_vaccinations") df["date"] = pd.to_datetime(df["date"], format="%d.%m.%y").astype(str) df["total_vaccinations"] = pd.to_numeric(df["total_vaccinations"], errors="coerce").fillna(0) df["total_vaccinations"] = df.sort_values("date").groupby( "vaccine", as_index=False)["total_vaccinations"].cumsum() df["location"] = "Iceland" assert set(df["vaccine"].unique()) == set(VACCINE_MAPPING.keys( )), f"Vaccines present in data: {df['vaccine'].unique()}" df = df.replace(VACCINE_MAPPING) df.to_csv(paths.tmp_vax_out_man("Iceland"), index=False) export_metadata(df, "Ministry of Health", url, paths.tmp_vax_metadata_man)
def main(paths): vaccine_mapping = { 1: "Pfizer/BioNTech", 2: "Moderna", 3: "Oxford/AstraZeneca", 4: "Johnson&Johnson", } one_dose_vaccines = ["Johnson&Johnson"] source = "https://www.data.gouv.fr/fr/datasets/r/b273cf3b-e9de-437c-af55-eda5979e92fc" df = pd.read_csv(source, usecols=[ "vaccin", "jour", "n_cum_dose1", "n_cum_dose2", "n_cum_dose3" ], sep=";") df = df.rename( columns={ "vaccin": "vaccine", "jour": "date", "n_cum_dose1": "people_vaccinated", "n_cum_dose2": "people_fully_vaccinated", "n_cum_dose3": "total_boosters", }) # Map vaccine names df = df[(df.vaccine.isin(vaccine_mapping.keys())) & (df.people_vaccinated > 0)] assert set(df["vaccine"].unique()) == set(vaccine_mapping.keys()) df["vaccine"] = df.vaccine.replace(vaccine_mapping) # Add total doses df["total_vaccinations"] = df.people_vaccinated + df.people_fully_vaccinated + df.total_boosters manufacturer = df[["date", "total_vaccinations", "vaccine"]].assign(location="France") manufacturer.to_csv(paths.tmp_vax_out_man("France"), index=False) export_metadata(manufacturer, "Public Health France", source, paths.tmp_vax_metadata_man) # Infer fully vaccinated for one-dose vaccines df.loc[df.vaccine.isin(one_dose_vaccines), "people_fully_vaccinated"] = df.people_vaccinated df = df.groupby("date", as_index=False).agg({ "total_vaccinations": "sum", "people_vaccinated": "sum", "people_fully_vaccinated": "sum", "total_boosters": "sum", "vaccine": lambda x: ", ".join(sorted(x)), }) df = df.assign( location="France", source_url= ("https://www.data.gouv.fr/fr/datasets/donnees-relatives-aux-personnes-vaccinees-contre-la-covid-19-1/" ), ) df.to_csv(paths.tmp_vax_out("France"), index=False)
def main(paths): DATA_URL = ( "https://services3.arcgis.com/MF53hRPmwfLccHCj/arcgis/rest/services/" "covid_vaccinations_by_drug_name_new/FeatureServer/0/query") PARAMS = { "f": "json", "where": "municipality_code='00'", "returnGeometry": False, "spatialRel": "esriSpatialRelIntersects", "outFields": "date,vaccine_name,vaccination_state,vaccinated_cum", "resultOffset": 0, "resultRecordCount": 32000, "resultType": "standard", } res = requests.get(DATA_URL, params=PARAMS) data = [elem["attributes"] for elem in json.loads(res.content)["features"]] df = pd.DataFrame.from_records(data) df["date"] = pd.to_datetime(df["date"], unit="ms") # Correction for vaccinations wrongly attributed to early December 2020 df.loc[df.date < "2020-12-27", "date"] = pd.to_datetime("2020-12-27") # Reshape data df = df[(df.vaccination_state != "Dalinai") & (df.vaccinated_cum > 0)].copy() df.loc[df.vaccination_state == "Visi", "dose_number"] = 1 df.loc[df.vaccination_state == "Pilnai", "dose_number"] = 2 df = df.drop(columns="vaccination_state") # Data by vaccine vaccine_mapping = { "Pfizer-BioNTech": "Pfizer/BioNTech", "Moderna": "Moderna", "AstraZeneca": "Oxford/AstraZeneca", "Johnson & Johnson": "Johnson&Johnson", } vaccines_wrong = set( df["vaccine_name"].unique()).difference(vaccine_mapping) if vaccines_wrong: raise ValueError(f"Missing vaccines: {vaccines_wrong}") # assert set(df["vaccine_name"].unique()) == set(vaccine_mapping.keys()) df = df.replace(vaccine_mapping) vax = (df.groupby( ["date", "vaccine_name"], as_index=False)["vaccinated_cum"].sum().sort_values("date").rename( columns={ "vaccine_name": "vaccine", "vaccinated_cum": "total_vaccinations" })) vax["location"] = "Lithuania" vax.to_csv(paths.tmp_vax_out_man("Lithuania"), index=False) export_metadata(vax, "Ministry of Health", DATA_URL, paths.tmp_vax_metadata_man) # Unpivot df = (df.groupby( ["date", "dose_number", "vaccine_name"], as_index=False).sum().pivot( index=["date", "vaccine_name"], columns="dose_number", values="vaccinated_cum", ).fillna(0).reset_index().rename(columns={ 1: "people_vaccinated", 2: "people_fully_vaccinated" }).sort_values("date")) # Total vaccinations df = df.assign(total_vaccinations=df.people_vaccinated + df.people_fully_vaccinated) # Single shot msk = df.vaccine_name == "Johnson & Johnson" df.loc[msk, "people_fully_vaccinated"] = df.loc[msk, "people_vaccinated"] # Group by date df = (df.groupby("date").agg({ "people_fully_vaccinated": sum, "people_vaccinated": sum, "total_vaccinations": sum, "vaccine_name": lambda x: ", ".join(sorted(x)), }).rename(columns={ "vaccine_name": "vaccine" }).reset_index()) df = df.replace(0, pd.NA) df.loc[:, "location"] = "Lithuania" df.loc[:, "source_url"] = "https://experience.arcgis.com/experience/cab84dcfe0464c2a8050a78f817924ca/page/page_3/" df.to_csv(paths.tmp_vax_out("Lithuania"), index=False)