Exemple #1
0
    def to_csv(self, paths):
        locations = ["Switzerland", "Liechtenstein"]
        df, df_manuf, df_age = self.read()

        # Main data
        for location in locations:
            df.pipe(self.pipeline,
                    location).to_csv(paths.tmp_vax_out(location), index=False)

        # Manufacturer
        df_manuf = df_manuf.pipe(self.pipeline_manufacturer)
        df_manuf.to_csv(paths.tmp_vax_out_man("Switzerland"), index=False)
        export_metadata(
            df_manuf,
            "Federal Office of Public Health",
            self.source_url,
            paths.tmp_vax_metadata_man,
        )

        # Age
        for location in locations:
            df_age_ = df_age.pipe(self.pipeline_age, location)
            df_age_.to_csv(paths.tmp_vax_out_by_age_group(location),
                           index=False)
            export_metadata(
                df_age_,
                "Federal Office of Public Health",
                self.source_url,
                paths.tmp_vax_metadata_age,
            )
Exemple #2
0
 def to_csv(self, paths):
     # Load data
     df, df_age = self.read()
     # Export main
     df.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location),
                                   index=False)
     # Export manufacturer data
     df_man = df.pipe(self.pipeline_manufacturer)
     df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False)
     export_metadata(
         df_man,
         "Ministry of Health via vacuna.uy",
         self.source_url,
         paths.tmp_vax_metadata_man,
     )
     # Export age data
     df_age = df_age.pipe(self.pipeline_age)
     df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location),
                   index=False)
     export_metadata(
         df_age,
         "Ministry of Health via vacuna.uy",
         self.source_url_age,
         paths.tmp_vax_metadata_age,
     )
Exemple #3
0
 def _export_metadata(self, df, output_path):
     export_metadata(
         df=df,
         source_name="European Centre for Disease Prevention and Control (ECDC)",
         source_url=self.source_url_ref,
         output_path=output_path,
     )
Exemple #4
0
 def _export_datafile_secondary(self, df, metadata, output_path, output_path_meta):
     """Export secondary data."""
     # Check metadata
     self._check_metadata(metadata)
     # Export data
     df.to_csv(output_path, index=False)
     # Export metadata
     export_metadata(df, metadata["source_name"], metadata["source_url"], output_path_meta)
Exemple #5
0
 def to_csv(self, paths):
     df_base = self.read().pipe(self.pipeline_base)
     # Export data
     df = df_base.pipe(self.pipeline)
     df.to_csv(paths.tmp_vax_out(self.location), index=False)
     # Export manufacturer data
     df = df_base.pipe(self.pipeline_manufacturer)
     df.to_csv(paths.tmp_vax_out_man(self.location), index=False)
     export_metadata(df, "Robert Koch Institut", self.source_url_ref, paths.tmp_vax_metadata_man)
    def to_csv(self, paths):
        self.read().pipe(self.pipeline).to_csv(paths.tmp_vax_out("United States"), index=False)

        df_manufacturer = self.read_manufacturer(paths)
        df_manufacturer.to_csv(paths.tmp_vax_out_man(self.location), index=False)
        export_metadata(
            df_manufacturer,
            "Centers for Disease Control and Prevention",
            "https://covid.cdc.gov/covid-data-tracker/COVIDData/getAjaxData?id=vaccination_data",
            paths.tmp_vax_metadata_man,
        )
Exemple #7
0
 def to_csv(self, paths):
     """Generalized."""
     df_base = self.read()
     # Main data
     df = df_base.pipe(self.pipeline)
     df.to_csv(paths.tmp_vax_out(self.location), index=False)
     # Age data
     df_age = df_base.pipe(self.pipeline_age)
     df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False)
     export_metadata(
         df_age, "Government of Jersey", self.source_url, paths.tmp_vax_metadata_age
     )
Exemple #8
0
 def export(self, paths):
     df = self.read().pipe(self.pipeline)
     df.to_csv(paths.tmp_vax_out(self.location), index=False)
     # Age data
     df_age = self.read_age().pipe(self.pipeline_age)
     df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location),
                   index=False)
     export_metadata(
         df_age,
         "Ministerio de Salud via https://github.com/jmcastagnetto/covid-19-peru-vacunas",
         self.source_url_ref,
         paths.tmp_vax_metadata_age,
     )
Exemple #9
0
def main(paths):
    source = "https://static.data.gov.hk/covid-vaccine/bar_vaccination_date.json"
    data = read(source).pipe(pipeline)

    destination = paths.tmp_vax_out("Hong Kong")
    data.drop(columns=["total_pfizer", "total_sinovac"]).to_csv(destination,
                                                                index=False)

    destination = paths.tmp_vax_out_man("Hong Kong")
    manufacturer = data.pipe(manufacturer_pipeline)
    manufacturer.to_csv(destination, index=False)
    export_metadata(manufacturer, "Government of Hong Kong", source,
                    paths.tmp_vax_metadata_man)
Exemple #10
0
 def export(self, paths):
     destination = paths.tmp_vax_out(self.location)
     self.read().pipe(self.pipeline).to_csv(destination, index=False)
     # Export age data
     df_age = self.read_age().pipe(self.pipeline_age)
     df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location),
                   index=False)
     export_metadata(
         df_age,
         "Ministry of Health via github.com/dancarmoz/israel_moh_covid_dashboard_data",
         self.source_url_age,
         paths.tmp_vax_metadata_age,
     )
Exemple #11
0
def main(paths):
    source = "https://onemocneni-aktualne.mzcr.cz/api/v2/covid-19/ockovani.csv"

    base = read(source).pipe(base_pipeline)

    # Manufacturer data
    df_man = base.pipe(breakdown_per_vaccine)
    df_man.to_csv(paths.tmp_vax_out_man("Czechia"), index=False)
    export_metadata(df_man, "Ministry of Health", source,
                    paths.tmp_vax_metadata_man)

    # Main data
    base.pipe(global_pipeline).to_csv(paths.tmp_vax_out("Czechia"),
                                      index=False)
Exemple #12
0
 def to_csv(self, paths):
     df_base = self.read().pipe(self.pipeline_base)
     # Export data
     df = df_base.copy().pipe(self.pipeline)
     df.to_csv(paths.tmp_vax_out(self.location), index=False)
     # Export manufacturer data
     df = df_base.copy().pipe(self.pipeline_manufacturer)
     df.to_csv(paths.tmp_vax_out_man(f"{self.location}"), index=False)
     export_metadata(
         df,
         "Government of Romania via datelazi.ro",
         self.source_url,
         paths.tmp_vax_metadata_man,
     )
Exemple #13
0
 def to_csv(self, paths):
     df = self.read()
     df_base = df.pipe(self.pipe_base)
     # Main data
     df_base.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location),
                                        index=False)
     # Manufacturer data
     df_man = df_base.pipe(self.pipeline_manufacturer)
     df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False)
     export_metadata(
         df_man,
         "National Health Service",
         self.source_url,
         paths.tmp_vax_metadata_man,
     )
Exemple #14
0
 def to_csv(self, paths):
     # Load data
     df = self.read()
     # Export main
     df.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location),
                                   index=False)
     # Export manufacturer data
     df_man = df.pipe(self.pipeline_manufacturer)
     df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False)
     export_metadata(
         df_man,
         "National Security and Defense Council of Ukraine",
         self.source_url,
         paths.tmp_vax_metadata_man,
     )
Exemple #15
0
    def to_csv(self, paths) -> None:
        vaccine_data = self.read().pipe(self.pipeline_base)

        self.vax_date_mapping = self.vaccine_start_dates(vaccine_data)

        vaccine_data.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False)

        df_man = vaccine_data.pipe(self.pipeline_manufacturer)
        df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False)
        export_metadata(
            df_man,
            "Extraordinary commissioner for the Covid-19 emergency",
            self.source_url,
            paths.tmp_vax_metadata_man,
        )
Exemple #16
0
    def to_csv(self, paths):
        # Manufacturer
        df_man = self.read(self.source_url_manufacturer).pipe(
            self.pipeline_manufacturer)
        df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False)
        export_metadata(
            df_man,
            "Ministerio de Ciencia, Tecnología, Conocimiento e Innovación",
            self.source_url_ref,
            paths.tmp_vax_metadata_man,
        )

        # Main data
        df = self.read(self.source_url_vaccinations).pipe(
            self.pipeline_vaccinations)
        df.to_csv(paths.tmp_vax_out(self.location), index=False)
Exemple #17
0
    def to_csv(self, paths):
        vaccine_data, manufacturer_data = self.read()

        vaccine_data.pipe(self.pipeline, country_code="CH").to_csv(
            paths.tmp_vax_out("Switzerland"), index=False
        )

        vaccine_data.pipe(self.pipeline, country_code="FL").to_csv(
            paths.tmp_vax_out("Liechtenstein"), index=False
        )

        df_man = manufacturer_data.pipe(self.pipeline_manufacturer)
        df_man.to_csv(paths.tmp_vax_out_man("Switzerland"), index=False)
        export_metadata(
            df_man,
            "Federal Office of Public Health",
            self.source_url,
            paths.tmp_vax_metadata_man,
        )
Exemple #18
0
    def to_csv(self, paths):

        df = self.read(self.source_url).pipe(self.pipeline_base)

        # Manufacturer
        df_man = df.pipe(self.pipeline_manufacturer)
        df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False)
        export_metadata(
            df_man,
            "Ministerio de Salud Pública del Ecuador (via https://github.com/andrab/ecuacovid)",
            self.source_url_ref,
            paths.tmp_vax_metadata_man,
        )

        # Main data
        df = df.pipe(self.pipeline)
        boosters = self.read(self.source_url_boosters).pipe(self.pipeline_boosters)
        df.merge(boosters, on="date", how="left", validate="one_to_one").to_csv(
            paths.tmp_vax_out(self.location), index=False
        )
Exemple #19
0
 def to_csv(self, paths):
     """Generalized."""
     # Main data
     data = self.read().pipe(self.pipeline)
     increment(
         paths=paths,
         location=data["location"],
         total_vaccinations=data["total_vaccinations"],
         people_vaccinated=data["people_vaccinated"],
         people_fully_vaccinated=data["people_fully_vaccinated"],
         date=data["date"],
         source_url=data["source_url"],
         vaccine=data["vaccine"],
     )
     # Vaccination by manufacturer
     df_manufacturer = self.read_manufacturer(paths)
     df_manufacturer.to_csv(paths.tmp_vax_out_man(self.location), index=False)
     export_metadata(
         df_manufacturer,
         "Centers for Disease Control and Prevention",
         self.source_url,
         paths.tmp_vax_metadata_man,
     )
Exemple #20
0
def main(paths):

    url = "https://e.infogram.com/c3bc3569-c86d-48a7-9d4c-377928f102bf"
    soup = get_soup(url)

    for script in soup.find_all("script"):
        if "infographicData" in str(script):
            json_data = str(script).replace("<script>window.infographicData=",
                                            "").replace(";</script>", "")
            json_data = json.loads(json_data)
            break

    metric_entities = {
        "total_vaccinations": "7287c058-7921-4abc-a667-ce298827c969",
        "people_vaccinated": "8d14f33a-d482-4176-af55-71209314b07b",
        "people_fully_vaccinated": "16a69e30-01fd-4806-920c-436f8f29e9bf",
        "total_boosters": "209af2de-9927-4c51-a704-ddc85e28bab9",
    }
    data = {}

    for metric, entity in metric_entities.items():
        value = json_data["elements"]["content"]["content"]["entities"][
            entity]["props"]["chartData"]["data"][0][0][0]
        value = re.search(r'18px;">([\d\.]+)', value).group(1)
        value = clean_count(value)
        data[metric] = value

    date = json_data["updatedAt"][:10]

    increment(
        paths=paths,
        location="Iceland",
        total_vaccinations=data["total_vaccinations"],
        people_vaccinated=data["people_vaccinated"],
        people_fully_vaccinated=data["people_fully_vaccinated"],
        total_boosters=data["total_boosters"],
        date=date,
        source_url="https://www.covid.is/tolulegar-upplysingar-boluefni",
        vaccine=", ".join(sorted(VACCINE_MAPPING.values())),
    )

    # By manufacturer
    data = json_data["elements"]["content"]["content"]["entities"][
        "e329559c-c3cc-48e9-8b7b-1a5f87ea7ad3"]["props"]["chartData"]["data"][
            0]
    df = pd.DataFrame(data[1:]).reset_index(drop=True)
    df.columns = ["date"] + data[0][1:]

    df = df.melt("date", var_name="vaccine", value_name="total_vaccinations")

    df["date"] = pd.to_datetime(df["date"], format="%d.%m.%y").astype(str)
    df["total_vaccinations"] = pd.to_numeric(df["total_vaccinations"],
                                             errors="coerce").fillna(0)
    df["total_vaccinations"] = df.sort_values("date").groupby(
        "vaccine", as_index=False)["total_vaccinations"].cumsum()
    df["location"] = "Iceland"

    assert set(df["vaccine"].unique()) == set(VACCINE_MAPPING.keys(
    )), f"Vaccines present in data: {df['vaccine'].unique()}"
    df = df.replace(VACCINE_MAPPING)

    df.to_csv(paths.tmp_vax_out_man("Iceland"), index=False)
    export_metadata(df, "Ministry of Health", url, paths.tmp_vax_metadata_man)
Exemple #21
0
def main(paths):

    vaccine_mapping = {
        1: "Pfizer/BioNTech",
        2: "Moderna",
        3: "Oxford/AstraZeneca",
        4: "Johnson&Johnson",
    }
    one_dose_vaccines = ["Johnson&Johnson"]

    source = "https://www.data.gouv.fr/fr/datasets/r/b273cf3b-e9de-437c-af55-eda5979e92fc"

    df = pd.read_csv(source,
                     usecols=[
                         "vaccin", "jour", "n_cum_dose1", "n_cum_dose2",
                         "n_cum_dose3"
                     ],
                     sep=";")

    df = df.rename(
        columns={
            "vaccin": "vaccine",
            "jour": "date",
            "n_cum_dose1": "people_vaccinated",
            "n_cum_dose2": "people_fully_vaccinated",
            "n_cum_dose3": "total_boosters",
        })

    # Map vaccine names
    df = df[(df.vaccine.isin(vaccine_mapping.keys()))
            & (df.people_vaccinated > 0)]
    assert set(df["vaccine"].unique()) == set(vaccine_mapping.keys())
    df["vaccine"] = df.vaccine.replace(vaccine_mapping)

    # Add total doses
    df["total_vaccinations"] = df.people_vaccinated + df.people_fully_vaccinated + df.total_boosters

    manufacturer = df[["date", "total_vaccinations",
                       "vaccine"]].assign(location="France")
    manufacturer.to_csv(paths.tmp_vax_out_man("France"), index=False)
    export_metadata(manufacturer, "Public Health France", source,
                    paths.tmp_vax_metadata_man)

    # Infer fully vaccinated for one-dose vaccines
    df.loc[df.vaccine.isin(one_dose_vaccines),
           "people_fully_vaccinated"] = df.people_vaccinated

    df = df.groupby("date", as_index=False).agg({
        "total_vaccinations":
        "sum",
        "people_vaccinated":
        "sum",
        "people_fully_vaccinated":
        "sum",
        "total_boosters":
        "sum",
        "vaccine":
        lambda x: ", ".join(sorted(x)),
    })

    df = df.assign(
        location="France",
        source_url=
        ("https://www.data.gouv.fr/fr/datasets/donnees-relatives-aux-personnes-vaccinees-contre-la-covid-19-1/"
         ),
    )

    df.to_csv(paths.tmp_vax_out("France"), index=False)
Exemple #22
0
def main(paths):

    DATA_URL = (
        "https://services3.arcgis.com/MF53hRPmwfLccHCj/arcgis/rest/services/"
        "covid_vaccinations_by_drug_name_new/FeatureServer/0/query")
    PARAMS = {
        "f": "json",
        "where": "municipality_code='00'",
        "returnGeometry": False,
        "spatialRel": "esriSpatialRelIntersects",
        "outFields": "date,vaccine_name,vaccination_state,vaccinated_cum",
        "resultOffset": 0,
        "resultRecordCount": 32000,
        "resultType": "standard",
    }
    res = requests.get(DATA_URL, params=PARAMS)

    data = [elem["attributes"] for elem in json.loads(res.content)["features"]]

    df = pd.DataFrame.from_records(data)

    df["date"] = pd.to_datetime(df["date"], unit="ms")

    # Correction for vaccinations wrongly attributed to early December 2020
    df.loc[df.date < "2020-12-27", "date"] = pd.to_datetime("2020-12-27")

    # Reshape data
    df = df[(df.vaccination_state != "Dalinai")
            & (df.vaccinated_cum > 0)].copy()
    df.loc[df.vaccination_state == "Visi", "dose_number"] = 1
    df.loc[df.vaccination_state == "Pilnai", "dose_number"] = 2
    df = df.drop(columns="vaccination_state")

    # Data by vaccine
    vaccine_mapping = {
        "Pfizer-BioNTech": "Pfizer/BioNTech",
        "Moderna": "Moderna",
        "AstraZeneca": "Oxford/AstraZeneca",
        "Johnson & Johnson": "Johnson&Johnson",
    }
    vaccines_wrong = set(
        df["vaccine_name"].unique()).difference(vaccine_mapping)
    if vaccines_wrong:
        raise ValueError(f"Missing vaccines: {vaccines_wrong}")
    # assert set(df["vaccine_name"].unique()) == set(vaccine_mapping.keys())
    df = df.replace(vaccine_mapping)
    vax = (df.groupby(
        ["date", "vaccine_name"],
        as_index=False)["vaccinated_cum"].sum().sort_values("date").rename(
            columns={
                "vaccine_name": "vaccine",
                "vaccinated_cum": "total_vaccinations"
            }))
    vax["location"] = "Lithuania"
    vax.to_csv(paths.tmp_vax_out_man("Lithuania"), index=False)
    export_metadata(vax, "Ministry of Health", DATA_URL,
                    paths.tmp_vax_metadata_man)

    # Unpivot
    df = (df.groupby(
        ["date", "dose_number", "vaccine_name"], as_index=False).sum().pivot(
            index=["date", "vaccine_name"],
            columns="dose_number",
            values="vaccinated_cum",
        ).fillna(0).reset_index().rename(columns={
            1: "people_vaccinated",
            2: "people_fully_vaccinated"
        }).sort_values("date"))

    # Total vaccinations
    df = df.assign(total_vaccinations=df.people_vaccinated +
                   df.people_fully_vaccinated)

    # Single shot
    msk = df.vaccine_name == "Johnson & Johnson"
    df.loc[msk, "people_fully_vaccinated"] = df.loc[msk, "people_vaccinated"]

    # Group by date
    df = (df.groupby("date").agg({
        "people_fully_vaccinated":
        sum,
        "people_vaccinated":
        sum,
        "total_vaccinations":
        sum,
        "vaccine_name":
        lambda x: ", ".join(sorted(x)),
    }).rename(columns={
        "vaccine_name": "vaccine"
    }).reset_index())
    df = df.replace(0, pd.NA)

    df.loc[:, "location"] = "Lithuania"
    df.loc[:,
           "source_url"] = "https://experience.arcgis.com/experience/cab84dcfe0464c2a8050a78f817924ca/page/page_3/"

    df.to_csv(paths.tmp_vax_out("Lithuania"), index=False)