コード例 #1
0
ファイル: canada.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(data["data"])
     check_known_columns(
         df,
         [
             "date",
             "change_cases",
             "change_fatalities",
             "change_tests",
             "change_hospitalizations",
             "change_criticals",
             "change_recoveries",
             "change_vaccinations",
             "change_vaccinated",
             "change_boosters_1",
             "change_boosters_2",
             "change_vaccines_distributed",
             "total_cases",
             "total_fatalities",
             "total_tests",
             "total_hospitalizations",
             "total_criticals",
             "total_recoveries",
             "total_vaccinations",
             "total_vaccinated",
             "total_boosters_1",
             "total_boosters_2",
             "total_vaccines_distributed",
         ],
     )
     return df[["date", "total_vaccinations", "total_vaccinated", "total_boosters_1", "total_boosters_2"]]
コード例 #2
0
ファイル: israel.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(data)
     check_known_columns(
         df,
         [
             "Day_Date",
             "vaccinated",
             "vaccinated_cum",
             "vaccinated_population_perc",
             "vaccinated_seconde_dose",
             "vaccinated_seconde_dose_cum",
             "vaccinated_seconde_dose_population_perc",
             "vaccinated_third_dose",
             "vaccinated_third_dose_cum",
             "vaccinated_third_dose_population_perc",
             "vaccinated_fourth_dose_population_perc",
             "vaccinated_fourth_dose",
             "vaccinated_validity_perc",
             "vaccinated_expired_perc",
             "not_vaccinated_perc",
             "vaccinated_fourth_dose_cum",
         ],
     )
     return df
コード例 #3
0
ファイル: romania.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     data = request_json(self.source_url)
     df = pd.DataFrame.from_dict(data["historicalData"], orient="index")
     check_known_columns(
         df,
         [
             "parsedOn",
             "parsedOnString",
             "fileName",
             "complete",
             "averageAge",
             "numberInfected",
             "numberCured",
             "numberDeceased",
             "percentageOfWomen",
             "percentageOfMen",
             "percentageOfChildren",
             "numberTotalDosesAdministered",
             "distributionByAge",
             "countyInfectionsNumbers",
             "incidence",
             "large_cities_incidence",
             "small_cities_incidence",
             "vaccines",
         ],
     )
     return df[["vaccines", "numberTotalDosesAdministered"
                ]].reset_index().dropna().sort_values(by="index")
コード例 #4
0
 def _parse_pdf_table(self) -> pd.Series:
     """Extract table from pdf url"""
     print(self.source_url_ref["manufacturer"])
     df_list = tabula.read_pdf(self.source_url_ref["manufacturer"],
                               pages="all",
                               stream=True)
     df = [table for table in df_list if "Pfizer" in table.columns][0]
     # Checks data
     check_known_columns(
         df,
         [
             "Unnamed: 0",
             "Covid Shield",
             "Unnamed: 1",
             "Verocell",
             "Unnamed: 2",
             "J & J",
             "Pfizer",
             "Unnamed: 3",
             "Unnamed: 4",
             "Unnamed: 5",
             "Moderna",
         ],
     )
     return df[df["Unnamed: 0"] == "Total"].drop(columns=["Unnamed: 0"])
コード例 #5
0
 def read(self) -> pd.DataFrame:
     df = pd.read_csv(self.source_url)
     check_known_columns(
         df,
         [
             "Date",
             "MMWR_week",
             "Location",
             "Administered_Daily",
             "Administered_Cumulative",
             "Administered_7_Day_Rolling_Average",
             "Admin_Dose_1_Daily",
             "Admin_Dose_1_Cumulative",
             "Admin_Dose_1_Day_Rolling_Average",
             "date_type",
             "Administered_daily_change_report",
             "Administered_daily_change_report_7dayroll",
             "Series_Complete_Daily",
             "Series_Complete_Cumulative",
             "Series_Complete_Day_Rolling_Average",
             "Booster_Daily",
             "Booster_Cumulative",
             "Booster_7_Day_Rolling_Average",
         ],
     )
     return df[[
         "Date",
         "Location",
         "Administered_Cumulative",
         "Admin_Dose_1_Cumulative",
         "date_type",
         "Series_Complete_Cumulative",
         "Booster_Cumulative",
     ]]
コード例 #6
0
ファイル: singapore.py プロジェクト: LimLim0a0/covid-19-data
    def read(self) -> str:
        with tempfile.TemporaryDirectory() as tf:
            r = requests.get(self.source_url)
            z = zipfile.ZipFile(io.BytesIO(r.content))
            z.extractall(tf)

            df_primary = pd.read_csv(
                os.path.join(
                    tf,
                    "primary-series-vaccination-take-up-by-population.csv"))
            check_known_columns(
                df_primary,
                [
                    "vacc_date",
                    "received_at_least_one_dose",
                    "received_at_least_two_doses",
                    "received_one_dose_pcttakeup",
                    "received_two_doses_pcttakeup",
                ],
            )

            df_boosters = pd.read_csv(
                os.path.join(tf, "progress-of-vaccine-booster-programme.csv"))
            check_known_columns(
                df_boosters,
                [
                    "vacc_date",
                    "received_booster_or_three_doses",
                    "booster_or_three_doses_pcttakeup",
                ],
            )
        df = self._merge_primary_and_boosters(df_primary, df_boosters)
        return df
コード例 #7
0
ファイル: greece.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     data = requests.get(self.source_url,
                         headers={
                             "Authorization": f"Token {self.token}"
                         }).json()
     df = pd.DataFrame.from_records(data)
     check_known_columns(
         df,
         [
             "area",
             "areaid",
             "dailydose1",
             "dailydose2",
             "dailydose3",
             "daydiff",
             "daytotal",
             "referencedate",
             "totaldistinctpersons",
             "totaldose1",
             "totaldose2",
             "totaldose3",
             "totalvaccinations",
         ],
     )
     return (df.rename(
         columns={
             "referencedate": "date",
             "totaldistinctpersons": "people_vaccinated",
             "totaldose2": "people_fully_vaccinated",
             "totaldose3": "total_boosters",
             "totalvaccinations": "total_vaccinations",
         })[[
             "people_vaccinated", "people_fully_vaccinated",
             "total_boosters", "total_vaccinations", "date"
         ]].groupby("date").sum().reset_index())
コード例 #8
0
ファイル: who.py プロジェクト: LimLim0a0/covid-19-data
 def pipe_checks(self, df: pd.DataFrame) -> pd.DataFrame:
     check_known_columns(
         df,
         [
             "COUNTRY",
             "WHO_REGION",
             "ISO3",
             "PERSONS_VACCINATED_1PLUS_DOSE_PER100",
             "PERSONS_FULLY_VACCINATED",
             "DATA_SOURCE",
             "TOTAL_VACCINATIONS",
             "NUMBER_VACCINES_TYPES_USED",
             "TOTAL_VACCINATIONS_PER100",
             "FIRST_VACCINE_DATE",
             "PERSONS_FULLY_VACCINATED_PER100",
             "PERSONS_VACCINATED_1PLUS_DOSE",
             "VACCINES_USED",
             "DATE_UPDATED",
         ],
     )
     if len(df) > 300:
         raise ValueError(
             f"Check source, it may contain updates from several dates! Shape found was {df.shape}"
         )
     if df.groupby("COUNTRY").DATE_UPDATED.nunique().nunique() == 1:
         if df.groupby("COUNTRY").DATE_UPDATED.nunique().unique()[0] != 1:
             raise ValueError("Countries have more than one date update!")
     else:
         raise ValueError("Countries have more than one date update!")
     return df
コード例 #9
0
 def pipe_initial_check(self, df: pd.DataFrame) -> pd.DataFrame:
     # Vaccines
     vaccines_wrong = set(df.Vaccine).difference(self.vaccine_mapping)
     if vaccines_wrong:
         raise ValueError(f"Unknown vaccines found. Check {vaccines_wrong}")
     check_known_columns(df, COLUMNS)
     return df
コード例 #10
0
ファイル: norway.py プロジェクト: LimLim0a0/covid-19-data
 def read(self):
     df = pd.read_csv(self.source_url)
     check_known_columns(
         df,
         [
             "granularity_time",
             "granularity_geo",
             "location_code",
             "border",
             "age",
             "sex",
             "year",
             "week",
             "yrwk",
             "season",
             "x",
             "date",
             "n_dose_1",
             "n_dose_2",
             "n_dose_3_all",
             "cum_n_dose_1",
             "cum_n_dose_2",
             "cum_n_dose_3_all",
             "cum_pr100_dose_1",
             "cum_pr100_dose_2",
             "cum_pr100_dose_3_all",
             "pop",
             "location_name",
             "date_of_publishing",
         ],
     )
     return df
コード例 #11
0
ファイル: peru.py プロジェクト: LimLim0a0/covid-19-data
 def read(self):
     df = pd.read_csv(self.source_url)
     check_known_columns(df, [
         "fecha_corte", "fecha_vacunacion", "fabricante", "dosis", "n_reg",
         "flag_vacunacion_general"
     ])
     return df[[
         "fecha_vacunacion", "fabricante", "dosis", "n_reg",
         "flag_vacunacion_general"
     ]]
コード例 #12
0
ファイル: portugal.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     df = pd.read_csv(self.source_url)
     check_known_columns(
         df,
         [
             "data",
             "doses",
             "doses_novas",
             "doses1",
             "doses1_novas",
             "doses2",
             "doses2_novas",
             "pessoas_vacinadas_completamente",
             "pessoas_vacinadas_completamente_novas",
             "pessoas_vacinadas_parcialmente",
             "pessoas_vacinadas_parcialmente_novas",
             "pessoas_inoculadas",
             "pessoas_inoculadas_novas",
             "vacinas",
             "vacinas_novas",
             "pessoas_vacinadas_completamente_continente",
             "pessoas_vacinadas_completamente_continente_novas",
             "pessoas_reforço",
             "pessoas_reforço_novas",
             "pessoas_reforço_continente",
             "pessoas_reforço_continente_novas",
             "pessoas_gripe",
             "pessoas_gripe_novas",
             "vacinas_reforço_e_gripe_novas",
             "reforço_80mais",
             "reforço_80mais_novas",
             "reforço_70_79",
             "reforço_70_79_novas",
             "reforço_65_69",
             "reforço_65_69_novas",
             "reforço_60_69",
             "reforço_60_69_novas",
             "reforço_50_59",
             "reforço_50_59_novas",
             "vacinação_iniciada_05_11",
             "vacinação_iniciada_05_11_novas",
             "pessoas_inoculadas_12mais",
             "reforço_40_49",
             "reforço_40_49_novas",
             "vacinação_completa_05_11_novas",
             "reforço_30_39_novas",
             "vacinação_completa_05_11",
             "reforço_18_29",
             "reforço_18_29_novas",
             "reforço_30_39",
         ],
     )
     return df[self.columns_rename.keys()]
コード例 #13
0
ファイル: chile.py プロジェクト: LimLim0a0/covid-19-data
 def pipe_calculate_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
     check_known_columns(df, [
         "Region", "date", "Primera", "Refuerzo", "Segunda", "Unica",
         "Cuarta"
     ])
     df = df.fillna(0)
     return df.assign(
         people_vaccinated=df.Primera + df.Unica,
         people_fully_vaccinated=df.Segunda + df.Unica,
         total_vaccinations=df.Primera + df.Refuerzo + df.Segunda +
         df.Unica + df.Cuarta,
         total_boosters=df.Refuerzo + df.Cuarta,
     ).drop(columns=["Primera", "Refuerzo", "Segunda", "Unica", "Cuarta"])
コード例 #14
0
 def read(self) -> pd.DataFrame:
     df = pd.read_excel(self.source_url)
     check_known_columns(
         df,
         [
             "Date",
             "Nombre de dose 1",
             "Nombre de dose 2",
             "Nombre de Dose complémentaire par rapport à schéma complet",
             "Nombre total de doses",
         ],
     )
     return df
コード例 #15
0
 def read(self) -> pd.DataFrame:
     df = pd.read_csv(self.source_url)
     # Temporal fix
     df.columns = [col.replace("\ufeff", "") for col in df.columns]
     check_known_columns(
         df,
         [
             "Date of Vaccination",
             "Total Vaccination Doses",
             "Fully vaccinated (2 of 2 or 1 of 1)",
             "Received one dose (1 of 2 or 1 of 1)",
             "Total Booster doses",
         ],
     )
     return df
コード例 #16
0
def read(source: str) -> pd.DataFrame:
    df = pd.read_csv(source)
    check_known_columns(
        df,
        [
            "id",
            "datum",
            "vakcina",
            "kraj_nuts_kod",
            "kraj_nazev",
            "vekova_skupina",
            "prvnich_davek",
            "druhych_davek",
            "celkem_davek",
        ],
    )
    return df
コード例 #17
0
 def read(self):
     data = request_json(self.source_url)
     df = pd.DataFrame.from_records(elem["attributes"]
                                    for elem in data["features"])
     check_known_columns(
         df,
         [
             "Reportdt",
             "Total_Vaccinations",
             "Total_Individuals",
             "LastValue",
             "ObjectId",
             "Elderly",
             "FirstDose",
             "SecondDose",
             "BoosterDose",
         ],
     )
     return df
コード例 #18
0
 def read(self):
     df = pd.read_csv(self.source_url, sep=";")
     check_known_columns(
         df,
         [
             "iso_week",
             "iso_year",
             "week",
             "vaccine",
             "gender",
             "AgeGroup",
             "region",
             "district",
             "district_code",
             "dose",
             "doses_administered",
         ],
     )
     return df
コード例 #19
0
ファイル: malaysia.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     df = pd.read_csv(self.source_url)
     check_known_columns(
         df,
         [
             "date",
             "daily_partial",
             "daily_full",
             "daily",
             "daily_partial_child",
             "daily_full_child",
             "daily_booster",
             "cumul_partial",
             "cumul_full",
             "cumul",
             "cumul_partial_child",
             "cumul_full_child",
             "cumul_booster",
             "pfizer1",
             "pfizer2",
             "pfizer3",
             "sinovac1",
             "sinovac2",
             "sinovac3",
             "astra1",
             "astra2",
             "astra3",
             "sinopharm1",
             "sinopharm2",
             "sinopharm3",
             "cansino",
             "cansino3",
             "pending1",
             "pending2",
             "pending3",
             "daily_partial_adol",
             "daily_full_adol",
             "cumul_full_adol",
             "cumul_partial_adol",
         ],
     )
     return df
コード例 #20
0
 def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
     check_known_columns(
         df,
         [
             "fecha", "dosis_total", "primera_dosis", "segunda_dosis",
             "dosis_unica", "refuerzo"
         ],
     )
     return (df.pipe(self.pipe_column_rename).pipe(self.pipe_metrics).pipe(
         self.pipe_checks).pipe(self.pipe_date).pipe(
             self.pipe_vaccines).assign(
                 location=self.location, source_url=self.source_url_ref)[[
                     "location",
                     "date",
                     "vaccine",
                     "source_url",
                     "total_vaccinations",
                     "people_vaccinated",
                     "people_fully_vaccinated",
                     "total_boosters",
                 ]].sort_values("date").pipe(
                     self.pipe_exclude_dp).pipe(make_monotonic))
コード例 #21
0
 def read(self) -> pd.DataFrame:
     df = pd.read_csv(self.source_url)
     check_known_columns(
         df,
         [
             "data_somministrazione",
             "fornitore",
             "area",
             "fascia_anagrafica",
             "sesso_maschile",
             "sesso_femminile",
             "prima_dose",
             "seconda_dose",
             "pregressa_infezione",
             "dose_addizionale_booster",
             "codice_NUTS1",
             "codice_NUTS2",
             "codice_regione_ISTAT",
             "nome_area",
             "booster_immuno",
         ],
     )
     return df[self.columns]
コード例 #22
0
 def pipeline_manufacturer(self, df: pd.DataFrame) -> pd.DataFrame:
     check_known_columns(
         df,
         [
             "zona",
             "fabricante",
             "dosis_total",
             "primera_dosis",
             "segunda_dosis",
             "dosis_unica",
             "dosis_refuerzo",
             "administered_at",
         ],
     )
     return (df.pipe(self.pipe_manuf_rename_cols).pipe(
         self.pipe_manuf_aggregate).pipe(
             self.pipe_manuf_vaccine_checks).pipe(
                 self.pipe_manuf_date).assign(
                     location=self.location).sort_values(
                         ["vaccine", "date"])[[
                             "location", "date", "vaccine",
                             "total_vaccinations"
                         ]])
コード例 #23
0
 def read(self) -> pd.DataFrame:
     response = requests.get(self.source_url).content
     df = pd.read_csv(io.StringIO(response.decode("utf-8")))
     check_known_columns(
         df,
         [
             "Date",
             "Age Group",
             "Sex",
             "Sinovac 1st dose",
             "Sinovac 2nd dose",
             "Sinovac 3rd dose",
             "Sinovac 4th dose",
             "Sinovac 5th dose",
             "Sinovac 6th dose",
             "BioNTech 1st dose",
             "BioNTech 2nd dose",
             "BioNTech 3rd dose",
             "BioNTech 4th dose",
             "BioNTech 5th dose",
             "BioNTech 6th dose",
         ],
     )
     return df
コード例 #24
0
 def read(self) -> pd.DataFrame:
     df = read_csv_from_url(self.source_url["main"])
     check_known_columns(df, ["date", "dose_1", "dose_2", "dose_3"])
     return df
コード例 #25
0
 def read(self) -> pd.DataFrame:
     df = pd.read_csv(self.source_url)
     check_known_columns(
         df,
         ["DATE", "REGION", "AGEGROUP", "SEX", "BRAND", "DOSE", "COUNT"])
     return df[["DATE", "DOSE", "COUNT"]]
コード例 #26
0
ファイル: austria.py プロジェクト: LimLim0a0/covid-19-data
 def read(self) -> pd.DataFrame:
     df = read_csv_from_url(self.source_url, sep=";", ciphers_low=True)
     check_known_columns(
         df, ["date", "state_id", "state_name", "vaccine", "dose_number", "doses_administered_cumulative"]
     )
     return df[["date", "state_name", "vaccine", "dose_number", "doses_administered_cumulative"]]
コード例 #27
0
    def export(self):

        vaccine_mapping = {
            1: "Pfizer/BioNTech",
            2: "Moderna",
            3: "Oxford/AstraZeneca",
            4: "Johnson&Johnson",
        }
        one_dose_vaccines = ["Johnson&Johnson"]

        df = read_csv_from_url(self.source_url, sep=";")
        check_known_columns(
            df,
            [
                "fra",
                "vaccin",
                "jour",
                "n_dose1",
                "n_dose2",
                "n_dose3",
                "n_dose4",
                "n_rappel",
                "n_cum_dose1",
                "n_cum_dose2",
                "n_cum_dose3",
                "n_cum_dose4",
                "n_cum_rappel",
            ],
        )
        df = df[["vaccin", "jour", "n_cum_dose1", "n_cum_dose2", "n_cum_dose3", "n_cum_dose4"]]

        df = df.rename(
            columns={
                "vaccin": "vaccine",
                "jour": "date",
            }
        )

        # Map vaccine names
        df = df[(df.vaccine.isin(vaccine_mapping.keys())) & (df.n_cum_dose1 > 0)]
        assert set(df["vaccine"].unique()) == set(vaccine_mapping.keys())
        df["vaccine"] = df.vaccine.replace(vaccine_mapping)

        df["total_vaccinations"] = df.n_cum_dose1 + df.n_cum_dose2 + df.n_cum_dose3 + df.n_cum_dose4
        df["people_vaccinated"] = df.n_cum_dose1

        # 2-dose vaccines
        mask = -df.vaccine.isin(one_dose_vaccines)
        df.loc[mask, "people_fully_vaccinated"] = df.n_cum_dose2
        df.loc[mask, "total_boosters"] = df.n_cum_dose3 + df.n_cum_dose4

        # 1-dose vaccines
        mask = df.vaccine.isin(one_dose_vaccines)
        df.loc[mask, "people_fully_vaccinated"] = df.n_cum_dose1
        df.loc[mask, "total_boosters"] = df.n_cum_dose2 + df.n_cum_dose3 + df.n_cum_dose4

        df = df.drop(columns=["n_cum_dose1", "n_cum_dose2", "n_cum_dose3", "n_cum_dose4"])

        df_man = df[["date", "total_vaccinations", "vaccine"]].assign(location="France")

        approval_timeline = df_man[["vaccine", "date"]].groupby("vaccine").min().to_dict()["date"]

        df = (
            df.groupby("date", as_index=False)
            .agg(
                {
                    "total_vaccinations": "sum",
                    "people_vaccinated": "sum",
                    "people_fully_vaccinated": "sum",
                    "total_boosters": "sum",
                }
            )
            .pipe(build_vaccine_timeline, approval_timeline)
        )

        df = df.assign(
            location=self.location,
            source_url=self.source_url_ref,
        )
        self.export_datafile(
            df,
            df_manufacturer=df_man,
            meta_manufacturer={"source_name": "Public Health France", "source_url": self.source_url},
        )
コード例 #28
0
    def parse_metrics_from_pdf(self, pdf_path):
        print(pdf_path)
        dfs = tabula.read_pdf(pdf_path)
        df = dfs[0]

        # Fix header
        df = self._fix_header(df)
        # All calculations below assume a fixed shape of the PDF's table, and a specific order for
        # the columns and vaccines. If the following test fails, then the table should be checked
        # for potential changes.
        check_known_columns(
            df,
            [
                "දිය",
                "ේ ාවිෂීල්ඩඩ් Covishield",
                "ටයිේයාෆාම් Sinopharm",
                # "ටුට්නිව් - V",
                "ට ට්නිව් - V",
                "Sputnik - V",
                "Unnamed: 0",
                "ෆයිසර්Pfizer",
                "Unnamed: 1",
                "Unnamed: 2",
                "ම ොඩර්ර්ො Moderna",
            ],
        )

        values_idx = df.index[df.iloc[:, 1].notnull()].max()
        values_raw = df.iloc[values_idx].values.flatten()
        values = []
        for val in values_raw:
            if not pd.isnull(val):
                values += val.split()
        assert len(values) == 11

        doses = {
            "first": {
                "covishield": clean_count(values[0]),
                "sinopharm": clean_count(values[2]),
                "sputnik": clean_count(values[4]),
                "pfizer": clean_count(values[6]),
                "moderna": clean_count(values[9]),
            },
            "second": {
                "covishield": clean_count(values[1]),
                "sinopharm": clean_count(values[3]),
                "sputnik": clean_count(values[5]),
                "pfizer": clean_count(values[7]),
                "moderna": clean_count(values[10]),
            },
            "third": {
                "pfizer": clean_count(values[8])
            },
        }

        people_vaccinated = sum(doses["first"].values())
        people_fully_vaccinated = sum(doses["second"].values())
        total_boosters = sum(doses["third"].values())
        total_vaccinations = people_vaccinated + people_fully_vaccinated + total_boosters

        return pd.Series({
            "total_vaccinations": total_vaccinations,
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
            "total_boosters": total_boosters,
        })