Esempi in Python per ISODB.sort_values, esempi in Python per covid_updater.iso.ISODB.sort_values

Esempio n. 1

0

Mostra file

def main():
    # Load current file
    df_source = pd.read_csv(OUTPUT_FILE)

    #  Get date
    driver = load_driver(DATA_URL)
    try:
        date = load_date(driver)
    except:
        raise Exception("Date not found!")

    # Load dose 1 data
    url = "https://www.fhi.no/api/chartdata/api/99112"
    dix = json.loads(requests.get(url).content)
    df_dose1 = pd.DataFrame(dix, columns=["region", "people_vaccinated"])
    # Load dose 2 data
    url = "https://www.fhi.no/api/chartdata/api/99111"
    dix = json.loads(requests.get(url).content)
    df_dose2 = pd.DataFrame(dix, columns=["region", "people_fully_vaccinated"])
    # Remove row
    df_dose1 = df_dose1.loc[~(df_dose2["region"] == "Fylke")]
    df_dose2 = df_dose2.loc[~(df_dose2["region"] == "Fylke")]
    # Merge
    df = df_dose1.merge(df_dose2, on="region", how="left")

    # Process region column
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    # Add columns
    df.loc[:, "date"] = date
    df.loc[:, "location"] = COUNTRY
    df.loc[:,
           "total_vaccinations"] = df.loc[:,
                                          "people_fully_vaccinated"] + df.loc[:,
                                                                              "people_vaccinated"]

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    # Export
    df = df[[
        "location", "region", "date", "location_iso", "region_iso",
        "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"
    ]]
    df = keep_min_date(df)
    cols = [
        "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"
    ]
    df[cols] = df[cols].astype("Int64").fillna(pd.NA)
    df = df.sort_values(by=["region", "date"])
    df.to_csv(OUTPUT_FILE, index=False)

    # Tracking
    update_country_tracking(country=COUNTRY,
                            url=DATA_URL_REFERENCE,
                            last_update=df["date"].max())

Esempio n. 2

0

Mostra file

 def _postprocess(self, df):
     df = ISODB().merge(df, country_iso=self.country_iso, mode=self.mode_iso_merge)
     df = df.sort_values(by="date")
     for field in self.do_cumsum_fields:
         df[field] = df.groupby("region")[field].cumsum().values
     #  TODO: Insert here population info (need path to population.csv as class attribute)
     return df

Esempio n. 3

0

Mostra file

File: czechia.py Progetto: hef-abd/covid19-vaccination-subnational

def main():
    # Load data
    df = pd.read_csv(DATA_URL)

    # Check 1
    cols = ["datum", "vakcina", "kraj_nuts_kod", "kraj_nazev", "vekova_skupina", "prvnich_davek", "druhych_davek", "celkem_davek"]
    if not all([col in df.columns for col in cols]):
        raise Exception("API changed")

    # Column renaming
    df = df.rename(columns={
        "datum": "date",
        "kraj_nazev": "region",
        "prvnich_davek": "people_vaccinated",
        "druhych_davek": "people_fully_vaccinated",
        "celkem_davek": "total_vaccinations"
    })

    # Add counts per day
    df = df.groupby(by=["date", "region"]).agg(
        people_vaccinated=("people_vaccinated", sum),
        people_fully_vaccinated=("people_fully_vaccinated", sum),
        total_vaccinations=("total_vaccinations", sum)
    ).reset_index()

    # Check 2
    if not (df["total_vaccinations"] == df["people_vaccinated"] + df["people_fully_vaccinated"]).all():
        raise Exception("Error in columns. dose_1 + dose_2 != total_doses")

    # Rename regions
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "location"] = COUNTRY

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Compute cumsums
    df = df.sort_values(by="date")
    df.loc[:, "total_vaccinations"] = df.groupby("region")["total_vaccinations"].cumsum().values
    df.loc[:, "people_vaccinated"] = df.groupby("region")["people_vaccinated"].cumsum().values
    df.loc[:, "people_fully_vaccinated"] = df.groupby("region")["people_fully_vaccinated"].cumsum().values

    # Export
    export_data(
        df=df,
        data_url_reference=DATA_URL_REFERENCE,
        output_file=OUTPUT_FILE
    )

Esempio n. 4

0

Mostra file

File: belgium.py Progetto: hef-abd/covid19-vaccination-subnational

def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Load data
    page_content = requests.get(DATA_URL, headers={
        'User-Agent': 'Custom'
    }).content
    soup = BeautifulSoup(page_content, "html.parser")

    # Get new data
    boxes = soup.findAll(class_="col-12 col-md-6 col-xl-4")
    new_data = []
    if len(boxes) == 3:
        for box in boxes:
            fields = box.findAll(class_="col-12")
            if len(fields) == 4:
                region = fields[0].text.strip()
                if "Vaccines administered" in fields[1].text:
                    total, regional = fields[1].findAll(
                        class_="col-auto text-end")
                    dose_1, dose_2 = list(
                        map(lambda x: int(x.replace(",", "")),
                            regional.text.strip().split("\n")))
                    new_data.append([region, dose_1, dose_2])
    df = pd.DataFrame(
        new_data,
        columns=["region", "people_vaccinated", "people_fully_vaccinated"])

    # Process
    df.loc[:,
           "total_vaccinations"] = df.loc[:,
                                          "people_vaccinated"] + df.loc[:,
                                                                        "people_fully_vaccinated"]
    df.loc[:, "location"] = COUNTRY

    # Join with date
    url = "https://covid-vaccinatie.be/en/vaccines-administered.xlsx"
    df_dates = get_date(url)
    df = df.merge(df_dates, left_on="region", right_on="Region", how="left")

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Export
    region = df_dates.index.tolist()
    date = df_dates.date.tolist()
    df_source = df_source.loc[~(df_source["region"].isin(region)
                                & df_source["date"].isin(date))]
    df = pd.concat([df, df_source])
    df = df[[
        "location", "region", "date", "location_iso", "region_iso",
        "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"
    ]]
    cols = [
        "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"
    ]

    # Avoid repeating reports
    df = keep_min_date(df)

    # Export
    df[cols] = df[cols].astype("Int64").fillna(pd.NA)
    df = df.sort_values(by=["region", "date"])
    df.to_csv(OUTPUT_FILE, index=False)

    # Tracking
    update_country_tracking(country=COUNTRY,
                            url=DATA_URL_REFERENCE,
                            last_update=df["date"].max())

Esempio n. 5

0

Mostra file

def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Locate newest pdf
    html_page = urllib.request.urlopen(DATA_URL)
    soup = BeautifulSoup(html_page, "html.parser")
    pdf_path = soup.find('a', text="Download her").get(
        "href")  # Get path to newest pdf
    # Get preliminary dataframe
    column_string = {
        'dtype': str,
        'header': None
    }  # Force dtype to be object because of thousand separator
    kwargs = {
        'pandas_options': column_string,
    }
    dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all",
                                   **kwargs)  # len(dfs_from_pdf) == 8 ?
    #date = datetime.strptime(pdf_path.split("-")[-2], "%d%m%Y").strftime("%Y-%m-%d")
    date = get_date(dfs_from_pdf)

    # Get preliminary dataframe
    column_string = {
        'dtype': str,
        'header': None
    }  # Force dtype to be object because of thousand separator
    kwargs = {
        'pandas_options': column_string,
    }
    dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all", **kwargs)
    df = dfs_from_pdf[1]  # Hardcoded

    if df.shape != (11, 7):
        raise Exception("Shape of table changed!")
    if not all(region in df[0].tolist() for region in regions):
        raise Exception("Region missing!")

    # Drop columns
    df = df.drop([0, 1, 2, 3, len(df) - 1])
    # Rename columns
    df = df.rename(columns={
        0: "region",
        2: "people_vaccinated",
        4: "people_fully_vaccinated"
    })
    df = df.astype(str)

    # Remove numeric 1000-separator
    df.loc[:, "people_vaccinated"] = df.loc[:, "people_vaccinated"].apply(
        lambda x: int(x.replace(".", ""))).fillna(0).astype(int)

    def del_separator(x):
        if x != 'nan':
            return int(x.replace(".", ""))
        else:
            return 0

    df.loc[:,
           "people_fully_vaccinated"] = df.loc[:,
                                               "people_fully_vaccinated"].apply(
                                                   lambda x: del_separator(
                                                       x)).astype("Int64")

    # Process region column
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    # Get new columns
    df.loc[:,
           "total_vaccinations"] = df.loc[:,
                                          "people_vaccinated"] + df.loc[:,
                                                                        "people_fully_vaccinated"]
    df.loc[:, "location"] = COUNTRY
    df.loc[:, "date"] = date

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)
    df.loc[df["region"] == "Others", "location_iso"] = COUNTRY_ISO

    # Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    # Export
    cols = [
        "location", "region", "date", "location_iso", "region_iso",
        "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"
    ]
    df = keep_min_date(df[cols])[cols]
    df = df.sort_values(by=["region", "date"])
    df.to_csv(OUTPUT_FILE, index=False)

    # Tracking
    update_country_tracking(country=COUNTRY,
                            url=DATA_URL_REFERENCE,
                            last_update=df["date"].max())