def main(): # Get data df = pd.read_csv(DATA_URL, usecols=["state", "date", "vaccinated"]) df = df.rename(columns={"vaccinated": "total_vaccinations"}) # Get data after vaccination started start_date = "2021-01-18" df = df.loc[df["date"] >= start_date] # Process vaccinations df.loc[:, "total_vaccinations"] = df.loc[:, "total_vaccinations"].fillna( 0).astype(int) # Get region iso df = df[~(df.loc[:, "state"] == "TOTAL")] df.loc[:, "region_iso"] = f"{COUNTRY_ISO}-" + df.loc[:, "state"] # Get region name df = ISODB().merge(df, mode="region") df.loc[:, "location"] = COUNTRY # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def _postprocess(self, df): df = ISODB().merge(df, country_iso=self.country_iso, mode=self.mode_iso_merge) df = df.sort_values(by="date") for field in self.do_cumsum_fields: df[field] = df.groupby("region")[field].cumsum().values # TODO: Insert here population info (need path to population.csv as class attribute) return df
def main(): # Load current file df_source = pd.read_csv(OUTPUT_FILE) # Get date driver = load_driver(DATA_URL) try: date = load_date(driver) except: raise Exception("Date not found!") # Load dose 1 data url = "https://www.fhi.no/api/chartdata/api/99112" dix = json.loads(requests.get(url).content) df_dose1 = pd.DataFrame(dix, columns=["region", "people_vaccinated"]) # Load dose 2 data url = "https://www.fhi.no/api/chartdata/api/99111" dix = json.loads(requests.get(url).content) df_dose2 = pd.DataFrame(dix, columns=["region", "people_fully_vaccinated"]) # Remove row df_dose1 = df_dose1.loc[~(df_dose2["region"] == "Fylke")] df_dose2 = df_dose2.loc[~(df_dose2["region"] == "Fylke")] # Merge df = df_dose1.merge(df_dose2, on="region", how="left") # Process region column df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) # Add columns df.loc[:, "date"] = date df.loc[:, "location"] = COUNTRY df.loc[:, "total_vaccinations"] = df.loc[:, "people_fully_vaccinated"] + df.loc[:, "people_vaccinated"] # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concat df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export df = df[[ "location", "region", "date", "location_iso", "region_iso", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated" ]] df = keep_min_date(df) cols = [ "total_vaccinations", "people_vaccinated", "people_fully_vaccinated" ] df[cols] = df[cols].astype("Int64").fillna(pd.NA) df = df.sort_values(by=["region", "date"]) df.to_csv(OUTPUT_FILE, index=False) # Tracking update_country_tracking(country=COUNTRY, url=DATA_URL_REFERENCE, last_update=df["date"].max())
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Load new data from API data = requests.get(DATA_URL).json() df = (pd.DataFrame(data["vaccination_data"]).fillna(0).astype({ "Administered_Dose1": int, "Administered_Dose2": int })) # Add data if new is available cols = ["LongName", "Date", "Administered_Dose1", "Administered_Dose2"] df = df[cols] # Process columns df.loc[:, "total_vaccinations"] = (df.loc[:, "Administered_Dose1"] + df.loc[:, "Administered_Dose2"]) df = df.rename( columns={ "LongName": "region", "Date": "date", "Administered_Dose1": "people_vaccinated", "Administered_Dose2": "people_fully_vaccinated", }) df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%Y-%m-%d") df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d") df = df[~df["region"].isin(["United States", "Long Term Care"])] df.loc[:, "location"] = "United States" # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) df.loc[df["region"] == "Federated States of Micronesia", "location_iso"] = "FM" df.loc[df["region"] == "Marshall Islands", "location_iso"] = "MH" df.loc[df["region"] == "Puerto Rico", "location_iso"] = "PR" df.loc[df["region"] == "Republic of Palau", "location_iso"] = "PW" df.loc[df["region"] == "Bureau of Prisons", "location_iso"] = "US" df.loc[df["region"] == "Dept of Defense", "location_iso"] = "US" df.loc[df["region"] == "Indian Health Svc", "location_iso"] = "US" df.loc[df["region"] == "Veterans Health", "location_iso"] = "US" # Concat dates = df.loc[:, "date"].unique().tolist() if len(dates) != 1: raise Exception("Multiple dates detected!") df_source = df_source.loc[~(df_source.loc[:, "date"] == dates[0])] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): df = pd.read_csv(DATA_URL, parse_dates=["data_somministrazione"]) df = df.rename( columns={ "data_somministrazione": "date", "area": "region", "totale": "total_vaccinations", "prima_dose": "people_vaccinated", "seconda_dose": "people_fully_vaccinated", }) df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%Y-%m-%d") df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d") df.loc[:, "location"] = COUNTRY # Compute cumsums df = df.sort_values(by="date") df["total_vaccinations"] = ( df.groupby("region")["total_vaccinations"].cumsum().values) df["people_vaccinated"] = df.groupby( "region")["people_vaccinated"].cumsum().values df["people_fully_vaccinated"] = ( df.groupby("region")["people_fully_vaccinated"].cumsum().values) df = df[df.loc[:, "region"] != "ITA"] # Add ISO codes df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Request and Get data page = requests.get(DATA_URL) soup = BeautifulSoup(page.content, "html.parser") table = soup.find( "p", string=re.compile("Ваксинирани лица по")).parent.find("table") df = pd.read_html(str(table))[0] df = df.droplevel(level=0, axis=1) date = str( datetime.datetime.now(pytz.timezone("Europe/Sofia")).date() - datetime.timedelta(days=1)) df = df.rename(columns={"Област": "region", "Общо": "total_vaccinations"}) df = df[~(df.loc[:, "region"] == "Общо")] df.loc[:, "region"] = df.loc[:, "region"].replace(replace) df.loc[:, "date"] = date df.loc[:, "location"] = COUNTRY # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concat df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load data df = pd.read_csv(DATA_URL, sep=";") df = df.rename( columns={ "Date": "date", "Region": "region", "first_dose": "people_vaccinated", "second_dose": "people_fully_vaccinated", } ) # Cumsum df = df.sort_values(by="date") df["people_vaccinated"] = df.groupby("region")["people_vaccinated"].cumsum().values df["people_fully_vaccinated"] = ( df.groupby("region")["people_fully_vaccinated"].cumsum().values ) # Add columns df.loc[:, "total_vaccinations"] = ( df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"] ) df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "location"] = COUNTRY # Get iso codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load df = pd.read_csv(DATA_URL, sep=";", usecols=[ "Datum", "Name", "EingetrageneImpfungen", "Teilgeimpfte", "Vollimmunisierte" ]) df = df.loc[df["Name"] != "Österreich"] # Rename columns df = df.rename( columns={ "Datum": "date", "Name": "region", "EingetrageneImpfungen": "total_vaccinations", "Teilgeimpfte": "people_vaccinated", "Vollimmunisierte": "people_fully_vaccinated" }) df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) # Column proccess df.loc[:, "date"] = df.date.str.slice(0, 10) df.loc[:, "location"] = COUNTRY # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Load data df, date = load_data(DATA_URL) # Rename columns df = df.rename(columns={"Län": "region"}) # Process columns df.loc[:, "total_vaccinations"] = column_str2int( df.loc[:, "Moderna"]) + column_str2int(df.loc[:, "Pfizer/BioNTech"]) df.loc[:, "location"] = COUNTRY df.loc[:, "date"] = date # Remove total numbers df = df.loc[~(df.loc[:, "region"] == "Totala summan")] # Get iso codes df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concat df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Load data df = load_data(DATA_URL) # Process columns df = df.rename( columns={ "jpt_nazwa_": "region", "SZCZEPIENIA_SUMA": "total_vaccinations", "DAWKA_2_SUMA": "people_fully_vaccinated", }) df.loc[:, "location"] = COUNTRY date = (datetime.datetime.now( pytz.timezone("Europe/Warsaw")).date().strftime("%Y-%m-%d")) df.loc[:, "date"] = date df.loc[:, "people_vaccinated"] = (df.loc[:, "total_vaccinations"] - df.loc[:, "people_fully_vaccinated"]) df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) # ISO df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concat df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): df = pd.read_csv(DATA_URL, dtype={"personas con pauta completa": str}) df = df.rename(columns={ "informe": "date", "comunidad autónoma": "region", "dosis administradas": "total_vaccinations", "personas con pauta completa": "people_fully_vaccinated" }) df.loc[:, "people_fully_vaccinated"] = df.loc[:, "people_fully_vaccinated"].fillna("nan") df = df.astype({ "total_vaccinations": str, "people_fully_vaccinated": str }) df = df[~(df.loc[:, "region"]=="Totales")] df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%d/%m/%Y") df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d") df.loc[:, "total_vaccinations"] = df.loc[:, "total_vaccinations"].apply(lambda x: int(x.replace(".", ""))) df.loc[:, "people_fully_vaccinated"] = df.loc[:, "people_fully_vaccinated"].apply( lambda x: int(x.replace(".", "") if x != "nan" else 0) ) df.loc[:, "people_vaccinated"] = df.loc[:, "total_vaccinations"] - df.loc[:, "people_fully_vaccinated"] df.loc[:, "location"] = COUNTRY # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data( df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE )
def main(): df = pd.read_csv(DATA_URL, usecols=[ "date", "state", "firstDosesCumulative", "secondDosesCumulative" ]) df = df.rename( columns={ "state": "region", "firstDosesCumulative": "people_vaccinated", "secondDosesCumulative": "people_fully_vaccinated" }) df.loc[:, "total_vaccinations"] = df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"] df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%Y-%m-%d") df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d") df.loc[:, "location"] = COUNTRY # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): COLUMNS_RENAMING = { "date_vaccine_administered": "date", "province": "region", "cumulative_avaccine": "total_vaccinations" } df = pd.read_csv(DATA_URL_1, usecols=COLUMNS_RENAMING.keys()) df = df.rename(columns=COLUMNS_RENAMING) # Date df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%d-%m-%Y") df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d") # New cols df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "location"] = COUNTRY # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Add completed vaccinations COLUMNS_RENAMING = { "date_vaccine_completed": "date", "province": "region", "cumulative_cvaccine": "people_fully_vaccinated" } df_2 = pd.read_csv(DATA_URL_2, usecols=COLUMNS_RENAMING.keys()) df_2 = df_2.rename(columns=COLUMNS_RENAMING) # Date df_2.loc[:, "date"] = pd.to_datetime(df_2.loc[:, "date"], format="%d-%m-%Y") df_2.loc[:, "date"] = df_2.loc[:, "date"].dt.strftime("%Y-%m-%d") # New cols df_2.loc[:, "region"] = df_2.loc[:, "region"].replace(REGION_RENAMING) df = df.merge(df_2, on=["region", "date"], how="left") df.loc[:, "people_fully_vaccinated"] = df.loc[:, "people_fully_vaccinated"].fillna( 0).astype(int) df.loc[:, "people_vaccinated"] = df.loc[:, "total_vaccinations"] - df.loc[:, "people_fully_vaccinated"].astype( int ) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load data df = pd.read_csv(DATA_URL) # Check 1 cols = ["datum", "vakcina", "kraj_nuts_kod", "kraj_nazev", "vekova_skupina", "prvnich_davek", "druhych_davek", "celkem_davek"] if not all([col in df.columns for col in cols]): raise Exception("API changed") # Column renaming df = df.rename(columns={ "datum": "date", "kraj_nazev": "region", "prvnich_davek": "people_vaccinated", "druhych_davek": "people_fully_vaccinated", "celkem_davek": "total_vaccinations" }) # Add counts per day df = df.groupby(by=["date", "region"]).agg( people_vaccinated=("people_vaccinated", sum), people_fully_vaccinated=("people_fully_vaccinated", sum), total_vaccinations=("total_vaccinations", sum) ).reset_index() # Check 2 if not (df["total_vaccinations"] == df["people_vaccinated"] + df["people_fully_vaccinated"]).all(): raise Exception("Error in columns. dose_1 + dose_2 != total_doses") # Rename regions df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "location"] = COUNTRY # ISO df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Compute cumsums df = df.sort_values(by="date") df.loc[:, "total_vaccinations"] = df.groupby("region")["total_vaccinations"].cumsum().values df.loc[:, "people_vaccinated"] = df.groupby("region")["people_vaccinated"].cumsum().values df.loc[:, "people_fully_vaccinated"] = df.groupby("region")["people_fully_vaccinated"].cumsum().values # Export export_data( df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE )
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Load data page_content = requests.get(DATA_URL, headers={ "User-Agent": "Custom" }).content soup = BeautifulSoup(page_content, "html.parser") # Get new data boxes = soup.findAll(class_="col-12 col-md-6 col-xl-4") new_data = [] if len(boxes) == 3: for box in boxes: fields = box.findAll(class_="col-12") if len(fields) == 4: region = fields[0].text.strip() if "Vaccines administered" in fields[1].text: total, regional = fields[1].findAll( class_="col-auto text-end") dose_1, dose_2 = list( map( lambda x: int(x.replace(",", "")), regional.text.strip().split("\n"), )) new_data.append([region, dose_1, dose_2]) df = pd.DataFrame( new_data, columns=["region", "people_vaccinated", "people_fully_vaccinated"]) # Process df.loc[:, "total_vaccinations"] = (df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"]) df.loc[:, "location"] = COUNTRY # Join with date url = "https://covid-vaccinatie.be/en/vaccines-administered.xlsx" df_dates = get_date(url) df = df.merge(df_dates, left_on="region", right_on="Region", how="left") # ISO df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concatenate region = df_dates.index.tolist() date = df_dates.date.tolist() df_source = df_source.loc[~(df_source["region"].isin(region) & df_source["date"].isin(date))] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load data df = load_data(DATA_URL) # Replace region names df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) # ISO df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main_ch(df): # Switzerland df_ch = df.loc[~df.loc[:, "region_iso"].isin(["CHFL", "FL"])].reset_index( drop=True) # Process columns df_ch.loc[:, "region_iso"] = f"{COUNTRY_ISO_CH}-" + df_ch.loc[:, "region_iso"] df_ch.loc[:, "location"] = COUNTRY_CH # Get region names df_ch = ISODB().merge(df_ch, mode="region") # Export export_data(df=df_ch, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE_CH)
def main(): # Load current file df_source = pd.read_csv(OUTPUT_FILE) # Get date driver = load_driver(DATA_URL) try: date = load_date(driver) except: raise Exception("Date not found!") # Load dose 1 data url = "https://www.fhi.no/api/chartdata/api/99112" dix = json.loads(requests.get(url).content) df_dose1 = pd.DataFrame(dix, columns=["region", "people_vaccinated"]) # Load dose 2 data url = "https://www.fhi.no/api/chartdata/api/99111" dix = json.loads(requests.get(url).content) df_dose2 = pd.DataFrame(dix, columns=["region", "people_fully_vaccinated"]) # Remove row df_dose1 = df_dose1.loc[~(df_dose2["region"] == "Fylke")] df_dose2 = df_dose2.loc[~(df_dose2["region"] == "Fylke")] # Merge df = df_dose1.merge(df_dose2, on="region", how="left") # Process region column df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) # Add columns df.loc[:, "date"] = date df.loc[:, "location"] = COUNTRY df.loc[:, "total_vaccinations"] = (df.loc[:, "people_fully_vaccinated"] + df.loc[:, "people_vaccinated"]) # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concat df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Load new data html_page = urllib.request.urlopen(DATA_URL) soup = BeautifulSoup(html_page, "html.parser") # Get new date date = get_date(soup) # Get df df = get_df(soup) # Rename columns df = df.rename( columns={ "primera_dosis_cantidad": "people_vaccinated", "segunda_dosis_cantidad": "people_fully_vaccinated", "jurisdiccion_nombre": "region" }) # Process columns df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "total_vaccinations"] = df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"] df.loc[:, "location"] = COUNTRY df.loc[:, "date"] = date # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Concatenate df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load DB db = ISODB.create_from_source() # Renaming iso_rename = { "Friuli-Venezia Giulia": "Friuli Venezia Giulia", "Brussels Hoofdstedelijk Gewest": "Brussels", "Saha, Respublika": "Sakha, Respublika", } # Add new entries items = [ ["IT", "IT-TN", "Provincia autonoma di Trento"], ["IT", "IT-BZ", "Provincia autonoma di Bolzano - Alto Adige"], ["FR", "FR-RE", "La Reunion"], ["FR", "FR-YT", "Mayotte"], ["BE", "BE-VLG", "Flanders"], ["BE", "BE-WAL", "Wallonia"], ["US", "US-PR", "Puerto Rico"], ["US", "US-AS", "American Samoa"], ["US", "US-GU", "Guam"], ["US", "US-VI", "Virgin Islands"], ["US", "US-MP", "Northern Mariana Islands"], ["FR", "FR-MQ", "Martinique"], ["FR", "FR-GP", "Guadeloupe"], ["FR", "FR-GF", "Guyane"], ["NO", "NO-46", "Vestland"], ["NO", "NO-42", "Agder"], ["NO", "NO-30", "Viken"], ["NO", "NO-54", "Troms og Finnmark"], ["NO", "NO-50", "Trondelag"], ["NO", "NO-38", "Vestfold og Telemark"], ["NO", "NO-34", "Innlandet"], ["CL", "CL-NB", "Nuble"], ["IN", "IN-LA", "Ladakh"], ["FI", "FI-01", "Aland"], ["KR", "KR-50", "Sejong-teukbyeoljachisi"], ] db.rename_values("subdivision_name", iso_rename) db.append(items)
def main(): # Load df = read_xlsx_from_url(DATA_URL) # Rename df = df.rename( columns={ "Date": "date", "Region": "region", "1st dose": "people_vaccinated", "2nd dose": "people_fully_vaccinated", }) # Remove NaN regions df = df.loc[~df.loc[:, "region"].isnull()] # Process df.loc[:, "people_fully_vaccinated"] = ( df.loc[:, "people_fully_vaccinated"].fillna(0).astype(int)) df.loc[:, "total_vaccinations"] = (df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"]) df.loc[:, "location"] = COUNTRY df.loc[:, "date"] = df.loc[:, "date"].apply( lambda x: datetime.strptime(x, "%d/%m/%Y").strftime("%Y-%m-%d")) # Cumsum df = df.sort_values(by="date") df["people_vaccinated"] = df.groupby( "region")["people_vaccinated"].cumsum().values df["people_fully_vaccinated"] = ( df.groupby("region")["people_fully_vaccinated"].cumsum().values) df["total_vaccinations"] = ( df.groupby("region")["total_vaccinations"].cumsum().values) # ISO df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Request & downloa data page_content = requests.get(DATA_URL, headers={ 'User-Agent': 'Custom' }).content soup = BeautifulSoup(page_content, "html.parser") # Build DataFrame df = read_psv(str(soup), sep=",") df = df.rename(columns={ "nom": "region", "total_vaccines": "total_vaccinations" }) df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) df.loc[:, "location"] = COUNTRY # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): df = pd.read_csv(DATA_URL) df = df.rename( columns={ "areaName": "region", "cumPeopleVaccinatedFirstDoseByPublishDate": "people_vaccinated", "cumPeopleVaccinatedSecondDoseByPublishDate": "people_fully_vaccinated" }) df.loc[:, "location"] = COUNTRY df.loc[:, "total_vaccinations"] = df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"] # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Load data page_content = requests.get(DATA_URL, headers={ 'User-Agent': 'Custom' }).content soup = BeautifulSoup(page_content, "html.parser") # Get new data boxes = soup.findAll(class_="col-12 col-md-6 col-xl-4") new_data = [] if len(boxes) == 3: for box in boxes: fields = box.findAll(class_="col-12") if len(fields) == 4: region = fields[0].text.strip() if "Vaccines administered" in fields[1].text: total, regional = fields[1].findAll( class_="col-auto text-end") dose_1, dose_2 = list( map(lambda x: int(x.replace(",", "")), regional.text.strip().split("\n"))) new_data.append([region, dose_1, dose_2]) df = pd.DataFrame( new_data, columns=["region", "people_vaccinated", "people_fully_vaccinated"]) # Process df.loc[:, "total_vaccinations"] = df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"] df.loc[:, "location"] = COUNTRY # Join with date url = "https://covid-vaccinatie.be/en/vaccines-administered.xlsx" df_dates = get_date(url) df = df.merge(df_dates, left_on="region", right_on="Region", how="left") # ISO df = ISODB().merge(df, country_iso=COUNTRY_ISO) # Export region = df_dates.index.tolist() date = df_dates.date.tolist() df_source = df_source.loc[~(df_source["region"].isin(region) & df_source["date"].isin(date))] df = pd.concat([df, df_source]) df = df[[ "location", "region", "date", "location_iso", "region_iso", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated" ]] cols = [ "total_vaccinations", "people_vaccinated", "people_fully_vaccinated" ] # Avoid repeating reports df = keep_min_date(df) # Export df[cols] = df[cols].astype("Int64").fillna(pd.NA) df = df.sort_values(by=["region", "date"]) df.to_csv(OUTPUT_FILE, index=False) # Tracking update_country_tracking(country=COUNTRY, url=DATA_URL_REFERENCE, last_update=df["date"].max())
def main(): # Load current data df_source = pd.read_csv(OUTPUT_FILE) # Locate newest pdf html_page = urllib.request.urlopen(DATA_URL) soup = BeautifulSoup(html_page, "html.parser") pdf_path = soup.find("a", text="Download her").get("href") # Get path to newest pdf # Get preliminary dataframe column_string = { "dtype": str, "header": None, } # Force dtype to be object because of thousand separator kwargs = {"pandas_options": column_string} dfs_from_pdf = tabula.read_pdf( pdf_path, pages="all", **kwargs ) # len(dfs_from_pdf) == 8 ? # date = datetime.strptime(pdf_path.split("-")[-2], "%d%m%Y").strftime("%Y-%m-%d") date = get_date(dfs_from_pdf) # Get preliminary dataframe df = dfs_from_pdf[3] # Hardcoded if df.shape != (11, 7): raise Exception("Shape of table changed!") if not all(region in df[0].tolist() for region in regions): raise Exception("Region missing!") # Drop columns df = df.drop([0, 1, 2, 3, len(df) - 1]) # Rename columns df = df.rename( columns={0: "region", 2: "people_vaccinated", 4: "people_fully_vaccinated"} ) df = df.astype(str) # Remove numeric 1000-separator df.loc[:, "people_vaccinated"] = ( df.loc[:, "people_vaccinated"] .apply(lambda x: int(x.replace(".", ""))) .fillna(0) .astype(int) ) def del_separator(x): if x != "nan": return int(x.replace(".", "")) else: return 0 df.loc[:, "people_fully_vaccinated"] = ( df.loc[:, "people_fully_vaccinated"] .apply(lambda x: del_separator(x)) .astype("Int64") ) # Process region column df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING) # Get new columns df.loc[:, "total_vaccinations"] = ( df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"] ) df.loc[:, "location"] = COUNTRY df.loc[:, "date"] = date # Add ISO codes df = ISODB().merge(df, country_iso=COUNTRY_ISO) df.loc[df["region"] == "Others", "location_iso"] = COUNTRY_ISO # Concat df_source = df_source.loc[~(df_source.loc[:, "date"] == date)] df = pd.concat([df, df_source]) # Export export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)