def main(): url = "https://www.terviseamet.ee/et/uudised" soup = BeautifulSoup(requests.get(url).content, "html.parser") for h2 in soup.find_all("h2", class_="views-field-title"): if "COVID-19 blogi" in h2.text: url = "https://www.terviseamet.ee" + h2.find("a")["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") text = soup.find(class_="node-published").text people_vaccinated, people_fully_vaccinated = re.search( r"Eestis on COVID-19 vastu vaktsineerimisi tehtud ([\d\s]+) inimesele, kaks doosi on saanud ([\d\s]+) inimest", text).groups() people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated date = soup.find(class_="field-name-post-date").text date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Estonia", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://covid19.gov.im/general-information/latest-updates/" soup = BeautifulSoup(requests.get(url).content, "html.parser") vax_box = soup.find(string=re.compile("Total first vaccinations")).parent.parent date = vax_box.find("strong").text date = vaxutils.clean_date(date, "%d %B %Y") for p in vax_box.find_all("p"): if "Total first vaccinations" in p.text: data = p.text people_vaccinated, people_fully_vaccinated = re.search(r"Total first vaccinations:\xa0 ([\d,]+)Total second vaccinations:\xa0 ([\d,]+)", data).groups() people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated vaxutils.increment( location="Isle of Man", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Pfizer/BioNTech" )
def main(): url = "https://www.kemkes.go.id/" soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="covid-case-container").find( class_="info-date").text.replace("Kondisi ", "") date = str(dateparser.parse(date, languages=["id"]).date()) people_vaccinated = soup.find( class_="description", text="Vaksinasi-1").parent.find(class_="case").text people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = soup.find( class_="description", text="Vaksinasi-2").parent.find(class_="case").text people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated vaxutils.increment(location="Indonesia", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Sinovac")
def parse_data(soup: BeautifulSoup) -> pd.Series: people_vaccinated = vaxutils.clean_count( soup .find(class_="status_infoArea") .find(class_="round1") .find(class_="big") .text ) people_fully_vaccinated = vaxutils.clean_count( soup .find(class_="status_infoArea") .find(class_="round2") .find(class_="big") .text ) total_vaccinations = people_vaccinated + people_fully_vaccinated date = str((datetime.datetime.now(pytz.timezone("Asia/Seoul")) - datetime.timedelta(days=1)).date()) data = { "date": date, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "total_vaccinations": total_vaccinations, } return pd.Series(data=data)
def connect_parse_data(source: str) -> pd.Series: op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: driver.get(source) time.sleep(10) date = driver.find_element_by_class_name( "as_of").find_element_by_tag_name("span").text date = vaxutils.clean_date(date, "%d.%m.%Y") for elem in driver.find_elements_by_class_name("counter_block"): if "1 ДОЗУ" in elem.text: people_vaccinated = elem.find_element_by_tag_name("h2").text if "2 ДОЗИ" in elem.text: people_fully_vaccinated = elem.find_element_by_tag_name( "h2").text data = { "people_vaccinated": vaxutils.clean_count(people_vaccinated), "people_fully_vaccinated": vaxutils.clean_count(people_fully_vaccinated), "date": date, } return pd.Series(data=data)
def main(): url = "https://www.exploregov.ky/coronavirus-statistics#vaccine-dashboard" soup = BeautifulSoup(requests.get(url).content, "html.parser") total_vaccinations = re.search( r"The total number of COVID-19 vaccines administered to date is ([\d,]+)", soup.text) total_vaccinations = vaxutils.clean_count(total_vaccinations.group(1)) people_fully_vaccinated = re.search( r"The total number of people completing the two-dose course is ([\d,]+)", soup.text) people_fully_vaccinated = vaxutils.clean_count( people_fully_vaccinated.group(1)) people_vaccinated = total_vaccinations - people_fully_vaccinated date = str(datetime.datetime.now(pytz.timezone("America/Cayman")).date()) vaxutils.increment(location="Cayman Islands", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://covid19asi.saglik.gov.tr/" soup = BeautifulSoup(requests.get(url).content, "html.parser") total_vaccinations = re.search(r"var yapilanasisayisi = (\d+);", str(soup)).group(1) total_vaccinations = vaxutils.clean_count(total_vaccinations) people_vaccinated = re.search(r"var asiyapilankisisayisi1Doz = (\d+);", str(soup)).group(1) people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = re.search( r"var asiyapilankisisayisi2Doz = (\d+);", str(soup)).group(1) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) date = str(datetime.datetime.now(pytz.timezone("Asia/Istanbul")).date()) vaxutils.increment(location="Turkey", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Sinovac")
def main(): # Options for Chrome WebDriver op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: url = "https://datastudio.google.com/embed/u/0/reporting/2f2537fa-ac23-4f08-8741-794cdbedca03/page/CPFTB" driver.get(url) time.sleep(5) for elem in driver.find_elements_by_class_name("kpimetric"): if "1ª Dose" in elem.text: people_vaccinated = elem.find_element_by_class_name( "valueLabel").text elif "2ª Dose" in elem.text: people_fully_vaccinated = elem.find_element_by_class_name( "valueLabel").text people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated date = str(datetime.datetime.now(pytz.timezone("Brazil/East")).date()) vaxutils.increment(location="Brazil", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url="https://coronavirusbra1.github.io/", vaccine="Oxford/AstraZeneca, Sinovac")
def parse_data(url: str) -> pd.Series: kwargs = {"pandas_options": {"dtype": str, "header": None}} dfs_from_pdf = tabula.read_pdf(url, pages="all", **kwargs) assert len(dfs_from_pdf) == 1 df = dfs_from_pdf[0] df = df[df[0] == "India"] people_vaccinated = vaxutils.clean_count(df[1].item()) people_fully_vaccinated = vaxutils.clean_count(df[2].item()) total_vaccinations = vaxutils.clean_count(df[3].item()) return pd.Series({ "date": str((datetime.datetime.now(pytz.timezone("Asia/Kolkata")) - datetime.timedelta(days=1)).date()), "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "total_vaccinations": total_vaccinations, "source_url": url, })
def main(): url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_" # Locate newest pdf soup = BeautifulSoup(requests.get(url).content, "html.parser") pdf_path = soup.find( "a", class_="btn-primary")["href"] # Get path to newest pdf # Fetch data dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all") df = pd.DataFrame(dfs_from_pdf[2]) # Hardcoded table location people_vaccinated = df.loc[df["Unnamed: 0"] == "Personnes vaccinées - Dose 1", "Unnamed: 2"].values[0] people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = df.loc[df["Unnamed: 0"] == "Personnes vaccinées - Dose 2", "Unnamed: 2"].values[0] people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated date = df["Unnamed: 1"].str.replace("Journée du ", "").values[0] date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Luxembourg", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=pdf_path, vaccine="Pfizer/BioNTech")
def connect_parse_data(source: str) -> pd.Series: headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } soup = BeautifulSoup( requests.get(source, headers=headers).content, "html.parser") data = re.search(r"De los ([\d\.]+) vacunados, ([\d\.]+)", soup.text) people_vaccinated = vaxutils.clean_count(data.group(1)) people_fully_vaccinated = vaxutils.clean_count(data.group(2)) data = { "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, } return pd.Series(data=data)
def parse_data(url: str) -> pd.Series: kwargs = {"pandas_options": {"dtype": str, "header": None}} dfs_from_pdf = tabula.read_pdf(url, pages="all", **kwargs) for df in dfs_from_pdf: if "Beneficiaries vaccinated" in dfs_from_pdf[0].values.flatten(): break df = df[df[0] == "India"] ncols = df.shape[1] people_vaccinated = vaxutils.clean_count(df[ncols - 3].item()) people_fully_vaccinated = vaxutils.clean_count(df[ncols - 2].item()) total_vaccinations = vaxutils.clean_count(df[ncols - 1].item()) return pd.Series({ "date": str((datetime.datetime.now(pytz.timezone("Asia/Kolkata")) - datetime.timedelta(days=1)).date()), "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "total_vaccinations": total_vaccinations, "source_url": url, })
def parse_data(soup: BeautifulSoup) -> pd.Series: numbers = soup.find(class_="cifras-coronavirus").find_all(class_="cifra") return pd.Series( data={ "total_vaccinations": vaxutils.clean_count(numbers[1].text), "people_fully_vaccinated": vaxutils.clean_count(numbers[2].text), "date": set_date() })
def parse_data(soup: BeautifulSoup) -> pd.Series: regex = r"The total number of COVID-19 vaccines administered to date is ([\d,]+)\.\sSo far, ([\d,]+) .* have received at least one dose of the Pfizer-BioNTech vaccine" matches = re.search(regex, soup.text) total_vaccinations = vaxutils.clean_count(matches.group(1)) people_vaccinated = vaxutils.clean_count(matches.group(2)) assert total_vaccinations >= people_vaccinated people_fully_vaccinated = total_vaccinations - people_vaccinated return pd.Series({ "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, })
def main(): url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_" # Locate newest pdf html_page = urllib.request.urlopen(url) soup = BeautifulSoup(html_page, "html.parser") pdf_path = soup.find( "a", class_="btn-primary")["href"] # Get path to newest pdf # Fetch data dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all") df = pd.DataFrame(dfs_from_pdf[2]) # Hardcoded table location total_vaccinations = df.loc[df["Unnamed: 0"] == "Nombre de doses administrées", "Unnamed: 1"].values[0] total_vaccinations = vaxutils.clean_count(total_vaccinations) date = re.search(r"\d\d\.\d\d\.202\d", df.columns[1]).group(0) date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Luxembourg", total_vaccinations=total_vaccinations, date=date, source_url=pdf_path, vaccine="Pfizer/BioNTech")
def parse_data(pdf_link) -> str: url = "https://www.rivm.nl" + pdf_link["href"] kwargs = {"pandas_options": {"dtype": str}} dfs_from_pdf = tabula.read_pdf(url, pages="all", **kwargs) for df in dfs_from_pdf: if "Eerste" in df.columns: break people_vaccinated = vaxutils.clean_count(df.loc[df.Doelgroep == "Totaal", "Eerste"].item()) people_fully_vaccinated = vaxutils.clean_count( df.loc[df.Doelgroep == "Totaal", "Tweede"].item()) total_vaccinations = people_vaccinated + people_fully_vaccinated return total_vaccinations, people_vaccinated, people_fully_vaccinated
def connect_parse_data(source: str) -> pd.Series: headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } soup = BeautifulSoup(requests.get(source, headers=headers).content, "html.parser") total_vaccinations = soup.find(class_="repart-stlucia").text total_vaccinations = vaxutils.clean_count(total_vaccinations) date = soup.find(class_="h2-blue").text date = re.search(r"\w+ \d+, 202\d", date).group(0) date = vaxutils.clean_date(date, "%B %d, %Y") data = { "total_vaccinations": total_vaccinations, "date": date, } return pd.Series(data=data)
def main(): date = datetime.date.today() - datetime.timedelta(days=1) url_date = date.strftime("%-d.%-m.%y") url = f"http://www.covidmaroc.ma/Documents/BULLETIN/{url_date}.COVID-19.pdf" os.system( "curl http://www.covidmaroc.ma/Documents/BULLETIN/1.2.21.COVID-19.pdf -o morocco.pdf -s" ) with open("morocco.pdf", "rb") as pdfFileObj: pdfReader = PyPDF2.PdfFileReader(pdfFileObj) text = pdfReader.getPage(0).extractText() regex = r"Bénéficiaires de la vaccination\s+Cumul global([\d\s]+)Situation épidémiologique" total_vaccinations = re.search(regex, text) total_vaccinations = vaxutils.clean_count(total_vaccinations.group(1)) date = str(date) vaxutils.increment(location="Morocco", total_vaccinations=total_vaccinations, date=date, source_url=url, vaccine="Oxford/AstraZeneca, Sinopharm") os.remove("morocco.pdf")
def main(): url = "https://esriportugal.maps.arcgis.com/apps/opsdashboard/index.html#/acf023da9a0b4f9dbb2332c13f635829" # Options for Chrome WebDriver op = Options() # op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: driver.get(url) time.sleep(4) for box in driver.find_elements_by_class_name("indicator-top-text"): if "Total de Vacinas Administradas" in box.text: count_text = box.find_element_by_xpath("..").text elif "Dados relativos ao boletim da DGS de" in box.text: date_text = box.find_element_by_xpath("..").text count = re.search(r"\n([\d\s]+$)", count_text).group(1) count = vaxutils.clean_count(count) date = re.search(r"\n([\d/]+$)", date_text).group(1) date = vaxutils.clean_date(date, "%d/%m/%Y") vaxutils.increment(location="Portugal", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://datadashboard.health.gov.il/COVID-19/general" # Options for Chrome WebDriver op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: driver.get(url) time.sleep(2) for counter in driver.find_elements_by_class_name("title-header"): if "מתחסנים מנה ראשונה" in counter.text: count = counter.find_element_by_class_name("total-amount").text count = vaxutils.clean_count(count) break date = str(datetime.datetime.now(pytz.timezone("Asia/Jerusalem")).date()) vaxutils.increment(location="Israel", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://vaccinocovid.iss.sm/" soup = BeautifulSoup( requests.get(url, verify=False).content, "html.parser") for script in soup.find_all("script"): if "new Chart" in str(script): chart_data = str(script) break people_vaccinated = re.search(r"([\d,. ]+) [Vv]accinati", chart_data).group(1) people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = 0 total_vaccinations = people_vaccinated + people_fully_vaccinated date = re.search(r"Dati aggiornati al (\d{2}/\d{2}/\d{4})", chart_data).group(1) date = vaxutils.clean_date(date, "%d/%m/%Y") vaxutils.increment(location="San Marino", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url="https://vaccinocovid.iss.sm/", vaccine="Sputnik V")
def main(): url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/" soup = BeautifulSoup(requests.get(url).content, "html.parser") links = soup.find(class_="display-posts-listing").find_all("a", class_="title") for link in links: if "Actualizare zilnică" in link.text: url = link["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="post-date").find(class_="meta-text").text.strip() date = vaxutils.clean_date(date, "%b %d, %Y") main_text = soup.find(class_="entry-content-text").text count = re.search( r"Număr total de persoane vaccinate împotriva COVID-19 cu vaccinul Pfizer BioNTech \(începând cu data de 27 decembrie 2020\) – ([\d\.]+)", main_text) count = count.group(1) count = vaxutils.clean_count(count) vaxutils.increment(location="Romania", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://thl.fi/fi/web/infektiotaudit-ja-rokotukset/ajankohtaista/ajankohtaista-koronaviruksesta-covid-19/tilannekatsaus-koronaviruksesta" page = requests.get(url) soup = BeautifulSoup(page.content, "html.parser") for p in soup.find(class_="journal-content-article").find_all("p"): if "Annetut rokoteannokset" in p.text: break count = p.find("strong").text count = vaxutils.clean_count(count) date = soup.find(class_="thl-image-caption").text date = re.compile(r"Tiedot on päivitetty (\d+)\.(\d+)").search(date) date = datetime.date(year=2021, month=int(date.group(2)), day=int(date.group(1))) date = str(date) vaxutils.increment(location="Finland", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://www.gov.bm/sites/default/files/COVID-19%20Vaccination%20Updates.pdf" os.system(f"curl {url} -o bermuda.pdf -s") with open("bermuda.pdf", "rb") as pdfFileObj: pdfReader = PyPDF2.PdfFileReader(pdfFileObj) text = pdfReader.getPage(0).extractText() regex = r"VACCINATION CENTRE(.*?)Total Vaccines Administered" total_vaccinations = re.search(regex, text) total_vaccinations = vaxutils.clean_count(total_vaccinations.group(1)) regex = r"As of (\w+ \d+, 20\d+)" date = re.search(regex, text) date = vaxutils.clean_date(date.group(1), "%B %d, %Y") vaxutils.increment( location="Bermuda", total_vaccinations=total_vaccinations, date=date, source_url=url, vaccine="Pfizer/BioNTech" ) os.remove("bermuda.pdf")
def main(): data = { "location": "Guatemala", "source_url": "https://gtmvigilanciacovid.shinyapps.io/3869aac0fb95d6baf2c80f19f2da5f98", "vaccine": "Moderna", } with webdriver.Chrome() as driver: driver.get(data["source_url"]) time.sleep(2) driver.find_element_by_class_name("fa-syringe").click() time.sleep(4) date = driver.find_element_by_class_name("logo").text dose1 = driver.find_element_by_id( "dosisaplicadas").find_element_by_tag_name("h3").text data["people_vaccinated"] = vaxutils.clean_count(dose1) data["people_fully_vaccinated"] = 0 data["total_vaccinations"] = data["people_vaccinated"] + data[ "people_fully_vaccinated"] date = re.search(r"\d+/\d+/202\d", date).group(0) data["date"] = vaxutils.clean_date(date, "%d/%m/%Y") vaxutils.increment( location=data["location"], total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], date=data["date"], source_url=data["source_url"], vaccine=data["vaccine"], )
def main(): url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/" soup = BeautifulSoup(requests.get(url).content, "html.parser") links = soup.find(class_="display-posts-listing").find_all("a", class_="title") for link in links: if "Actualizare zilnică" in link.text: url = link["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="post-date").find(class_="meta-text").text.strip() date = vaxutils.clean_date(date, "%b %d, %Y") paragraphs = soup.find(class_="entry-content").find_all("p") for paragraph in paragraphs: if "Număr total de persoane vaccinate" in paragraph.text: count = paragraph.find_all("strong") count = "".join(c.text for c in count) count = vaxutils.clean_count(count) vaxutils.increment( location="Romania", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech" )
def main(): url = "https://www.terviseamet.ee/et/uudised" soup = BeautifulSoup(requests.get(url).content, "html.parser") for h2 in soup.find_all("h2", class_="views-field-title"): if "COVID-19 blogi" in h2.text: url = "https://www.terviseamet.ee" + h2.find("a")["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="field-name-post-date").text date = vaxutils.clean_date(date, "%d.%m.%Y") count = soup.find( string=re.compile(r"Eestis on COVID-19 vastu vaktsineerimisi")) count = re.search(r"tehtud ([\d\s]+) inimesele", count).group(1) count = vaxutils.clean_count(count) vaxutils.increment(location="Estonia", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: url = "https://app.powerbi.com/view?r=eyJrIjoiMzg4YmI5NDQtZDM5ZC00ZTIyLTgxN2MtOTBkMWM4MTUyYTg0IiwidCI6ImFmZDBhNzVjLTg2NzEtNGNjZS05MDYxLTJjYTBkOTJlNDIyZiIsImMiOjh9" driver.get(url) # Wait for the desired element to load. If nothing is found after 25 seconds, returns. timeout = 25 try: element_present = EC.presence_of_element_located( (By.CLASS_NAME, "value")) data = WebDriverWait(driver, timeout).until(element_present).text except TimeoutException: return count = vaxutils.clean_count(data) try: element_present = EC.presence_of_element_located( (By.CLASS_NAME, "title")) data = WebDriverWait(driver, timeout).until(element_present).text except TimeoutException: return date = vaxutils.clean_date(data, "%m/%d/%Y %H:%M:%S %p") vaxutils.increment(location="Italy", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def parse_data(data: dict) -> pd.Series: sheet_id = data["sheetNames"].index("Vakcinisani ukupno") total_vaccinations = vaxutils.clean_count(data["data"][sheet_id][0][0]) date = vaxutils.clean_date(data["data"][1][0][0], "%d.%m.%Y.") data = {"date": date, "total_vaccinations": total_vaccinations} return pd.Series(data=data)
def parse_data(source: str) -> pd.Series: os.system(f"curl {source} -o morocco.pdf -s") dfs = tabula.read_pdf("morocco.pdf", pages=1, pandas_options={ "dtype": str, "header": None }) df = dfs[2] data = { "people_fully_vaccinated": vaxutils.clean_count(df[0].values[-1]), "people_vaccinated": vaxutils.clean_count(df[1].values[-1]), } data["total_vaccinations"] = data["people_vaccinated"] + data[ "people_fully_vaccinated"] return pd.Series(data=data)