def main(): url = "https://vaccinocovid.iss.sm/" soup = BeautifulSoup( requests.get(url, verify=False).content, "html.parser") for script in soup.find_all("script"): if "new Chart" in str(script): chart_data = str(script) break people_vaccinated = re.search(r"([\d,. ]+) [Vv]accinati", chart_data).group(1) people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = 0 total_vaccinations = people_vaccinated + people_fully_vaccinated date = re.search(r"Dati aggiornati al (\d{2}/\d{2}/\d{4})", chart_data).group(1) date = vaxutils.clean_date(date, "%d/%m/%Y") vaxutils.increment(location="San Marino", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url="https://vaccinocovid.iss.sm/", vaccine="Sputnik V")
def main(): url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/" soup = BeautifulSoup(requests.get(url).content, "html.parser") links = soup.find(class_="display-posts-listing").find_all("a", class_="title") for link in links: if "Actualizare zilnică" in link.text: url = link["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="post-date").find(class_="meta-text").text.strip() date = vaxutils.clean_date(date, "%b %d, %Y") main_text = soup.find(class_="entry-content-text").text count = re.search( r"Număr total de persoane vaccinate împotriva COVID-19 cu vaccinul Pfizer BioNTech \(începând cu data de 27 decembrie 2020\) – ([\d\.]+)", main_text) count = count.group(1) count = vaxutils.clean_count(count) vaxutils.increment(location="Romania", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/" soup = BeautifulSoup(requests.get(url).content, "html.parser") links = soup.find(class_="display-posts-listing").find_all("a", class_="title") for link in links: if "Actualizare zilnică" in link.text: url = link["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="post-date").find(class_="meta-text").text.strip() date = vaxutils.clean_date(date, "%b %d, %Y") paragraphs = soup.find(class_="entry-content").find_all("p") for paragraph in paragraphs: if "Număr total de persoane vaccinate" in paragraph.text: count = paragraph.find_all("strong") count = "".join(c.text for c in count) count = vaxutils.clean_count(count) vaxutils.increment( location="Romania", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech" )
def main(): url = "https://www.gov.bm/sites/default/files/COVID-19%20Vaccination%20Updates.pdf" os.system(f"curl {url} -o bermuda.pdf -s") with open("bermuda.pdf", "rb") as pdfFileObj: pdfReader = PyPDF2.PdfFileReader(pdfFileObj) text = pdfReader.getPage(0).extractText() regex = r"VACCINATION CENTRE(.*?)Total Vaccines Administered" total_vaccinations = re.search(regex, text) total_vaccinations = vaxutils.clean_count(total_vaccinations.group(1)) regex = r"As of (\w+ \d+, 20\d+)" date = re.search(regex, text) date = vaxutils.clean_date(date.group(1), "%B %d, %Y") vaxutils.increment( location="Bermuda", total_vaccinations=total_vaccinations, date=date, source_url=url, vaccine="Pfizer/BioNTech" ) os.remove("bermuda.pdf")
def main(): url = "https://www.terviseamet.ee/et/uudised" soup = BeautifulSoup(requests.get(url).content, "html.parser") for h2 in soup.find_all("h2", class_="views-field-title"): if "COVID-19 blogi" in h2.text: url = "https://www.terviseamet.ee" + h2.find("a")["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="field-name-post-date").text date = vaxutils.clean_date(date, "%d.%m.%Y") count = soup.find( string=re.compile(r"Eestis on COVID-19 vastu vaktsineerimisi")) count = re.search(r"tehtud ([\d\s]+) inimesele", count).group(1) count = vaxutils.clean_count(count) vaxutils.increment(location="Estonia", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_" # Locate newest pdf html_page = urllib.request.urlopen(url) soup = BeautifulSoup(html_page, "html.parser") pdf_path = soup.find( "a", class_="btn-primary")["href"] # Get path to newest pdf # Fetch data dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all") df = pd.DataFrame(dfs_from_pdf[2]) # Hardcoded table location total_vaccinations = df.loc[df["Unnamed: 0"] == "Nombre de doses administrées", "Unnamed: 1"].values[0] total_vaccinations = vaxutils.clean_count(total_vaccinations) date = re.search(r"\d\d\.\d\d\.202\d", df.columns[1]).group(0) date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Luxembourg", total_vaccinations=total_vaccinations, date=date, source_url=pdf_path, vaccine="Pfizer/BioNTech")
def main(): url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_" # Locate newest pdf soup = BeautifulSoup(requests.get(url).content, "html.parser") pdf_path = soup.find( "a", class_="btn-primary")["href"] # Get path to newest pdf # Fetch data dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all") df = pd.DataFrame(dfs_from_pdf[2]) # Hardcoded table location values = sorted( pd.to_numeric(df["Unnamed: 2"].str.replace( r"[^\d]", "", regex=True)).dropna().astype(int)) assert len(values) == 3 total_vaccinations = values[2] people_vaccinated = values[1] people_fully_vaccinated = values[0] date = df["Unnamed: 1"].str.replace("Journée du ", "").values[0] date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Luxembourg", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=pdf_path, vaccine="Moderna, Oxford/AstraZeneca, Pfizer/BioNTech")
def main(): url = "https://esriportugal.maps.arcgis.com/apps/opsdashboard/index.html#/acf023da9a0b4f9dbb2332c13f635829" # Options for Chrome WebDriver op = Options() # op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: driver.get(url) time.sleep(4) for box in driver.find_elements_by_class_name("indicator-top-text"): if "Total de Vacinas Administradas" in box.text: count_text = box.find_element_by_xpath("..").text elif "Dados relativos ao boletim da DGS de" in box.text: date_text = box.find_element_by_xpath("..").text count = re.search(r"\n([\d\s]+$)", count_text).group(1) count = vaxutils.clean_count(count) date = re.search(r"\n([\d/]+$)", date_text).group(1) date = vaxutils.clean_date(date, "%d/%m/%Y") vaxutils.increment(location="Portugal", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://data.public.lu/fr/datasets/covid-19-rapports-journaliers/#_" # Locate newest pdf soup = BeautifulSoup(requests.get(url).content, "html.parser") pdf_path = soup.find( "a", class_="btn-primary")["href"] # Get path to newest pdf # Fetch data dfs_from_pdf = tabula.read_pdf(pdf_path, pages="all") df = pd.DataFrame(dfs_from_pdf[2]) # Hardcoded table location people_vaccinated = df.loc[df["Unnamed: 0"] == "Personnes vaccinées - Dose 1", "Unnamed: 2"].values[0] people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = df.loc[df["Unnamed: 0"] == "Personnes vaccinées - Dose 2", "Unnamed: 2"].values[0] people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated date = df["Unnamed: 1"].str.replace("Journée du ", "").values[0] date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Luxembourg", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=pdf_path, vaccine="Pfizer/BioNTech")
def connect_parse_data(source: str) -> pd.Series: op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: driver.get(source) time.sleep(10) date = driver.find_element_by_class_name( "as_of").find_element_by_tag_name("span").text date = vaxutils.clean_date(date, "%d.%m.%Y") for elem in driver.find_elements_by_class_name("counter_block"): if "1 ДОЗУ" in elem.text: people_vaccinated = elem.find_element_by_tag_name("h2").text if "2 ДОЗИ" in elem.text: people_fully_vaccinated = elem.find_element_by_tag_name( "h2").text data = { "people_vaccinated": vaxutils.clean_count(people_vaccinated), "people_fully_vaccinated": vaxutils.clean_count(people_fully_vaccinated), "date": date, } return pd.Series(data=data)
def main(): url = "https://covid19.gov.im/general-information/latest-updates/" soup = BeautifulSoup(requests.get(url).content, "html.parser") vax_box = soup.find(string=re.compile("Total first vaccinations")).parent.parent date = vax_box.find("strong").text date = vaxutils.clean_date(date, "%d %B %Y") for p in vax_box.find_all("p"): if "Total first vaccinations" in p.text: data = p.text people_vaccinated, people_fully_vaccinated = re.search(r"Total first vaccinations:\xa0 ([\d,]+)Total second vaccinations:\xa0 ([\d,]+)", data).groups() people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated vaxutils.increment( location="Isle of Man", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Pfizer/BioNTech" )
def main(): op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: url = "https://app.powerbi.com/view?r=eyJrIjoiMzg4YmI5NDQtZDM5ZC00ZTIyLTgxN2MtOTBkMWM4MTUyYTg0IiwidCI6ImFmZDBhNzVjLTg2NzEtNGNjZS05MDYxLTJjYTBkOTJlNDIyZiIsImMiOjh9" driver.get(url) # Wait for the desired element to load. If nothing is found after 25 seconds, returns. timeout = 25 try: element_present = EC.presence_of_element_located( (By.CLASS_NAME, "value")) data = WebDriverWait(driver, timeout).until(element_present).text except TimeoutException: return count = vaxutils.clean_count(data) try: element_present = EC.presence_of_element_located( (By.CLASS_NAME, "title")) data = WebDriverWait(driver, timeout).until(element_present).text except TimeoutException: return date = vaxutils.clean_date(data, "%m/%d/%Y %H:%M:%S %p") vaxutils.increment(location="Italy", total_vaccinations=count, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def connect_parse_data(source: str) -> pd.Series: headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } soup = BeautifulSoup(requests.get(source, headers=headers).content, "html.parser") total_vaccinations = soup.find(class_="repart-stlucia").text total_vaccinations = vaxutils.clean_count(total_vaccinations) date = soup.find(class_="h2-blue").text date = re.search(r"\w+ \d+, 202\d", date).group(0) date = vaxutils.clean_date(date, "%B %d, %Y") data = { "total_vaccinations": total_vaccinations, "date": date, } return pd.Series(data=data)
def main(): url = "https://www.terviseamet.ee/et/uudised" soup = BeautifulSoup(requests.get(url).content, "html.parser") for h2 in soup.find_all("h2", class_="views-field-title"): if "COVID-19 blogi" in h2.text: url = "https://www.terviseamet.ee" + h2.find("a")["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") text = soup.find(class_="node-published").text people_vaccinated, people_fully_vaccinated = re.search( r"Eestis on COVID-19 vastu vaktsineerimisi tehtud ([\d\s]+) inimesele, kaks doosi on saanud ([\d\s]+) inimest", text).groups() people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = people_vaccinated + people_fully_vaccinated date = soup.find(class_="field-name-post-date").text date = vaxutils.clean_date(date, "%d.%m.%Y") vaxutils.increment(location="Estonia", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): data = { "location": "Guatemala", "source_url": "https://gtmvigilanciacovid.shinyapps.io/3869aac0fb95d6baf2c80f19f2da5f98", "vaccine": "Moderna", } with webdriver.Chrome() as driver: driver.get(data["source_url"]) time.sleep(2) driver.find_element_by_class_name("fa-syringe").click() time.sleep(4) date = driver.find_element_by_class_name("logo").text dose1 = driver.find_element_by_id( "dosisaplicadas").find_element_by_tag_name("h3").text data["people_vaccinated"] = vaxutils.clean_count(dose1) data["people_fully_vaccinated"] = 0 data["total_vaccinations"] = data["people_vaccinated"] + data[ "people_fully_vaccinated"] date = re.search(r"\d+/\d+/202\d", date).group(0) data["date"] = vaxutils.clean_date(date, "%d/%m/%Y") vaxutils.increment( location=data["location"], total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], date=data["date"], source_url=data["source_url"], vaccine=data["vaccine"], )
def parse_date(soup: BeautifulSoup) -> str: for h3 in soup.find_all("h3"): if "Vaccination Data" in h3.text: break date = re.search(r"as of (\d+ \w+ \d+)", h3.text).group(1) date = vaxutils.clean_date(date, "%d %b %Y") return date
def main(): url = "https://www.gov.je/Health/Coronavirus/Vaccine/Pages/VaccinationStatistics.aspx" soup = BeautifulSoup(requests.get(url).content, "html.parser") table = soup.find(class_="govstyleTable-default") df = pd.read_html(str(table))[0] total_vaccinations = int(df.loc[df[0] == "Total number of doses", 1].values[0]) people_vaccinated = int( df.loc[df[0] == "Total number of first dose vaccinations", 1].values[0]) people_fully_vaccinated = int( df.loc[df[0] == "Total number of second dose vaccinations", 1].values[0]) date = re.search(r"Data applies to: Week ending (\d[\w\s]+\d{4})", soup.text).group(1) date = vaxutils.clean_date(date, "%d %B %Y") vaxutils.increment(location="Jersey", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Oxford/AstraZeneca, Pfizer/BioNTech")
def parse_data(data: dict) -> pd.Series: sheet_id = data["sheetNames"].index("Vakcinisani ukupno") total_vaccinations = vaxutils.clean_count(data["data"][sheet_id][0][0]) date = vaxutils.clean_date(data["data"][1][0][0], "%d.%m.%Y.") data = {"date": date, "total_vaccinations": total_vaccinations} return pd.Series(data=data)
def parse_data(data: dict) -> pd.Series: date = vaxutils.clean_date(data["updated"], "%Y/%m/%d") total_vaccinations = data["progress"] return pd.Series(data={ "date": date, "total_vaccinations": total_vaccinations, })
def parse_data(soup: BeautifulSoup) -> pd.Series: numbers = soup.find_all(class_="odometer") date = re.search(r"[\d\.]{10}", soup.find(class_="counter").text).group(0) date = vaxutils.clean_date(date, "%d.%m.%Y") return pd.Series(data={ "total_vaccinations": int(numbers[0]["data-count"]), "people_vaccinated": int(numbers[1]["data-count"]), "people_fully_vaccinated": int(numbers[2]["data-count"]), "date": date })
def parse_data(soup: BeautifulSoup) -> pd.Series: data = pd.Series(dtype="int") spans = soup.find("table").find_all("span") data["people_vaccinated"] = int(re.sub(r"[^\d]", "", spans[-3].text)) data["people_fully_vaccinated"] = int(re.sub(r"[^\d]", "", spans[-2].text)) data["total_vaccinations"] = data["people_vaccinated"] + data["people_fully_vaccinated"] date = re.search(r"[\d-]{10}", spans[0].text).group(0) data["date"] = vaxutils.clean_date(date, "%d-%m-%Y") return data
def read(source: str) -> pd.Series: soup = vaxutils.get_soup(source) total_vaccinations = vaxutils.clean_count(soup.find(class_="stats-decoration-title").text) people_vaccinated = total_vaccinations people_fully_vaccinated = 0 date = re.search(r"\d+ \w+ 202\d", soup.find(class_="stats-decoration-text").text).group(0) date = vaxutils.clean_date(date, "%d %B %Y") return pd.Series(data={ "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "date": date, })
def main(): url = "https://www.mohfw.gov.in/" soup = BeautifulSoup(requests.get(url).content, "html.parser") total_vaccinations = soup.find(class_="coviddata").text total_vaccinations = vaxutils.clean_count(total_vaccinations) date = soup.find(id="site-dashboard").find("h5").text date = re.search(r"\d+\s\w+\s+202\d", date).group(0) date = vaxutils.clean_date(date, "%d %B %Y") vaxutils.increment(location="India", total_vaccinations=total_vaccinations, date=date, source_url=url, vaccine="Covaxin, Covishield")
def connect_parse_data(source: str) -> pd.Series: op = Options() op.add_argument("--headless") with webdriver.Chrome(options=op) as driver: driver.get(source) time.sleep(5) total_vaccinations = driver.find_element_by_id("counter1").text date = driver.find_element_by_id("pupdateddate").text date = vaxutils.clean_date(date.replace("Updated ", ""), "%d %b, %Y") data = { "total_vaccinations": vaxutils.clean_count(total_vaccinations), "date": date, } return pd.Series(data=data)
def parse_data(soup: BeautifulSoup) -> pd.Series: people_vaccinated = int(soup.find_all(class_="count")[0]["data-count"]) people_fully_vaccinated = int(soup.find_all(class_="count")[1]["data-count"]) assert people_vaccinated >= people_fully_vaccinated total_vaccinations = people_vaccinated + people_fully_vaccinated date = soup.find(class_="reportdate").text date = re.search(r"\d+ \w+ 202\d", date).group(0) date = vaxutils.clean_date(date, "%d %b %Y") data = { "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "date": date, } return pd.Series(data=data)
def main(): url = "https://covid19.gov.im/general-information/latest-updates/" soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="fa-syringe").parent.find("strong").text date = vaxutils.clean_date(date, "%d %B %Y") total_vaccinations = soup.find(class_="fa-syringe").parent.text total_vaccinations = re.search(r"Total vaccinations:\s+(\d+)", total_vaccinations).group(1) total_vaccinations = vaxutils.clean_count(total_vaccinations) vaxutils.increment(location="Isle of Man", total_vaccinations=total_vaccinations, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def parse_data(soup: BeautifulSoup) -> pd.Series: total_vaccinations = int(soup.find_all(class_="counter")[0].text) people_vaccinated = int(soup.find_all(class_="counter")[1].text) people_fully_vaccinated = int(soup.find_all(class_="counter")[2].text) assert total_vaccinations >= people_vaccinated assert people_vaccinated >= people_fully_vaccinated date = soup.find(class_="fuente").text date = re.search(r"\d{2}-\d{2}-\d{4}", date).group(0) date = vaxutils.clean_date(date, "%d-%m-%Y") data = { "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "date": date, } return pd.Series(data=data)
def main(): url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/" soup = BeautifulSoup(requests.get(url).content, "html.parser") links = soup.find(class_="display-posts-listing").find_all("a", class_="title") for link in links: if "Actualizare zilnică" in link.text: url = link["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="post-date").find(class_="meta-text").text.strip() date = vaxutils.clean_date(date, "%b %d, %Y") main_text = soup.find(class_="entry-content-text").text people_fully_vaccinated = re.findall(r"[\d.]+ persoane vaccinate cu 2 doz", main_text)[1] people_fully_vaccinated = people_fully_vaccinated.replace( " persoane vaccinate cu 2 doz", "") people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) people_vaccinated = re.findall(r"[\d.]+ persoane vaccinate cu 1 doz", main_text)[1] people_vaccinated = people_vaccinated.replace( " persoane vaccinate cu 1 doz", "") people_vaccinated = vaxutils.clean_count( people_vaccinated) + people_fully_vaccinated total_vaccinations = people_vaccinated + people_fully_vaccinated vaxutils.increment(location="Romania", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Pfizer/BioNTech")
def main(): url = "https://vaccinare-covid.gov.ro/comunicate-oficiale/" soup = BeautifulSoup(requests.get(url).content, "html.parser") links = soup.find(class_="display-posts-listing").find_all("a", class_="title") for link in links: if "Actualizare zilnică" in link.text: url = link["href"] break soup = BeautifulSoup(requests.get(url).content, "html.parser") date = soup.find(class_="post-date").find(class_="meta-text").text.strip() date = vaxutils.clean_date(date, "%b %d, %Y") url = soup.find(class_="entry-content-text").find_all("a")[-1]["href"] kwargs = {'pandas_options': {'dtype': str, 'header': None}} dfs_from_pdf = tabula.read_pdf(url, pages="all", **kwargs) df = dfs_from_pdf[0] values = df[df[0] == "Total"].dropna()[2].str.split(" ") values = [ vaxutils.clean_count(val) for val in pd.core.common.flatten(values) ] assert len(values) == 2 people_fully_vaccinated = values[1] one_dose_only = values[0] people_vaccinated = one_dose_only + people_fully_vaccinated total_vaccinations = people_fully_vaccinated + people_vaccinated vaxutils.increment(location="Romania", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url=url, vaccine="Moderna, Oxford/AstraZeneca, Pfizer/BioNTech")
def read(source: str) -> pd.Series: headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.16; rv:86.0) Gecko/20100101 Firefox/86.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Language": "fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3", "Accept-Encoding": "gzip, deflate, br", "Connection": "keep-alive", "Upgrade-Insecure-Requests": "1", "Pragma": "no-cache", "Cache-Control": "no-cache", } soup = BeautifulSoup( requests.get(source, headers=headers).content, "html.parser") text = soup.find("div", id="data").find("p").text date = re.search(r"На сегодня \(([\d\.]{8})\)", text).group(1) date = vaxutils.clean_date(date, "%d.%m.%y") people_vaccinated = re.search( r"([\d\s]+) чел\. \(.*% от населения\) - привито хотя бы одним компонентом вакцины", text).group(1) people_vaccinated = vaxutils.clean_count(people_vaccinated) people_fully_vaccinated = re.search( r"([\d\s]+) чел\. \(.*% от населения\) - полностью привито", text).group(1) people_fully_vaccinated = vaxutils.clean_count(people_fully_vaccinated) total_vaccinations = re.search(r"([\d\s]+) шт\. - всего прививок сделано", text).group(1) total_vaccinations = vaxutils.clean_count(total_vaccinations) return pd.Series({ "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "date": date, })