def _parse_data(self) -> dict:
     with get_driver() as driver:
         driver.get(self.source_url)
         time.sleep(2)
         spans = [
             span for span in driver.find_elements_by_tag_name("span")
             if span.get_attribute("data-text")
         ]
         # Date
         date = extract_clean_date(
             spans[6].text.replace("Sept", "Sep"),
             "\(as of ([a-zA-Z]+)\.\s?(\d{1,2}), (20\d{2})\)",
             "%b %d %Y",
             lang="en",
         )
         # Metrics
         total_vaccinations = clean_count(spans[8].text)
         people_fully_vaccinated = clean_count(spans[15].text)
     if total_vaccinations < people_fully_vaccinated:
         raise ValueError(
             "Check values for:\n"
             f"total_vaccinations\t\t{total_vaccinations}\npeople_fully_vaccinated\t\t{people_fully_vaccinated}"
         )
     return {
         "total_vaccinations": total_vaccinations,
         # "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "date": date,
     }
Beispiel #2
0
def read(source: str) -> pd.Series:

    with get_driver() as driver:
        driver.get(source)
        time.sleep(10)

        for block in driver.find_elements_by_class_name("kpimetric"):
            if "1ste dosis" in block.text and "%" not in block.text:
                people_partly_vaccinated = clean_count(
                    block.find_element_by_class_name("valueLabel").text)
            elif "2de dosis" in block.text and "%" not in block.text:
                people_fully_vaccinated = clean_count(
                    block.find_element_by_class_name("valueLabel").text)
            elif "3de dosis" in block.text and "%" not in block.text:
                total_boosters = clean_count(
                    block.find_element_by_class_name("valueLabel").text)

    people_vaccinated = people_partly_vaccinated + people_fully_vaccinated

    return pd.Series(
        data={
            "total_vaccinations": people_vaccinated + people_fully_vaccinated,
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
            "total_boosters": total_boosters,
            "date": localdate("America/Paramaribo"),
        })
Beispiel #3
0
 def _parse_date(self, text: str):
     thai_date_replace = {
         # Months
         "มกราคม": 1,
         "กุมภาพันธ์": 2,
         "มีนาคม": 3,
         "เมษายน": 4,
         "พฤษภาคม": 5,
         "พฤษภำคม": 5,
         "มิถุนายน": 6,
         "มิถุนำยน": 6,
         "กรกฎาคม": 7,
         "กรกฎำคม": 7,
         "สิงหาคม": 8,
         "สิงหำคม": 8,
         "กันยายน": 9,
         "ตุลาคม": 10,
         "พฤศจิกายน": 11,
         "ธันวาคม": 12,
     }
     date_raw = re.search(self.regex_date, text)
     day = clean_count(date_raw.group(1))
     month = thai_date_replace[date_raw.group(2)]
     year = clean_count(date_raw.group(3)) - self._year_difference_conversion
     return clean_date(datetime(year, month, day))
Beispiel #4
0
 def _parse_metrics(self, soup: BeautifulSoup):
     match = re.search(self.regex["metrics"], soup.text)
     total_vaccinations = clean_count(match.group(1))
     people_vaccinated = clean_count(match.group(2))
     people_fully_vaccinated = clean_count(match.group(3))
     total_boosters = clean_count(match.group(4))
     return total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters
Beispiel #5
0
    def _parse_data(self) -> pd.Series:

        with get_driver(headless=True) as driver:
            # Main page
            driver.get(self._get_iframe_url())
            time.sleep(5)

            data_blocks = WebDriverWait(driver, 30).until(
                EC.visibility_of_all_elements_located((By.CLASS_NAME, "card"))
            )
            for block in data_blocks:
                block_title = block.get_attribute("aria-label")
                if "first dose" in block_title:
                    people_vaccinated = re.search(r"first dose +(\d+)\.", block_title).group(1)
                elif "sec dose" in block_title:
                    people_fully_vaccinated = re.search(r"sec dose +(\d+)\.", block_title).group(1)

            people_vaccinated = clean_count(people_vaccinated)
            people_fully_vaccinated = clean_count(people_fully_vaccinated)

        return pd.Series(
            {
                "people_vaccinated": people_vaccinated,
                "people_fully_vaccinated": people_fully_vaccinated,
            }
        )
Beispiel #6
0
    def parse_data_news_page(self, soup: BeautifulSoup):
        """
        2021-09-10
        We received confirmation from the International Communications Office, State Secretariat
        for International Communications and Relations, that the part of the report referring to
        people who received the 2nd dose ("közülük ([\d ]+) fő már a második oltását is megkapt")
        also included those who have received the J&J vaccine.
        On the other hand, we cannot estimate the number of vaccinations administered, as adding
        the two reported metrics would count J&J vaccines twice.
        """

        text = clean_string(soup.find(class_="page_body").text)
        match = re.search(self.regex["metrics"], text)

        people_vaccinated = clean_count(match.group(1))
        people_fully_vaccinated = clean_count(match.group(2))
        total_boosters = clean_count(match.group(3))

        return {
            "people_vaccinated": people_vaccinated,
            "people_fully_vaccinated": people_fully_vaccinated,
            "total_boosters": total_boosters,
            "date": extract_clean_date(
                soup.find("p").text,
                regex="(202\d. .* \d+.) - .*",
                date_format="%Y. %B %d.",
                loc="hu_HU.UTF-8",
                minus_days=1,
            ),
        }
Beispiel #7
0
def read(source: str) -> pd.Series:
    op = Options()
    op.add_argument("--headless")

    with webdriver.Chrome(options=op) as driver:
        driver.get(source)
        time.sleep(3)

        for h5 in driver.find_elements_by_tag_name("h5"):

            if "Primera dosis" in h5.text:
                people_vaccinated = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

            elif "Total dosis aplicadas" in h5.text:
                total_vaccinations = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

            elif "Población completamente vacunada" in h5.text:
                people_fully_vaccinated = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

            elif "Dosis refuerzo" in h5.text:
                total_boosters = clean_count(
                    h5.find_element_by_xpath("./preceding-sibling::div").text)

    data = {
        "date": localdate("America/Santo_Domingo"),
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "total_vaccinations": total_vaccinations,
        "total_boosters": total_boosters,
    }
    return pd.Series(data=data)
Beispiel #8
0
    def _parse_data(self, worksheet):

        for row in worksheet.values():
            for value in row:
                if "Total dosis aplicadas al " in str(value):
                    total_vaccinations = row[-1]
                    if type(total_vaccinations) != int:
                        total_vaccinations = clean_count(total_vaccinations)
                    date_raw = re.search(r"[\d-]{10}$", value).group(0)
                    date_str = clean_date(date_raw, "%d-%m-%Y")
                elif value == "Esquemas completos segundas + únicas dosis":
                    people_fully_vaccinated = row[-1]
                    if type(people_fully_vaccinated) != int:
                        people_fully_vaccinated = clean_count(
                            people_fully_vaccinated)
                elif value == "Total únicas dosis acumuladas":
                    unique_doses = row[-1]
                    if type(unique_doses) != int:
                        unique_doses = clean_count(unique_doses)

        if total_vaccinations is None or people_fully_vaccinated is None:
            raise ValueError(
                "Date is not where it is expected be! Check worksheet")
        return pd.Series({
            "date":
            date_str,
            "total_vaccinations":
            total_vaccinations,
            "people_fully_vaccinated":
            people_fully_vaccinated,
            "people_vaccinated":
            total_vaccinations - people_fully_vaccinated + unique_doses,
        })
Beispiel #9
0
def parse_vaccinations(elem) -> dict:
    # Get news text
    url = elem.find_parent(class_="card").find("a").get("href")
    soup = get_soup(url, verify=False)
    text = "\n".join([p.text for p in soup.find("article").find_all("p")])

    # Find metrics
    metrics = dict()
    # total_vaccinations = re.search(r"疫苗共有(?P<count>[\d,]*)人次", text)
    total_vaccinations = re.search(r"疫苗劑數為(?P<count>[\d,]*)劑", text)
    # print(total_vaccinations)
    # people_vaccinated = re.search(r"1劑疫苗共有(?P<count>[\d,]*)人次", text)
    people_vaccinated = re.search(r"已接種人數共有(?P<count>[\d,]*)人", text)
    # people_fully_vaccinated = re.search(r"2劑疫苗共有(?P<count>[\d,]*)人次", text)
    # people_fully_vaccinated = re.search(r"已完成接種2劑有(?P<count>[\d,]*)人", text)
    people_fully_vaccinated = re.search(r"已接種第2劑的?有([\d,]{6,})", text)
    # people_fully_vaccinated = re.search(r"接種2劑有(?P<count>[\d,]*)人", text)

    if total_vaccinations:
        metrics["total_vaccinations"] = clean_count(
            total_vaccinations.group(1))
    if people_vaccinated:
        metrics["people_vaccinated"] = clean_count(people_vaccinated.group(1))
    if people_fully_vaccinated:
        metrics["people_fully_vaccinated"] = clean_count(
            people_fully_vaccinated.group(1))
    return metrics
Beispiel #10
0
    def connect_parse_data(self) -> pd.Series:
        op = Options()
        op.add_argument("--headless")

        with webdriver.Chrome(options=op) as driver:
            driver.get(self.source_url)
            time.sleep(5)

            total_vaccinations = clean_count(
                driver.find_element_by_id("counter1").text)
            total_boosters = clean_count(
                driver.find_element_by_id("counter4").text)
            # people_vaccinated_share = driver.find_element_by_id("counter4").text
            # assert "One dose" in people_vaccinated_share
            # people_fully_vaccinated_share = driver.find_element_by_id("counter4a").text
            # assert "Two doses" in people_fully_vaccinated_share

        # This logic is only valid as long as Qatar *exclusively* uses 2-dose vaccines
        # people_vaccinated_share = float(re.search(r"[\d.]+", people_vaccinated_share).group(0))
        # people_fully_vaccinated_share = float(re.search(r"[\d.]+", people_fully_vaccinated_share).group(0))
        # vaccinated_proportion = people_vaccinated_share / (people_vaccinated_share + people_fully_vaccinated_share)
        # people_vaccinated = round(total_vaccinations * vaccinated_proportion)
        # people_fully_vaccinated = total_vaccinations - people_vaccinated

        date = localdate("Asia/Qatar")

        data = {
            "total_vaccinations": total_vaccinations,
            "total_boosters": total_boosters,
            # "people_vaccinated": people_vaccinated,
            # "people_fully_vaccinated": people_fully_vaccinated,
            "date": date,
        }
        return pd.Series(data=data)
Beispiel #11
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)

    for block in soup.find(class_="main").find_all(class_="w3-center"):

        if block.find("p").text == "ΣΥΝΟΛΟ ΕΜΒΟΛΙΑΣΜΩΝ":
            total_vaccinations = clean_count(block.find_all("p")[1].text)
            date = re.search(r"[\d/]{8,10}", block.find_all("p")[2].text)
            date = clean_date(date.group(0), "%d/%m/%Y")

        if block.find("p").text == "ΣΥΝΟΛΟ 1ης ΔΟΣΗΣ":
            people_vaccinated = clean_count(block.find_all("p")[1].text)

        if block.find("p").text == "ΣΥΝΟΛΟ 2ης ΔΟΣΗΣ":
            people_fully_vaccinated = clean_count(block.find_all("p")[1].text)

        if block.find("p").text == "ΣΥΝΟΛΟ 3ης ΔΟΣΗΣ":
            total_boosters = clean_count(block.find_all("p")[1].text)

    data = {
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "total_boosters": total_boosters,
        "date": date,
        "source_url": source,
    }
    return pd.Series(data=data)
Beispiel #12
0
def read(source: str) -> pd.Series:
    soup = get_soup(source)

    text = soup.find("div", id="data").find("p").text

    date = re.search(r"На сегодня \(([\d\.]{8})\)", text).group(1)
    date = clean_date(date, "%d.%m.%y")

    people_vaccinated = re.search(
        r"([\d\s]+) чел\. \([\d\.]+% от населения[^)]*\) - привито хотя бы одним компонентом вакцины",
        text,
    ).group(1)
    people_vaccinated = clean_count(people_vaccinated)

    people_fully_vaccinated = re.search(
        r"([\d\s]+) чел\. \([\d\.]+% от населения,?[^)]*\) - полностью привито",
        text).group(1)
    people_fully_vaccinated = clean_count(people_fully_vaccinated)

    total_vaccinations = re.search(r"([\d\s]+) шт\. - всего прививок сделано",
                                   text).group(1)
    total_vaccinations = clean_count(total_vaccinations)

    total_boosters = re.search(r"([\d\s]+) чел\. - прошли ревакцинацию",
                               text).group(1)
    total_boosters = clean_count(total_boosters)

    return pd.Series({
        "total_vaccinations": total_vaccinations,
        "people_vaccinated": people_vaccinated,
        "people_fully_vaccinated": people_fully_vaccinated,
        "total_boosters": total_boosters,
        "date": date,
    })
 def pipe_metrics(self, ds: pd.Series) -> pd.Series:
     ds = enrich_data(ds, "people_vaccinated",
                      clean_count(ds["first_vaccine_number"]))
     ds = enrich_data(ds, "people_fully_vaccinated",
                      clean_count(ds["second_vaccine_number"]))
     total_vaccinations = ds["people_vaccinated"] + ds[
         "people_fully_vaccinated"]
     return enrich_data(ds, "total_vaccinations", total_vaccinations)
Beispiel #14
0
 def _parse_table(self, soup):
     df = pd.read_html(str(soup.find("table", id="content-table3")))[0]
     df = df[df["Región"] == "Total"]
     total_vaccinations = clean_count(df["Total dosis"].item())
     people_vaccinated = clean_count(df["Dosis 1"].item())
     people_fully_vaccinated = clean_count(df["Dosis 2"].item())
     total_boosters = clean_count(df["Dosis 3"].item())
     return total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters
Beispiel #15
0
 def _parse_boosters(self, infogram_data: dict) -> int:
     boosters = clean_count(
         self._get_infogram_value(infogram_data,
                                  "2fbd1738-f9c3-49ad-8855-c933c83abc18"))
     boosters_for = clean_count(
         self._get_infogram_value(infogram_data,
                                  "20d53fe7-91f8-4778-a13f-c938f18dd8fe"))
     return boosters + boosters_for
Beispiel #16
0
 def _parse_people_fully_vaccinated(self, infogram_data: dict) -> int:
     ppl_fully_vaxed = clean_count(
         self._get_infogram_value(infogram_data,
                                  "7b45d34f-b8d0-47d7-8c3a-35c89a4d4cdf"))
     ppl_fully_vaxed_for = clean_count(
         self._get_infogram_value(infogram_data,
                                  "9eee1f41-c398-4a15-81aa-2588250e53cb"))
     return ppl_fully_vaxed + ppl_fully_vaxed_for
Beispiel #17
0
 def _parse_people_vaccinated(self, infogram_data: dict) -> int:
     ppl_vaxed = clean_count(
         self._get_infogram_value(infogram_data,
                                  "4275cc3f-7ae8-4af3-9c5a-ef94203d47d7"))
     ppl_vaxed_for = clean_count(
         self._get_infogram_value(infogram_data,
                                  "8a007cb6-7384-4af1-9f92-c41699d77aab"))
     return ppl_vaxed + ppl_vaxed_for
Beispiel #18
0
 def _parse_metrics_raw(self, soup, raise_err=True):
     elems = soup.find_all(class_="ttip")
     has_d3 = False
     for e in elems:
         if p := e.find("p"):
             if (text := p.text.strip()) == "1st doses administered":
                 dose1 = clean_count(e.span.text)
             elif text == "2nd doses administered":
                 dose2 = clean_count(e.span.text)
Beispiel #19
0
 def _parse_metrics(self, pdf_text: str):
     regex = (
         r"total doses administered ([\d,]+) total partially vaccinated ([\d,]+) total fully vaccinated ([\d,]+)"
     )
     data = re.search(regex, pdf_text)
     total_vaccinations = clean_count(data.group(1))
     people_vaccinated = clean_count(data.group(2))
     people_fully_vaccinated = clean_count(data.group(3))
     return total_vaccinations, people_vaccinated, people_fully_vaccinated
Beispiel #20
0
 def _parse_text_who(self, soup):
     who_eul = (
         r"In addition, ([\d,]+) doses of other vaccines recognised in the World Health Organization.s Emergency"
         r" Use Listing \(WHO EUL\) have been administered, covering ([\d,]+) individuals\."
     )
     data = re.search(who_eul, soup.text).groups()
     who_doses = clean_count(data[0])
     who_people_vaccinated = clean_count(data[1])
     return who_doses, who_people_vaccinated
Beispiel #21
0
 def _parse_text_summary(self, soup):
     preamble = (
         r"As of ([\d]+ [A-Za-z]+ 20\d{2}), (\d+)% of our population has completed their full regimen/"
         r" received two doses of COVID-19 vaccines, (\d+)% has received at least one dose,"
         r" and (\d+)% ha(?:ve|s) received (?:their )?booster(?:s)?")
     data = re.search(preamble, soup.text).groups()
     date = clean_date(data[0], fmt="%d %B %Y", lang="en")
     share_fully_vaccinated = clean_count(data[1])
     share_vaccinated = clean_count(data[2])
     share_boosters = clean_count(data[3])
     return date, share_fully_vaccinated, share_vaccinated, share_boosters
Beispiel #22
0
 def _parse_text_national(self, soup):
     national_program = (
         r"We have administered a total of ([\d,]+) doses of COVID-19 vaccines under the.*"
         r"In total, ([\d,]+) individuals have received at least one dose of vaccine under the national vaccination"
         r" programme,.* ([\d,]+) (?:individuals )?have (?:received|taken) their booster shots"
     )
     data = re.search(national_program, soup.text).groups()
     national_doses = clean_count(data[0])
     national_people_vaccinated = clean_count(data[1])
     national_boosters = clean_count(data[2])
     return national_doses, national_boosters, national_people_vaccinated
Beispiel #23
0
 def _parse_metrics(self, news_info: dict):
     soup = get_soup(news_info["link"])
     text = clean_string(soup.text)
     metrics = re.search(self.regex["metrics"], text).group(1, 2, 3)
     return {
         "total_vaccinations": clean_count(metrics[0]),
         "people_vaccinated": clean_count(metrics[1]),
         "people_fully_vaccinated": clean_count(metrics[2]),
         "source_url": news_info["link"],
         "date": news_info["date"],
     }
 def parse_metrics(self, soup):
     dfs = pd.read_html(self.source_url, converters={"Totales": lambda x: str(x)})
     df = dfs[0].rename(columns={"Unnamed: 0": "metric"}).set_index("metric")
     people_vaccinated = clean_count(df.loc["Total Vacunados 1ª dosis", "Totales"])
     total_vaccinations = clean_count(df.loc["Total dosis administradas", "Totales"])
     people_fully_vaccinated = total_vaccinations - people_vaccinated
     return {
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
     }
Beispiel #25
0
 def _parse_data_metrics(self, soup) -> dict:
     counters = soup.find_all(class_="text-brand-blue")
     dose_1 = clean_count(
         re.search(r"Innuttaasut ([\d\.]+)",
                   counters[1].parent.find_all("dd")[-1].text).group(1))
     dose_2 = clean_count(
         re.search(r"Innuttaasut ([\d\.]+)",
                   counters[2].parent.find_all("dd")[-1].text).group(1))
     if dose_1 < dose_2:
         raise ValueError("dose_1 cannot be higher than dose_2")
     return {"people_vaccinated": dose_1, "people_fully_vaccinated": dose_2}
Beispiel #26
0
 def parse_data(self, soup):
     widgets = soup.find_all(class_="textwidget")
     total_vaccinations = clean_count(widgets[0].text)
     people_fully_vaccinated = clean_count(widgets[1].text)
     people_vaccinated = total_vaccinations - people_fully_vaccinated
     return pd.Series({
         "total_vaccinations": total_vaccinations,
         "people_vaccinated": people_vaccinated,
         "people_fully_vaccinated": people_fully_vaccinated,
         "date": localdate("Asia/Tbilisi"),
     })
Beispiel #27
0
def parse_data(soup: BeautifulSoup) -> pd.Series:

    total_vaccinations = clean_count(
        soup.find(id="stats").find_all("span")[0].text)
    people_fully_vaccinated = clean_count(
        soup.find(id="stats").find_all("span")[1].text)

    data = {
        "total_vaccinations": total_vaccinations,
        "people_fully_vaccinated": people_fully_vaccinated,
    }
    return pd.Series(data=data)
    def _parse_metrics(self, text: str) -> pd.DataFrame:
        """Parse metrics from data."""
        pcr = re.search(self.regex["pcr"], text)
        ag = re.search(self.regex["ag"], text)

        if not pcr and not ag:
            raise ValueError("Unable to extract data from text, please update the regex.")

        pcr = clean_count(pcr.group(1))
        ag = clean_count(ag.group(1))

        return pcr + ag
Beispiel #29
0
 def _parse_data(self, soup):
     metrics_raw = soup.find_all("h3", class_="ml-4")
     data = {}
     for h in metrics_raw:
         title = h.parent.p.text.strip()
         if title == "Всего вакцинаций":
             data["total_vaccinations"] = clean_count(h.text)
         elif title == "Количество вакцинированных 1 дозой":
             data["people_vaccinated"] = clean_count(h.text)
         elif title == "Количество лиц, прошедших полный курс вакцинации":
             data["people_fully_vaccinated"] = clean_count(h.text)
     return data
Beispiel #30
0
 def _parse_people_fully_vaccinated(self, infogram_data: dict) -> int:
     ppl_fully_vaxed = clean_count(
         self._get_infogram_value(
             infogram_data,
             "7b45d34f-b8d0-47d7-8c3a-35c89a4d4cdfbeb30c31-b4de-45fc-bdc5-78e39d37039b"
         ))
     ppl_fully_vaxed_for = clean_count(
         self._get_infogram_value(
             infogram_data,
             "9eee1f41-c398-4a15-81aa-2588250e53cbbc920ff4-fff6-4ef4-a174-af1915c4b1a7"
         ))
     return ppl_fully_vaxed + ppl_fully_vaxed_for