Ejemplo n.º 1
0
    def _parse_metrics(self, soup):
        match = re.search(self.regex["title"], soup.text)
        data = {"total_vaccinations": clean_count(match.group(2))}

        for metric in ["people_vaccinated", "people_fully_vaccinated", "total_boosters"]:
            match = re.search(self.regex[metric], soup.text)
            data[metric] = clean_count(match.group(1))
        return data
Ejemplo n.º 2
0
 def _parse_data_metrics(self, soup) -> dict:
     counters = soup.find_all(class_="text-brand-blue")
     dose_1 = clean_count(
         re.search(r"Innuttaasut ([\d\.]+)",
                   counters[1].parent.find_all("dd")[-1].text).group(1))
     dose_2 = clean_count(
         re.search(r"Innuttaasut ([\d\.]+)",
                   counters[2].parent.find_all("dd")[-1].text).group(1))
     if dose_1 < dose_2:
         raise ValueError("dose_1 cannot be higher than dose_2")
     return {"people_vaccinated": dose_1, "people_fully_vaccinated": dose_2}
Ejemplo n.º 3
0
 def _parse_data(self) -> tuple:
     """Parses the data from the source."""
     # Pfizer first dose
     response = json.loads(
         requests.post(
             self.source_url, headers=self.headers, data=json.dumps(self._payload("Pfizer_first"))
         ).content
     )
     Pfizer_first = response["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"][0]["M0"]
     date = response["results"][0]["result"]["data"]["timestamp"]
     # Pfizer second dose
     response = json.loads(
         requests.post(
             self.source_url, headers=self.headers, data=json.dumps(self._payload("Pfizer_second"))
         ).content
     )
     Pfizer_second = response["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"][0]["M0"]
     # Pfizer booster
     response = json.loads(
         requests.post(
             self.source_url, headers=self.headers, data=json.dumps(self._payload("Pfizer_booster"))
         ).content
     )
     Pfizer_booster = response["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"][0]["M0"]
     # Johnson first dose
     response = json.loads(
         requests.post(
             self.source_url, headers=self.headers, data=json.dumps(self._payload("Johnson_first"))
         ).content
     )
     Johnson_first = response["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"][0]["M0"]
     # Johnson booster
     response = json.loads(
         requests.post(
             self.source_url, headers=self.headers, data=json.dumps(self._payload("Johnson_booster"))
         ).content
     )
     Johnson_booster = response["results"][0]["result"]["data"]["dsr"]["DS"][0]["PH"][0]["DM0"][0]["M0"]
     # parse date
     date = self._parse_date(date)
     # create metrics list
     metrics = [
         clean_count(Pfizer_first),
         clean_count(Pfizer_second),
         clean_count(Pfizer_booster),
         clean_count(Johnson_first),
         clean_count(Johnson_booster),
     ]
     # build dataframe
     df_main, df_manufacturer = self._build_df(metrics, date)
     return df_main, df_manufacturer
Ejemplo n.º 4
0
 def _parse_metrics(self, text: str) -> tuple:
     """Parse metrics from text."""
     total_vaccinations = re.search(self.regex["doses"], text).group(1)
     people_vaccinated = re.search(self.regex["doses"], text).group(2)
     people_fully_vaccinated = re.search(self.regex["doses"], text).group(3)
     total_boosters = re.search(self.regex["doses"], text).group(4)
     dose_after_positive = re.search(self.regex["doses"], text).group(5)
     return (
         clean_count(total_vaccinations),
         clean_count(people_vaccinated),
         clean_count(people_fully_vaccinated),
         clean_count(total_boosters),
         clean_count(dose_after_positive),
     )
Ejemplo n.º 5
0
 def _parse_metrics(self, soup: BeautifulSoup) -> int:
     """Parse metrics from soup"""
     text = soup.get_text()
     text = re.sub(r"(\d),(\d)", r"\1\2", text)
     people_vaccinated = clean_count(
         re.search(self.regex["people_vaccinated"], text).group(1))
     people_fully_vaccinated = clean_count(
         re.search(self.regex["people_fully_vaccinated"], text).group(2))
     total_vaccinations = people_vaccinated + people_fully_vaccinated
     df = {
         "people_vaccinated": [people_vaccinated],
         "people_fully_vaccinated": [people_fully_vaccinated],
         "total_vaccinations": [total_vaccinations],
     }
     return df
Ejemplo n.º 6
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = clean_count(
         elem.find_previous_sibling("div",
                                    class_="t192__title").text.replace(
                                        " ", ""))
     return count
Ejemplo n.º 7
0
    def export(self):
        url = "https://guineasalud.org/estadisticas/"

        soup = get_soup(url)
        stats = soup.find_all("tr")
        count = clean_count(stats[9].find_all("td")[-1].text)

        date_str = date.today().strftime("%Y-%m-%d")
        df = pd.DataFrame({
            "Country": self.location,
            "Date": [date_str],
            "Cumulative total": count,
            "Source URL": url,
            "Source label": "Ministerio de Sanidad y Bienestar Social",
            "Units": "tests performed",
            "Notes": pd.NA,
        })

        if os.path.isfile(self.output_path):
            existing = pd.read_csv(self.output_path)
            if count > existing["Cumulative total"].max(
            ) and date_str > existing["Date"].max():
                df = pd.concat([df, existing]).sort_values(
                    "Date", ascending=False).drop_duplicates()
                df.to_csv(self.output_path, index=False)
Ejemplo n.º 8
0
    def export(self):

        data = pd.read_csv(self.output_path)

        url = "http://cdcmoh.gov.kh/"
        soup = get_soup(url)
        print(soup.select("span:nth-child(1) strong span"))

        count = clean_count(soup.select("p+ div strong:nth-child(1)")[0].text)

        date_str = localdatenow("Asia/Phnom_Penh")

        if count > data["Cumulative total"].max(
        ) and date_str > data["Date"].max():

            new = pd.DataFrame({
                "Country": self.location,
                "Date": [date_str],
                "Cumulative total": count,
                "Source URL": url,
                "Source label": "CDCMOH",
                "Units": "tests performed",
            })

            data = pd.concat([new, data], sort=False)
        self.export_datafile(data)
Ejemplo n.º 9
0
 def _parse_data(self) -> pd.Series:
     data = request_json(self.source_url)["stats"]
     data = pd.DataFrame.from_records(data, columns=["tested"]).iloc[0]
     return {
         "count": clean_count(data[0]),
         "date": localdate("Atlantic/Faeroe"),
     }
Ejemplo n.º 10
0
 def _parse_metrics(self, text: str) -> int:
     """Get metrics from news text."""
     match = re.search(self.regex["booster"], text)
     if not match:
         raise TypeError(
             ("Website Structure Changed, please update the script"))
     count = match.group(1)
     return clean_count(count)
Ejemplo n.º 11
0
 def _parse_metrics(self, json_data: dict) -> dict:
     """Parses metrics from JSON"""
     data = {}
     for metric, entity in self.metric_entities.items():
         value = json_data["elements"]["content"]["content"]["entities"][
             entity]["props"]["content"]["blocks"][0]["text"]
         value = clean_count(value)
         data[metric] = value
     return data
Ejemplo n.º 12
0
 def _parse_metrics(self, json_data):
     data = {}
     for metric, entity in self.metric_entities.items():
         value = json_data["elements"]["content"]["content"]["entities"][
             entity]["props"]["chartData"]["data"][0][0][0]
         value = re.search(r'18px;">([\d\.]+)', value).group(1)
         value = clean_count(value)
         data[metric] = value
     return data
Ejemplo n.º 13
0
 def _parse_data(self, url: str) -> pd.Series:
     """Parse the data from the pdf url"""
     text = self._get_text_from_pdf(url)
     data = {
         "total_vaccinations":
         clean_count(
             re.search(self.regex["total_vaccinations"], text).group(1)),
         "people_vaccinated":
         clean_count(
             re.search(self.regex["people_vaccinated"], text).group(1)),
         "people_fully_vaccinated":
         clean_count(
             re.search(self.regex["people_fully_vaccinated"],
                       text).group(1)),
         "date":
         clean_date(re.search(self.regex["date"], text).group(1),
                    "%d %B, %Y",
                    lang="es"),
     }
     self._check_data(data)
     return pd.Series(data)
Ejemplo n.º 14
0
 def _parse_metrics(self, soup: BeautifulSoup) -> int:
     """Parse metrics from the soup"""
     total = soup.find(text=self.regex["Total"])
     dose2 = soup.find(text=self.regex["Dose2"])
     dose3 = soup.find(text=self.regex["Dose3"])
     boosters = soup.find(text=self.regex["Boosters"])
     if not total or not dose2 or not dose3 or not boosters:
         raise ValueError("Metrics not found, please update the script")
     total_vaccinations = clean_count(total.parent.find_next().text)
     people_fully_vaccinated = clean_count(dose2.parent.find_next().text)
     total_boosters = clean_count(
         dose3.parent.find_next().text) + clean_count(
             boosters.parent.find_next().text)
     people_vaccinated = total_vaccinations - people_fully_vaccinated - total_boosters
     df = {
         "people_vaccinated": [people_vaccinated],
         "people_fully_vaccinated": [people_fully_vaccinated],
         "total_boosters": [total_boosters],
         "total_vaccinations": [total_vaccinations],
     }
     return df
Ejemplo n.º 15
0
 def _parse_metrics_from_soup(self, soup: BeautifulSoup) -> tuple:
     """Get metrics from soup."""
     metrics = [
         "total_vaccinations",
         "people_vaccinated",
         "people_fully_vaccinated",
         "total_boosters",
     ]
     count = [
         clean_count(
             soup.find(text=re.compile(self.regex[metric])).find_parent(
                 "div", class_="wptb-text-container").find_next_sibling(
                     "div", class_="wptb-text-container").text)
         for metric in metrics
     ]
     return count
Ejemplo n.º 16
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = clean_count(
         elem.find_next_sibling("p", class_="case-Number").text)
     return count
Ejemplo n.º 17
0
 def _parse_metrics(self, soup: BeautifulSoup) -> int:
     """Parse metrics from soup"""
     text = soup.get_text()
     text = re.sub(r"(\d)\.(\d)", r"\1\2", text)
     count = re.search(self.regex["count"], text).group(1)
     return clean_count(count)
Ejemplo n.º 18
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = json.loads(elem.attrs["data-options"])["endVal"]
     return clean_count(count)
Ejemplo n.º 19
0
 def _df_builder(self, count: str) -> pd.DataFrame:
     """Builds dataframe from the text data"""
     df = pd.DataFrame({"Cumulative total": [clean_count(count)]})
     return df
Ejemplo n.º 20
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = elem.find_next_sibling("td").text
     return clean_count(count)
Ejemplo n.º 21
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     text = elem.find_next_sibling().text
     count = re.sub(r"\D", "", text)
     return clean_count(count)
Ejemplo n.º 22
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = clean_count(elem.find_previous_sibling("strong").text)
     return count
Ejemplo n.º 23
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = clean_count(
         elem.find(class_="stats-number")["data-counter-value"])
     return count
Ejemplo n.º 24
0
 def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
     """Parse metrics from source"""
     data = request_json(self.source_url)
     count = data["features"][0]["attributes"]["value"]
     return df.assign(**{"Cumulative total": clean_count(count)})
Ejemplo n.º 25
0
 def _parse_metrics(self, text: str) -> int:
     """Parse metrics from text"""
     count = re.search(self.regex["count"], text).group(1)
     return clean_count(count)
Ejemplo n.º 26
0
 def _parse_metrics(self, elem: element.Tag) -> int:
     """Parse metrics from element"""
     count = elem.text
     return clean_count(re.sub(r"\D", "", count))
Ejemplo n.º 27
0
 def _parse_metrics(self, soup: BeautifulSoup) -> int:
     """Parse metrics from soup"""
     count = soup.find("div", class_="test-stlucia").text
     return clean_count(count)
Ejemplo n.º 28
0
 def _parse_metrics(self, t_scraper: TableauScraper) -> int:
     """Parse metrics from TableauScraper"""
     count = int(
         t_scraper.getWorksheet("Resumen").data.loc[
             0, "SUM(Cantidad Pruebas)-alias"])
     return clean_count(count)
Ejemplo n.º 29
0
 def _parse_metrics(self, soup: BeautifulSoup) -> int:
     """Parse metrics from soup"""
     text = soup.find("table").find_all("span")[1].text
     count = re.sub(self.regex["count"], "", text)
     return clean_count(count)
Ejemplo n.º 30
0
 def _parse_metrics(self, table: pd.DataFrame) -> int:
     """Parse metrics from table"""
     count = table.iloc[-1][0]
     return clean_count(count)