Example #1
0
    def export(self):

        data = pd.read_csv(self.output_path)

        url = "http://cdcmoh.gov.kh/"
        soup = get_soup(url)
        print(soup.select("span:nth-child(1) strong span"))

        count = clean_count(soup.select("p+ div strong:nth-child(1)")[0].text)

        date_str = localdatenow("Asia/Phnom_Penh")

        if count > data["Cumulative total"].max(
        ) and date_str > data["Date"].max():

            new = pd.DataFrame({
                "Country": self.location,
                "Date": [date_str],
                "Cumulative total": count,
                "Source URL": url,
                "Source label": "CDCMOH",
                "Units": "tests performed",
            })

            data = pd.concat([new, data], sort=False)
        self.export_datafile(data)
Example #2
0
 def read(self) -> pd.DataFrame:
     """Reads the data from the source."""
     soup = get_soup(self.source_url_ref)
     self._read_latest(soup)
     link = self._parse_file_link(soup)
     df = read_xlsx_from_url(link, sheet_name="Date")
     return df
Example #3
0
 def _parse_data_url(self):
     soup = get_soup(self.source_url_ref)
     h3 = soup.find_all(class_="accordion-body")
     assert len(h3) == 4
     h5 = h3[1].find_all("h5")
     url = h5[0].a.get("href")
     return url
    def export(self):
        url = "https://guineasalud.org/estadisticas/"

        soup = get_soup(url)
        stats = soup.find_all("tr")
        count = clean_count(stats[9].find_all("td")[-1].text)

        date_str = date.today().strftime("%Y-%m-%d")
        df = pd.DataFrame({
            "Country": self.location,
            "Date": [date_str],
            "Cumulative total": count,
            "Source URL": url,
            "Source label": "Ministerio de Sanidad y Bienestar Social",
            "Units": "tests performed",
            "Notes": pd.NA,
        })

        if os.path.isfile(self.output_path):
            existing = pd.read_csv(self.output_path)
            if count > existing["Cumulative total"].max(
            ) and date_str > existing["Date"].max():
                df = pd.concat([df, existing]).sort_values(
                    "Date", ascending=False).drop_duplicates()
                df.to_csv(self.output_path, index=False)
Example #5
0
    def export(self):

        data = pd.read_csv(self.output_path)

        url = "https://www.gouv.bj/coronavirus/"
        soup = get_soup(url)

        stats = soup.find_all("h2", attrs={"class", "h1 adapt white regular"})

        count = int(stats[0].text) + int(stats[1].text)
        date_str = localdatenow("Africa/Porto-Novo")

        if count > data["Cumulative total"].max(
        ) and date_str > data["Date"].max():

            new = pd.DataFrame({
                "Country": self.location,
                "Date": [date_str],
                "Cumulative total": count,
                "Source URL": url,
                "Source label": "Government of Benin",
                "Units": "tests performed",
            })

            df = pd.concat([new, data], sort=False)
            self.export_datafile(df)
Example #6
0
 def _parse_link(self, soup: BeautifulSoup) -> str:
     """Parse the article url from soup"""
     page_href = soup.find("a", text=re.compile(self.regex["title"]))["href"]
     if not page_href:
         raise ValueError("Article page not found, please update the script")
     soup_page = get_soup(f"{self.source_url}{page_href}")
     href = soup_page.find_all("a", text=re.compile(self.regex["title"]))[-1]["href"]
     return f"{self._base_url}{href}"
Example #7
0
 def _get_elems(self) -> list:
     soup = get_soup(self.source_url)
     elems = soup.find_all("tr")
     elems = [
         e for e in elems
         if "Progress Report of COVID - 19 Immunization" in e.text
     ]
     return elems
Example #8
0
 def _get_file_links_bfill(self, index=None, date_limit=None):
     soup = get_soup(self.source_url_ref)
     links = [x.a.get("href") for x in soup.find_all("h5")]
     if index is None:
         date_limit = date_limit if date_limit is not None else self.date_limit_one_dose_ddmmyyyy
         i = [i for i, l in enumerate(links) if date_limit in l]
         index = i[0]
     links = links[:index]
     return links
Example #9
0
 def _get_list_pdf_urls(self) -> list:
     """Get list of pdf urls"""
     soup = get_soup(self.source_url, verify=False)
     links = list(
         map(
             lambda x: x.get("href"),
             soup.findAll(
                 "a",
                 text=re.compile("MINISTRY OF HEALTH KENYA COVID-19"))))
     return links
Example #10
0
 def read(self) -> pd.DataFrame:
     """Read data from source"""
     soup = get_soup(self.source_url_ref, verify=False)
     date = self._parse_date(soup)
     df = pd.DataFrame(
         {
             "Date": [date],
         }
     )
     return df
Example #11
0
 def read(self) -> pd.DataFrame:
     """Read data from source."""
     data = []
     for cnt in range(1, self._num_max_pages + 1):
         url = f"{self.source_url}{cnt}/"
         soup = get_soup(url)
         for _ in range(self._num_rows_per_page):
             data, proceed = self._parse_data(soup)
             if not proceed:
                 return pd.DataFrame(data)
     return None
Example #12
0
 def _parse_data(self, soup: BeautifulSoup) -> pd.DataFrame:
     """Parse data from the soup"""
     # the dashboard URL
     link = soup.find("iframe", {"title": "Covid Dashboard"})["src"]
     if not link:
         raise ValueError("Dashboard not found, please update the script")
     soup = get_soup(link)
     # the metrics
     metrics = self._parse_metrics(soup)
     # DataFrame
     df = pd.DataFrame({
         **metrics,
     })
     return df
Example #13
0
 def _parse_data(self, soup: BeautifulSoup) -> pd.DataFrame:
     """Parse data from soup"""
     # Get the article URL
     link = soup.find("a", text=re.compile(self.regex["title"]))["href"]
     if not link:
         raise ValueError("Article not found, please update the script")
     self.source_url_ref = link
     soup = get_soup(link)
     # Get the metrics
     metrics = self._parse_metrics(soup)
     # Get the date
     date = self._parse_date(soup)
     df = pd.DataFrame({
         "date": [date],
         **metrics,
     })
     return df
Example #14
0
 def _parse_data(self, soup: BeautifulSoup) -> pd.DataFrame:
     """Parse data from soup"""
     # Get the article URL
     link = soup.find("article", {
         "class": "category-izvestuvanja"
     }).find("a")["href"]
     if not link:
         raise ValueError("Article not found, please update the script")
     self.source_url_ref = link
     soup = get_soup(link)
     # Get the metrics
     count = self._parse_metrics(soup)
     # Get the date
     date = self._parse_date(soup)
     df = pd.DataFrame({
         "Date": [date],
         "Cumulative total": [count],
     })
     return df
    def _parse_data(self, soup: BeautifulSoup) -> pd.DataFrame:
        """Parse data from soup."""
        # Obtain the relevant link
        link = self._parse_link(soup, self.regex["title"])
        # Get soup from link
        soup = get_soup(link)
        # Extract pdf link from soup
        self.source_url_ref = self._parse_link(soup, self.regex["pdf"])
        # Extract text from pdf url
        text = self._extract_text_from_pdf()
        # Parse metrics
        count = self._parse_metrics(text)
        # Parse date
        date = self._parse_date(text)
        # Create dataframe
        df = {
            "Cumulative total": [count],
            "Date": [date],
        }

        return pd.DataFrame(df)
Example #16
0
 def _parse_link_zip(self) -> str:
     """Get link to latest pdf."""
     soup = get_soup(self.source_url_ref)
     url = soup.find("a", string="Download her").get("href")
     return url
Example #17
0
 def read(self) -> pd.Series:
     """Reada data from source"""
     soup = get_soup(self.source_url)
     json_data = self._get_json_data(soup)
     data = self._parse_data(json_data)
     return pd.Series(data)
Example #18
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_url)
     return self._parse_data(soup)
Example #19
0
 def read(self) -> pd.Series:
     """Read data from source."""
     soup = get_soup(self.source_url)
     data = self._parse_data(soup)
     return pd.Series(data)
Example #20
0
 def read(self):
     """Read the data from the source"""
     soup = get_soup(self.source_page)
     link = self._parse_link_pdf(soup)
     return self._parse_data(link)
Example #21
0
 def _get_text_from_url(self, url: str) -> str:
     """Extract text from the url."""
     soup = get_soup(url)
     text = soup.get_text().replace("\n", " ").replace("\xa0", "").lower()
     text = re.sub(r"(\d),(\d)", r"\1\2", text)
     return text
Example #22
0
 def read(self) -> pd.Series:
     soup = get_soup(self.source_url)
     data = self._parse_data(soup)
     return pd.Series(data=data)
Example #23
0
 def read(self) -> pd.DataFrame:
     """Reads data from the source page."""
     soup = get_soup(self.source_url_ref)
     data = self._parse_data(soup)
     return data
Example #24
0
 def read(self):
     soup = get_soup(self.source_url)
     json_data = self._get_json_data(soup)
     data = self._parse_data(json_data)
     df_manuf = self._parse_data_manufacturer(json_data)
     return data, df_manuf
Example #25
0
 def read(self) -> pd.DataFrame:
     """Reads data from source."""
     soup = get_soup(self.source_url)
     df = self._parse_data(soup)
     return df
Example #26
0
 def read(self):
     soup = get_soup(self.source_url)
     url = soup.find_all(class_="data-link")[-1]["href"]
     df = pd.read_csv(url, usecols=["date", "total tests"])
     return df