Esempio n. 1
0
 def _parse_date_from_element(self, elem: element.Tag) -> str:
     """Get date from relevant element."""
     date_tag = elem.findNextSibling("div", class_="dateDetalils")
     date = re.search(self.regex["date"], date_tag.text).group()
     return clean_date(date, "%d/%m/%Y")
Esempio n. 2
0
 def _parse_date(self, text: str):
     dt_raw = re.search(self.regex["date"],
                        text).group(1) + f"/{datetime.now().year}"
     dt = datetime.strptime(dt_raw, "%d/%m/%Y") - timedelta(days=1)
     return clean_date(dt)
Esempio n. 3
0
 def _parse_date(self):
     res = request_json(self.source_url_date)
     edit_ts = res["editingInfo"]["lastEditDate"]
     return clean_date(datetime.fromtimestamp(edit_ts / 1000))
Esempio n. 4
0
 def _propose_df(self):
     regex_1 = (
         r"COVID-19 Vaccination Update:\n\n1st and second dose — (([a-zA-Z]+) (\d{1,2})(?:th|nd|rd|st) (202\d)), in"
         r" 36 States \+ the FCT\. \n\n([0-9,]+) eligible "
         r"Nigerians have been vaccinated with first dose while ([0-9,]+) of Nigerians vaccinated with 1st dose"
         r" have collected their 2nd dose\.")
     regex_2 = (
         r"COVID-19 Vaccination Update for (([a-zA-Z]+) (\d{1,2})(?:th|nd|rd|st),? (202\d)), in 36 States \+ the"
         r" FCT\. ")
     regex_3 = r"COVID-19 Vaccination Update"
     data = []
     for tweet in self.tweets:
         match_1 = re.search(regex_1, tweet.full_text)
         match_2 = re.search(regex_2, tweet.full_text)
         match_3 = re.search(regex_3, tweet.full_text)
         if match_1:
             people_vaccinated = clean_count(match_1.group(5))
             people_fully_vaccinated = clean_count(match_1.group(6))
             dt = clean_date(" ".join(match_1.group(2, 3, 4)), "%B %d %Y")
             if self.stop_search(dt):
                 break
             data.append({
                 "date":
                 dt,
                 "total_vaccinations":
                 people_vaccinated + people_fully_vaccinated,
                 "people_vaccinated":
                 people_vaccinated,
                 "people_fully_vaccinated":
                 people_fully_vaccinated,
                 "text":
                 tweet.full_text,
                 "source_url":
                 self.build_post_url(tweet.id),
                 "media_url":
                 tweet.extended_entities["media"][0]["media_url_https"],
             })
         elif match_2:
             dt = clean_date(" ".join(match_2.group(2, 3, 4)), "%B %d %Y")
             if self.stop_search(dt):
                 break
             data.append({
                 "date":
                 dt,
                 "text":
                 tweet.full_text,
                 "source_url":
                 self.build_post_url(tweet.id),
                 "media_url":
                 tweet.extended_entities["media"][0]["media_url_https"],
             })
         elif match_3:
             data.append({
                 "text":
                 tweet.full_text,
                 "source_url":
                 self.build_post_url(tweet.id),
                 "media_url":
                 tweet.extended_entities["media"][0]["media_url_https"],
             })
     df = pd.DataFrame(data)
     return df
Esempio n. 5
0
 def _parse_date_from_text(self, text: str) -> str:
     """Get date from text."""
     date = re.search(self.regex["date"], text).group(1)
     return clean_date(date, "%d.%m.%Y")
Esempio n. 6
0
 def _parse_date(self, date: str) -> str:
     """Parse date from soup"""
     day_month = date.split("Përditësimi i fundit:")[1].strip().lower()
     year = datetime.now().year
     date = f"{day_month} {year}"
     return clean_date(date, "%d %B %Y")
Esempio n. 7
0
 def parse_date(self, elem):
     match = re.search(self.regex["date"], elem.parent.text)
     return clean_date(match.group(1), "%d/%m/%Y", minus_days=1)
Esempio n. 8
0
    def _parse_date(self, soup: str) -> str:
        date = soup.select(".elementor-element-00a2010 span")[0].text
        date = re.search(self.regex["date"], date).group(0)
        date = re.sub(r"(?<=\d)[a-z]{2}", "", date)

        return clean_date(date, "%d %B %Y")
Esempio n. 9
0
 def _parse_date(self, soup):
     date = soup.find(class_="actualiza").text
     date = re.search(r"\d{2}-\d{2}-\d{4}", date).group(0)
     date = clean_date(date, "%d-%m-%Y")
     return date
Esempio n. 10
0
def parse_date(soup: BeautifulSoup) -> str:
    date_raw = re.search(rf"var asidozuguncellemesaati = '(.*202\d)",
                         str(soup)).group(1)
    date_raw = date_raw.lower()
    return clean_date(date_raw, fmt="%d %B %Y", lang="tr_TR", loc="tr_TR")
Esempio n. 11
0
 def pipe_date(self, ds: pd.Series) -> pd.Series:
     ds.loc["date"] = clean_date(
         datetime.fromtimestamp(ds.date / 1000) - timedelta(days=1))
     return ds
Esempio n. 12
0
 def _parse_date(self, driver: webdriver.Chrome) -> str:
     elem = driver.find_element_by_class_name("tabl_vactination")
     date_str_raw = pd.read_html(
         elem.get_attribute("innerHTML"))[0].iloc[-1, -1]
     return clean_date(date_str_raw, "*данные на %d.%m.%Y")
Esempio n. 13
0
 def _parse_date(self, week_num) -> Iterator:
     """parses the date from the week number."""
     date = Week(2022, week_num, system="iso").enddate()
     return clean_date(date)
Esempio n. 14
0
 def _load_last_date(self) -> str:
     """Loads the last date from the datafile."""
     df_current = self.load_datafile()
     date = df_current.Date.max()
     return clean_date(date, "%Y-%m-%d", as_datetime=True)
Esempio n. 15
0
 def _parse_date(self, soup) -> str:
     date_raw = soup.find(class_="download").text
     regex = r"(\d{4})\s*COVID-19疫苗"
     date_str = re.search(regex, date_raw).group(1)
     date_str = clean_date(f"2022{date_str}", fmt="%Y%m%d")
     return date_str
Esempio n. 16
0
 def _parse_date(self, elem):
     dt = elem.find_element_by_xpath("../..").find_element_by_class_name("cdate").text
     return clean_date(dt, "%d.%m.%Y")
Esempio n. 17
0
 def _parse_date(self, elem):
     dt_raw = elem.find("p").text.strip()
     return clean_date(dt_raw, "%d.%m.%Y.")
Esempio n. 18
0
 def _parse_date_from_element(self, elem: element.Tag) -> str:
     """Get date from relevant element."""
     date_tag = elem.findPreviousSibling("div").find("span")
     date = re.search(self.regex["date"], date_tag.text).group()
     return clean_date(date, "%Y-%m-%d")
Esempio n. 19
0
 def _parse_date(self, soup: BeautifulSoup) -> str:
     """Get date from relevant element."""
     date_text = soup.find(class_="date").text.replace(".", " ")
     date = re.search(self.regex["date"], date_text).group()
     return clean_date(date, "%d %m %Y")
Esempio n. 20
0
 def _parse_date_from_element(self, elem: element.NavigableString) -> str:
     """Gets date from relevant element."""
     date = re.search(self.regex["date"], elem).group(1)
     return clean_date(date, "%d-%m-%Y")
Esempio n. 21
0
 def parse_date(self, elem):
     date_raw = elem.parent.find(class_="date").text
     return clean_date(date_raw, "%d %B %Y", minus_days=1, loc="fr_FR")
Esempio n. 22
0
 def _parse_date_from_text(self, text: str) -> str:
     """Get date from text."""
     date = re.search(self.regex["date"], text).group(1)
     date = clean_date(date, "%d/%m/%Y",
                       as_datetime=True) - pd.Timedelta(days=1)
     return date.strftime("%Y-%m-%d")