def _parse_date_from_element(self, elem: element.Tag) -> str: """Get date from relevant element.""" date_tag = elem.findNextSibling("div", class_="dateDetalils") date = re.search(self.regex["date"], date_tag.text).group() return clean_date(date, "%d/%m/%Y")
def _parse_date(self, text: str): dt_raw = re.search(self.regex["date"], text).group(1) + f"/{datetime.now().year}" dt = datetime.strptime(dt_raw, "%d/%m/%Y") - timedelta(days=1) return clean_date(dt)
def _parse_date(self): res = request_json(self.source_url_date) edit_ts = res["editingInfo"]["lastEditDate"] return clean_date(datetime.fromtimestamp(edit_ts / 1000))
def _propose_df(self): regex_1 = ( r"COVID-19 Vaccination Update:\n\n1st and second dose — (([a-zA-Z]+) (\d{1,2})(?:th|nd|rd|st) (202\d)), in" r" 36 States \+ the FCT\. \n\n([0-9,]+) eligible " r"Nigerians have been vaccinated with first dose while ([0-9,]+) of Nigerians vaccinated with 1st dose" r" have collected their 2nd dose\.") regex_2 = ( r"COVID-19 Vaccination Update for (([a-zA-Z]+) (\d{1,2})(?:th|nd|rd|st),? (202\d)), in 36 States \+ the" r" FCT\. ") regex_3 = r"COVID-19 Vaccination Update" data = [] for tweet in self.tweets: match_1 = re.search(regex_1, tweet.full_text) match_2 = re.search(regex_2, tweet.full_text) match_3 = re.search(regex_3, tweet.full_text) if match_1: people_vaccinated = clean_count(match_1.group(5)) people_fully_vaccinated = clean_count(match_1.group(6)) dt = clean_date(" ".join(match_1.group(2, 3, 4)), "%B %d %Y") if self.stop_search(dt): break data.append({ "date": dt, "total_vaccinations": people_vaccinated + people_fully_vaccinated, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "text": tweet.full_text, "source_url": self.build_post_url(tweet.id), "media_url": tweet.extended_entities["media"][0]["media_url_https"], }) elif match_2: dt = clean_date(" ".join(match_2.group(2, 3, 4)), "%B %d %Y") if self.stop_search(dt): break data.append({ "date": dt, "text": tweet.full_text, "source_url": self.build_post_url(tweet.id), "media_url": tweet.extended_entities["media"][0]["media_url_https"], }) elif match_3: data.append({ "text": tweet.full_text, "source_url": self.build_post_url(tweet.id), "media_url": tweet.extended_entities["media"][0]["media_url_https"], }) df = pd.DataFrame(data) return df
def _parse_date_from_text(self, text: str) -> str: """Get date from text.""" date = re.search(self.regex["date"], text).group(1) return clean_date(date, "%d.%m.%Y")
def _parse_date(self, date: str) -> str: """Parse date from soup""" day_month = date.split("Përditësimi i fundit:")[1].strip().lower() year = datetime.now().year date = f"{day_month} {year}" return clean_date(date, "%d %B %Y")
def parse_date(self, elem): match = re.search(self.regex["date"], elem.parent.text) return clean_date(match.group(1), "%d/%m/%Y", minus_days=1)
def _parse_date(self, soup: str) -> str: date = soup.select(".elementor-element-00a2010 span")[0].text date = re.search(self.regex["date"], date).group(0) date = re.sub(r"(?<=\d)[a-z]{2}", "", date) return clean_date(date, "%d %B %Y")
def _parse_date(self, soup): date = soup.find(class_="actualiza").text date = re.search(r"\d{2}-\d{2}-\d{4}", date).group(0) date = clean_date(date, "%d-%m-%Y") return date
def parse_date(soup: BeautifulSoup) -> str: date_raw = re.search(rf"var asidozuguncellemesaati = '(.*202\d)", str(soup)).group(1) date_raw = date_raw.lower() return clean_date(date_raw, fmt="%d %B %Y", lang="tr_TR", loc="tr_TR")
def pipe_date(self, ds: pd.Series) -> pd.Series: ds.loc["date"] = clean_date( datetime.fromtimestamp(ds.date / 1000) - timedelta(days=1)) return ds
def _parse_date(self, driver: webdriver.Chrome) -> str: elem = driver.find_element_by_class_name("tabl_vactination") date_str_raw = pd.read_html( elem.get_attribute("innerHTML"))[0].iloc[-1, -1] return clean_date(date_str_raw, "*данные на %d.%m.%Y")
def _parse_date(self, week_num) -> Iterator: """parses the date from the week number.""" date = Week(2022, week_num, system="iso").enddate() return clean_date(date)
def _load_last_date(self) -> str: """Loads the last date from the datafile.""" df_current = self.load_datafile() date = df_current.Date.max() return clean_date(date, "%Y-%m-%d", as_datetime=True)
def _parse_date(self, soup) -> str: date_raw = soup.find(class_="download").text regex = r"(\d{4})\s*COVID-19疫苗" date_str = re.search(regex, date_raw).group(1) date_str = clean_date(f"2022{date_str}", fmt="%Y%m%d") return date_str
def _parse_date(self, elem): dt = elem.find_element_by_xpath("../..").find_element_by_class_name("cdate").text return clean_date(dt, "%d.%m.%Y")
def _parse_date(self, elem): dt_raw = elem.find("p").text.strip() return clean_date(dt_raw, "%d.%m.%Y.")
def _parse_date_from_element(self, elem: element.Tag) -> str: """Get date from relevant element.""" date_tag = elem.findPreviousSibling("div").find("span") date = re.search(self.regex["date"], date_tag.text).group() return clean_date(date, "%Y-%m-%d")
def _parse_date(self, soup: BeautifulSoup) -> str: """Get date from relevant element.""" date_text = soup.find(class_="date").text.replace(".", " ") date = re.search(self.regex["date"], date_text).group() return clean_date(date, "%d %m %Y")
def _parse_date_from_element(self, elem: element.NavigableString) -> str: """Gets date from relevant element.""" date = re.search(self.regex["date"], elem).group(1) return clean_date(date, "%d-%m-%Y")
def parse_date(self, elem): date_raw = elem.parent.find(class_="date").text return clean_date(date_raw, "%d %B %Y", minus_days=1, loc="fr_FR")
def _parse_date_from_text(self, text: str) -> str: """Get date from text.""" date = re.search(self.regex["date"], text).group(1) date = clean_date(date, "%d/%m/%Y", as_datetime=True) - pd.Timedelta(days=1) return date.strftime("%Y-%m-%d")