def _parse_date_from_text(self, year: str, text: str) -> str: """Get date from relevant element.""" match = re.search(self.regex["date"], text) if not match: return None month_day = match.group(1) return clean_date(f"{month_day} {year}", "%B %d %Y")
def _parse_data(self, data): data = [{ "date": clean_date(f"{d['year']}-{d['month']}-{d['day']}", "%Y-%m-%d"), "total_vaccinations": d.get("usedToDate"), "people_vaccinated": d["administered"].get("toDate"), "people_fully_vaccinated": d["administered2nd"].get("toDate"), "total_boosters": d["administered3rd"].get("toDate"), "vaccine": self._build_vaccine_str(d), } for d in data] df = pd.DataFrame(data) return df
def _build_df(self, data): # Get dates dt = clean_date(data["fecha_inicial"], "%Y-%m-%dT%H:%M:%S%z", as_datetime=False) dates = pd.date_range(dt, periods=data["dias"], freq="D") # Build df # Notes on differences adicional vs refuerzo: # https://github.com/owid/covid-19-data/issues/2532#issuecomment-1074137207 df = pd.DataFrame({ "date": clean_date_series(list(dates)), "vaccine": data["denominacion"], "dose_1": data["dosis1"], "dose_2": data["dosis2"], "dose_additional": data["adicional"], "people_fully_vaccinated": data["esquemacompleto"], "total_boosters": data["refuerzo"], }) return df
def _build_df_age_group(self, data): # Get dates dt = clean_date(data["fecha_inicial"], "%Y-%m-%dT%H:%M:%S%z", as_datetime=False) dates = pd.date_range(dt, periods=data["dias"], freq="D") # Build df df = pd.DataFrame({ "date": dates, "people_vaccinated": data["dosis1"], "people_fully_vaccinated": data["esquemacompleto"], # "people_with_booster": [d + b for d, b in zip(data["refuerzo"], data["adicional"])], "people_with_booster": data[ "refuerzo"], # likely an under estimate (missing doses for immunocompromised) }).assign( **{ "age_group_min": data["desdeedad"], "age_group_max": data["hastaedad"] if data["hastaedad"] is not None else "", "age_group": data["denominacion"], }) return df
def _parse_date(self, t_scraper: TableauScraper) -> str: """Parse date from TableauScraper""" date = t_scraper.getWorksheet("COVID-19 | Prensa.Titulo").data.iat[0, 0] return clean_date(date, "%d/%m/%Y")
def _parse_date(self, soup: BeautifulSoup) -> str: """Parse date from soup""" date = soup.find("p").text.lower() return clean_date(date, "As of %B %d, %Y")
def _parse_date(self, soup: BeautifulSoup) -> str: """Parse date from soup""" date = soup.find(text=re.compile(self.regex["date"])) return clean_date(date, "%d.%m.%Yг.")