def get_rotten_tomatoes_url(content: str) -> dict: regex_pattern = r"https://www.rottentomatoes.com/.+/.+?(?=&)" rotten_tomatoes_url = regex_search(content=content, pattern=regex_pattern) result = ({ "rotten_tomatoes_url": rotten_tomatoes_url } if rotten_tomatoes_url else {}) return result
def get_seasons(content): regex_pattern = r"(No.\sof\sseasons:\s\d{1,3})|(\d{1,2}\sseasons)" reg_search = regex_search(content=content, pattern=regex_pattern) if reg_search: try: _, seasons = reg_search.split(":") except ValueError: seasons, _ = reg_search.split(" ") else: seasons = None result = {"seasons": int(seasons.strip())} if seasons else {} return result
def get_person_awards(self) -> dict: result = {"oscars": None, "oscar_nominations": None} soup_selection = { "soup": self.soup, "method": self.find_all, "tag": "span", "class": "awards-blurb", } if css_selection := self.soup_selection(**soup_selection): oscar_data = css_selection[0].get_text(strip=True) search = regex_search(oscar_data, r"\d{1,2}") if "Won" in oscar_data: result.update({"oscars": search}) elif len(css_selection) > 1: # If there is no oscar wins and nominations result.update({"oscar_nominations": search}) wins, nominations = re.findall( r"\d+", css_selection[-1].get_text(strip=True)) result.update({"wins": wins, "nominations": nominations})
def get_release_date(self) -> dict: release_date = {} soup_selection = { "soup": self.soup, "method": self.find, "tag": "li", "dataset-testid": "title-details-releasedate", } if css_selection := self.soup_selection(**soup_selection): # ['Release date', 'December 21, 2001 (Turkey)'] selection_list = css_selection.get_text("~").split("~") regex_pattern = r"(^.*?\d{4})" search = regex_search(selection_list[1], regex_pattern) try: date_time = get_date_time(search, "%B %d. %Y") except ValueError: try: date_time = get_date_time(search, "%B %d, %Y") except ValueError: date_time = get_date_time(search, "%B %d. %Y") release_date = {"release_date": date_time}
def get_tv_com_rate(content: str) -> dict: regex_pattern = r"(\d.\d|\d)/10\s.\sTV.com" tv_com_rate = regex_search(content=content, pattern=regex_pattern) result = {"tv_com_rate": tv_com_rate} if tv_com_rate else {} return result
def get_imdb_url(content: str) -> dict: regex_pattern = r"https://www.imdb.com/title/[^;]*/" imdb_url = regex_search(content=content, pattern=regex_pattern) result = {"imdb_url": imdb_url} if imdb_url else {} return result