Ejemplo n.º 1
0
 def get_rotten_tomatoes_url(content: str) -> dict:
     regex_pattern = r"https://www.rottentomatoes.com/.+/.+?(?=&)"
     rotten_tomatoes_url = regex_search(content=content,
                                        pattern=regex_pattern)
     result = ({
         "rotten_tomatoes_url": rotten_tomatoes_url
     } if rotten_tomatoes_url else {})
     return result
Ejemplo n.º 2
0
 def get_seasons(content):
     regex_pattern = r"(No.\sof\sseasons:\s\d{1,3})|(\d{1,2}\sseasons)"
     reg_search = regex_search(content=content, pattern=regex_pattern)
     if reg_search:
         try:
             _, seasons = reg_search.split(":")
         except ValueError:
             seasons, _ = reg_search.split(" ")
     else:
         seasons = None
     result = {"seasons": int(seasons.strip())} if seasons else {}
     return result
Ejemplo n.º 3
0
    def get_person_awards(self) -> dict:
        result = {"oscars": None, "oscar_nominations": None}

        soup_selection = {
            "soup": self.soup,
            "method": self.find_all,
            "tag": "span",
            "class": "awards-blurb",
        }
        if css_selection := self.soup_selection(**soup_selection):
            oscar_data = css_selection[0].get_text(strip=True)
            search = regex_search(oscar_data, r"\d{1,2}")
            if "Won" in oscar_data:
                result.update({"oscars": search})
            elif len(css_selection) > 1:
                # If there is no oscar wins and nominations
                result.update({"oscar_nominations": search})

            wins, nominations = re.findall(
                r"\d+", css_selection[-1].get_text(strip=True))
            result.update({"wins": wins, "nominations": nominations})
Ejemplo n.º 4
0
    def get_release_date(self) -> dict:
        release_date = {}

        soup_selection = {
            "soup": self.soup,
            "method": self.find,
            "tag": "li",
            "dataset-testid": "title-details-releasedate",
        }
        if css_selection := self.soup_selection(**soup_selection):
            # ['Release date', 'December 21, 2001 (Turkey)']
            selection_list = css_selection.get_text("~").split("~")
            regex_pattern = r"(^.*?\d{4})"
            search = regex_search(selection_list[1], regex_pattern)
            try:
                date_time = get_date_time(search, "%B %d. %Y")
            except ValueError:
                try:
                    date_time = get_date_time(search, "%B %d, %Y")
                except ValueError:
                    date_time = get_date_time(search, "%B %d. %Y")
            release_date = {"release_date": date_time}
Ejemplo n.º 5
0
 def get_tv_com_rate(content: str) -> dict:
     regex_pattern = r"(\d.\d|\d)/10\s.\sTV.com"
     tv_com_rate = regex_search(content=content, pattern=regex_pattern)
     result = {"tv_com_rate": tv_com_rate} if tv_com_rate else {}
     return result
Ejemplo n.º 6
0
 def get_imdb_url(content: str) -> dict:
     regex_pattern = r"https://www.imdb.com/title/[^;]*/"
     imdb_url = regex_search(content=content, pattern=regex_pattern)
     result = {"imdb_url": imdb_url} if imdb_url else {}
     return result