def _get_latest_season(self): webpage = get_parsed_webpage(f"{self.url}/episodes?season=0") self.latest_season = self._get_season_data(webpage) # Check to see if the latest season(s) are empty empty - Edge case while all([ episode["rating"] == "" for episode in self.latest_season["episodes"] ]): latest = self.latest_season["number"] - 1 webpage = get_parsed_webpage( f"{self.url}/episodes?season={latest}") self.latest_season = self._get_season_data(webpage)
def search_imdb(query): encoded = urllib.parse.quote(query) url = f"https://www.imdb.com/find?q={encoded}&s=tt&ttype=tv" webpage = get_parsed_webpage(url) lst = webpage.find("table", class_="findList") if lst is None: return [] return list( filter( lambda show: show is not None, (get_data_from_row(child) for child in lst.children), ) )
def get_show_ids(): webpage = get_parsed_webpage("https://www.imdb.com/chart/toptv/") list_div = webpage.find("div", class_="lister") lst = list_div.select_one("table.chart tbody ") for num, row in enumerate(lst.children): try: title_cell = row.find("td", class_="titleColumn") link = title_cell.find("a") title = link.text.strip() id_ = get_id_from_link(link["href"]) yield title, id_ except: continue
def get_all_seasons(self): """ Returns a generator of dictionaries (seasons), each contains an "episodes" key with a list of episodes for that season. """ seasons = range(1, self.latest_season["number"]) if self.log: seasons = tqdm(seasons, desc="Seasons") for season in seasons: episode_list_url = f"{self.url}/episodes?season={season}" webpage = get_parsed_webpage(episode_list_url) self.episode_data.append( self._get_season_data(season_page=webpage)) self.episode_data.append(self.latest_season)
def _get_show_data(self): """ Returns a dictionary of show level data """ webpage = get_parsed_webpage(self.url) details = webpage.find(class_="title_bar_wrapper") title = details.select(".title_wrapper h1")[0].text.strip() rating = details.select( ".ratings_wrapper .ratingValue span")[0].text.strip() num_ratings = details.select( ".ratings_wrapper a span.small")[0].text.strip() num_episodes = (webpage.find(class_="navigation_panel").find( class_="bp_sub_heading").text.replace("episodes", "").strip()) additional_details_tag = details.find(class_="subtext") additional_details = IMDBScraper._get_additional_details( additional_details_tag) plot_details = webpage.find(class_="plot_summary") summary = plot_details.find(class_="summary_text").text.strip() cast = plot_details.find_all(class_="credit_summary_item") if len(cast) == 1: stars = cast[0] creators = [] else: creators, stars = cast creators = list(map(lambda x: x.text.strip(), creators("a"))) stars = list(map(lambda x: x.text.strip(), stars("a"))) poster_div = webpage.find("div", class_="poster") poster = poster_div.find("img")["src"] data = dict( title=title, rating=rating, num_ratings=num_ratings, num_episodes=num_episodes, plot_summary=summary, creators=creators, stars=stars, poster_url=poster, ) data.update(additional_details) self.show_data = data