Python get_parsed_webpage Examples

Programming Language: Python

Namespace/Package Name: scraper.utils

Method/Function: get_parsed_webpage

Examples at hotexamples.com: 5

Python get_parsed_webpage - 5 examples found. These are the top rated real world Python examples of scraper.utils.get_parsed_webpage extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def _get_latest_season(self):
     webpage = get_parsed_webpage(f"{self.url}/episodes?season=0")
     self.latest_season = self._get_season_data(webpage)
     # Check to see if the latest season(s) are empty empty - Edge case
     while all([
             episode["rating"] == ""
             for episode in self.latest_season["episodes"]
     ]):
         latest = self.latest_season["number"] - 1
         webpage = get_parsed_webpage(
             f"{self.url}/episodes?season={latest}")
         self.latest_season = self._get_season_data(webpage)

Example #2

Show file

def search_imdb(query):
    encoded = urllib.parse.quote(query)
    url = f"https://www.imdb.com/find?q={encoded}&s=tt&ttype=tv"
    webpage = get_parsed_webpage(url)
    lst = webpage.find("table", class_="findList")
    if lst is None:
        return []
    return list(
        filter(
            lambda show: show is not None,
            (get_data_from_row(child) for child in lst.children),
        )
    )

Example #3

Show file

File: hundred_reports.py Project: joshuanazareth97/popviz

def get_show_ids():
    webpage = get_parsed_webpage("https://www.imdb.com/chart/toptv/")
    list_div = webpage.find("div", class_="lister")
    lst = list_div.select_one("table.chart tbody ")
    for num, row in enumerate(lst.children):
        try:
            title_cell = row.find("td", class_="titleColumn")
            link = title_cell.find("a")
            title = link.text.strip()
            id_ = get_id_from_link(link["href"])
            yield title, id_
        except:
            continue

Example #4

Show file

 def get_all_seasons(self):
     """
         Returns a generator of dictionaries (seasons),
         each contains an "episodes" key with a list of episodes for that season.
     """
     seasons = range(1, self.latest_season["number"])
     if self.log:
         seasons = tqdm(seasons, desc="Seasons")
     for season in seasons:
         episode_list_url = f"{self.url}/episodes?season={season}"
         webpage = get_parsed_webpage(episode_list_url)
         self.episode_data.append(
             self._get_season_data(season_page=webpage))
     self.episode_data.append(self.latest_season)

Example #5

Show file

    def _get_show_data(self):
        """
            Returns a dictionary of show level data
        """
        webpage = get_parsed_webpage(self.url)
        details = webpage.find(class_="title_bar_wrapper")
        title = details.select(".title_wrapper h1")[0].text.strip()
        rating = details.select(
            ".ratings_wrapper .ratingValue span")[0].text.strip()
        num_ratings = details.select(
            ".ratings_wrapper a span.small")[0].text.strip()
        num_episodes = (webpage.find(class_="navigation_panel").find(
            class_="bp_sub_heading").text.replace("episodes", "").strip())
        additional_details_tag = details.find(class_="subtext")
        additional_details = IMDBScraper._get_additional_details(
            additional_details_tag)

        plot_details = webpage.find(class_="plot_summary")
        summary = plot_details.find(class_="summary_text").text.strip()
        cast = plot_details.find_all(class_="credit_summary_item")
        if len(cast) == 1:
            stars = cast[0]
            creators = []
        else:
            creators, stars = cast
            creators = list(map(lambda x: x.text.strip(), creators("a")))
        stars = list(map(lambda x: x.text.strip(), stars("a")))

        poster_div = webpage.find("div", class_="poster")
        poster = poster_div.find("img")["src"]
        data = dict(
            title=title,
            rating=rating,
            num_ratings=num_ratings,
            num_episodes=num_episodes,
            plot_summary=summary,
            creators=creators,
            stars=stars,
            poster_url=poster,
        )
        data.update(additional_details)
        self.show_data = data