Ejemplo n.º 1
0
def scrape_shows(scrape_url: str) -> AbstractSet[Show]:
    """
    :param scrape_url: URL of webpage to scrape for RecurringShows
    :return: a collection of RecurringShows
    """
    logging.info("Fetching shows data from {}".format(scrape_url))
    shows_page = html(http_session.get(scrape_url).content)
    hrefs = shows_page.xpath("//a[contains(@href, 'artist')]/@href")
    recurring_shows = []
    for href in hrefs:
        try:
            recurring_shows.append(Show(href))
        except Exception:
            logging.error('Could not parse Show from %s', href, exc_info=True)
    return recurring_shows
Ejemplo n.º 2
0
def scrape_podcast_episodes(scrape_url: str) -> AbstractSet[PodcastEpisode]:
    """
    :param scrape_url: URL of webpage to scrape for IndividualPodcasts
    :return: a collection of scraped Podcasts
    """
    logging.info("Fetching podcast data from {0}".format(scrape_url))
    podcasts_page = html(http_session.get(scrape_url).content)
    # TODO: scrape more than the front page
    episodes = []
    for div in podcasts_page.xpath('//div[contains(@class, "podcast-list-item")]'):
        try:
            episodes.append(PodcastEpisode(div))
        except Exception as e:
            logging.error(e)
    return episodes