Ejemplo n.º 1
0
def get_movie_links(bestof_url):
    soup = get_soup(bestof_url)
    links = soup.find('div', {'id': 'top_movies_main'}).findAll('a')

    links = [BASE_URL + l['href'] for l in links]

    return links
Ejemplo n.º 2
0
def get_movie_links(bestof_url):
    soup = get_soup(bestof_url)
    links = soup.find('div', {'id': 'top_movies_main'}).findAll('a')

    links = [BASE_URL + l['href'] for l in links]

    return links
Ejemplo n.º 3
0
def get_bestof_links():
    soup = get_soup('http://www.rottentomatoes.com/top/')
    links = soup.findAll('a', href=re.compile('/top/bestofrt.*'))

    # Filter out view all link duplicates
    links = [BASE_URL + l['href'] for l in links if not l.text.startswith('View')]

    return links
Ejemplo n.º 4
0
def get_bestof_links():
    soup = get_soup('http://www.rottentomatoes.com/top/')
    links = soup.findAll('a', href=re.compile('/top/bestofrt.*'))

    # Filter out view all link duplicates
    links = [
        BASE_URL + l['href'] for l in links if not l.text.startswith('View')
    ]

    return links
Ejemplo n.º 5
0
def parse_review_page(review_page_url):
    soup = get_soup(review_page_url)
    reviews_div = soup.find('div', {'id': 'reviews'})

    if not reviews_div:
        return []

    reviews = reviews_div.findAll('div', {'class': 'bottom_divider'})

    results = [parse_review(review) for review in reviews]
    clean_results = [r for r in results if r is not None]

    return clean_results
Ejemplo n.º 6
0
def parse_review_page(review_page_url):
    soup = get_soup(review_page_url)
    reviews_div = soup.find('div', {'id': 'reviews'})

    if not reviews_div:
        return []

    reviews = reviews_div.findAll('div', {'class': 'bottom_divider'})
    
    results = [parse_review(review) for review in reviews]
    clean_results = [r for r in results if r is not None]

    return clean_results
Ejemplo n.º 7
0
def get_audience_reviews(movie_url):
    reviews_url = movie_url + 'reviews/?type=user'
    soup = get_soup(reviews_url)

    info = soup.find('span', {'class': 'pageInfo'}).text
    num_pages = int(info.split('of ')[1])

    upper = min(52, num_pages)

    all_reviews = []

    for page in range(1, upper + 1):
        review_page = '%s&page=%d' % (reviews_url, page)
        all_reviews += parse_review_page(review_page)

    deduped = list(set(all_reviews))

    return deduped
Ejemplo n.º 8
0
def get_audience_reviews(movie_url):
    reviews_url = movie_url + 'reviews/?type=user'
    soup = get_soup(reviews_url)

    info = soup.find('span', {'class': 'pageInfo'}).text
    num_pages = int(info.split('of ')[1])

    upper = min(52, num_pages)

    all_reviews = []

    for page in range(1, upper+1):
        review_page = '%s&page=%d' % (reviews_url, page)
        all_reviews += parse_review_page(review_page)

    deduped = list(set(all_reviews))

    return deduped
Ejemplo n.º 9
0
def get_movie_info(movie_url):
    soup = get_soup(movie_url)
    title = soup.find('h1', {'class': 'movie_title'}).text.encode('utf8')

    return (title, movie_url)
Ejemplo n.º 10
0
def get_movie_info(movie_url):
    soup = get_soup(movie_url)
    title = soup.find('h1', {'class': 'movie_title'}).text.encode('utf8')

    return (title, movie_url)