def scrape_movie(self): imdb_movie_id = self.imdb_movie_id self.scrape_main_page() # load in all review pages n=0 self.reviews = [] self.imdb_review_ranking_counter=0 while n < self.nreviews: imdb_review_url=url_join(self.main_page_url,'reviews?start=%s' % n) if n % 50 ==0 and self.debug: print ' * n=%d/%d' % (n, self.nreviews) self.reviews += self.get_reviews_from_page(imdb_review_url) n+=10 # imdb pages increment in steps of 10 if self.nreview_limit is not None and n == self.nreview_limit: break
def scrape_movie(self): imdb_movie_id = self.imdb_movie_id self.scrape_main_page() # load in all review pages n = 0 self.reviews = [] self.imdb_review_ranking_counter = 0 while n < self.nreviews: imdb_review_url = url_join(self.main_page_url, 'reviews?start=%s' % n) if n % 50 == 0 and self.debug: print ' * n=%d/%d' % (n, self.nreviews) self.reviews += self.get_reviews_from_page(imdb_review_url) n += 10 # imdb pages increment in steps of 10 if self.nreview_limit is not None and n == self.nreview_limit: break
def get_main_page_url(imdb_movie_id): return url_join('http://www.imdb.com/title/','tt%07d' % imdb_movie_id)
def get_main_page_url(imdb_movie_id): return url_join('http://www.imdb.com/title/', 'tt%07d' % imdb_movie_id)