def parse_resto(self, response): """ SECOND PARSING : Given a restaurant, get each review url and get to parse it - Usually there are 10 comments per page """ logger.warn(' > PARSING NEW RESTO PAGE ({})'.format(self.resto_nb)) self.resto_nb += 1 ## Get Restaurant Info xpath = '//div[@class="header_links"]/a/text()' self.resto_price = response.xpath(xpath).extract()[0] self.resto_type = response.xpath(xpath).extract()[1:] # Get the list of reviews on the restaurant page urls_review = response.xpath( '//div[@class="quote"]//a/@href').extract() # For each review open the link and parse it into the parse_review method for url_review in urls_review: yield response.follow(url=url_review, callback=self.parse_review) next_page, next_page_number = get_info.get_urls_next_list_of_reviews( response) if get_info.go_to_next_page(next_page, next_page_number, max_page=10): yield response.follow(next_page, callback=self.parse_resto)
def parse_resto(self, response): """SECOND PARSING : Given a restaurant, get each review url and get to parse it - Usually there are 10 comments per page """ logger.warn(' > PARSING NEW REVIEW PAGE ({})'.format(self.resto_pg_nb)) self.resto_pg_nb += 1 # Get the list of reviews on the restaurant page ######################## #### YOUR CODE HERE #### ######################## urls_review = get_info.get_urls_review_in_resto(response) ######################## ######################## # For each review open the link and parse it into the parse_review method for url_review in urls_review: yield response.follow(url=url_review, callback=self.parse_review) ######################## #### YOUR CODE HERE #### ######################## next_page, next_page_number = get_info.get_urls_next_list_of_reviews(response) # Follow the page if we decide to if get_info.go_to_next_page(next_page, next_page_number, max_page=50): yield response.follow(next_page, callback=self.parse_resto)
def parse_resto(self, response): """SECOND PARSING : Given a restaurant, get each review url and get to parse it - Usually there are 10 comments per page """ # Display a message in the console logger.warn(' > PARSING NEW RESTO PAGE ({})'.format(self.resto_nb)) # Get Number of reviews nr_reviews = get_info.get_number_of_reviews(response) logger.warn('{} reviews'.format(nr_reviews)) self.resto_nb += 1 # Get the list of the 10 reviews on the restaurant page review_urls = get_info.get_urls_review_in_main_search_page(response) # For each url : follow review url to get the elements for review_url in review_urls: #logger.warn('> New review detected : {}'.format(url)) yield response.follow(url=review_url, callback=self.parse_review) # Get next page information next_page, next_page_number = get_info.get_urls_next_list_of_reviews( response) # Follow the page if we decide to if get_info.go_to_next_review_page(next_page, next_page_number, max_page=None, printing=0): logger.warn( ' > GOING TO THE NEXT REVIEW PAGE ({})'.format(next_page)) yield response.follow(next_page, callback=self.parse_resto)