def parse_resto(self, response):
        """
        SECOND PARSING : Given a restaurant, get each review url and get to parse it
        - Usually there are 10 comments per page
        """
        logger.warn(' > PARSING NEW RESTO PAGE ({})'.format(self.resto_nb))
        self.resto_nb += 1

        ## Get Restaurant Info
        xpath = '//div[@class="header_links"]/a/text()'
        self.resto_price = response.xpath(xpath).extract()[0]
        self.resto_type = response.xpath(xpath).extract()[1:]

        # Get the list of reviews on the restaurant page
        urls_review = response.xpath(
            '//div[@class="quote"]//a/@href').extract()

        # For each review open the link and parse it into the parse_review method
        for url_review in urls_review:
            yield response.follow(url=url_review, callback=self.parse_review)

        next_page, next_page_number = get_info.get_urls_next_list_of_reviews(
            response)

        if get_info.go_to_next_page(next_page, next_page_number, max_page=10):
            yield response.follow(next_page, callback=self.parse_resto)
Example #2
0
    def parse_resto(self, response):
        """SECOND PARSING : Given a restaurant, get each review url and get to parse it
            - Usually there are 10 comments per page
        """
        logger.warn(' > PARSING NEW REVIEW PAGE ({})'.format(self.resto_pg_nb))
        self.resto_pg_nb += 1

        # Get the list of reviews on the restaurant page

        ########################
        #### YOUR CODE HERE ####
        ########################
        
        urls_review = get_info.get_urls_review_in_resto(response)
        
        ########################
        ########################

        # For each review open the link and parse it into the parse_review method
        for url_review in urls_review:
             yield response.follow(url=url_review, callback=self.parse_review)

        
        ########################
        #### YOUR CODE HERE ####
        ########################
        
        next_page, next_page_number = get_info.get_urls_next_list_of_reviews(response)
        
        # Follow the page if we decide to
        if get_info.go_to_next_page(next_page, next_page_number, max_page=50):
            yield response.follow(next_page, callback=self.parse_resto)
    def parse_resto(self, response):
        """SECOND PARSING : Given a restaurant, get each review url and get to parse it
            - Usually there are 10 comments per page
        """

        # Display a message in the console
        logger.warn(' > PARSING NEW RESTO PAGE ({})'.format(self.resto_nb))

        # Get Number of reviews
        nr_reviews = get_info.get_number_of_reviews(response)
        logger.warn('{} reviews'.format(nr_reviews))

        self.resto_nb += 1

        # Get the list of the 10 reviews on the restaurant page
        review_urls = get_info.get_urls_review_in_main_search_page(response)

        # For each url : follow review url to get the elements
        for review_url in review_urls:
            #logger.warn('> New review detected : {}'.format(url))
            yield response.follow(url=review_url, callback=self.parse_review)

        # Get next page information
        next_page, next_page_number = get_info.get_urls_next_list_of_reviews(
            response)

        # Follow the page if we decide to
        if get_info.go_to_next_review_page(next_page,
                                           next_page_number,
                                           max_page=None,
                                           printing=0):
            logger.warn(
                ' > GOING TO THE NEXT REVIEW PAGE ({})'.format(next_page))
            yield response.follow(next_page, callback=self.parse_resto)