def parse_list(self, page): records = list(NewsItemListDetailScraper.parse_list(self, page)) self.logger.debug('Got %s records', len(records)) if len(records) >= 99: raise ScraperBroken( 'Got %s records. Consider changing date interval' % len(records)) return records
def parse_list(self, record_html): # Normally this method gets passed raw html, # but we return both the html and the list_record from list_pages(). list_record, html = record_html # a better version of the restaurant address is available on this page, # attempt to extract additional location details to resolve ambiguities. try: info = self.detail_address_re.search(html).groupdict() list_record['zipcode'] = info['zipcode'] except: self.logger.info("Could not get detailed address information for record %s: %s" % (list_record['restaurant_id'], list_record['restaurant_name'])) for record in NewsItemListDetailScraper.parse_list(self, html): yield dict(list_record, **record)
def parse_list(self, record_html): # Normally this method gets passed raw html, # but we return both the html and the list_record from list_pages(). list_record, html = record_html # a better version of the restaurant address is available on this page, # attempt to extract additional location details to resolve ambiguities. try: info = self.detail_address_re.search(html).groupdict() list_record['zipcode'] = info['zipcode'] except: self.logger.info( "Could not get detailed address information for record %s: %s" % (list_record['restaurant_id'], list_record['restaurant_name'])) for record in NewsItemListDetailScraper.parse_list(self, html): yield dict(list_record, **record)
def parse_list(self, record_html): list_record, html = record_html for record in NewsItemListDetailScraper.parse_list(self, html): yield dict(list_record, **record)
def parse_list(self, record_html): list_record, html = record_html for record in NewsItemListDetailScraper.parse_list(self, html): yield dict(list_record, **record)
def parse_list(self, page): records = list(NewsItemListDetailScraper.parse_list(self, page)) self.logger.debug('Got %s records', len(records)) if len(records) >= 99: raise ScraperBroken('Got %s records. Consider changing date interval' % len(records)) return records
def parse_list(self, page): facility_type, html = page for record in NewsItemListDetailScraper.parse_list(self, html): yield dict(record, facility_type=facility_type)
def parse_list(self, page): facility_type, html = page for record in NewsItemListDetailScraper.parse_list(self, html): yield dict(record, facility_type=facility_type)