コード例 #1
0
 def parse_list(self, page):
     records = list(NewsItemListDetailScraper.parse_list(self, page))
     self.logger.debug('Got %s records', len(records))
     if len(records) >= 99:
         raise ScraperBroken(
             'Got %s records. Consider changing date interval' %
             len(records))
     return records
コード例 #2
0
ファイル: retrieval.py プロジェクト: DotNetWebs/openblock
    def parse_list(self, record_html):
        # Normally this method gets passed raw html,
        # but we return both the html and the list_record from list_pages().
        list_record, html = record_html
        # a better version of the restaurant address is available on this page,
        # attempt to extract additional location details to resolve ambiguities.
        try:
            info = self.detail_address_re.search(html).groupdict()
            list_record['zipcode'] = info['zipcode']
        except:
            self.logger.info("Could not get detailed address information for record %s: %s" % (list_record['restaurant_id'], list_record['restaurant_name']))

        for record in NewsItemListDetailScraper.parse_list(self, html):
            yield dict(list_record, **record)
コード例 #3
0
ファイル: retrieval.py プロジェクト: slinkp/openblock
    def parse_list(self, record_html):
        # Normally this method gets passed raw html,
        # but we return both the html and the list_record from list_pages().
        list_record, html = record_html
        # a better version of the restaurant address is available on this page,
        # attempt to extract additional location details to resolve ambiguities.
        try:
            info = self.detail_address_re.search(html).groupdict()
            list_record['zipcode'] = info['zipcode']
        except:
            self.logger.info(
                "Could not get detailed address information for record %s: %s"
                %
                (list_record['restaurant_id'], list_record['restaurant_name']))

        for record in NewsItemListDetailScraper.parse_list(self, html):
            yield dict(list_record, **record)
コード例 #4
0
ファイル: retrieval.py プロジェクト: christaggart/openblock
 def parse_list(self, record_html):
     list_record, html = record_html
     for record in NewsItemListDetailScraper.parse_list(self, html):
         yield dict(list_record, **record)
コード例 #5
0
ファイル: retrieval.py プロジェクト: vijayaraju/everyblock-1
 def parse_list(self, record_html):
     list_record, html = record_html
     for record in NewsItemListDetailScraper.parse_list(self, html):
         yield dict(list_record, **record)
コード例 #6
0
ファイル: retrieval.py プロジェクト: frankk00/openblock
 def parse_list(self, page):
     records = list(NewsItemListDetailScraper.parse_list(self, page))
     self.logger.debug('Got %s records', len(records))
     if len(records) >= 99:
         raise ScraperBroken('Got %s records. Consider changing date interval' % len(records))
     return records
コード例 #7
0
ファイル: retrieval.py プロジェクト: frankk00/openblock
 def parse_list(self, page):
     facility_type, html = page
     for record in NewsItemListDetailScraper.parse_list(self, html):
         yield dict(record, facility_type=facility_type)
コード例 #8
0
 def parse_list(self, page):
     facility_type, html = page
     for record in NewsItemListDetailScraper.parse_list(self, html):
         yield dict(record, facility_type=facility_type)