コード例 #1
0
    def next_request(self):
        block_pop_timeout = self.idle_before_close
        item = self.queue.pop(block_pop_timeout)
        if item:

            try:
                req = Request(item['url'])
            except ValueError:
                # need absolute url
                # need better url validation here
                req = Request(item['url'])

            try:
                if 'callback' in item and item['callback'] is not None:
                    req.callback = getattr(self.spider, item['callback'])
            except AttributeError:
                print 'kk'

            try:
                if 'errback' in item and item['errback'] is not None:
                    req.errback = getattr(self.spider, item['errback'])
            except AttributeError:
                print 'kk'

            # defaults not in schema
            if 'curdepth' not in item:
                item['curdepth'] = 0
            if "retry_times" not in item:
                item['retry_times'] = 0

            req.meta['field_css'] = item['meta']
            if 'item' in item['meta']:
                req.meta['item'] = item['meta']['item']

            if 'field_css' in item['meta']:
                req.meta['field_css'] = item['meta']['field_css']
            # extra check to add items to request
            if 'useragent' in item and item['useragent'] is not None:
                req.headers['User-Agent'] = item['useragent']

            return req