def process(url): try: print('DOWNLOADING', url) except: pass if not urlhelper.can_fetch(url): logger.info('disallowed ' + url) return request, response, data, soup = crawler.load(url) if request == None: return if response.geturl().endswith('.pdf'): return drequest = { k: v for k, v in request.header_items() } dresponse = { k: v for k, v in response.getheaders() } dresponse.update({ 'status': response.status, 'reason': response.reason }) ProcessQueue.push( response.geturl(), json.dumps(drequest, sort_keys=True), json.dumps(dresponse, sort_keys=True), data) """
def start(): queue = ProcessQueue() queue.bind(process) queue.run()