class Spider(threading.Thread): def __init__(self, queue, config, save_data_func, add_urls_func): threading.Thread.__init__(self) self.queue = queue self.daemon = True self.save_data_func = save_data_func self.add_urls_func = add_urls_func self.parser = Parser(config) self.start() def run(self): while True: url = self.queue.get() try: data, urls = self.parser.parse(url) if data: self.save_data_func(data) if urls: self.add_urls_func(urls) except Exception, e: log('ERROR', url, e, '\n', traceback.format_exc()) gc.collect() self.queue.task_done()
def __init__(self, queue, config, save_data_func, add_urls_func): threading.Thread.__init__(self) self.queue = queue self.daemon = True self.save_data_func = save_data_func self.add_urls_func = add_urls_func self.parser = Parser(config) self.start()