Exemple #1
0
class Spider(threading.Thread):
    def __init__(self, queue, config, save_data_func, add_urls_func):
        threading.Thread.__init__(self)
        self.queue = queue
        self.daemon = True

        self.save_data_func = save_data_func
        self.add_urls_func = add_urls_func

        self.parser = Parser(config)
        self.start()

    def run(self):
        while True:
            url = self.queue.get()

            try:
                data, urls = self.parser.parse(url)

                if data:
                    self.save_data_func(data)

                if urls:
                    self.add_urls_func(urls)

            except Exception, e:
                log('ERROR', url, e, '\n', traceback.format_exc())

            gc.collect()
            self.queue.task_done()
Exemple #2
0
    def __init__(self, queue, config, save_data_func, add_urls_func):
        threading.Thread.__init__(self)
        self.queue = queue
        self.daemon = True

        self.save_data_func = save_data_func
        self.add_urls_func = add_urls_func

        self.parser = Parser(config)
        self.start()