Beispiel #1
0
    def run(self):
        while True:
            if not self.runValue.value:
                print "%s stops" % self.name
                break
            self.signalget()
            start_t = time.time()
            self.ctask = self.taskqueue.get()
            if self.ctask.empty:
                time.sleep(10)
                continue
            end_t = time.time()
            self.log_wait_task(end_t - start_t)
            self.log_get_task()
            start_t = time.time()

            c = Crawler().create(self.ctask.type, self.ctask.key, self.ctask.data)
            if c:
                try:
                    c.crawl()
                    success = True
                    logger.info("CRAWL SUCCEED - <%s> %s" % (self.taskqueue.queueid, c))
                    end_t = time.time()
                    self.log_done_task(end_t - start_t)
                except Exception:
                    msg = get_exception_info()
                    success = False
                    logger.error("CRAWL FAILED - <%s> %s, %s" % (self.taskqueue.queueid, c, msg))
            else:
                logger.error("CRAWL FAILED - <%s> %s" % (self.taskqueue.queueid, self.ctask))
                success = False

            Scheduler.finish(self.ctask.type, self.ctask.key, c.data if c else {}, success)
Beispiel #2
0
    def run(self):
        while True:
            if not self.runValue.value:
                print "%s stops" % self.name
                break
            self.signalget()
            start_t = time.time()
            self.ctask = self.taskqueue.get()
            if self.ctask.empty:
                time.sleep(10)
                continue
            end_t = time.time()
            self.log_wait_task(end_t - start_t)
            self.log_get_task()
            start_t = time.time()

            c = Crawler().create(self.ctask.type, self.ctask.key,
                                 self.ctask.data)
            if c:
                try:
                    c.crawl()
                    success = True
                    logger.info("CRAWL SUCCEED - <%s> %s" %
                                (self.taskqueue.queueid, c))
                    end_t = time.time()
                    self.log_done_task(end_t - start_t)
                except Exception:
                    msg = get_exception_info()
                    success = False
                    logger.error("CRAWL FAILED - <%s> %s, %s" %
                                 (self.taskqueue.queueid, c, msg))
            else:
                logger.error("CRAWL FAILED - <%s> %s" %
                             (self.taskqueue.queueid, self.ctask))
                success = False

            Scheduler.finish(self.ctask.type, self.ctask.key,
                             c.data if c else {}, success)