Ejemplo n.º 1
0
class RedisSource(Source):
    """
    redis source
    """
    def __init__(self, redis_host, redis_port, redis_key, **kwargs):
        try:
            from custom_redis.client import Redis
        except ImportError:
            try:
                from redis import Redis
            except ImportError:
                warnings.warn(
                    "RedisSource depends on redis, try: pip install redis. ")
                exit(1)
        self.redis_key = redis_key
        self.redis_conn = Redis(redis_host, redis_port)

    async def __anext__(self):
        """
        异步迭代器需要实现这个方法,这是一个异步方法,最终返回一个迭代值。
        :return:
        """
        return self.redis_conn.lpop(self.redis_key)

    async def push_back(self, data):
        self.redis_conn.rpush(self.redis_key, data)

    @staticmethod
    def enrich_parser(sub_parser):
        sub_parser.add_argument("-rh", "--redis-host", default="0.0.0.0")
        sub_parser.add_argument("-rp", "--redis-port", default=6379)
        sub_parser.add_argument("-rk", "--redis-key", default="download_meta")
        sub_parser.add_argument("--idle", action="store_true", help="Idle... ")
class MultiDownloadProcess(Logger, MultiThreadClosing):

    name = "multidownload_process"

    def __init__(self, settings):
        self.settings_file = settings
        Logger.__init__(self, settings)
        self.set_logger()
        MultiThreadClosing.__init__(self)
        self.de_queue = Queue()
        if self.settings.get("CUSTOM_REDIS"):
            from custom_redis.client import Redis
        else:
            from redis import Redis
        self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                                self.settings.get("REDIS_PORT"))
        self.small = False

    @classmethod
    def parse_args(cls):
        parser = ArgumentParser()
        parser.add_argument("-s", "--settings", dest="settings", default="settings.py")
        return cls(**vars(parser.parse_args()))

    def is_small(self):
        self.small=True

    def callback(self, item, flag):
        """
        callback called when download is finished.
        :return:
        """
        raise NotImplementedError()

    def decode(self, item):
        """
        redis pop out to got url, filename, directory
        :param item:
        :return: (url, filename, directory)
        """
        raise NotImplementedError()

    def processing(self, de, url_paths, item):
        if self.small:
            downloader = "download_small_file"
        else:
            downloader = "start"
        flag = False
        try:
            t1 = time.time()
            length = len(url_paths)
            for index, (url, filename, path) in enumerate(url_paths):
                result = getattr(de, downloader)(url=url, filename=filename, path=path)
                self.logger.info("download process %s/%s completed"%(index+1, length))
                flag = flag or result
            t2 = time.time()
            self.logger.info("download finished, success:%s, seconds:%.4f"%(flag,  t2-t1))
            self.de_queue.put(de)
            self.callback(item, flag)
            self.logger.info("callback finished, seconds:%.4f"%(time.time()-t2))
        except Exception:
            self.logger.error(traceback.format_exc())
        finally:
            try:
                self.threads.remove(current_thread())
            except ValueError:
                pass
            self.logger.info("the count of thread which alives is %s. "%len(self.threads))

    def start(self):
        self.logger.debug("start process %s"%self.name)
        concurrent_download_count = self.settings.get("CONCURRENT_DOWNLOAD_COUNT", 10)
        for i in range(concurrent_download_count):
            DE = DownloaderEngine(self.settings_file, signal_open=False)
            DE.set_logger(self.logger)
            self.de_queue.put(DE)
        self.logger.debug("setup %s des"%concurrent_download_count)
        while self.alive:
            try:
                item = self.redis_conn.lpop(self.settings.get("QUEUE_KEY"))
            except Exception:
                self.logger.error("redis error %s"%traceback.format_exc())
                item = None
            if not item:
                self.logger.debug("got no message...")
                time.sleep(1)
                continue
            self.logger.debug("%s tasks  to be continue..."%self.redis_conn.llen(self.settings.get("QUEUE_KEY")))
            try:
                url_paths = self.decode(item)
            except Exception:
                self.logger.error(traceback.format_exc())
                url_paths = []
            while url_paths:
                try:
                    DE = self.de_queue.get_nowait()
                    th = Thread(target=self.processing, args=(DE, url_paths, item))
                    self.set_force_interrupt(th)
                    self.logger.debug("start a new thread. ")
                    th.start()
                except Empty:
                    time.sleep(1)
                else:
                    break
        while True:
            if filter(lambda x:x.is_alive(), self.threads):
                time.sleep(1)
            else:
                break
class MultiDownloadProcess(Logger, MultiThreadClosing):

    name = "multidownload_process"

    def __init__(self, settings):
        self.settings_file = settings
        Logger.__init__(self, settings)
        self.set_logger()
        MultiThreadClosing.__init__(self)
        self.de_queue = Queue()
        if self.settings.get("CUSTOM_REDIS"):
            from custom_redis.client import Redis
        else:
            from redis import Redis
        self.redis_conn = Redis(self.settings.get("REDIS_HOST"),
                                self.settings.get("REDIS_PORT"))
        self.small = False

    @classmethod
    def parse_args(cls):
        parser = ArgumentParser()
        parser.add_argument("-s",
                            "--settings",
                            dest="settings",
                            default="settings.py")
        return cls(**vars(parser.parse_args()))

    def is_small(self):
        self.small = True

    def callback(self, item, flag):
        """
        callback called when download is finished.
        :return:
        """
        raise NotImplementedError()

    def decode(self, item):
        """
        redis pop out to got url, filename, directory
        :param item:
        :return: (url, filename, directory)
        """
        raise NotImplementedError()

    def processing(self, de, url_paths, item):
        if self.small:
            downloader = "download_small_file"
        else:
            downloader = "start"
        flag = False
        try:
            t1 = time.time()
            length = len(url_paths)
            for index, (url, filename, path) in enumerate(url_paths):
                result = getattr(de, downloader)(url=url,
                                                 filename=filename,
                                                 path=path)
                self.logger.info("download process %s/%s completed" %
                                 (index + 1, length))
                flag = flag or result
            t2 = time.time()
            self.logger.info("download finished, success:%s, seconds:%.4f" %
                             (flag, t2 - t1))
            self.de_queue.put(de)
            self.callback(item, flag)
            self.logger.info("callback finished, seconds:%.4f" %
                             (time.time() - t2))
        except Exception:
            self.logger.error(traceback.format_exc())
        finally:
            try:
                self.threads.remove(current_thread())
            except ValueError:
                pass
            self.logger.info("the count of thread which alives is %s. " %
                             len(self.threads))

    def start(self):
        self.logger.debug("start process %s" % self.name)
        concurrent_download_count = self.settings.get(
            "CONCURRENT_DOWNLOAD_COUNT", 10)
        for i in range(concurrent_download_count):
            DE = DownloaderEngine(self.settings_file, signal_open=False)
            DE.set_logger(self.logger)
            self.de_queue.put(DE)
        self.logger.debug("setup %s des" % concurrent_download_count)
        while self.alive:
            try:
                item = self.redis_conn.lpop(self.settings.get("QUEUE_KEY"))
            except Exception:
                self.logger.error("redis error %s" % traceback.format_exc())
                item = None
            if not item:
                self.logger.debug("got no message...")
                time.sleep(1)
                continue
            self.logger.debug(
                "%s tasks  to be continue..." %
                self.redis_conn.llen(self.settings.get("QUEUE_KEY")))
            try:
                url_paths = self.decode(item)
            except Exception:
                self.logger.error(traceback.format_exc())
                url_paths = []
            while url_paths:
                try:
                    DE = self.de_queue.get_nowait()
                    th = Thread(target=self.processing,
                                args=(DE, url_paths, item))
                    self.set_force_interrupt(th)
                    self.logger.debug("start a new thread. ")
                    th.start()
                except Empty:
                    time.sleep(1)
                else:
                    break
        while True:
            if filter(lambda x: x.is_alive(), self.threads):
                time.sleep(1)
            else:
                break