コード例 #1
0
ファイル: crawler.py プロジェクト: ssd4561/web2kindle
class Downloader(Thread):
    def __init__(self,
                 to_download_q: PriorityQueue,
                 downloader_parser_q: PriorityQueue,
                 result_q: Queue,
                 name: str,
                 session=requests.session()):
        super().__init__(name=name)
        self.to_download_q = to_download_q
        self.downloader_parser_q = downloader_parser_q
        self.result_q = result_q
        self.session = session

        self._exit = False

        self.log = Log(self.name)

    def exit(self):
        self._exit = True

    def request(self):
        response = None

        try:
            task = self.to_download_q.get_nowait()
            TaskManager.register(task['tid'])
        except Empty:
            self.log.log_it(
                "Scheduler to Downloader队列为空,{}等待中。".format(self.name),
                'DEBUG')
            with COND:
                COND.wait()
                self.log.log_it(
                    "Downloader to Parser队列不为空。{}被唤醒。".format(self.name),
                    'DEBUG')
            return

        self.log.log_it("请求 {}".format(task['url']), 'INFO')
        try:
            response = self.session.request(task['method'], task['url'],
                                            **task.get('meta', {}))
        except Exception as e:
            traceback.print_exc()
            self.log.log_it(
                "网络请求错误。错误信息:{} URL:{} Response:{}".format(
                    str(e), task['url'], response), 'INFO')
            retry(task, self.to_download_q)
            return

        if response:
            task['response'] = response
        else:
            task['response'] = None

        self.downloader_parser_q.put(task)

    def run(self):
        while not self._exit:
            self.request()
コード例 #2
0
ファイル: crawler.py プロジェクト: zhangshaoze/web2kindle
class Downloader(Thread):
    def __init__(self, to_download_q: PriorityQueue,
                 downloader_parser_q: PriorityQueue,
                 result_q: Queue,
                 name: str,
                 lock,
                 session=requests.session()):
        super().__init__(name=name)
        self.to_download_q = to_download_q
        self.downloader_parser_q = downloader_parser_q
        self.result_q = result_q
        self.session = session

        self._exit = False

        self.log = Log(self.name)
        self.lock = lock
        self.task_manager = TaskManager(self.lock)

    def exit(self):
        self._exit = True

    def request(self):
        response = None

        try:
            task = self.to_download_q.get_nowait()
            self.task_manager.register(task['tid'])
        except Empty:
            self.log.log_it("Scheduler to Downloader队列为空,{}等待中。".format(self.name), 'DEBUG')
            with COND:
                COND.wait()
                self.log.log_it("Downloader to Parser队列不为空。{}被唤醒。".format(self.name), 'DEBUG')
            return

        self.log.log_it("请求 {}".format(task['url']), 'INFO')
        try:
            response = self.session.request(task['method'], task['url'], **task.get('meta', {}))
        except Exception as e:
            # traceback.print_exc(file=open(os.path.join(config.get('LOG_PATH'), 'downlaoder_traceback'), 'a'))
            traceback.print_exc()
            self.log.log_it("网络请求错误。错误信息:{} URL:{} Response:{}".format(str(e), task['url'], response), 'INFO')
            if task.get('retry', None):
                if task.get('retried', 0) < task.get('retry'):
                    task.update({'retried': task.get('retried', 1) + 1})
                    self.to_download_q.put(task)
            return

        if response:
            task.update({'response': response})
        else:
            task.update({'response': None})
        self.downloader_parser_q.put(task)

    def run(self):
        while not self._exit:
            self.request()
コード例 #3
0
class SendEmail:
    def __init__(self):
        self.log = Log('SendEmail2Kindle')

        try:
            self.username = MAIN_CONFIG['EMAIL_USERNAME']
            self.password = MAIN_CONFIG['PASSWORD']
            self.smtp_addr = MAIN_CONFIG['SMTP_ADDR']
            self.kindle_addr = MAIN_CONFIG['KINDLE_ADDR']
        except KeyError:
            self.log.log_it("无法实例化SendEmail2Kindle,请确保config.yml配置完整", 'ERROR')
            return

        self.sender = self.username
        self.sended = []
        self.client = smtplib.SMTP()

    def connect(self) -> bool:
        try:
            self.log.log_it("正在连接邮件服务器", 'INFO')
            self.client.connect(self.smtp_addr)
            self.log.log_it("正在登录服务器", 'INFO')
            self.client.login(self.username, self.password)
            return True
        except smtplib.SMTPAuthenticationError:
            self.log.log_it("邮箱用户名或密码错误", 'WARN')
            return False
        except Exception as e:
            self.log.log_it("连接错误。错误信息:{}".format(str(e)), 'INFO')
            return False

    def disconnect(self) -> None:
        self.client.quit()

    def __enter__(self):
        if not self.connect():
            raise Exception("SendEmail2Kindle连接服务器错误")
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.disconnect()

    def send_file(self, file_path: str) -> None:
        msg = MIMEMultipart()
        msg['Subject'] = 'Web2kindle'
        msg['From'] = self.sender
        msg['To'] = self.kindle_addr

        file = MIMEApplication(open(file_path, 'rb').read())
        file.add_header('Content-Disposition',
                        'attachment',
                        filename=file_path)
        msg.attach(file)
        try:
            self.client.sendmail(self.sender, self.kindle_addr,
                                 msg.as_string())
            self.sended.append(file_path)
        except smtplib.SMTPRecipientsRefused as e:
            self.log.log_it("所有收件人都被拒绝。", 'WARN')
        except smtplib.SMTPSenderRefused as e:
            self.log.log_it("发件人地址被拒绝。", 'WARN')
        except smtplib.SMTPDataError as e:
            self.log.log_it("服务器拒绝接受邮件数据。", 'WARN')
        except smtplib.SMTPException as e:
            self.log.log_it(
                "未知错误。FILE_PATH:{},ERRINFO:{}".format(file_path, str(e)),
                'WARN')

    def send_files(self, file_paths: list) -> None:
        for file_path in file_paths:
            self.log.log_it("正在发送:{}".format(file_path), 'INFO')
            self.send_file(file_path)
            self.log.log_it("{}发送成功".format(file_path), 'INFO')
コード例 #4
0
ファイル: html2kindle.py プロジェクト: ssd4561/web2kindle
class HTML2Kindle:
    content_template = Template(
        read_file('./web2kindle/templates/kindle_content.html'))
    opf_template = Template(
        read_file('./web2kindle/templates/kindle_opf.html'))
    index_template = Template(
        read_file('./web2kindle/templates/kindle_table.html'))
    ncx_template = Template(read_file('./web2kindle/templates/kindle_ncx.ncx'))

    def __init__(self,
                 items: list,
                 path: str,
                 book_name: str,
                 kindlegen_path: str = KINDLE_GEN_PATH) -> None:
        self.kindlegen_path = kindlegen_path if kindlegen_path is not None else KINDLE_GEN_PATH
        self.items = items
        self.book_name = str(book_name)
        self.path = path
        self.to_remove = set()
        self.log = Log('HTML2Kindle')

        if not os.path.exists(path):
            os.makedirs(path)

    def __exit__(self, exc_type: None, exc_val: None, exc_tb: None) -> None:
        self.remove()

    def __enter__(self):
        return self

    def remove(self) -> None:
        for i in self.to_remove:
            try:
                os.remove(i)
            except FileNotFoundError:
                pass

    def make_metadata(self, window: int = 20) -> None:
        window = int(window)
        spilt_items = split_list(self.items, window)

        # 根据window分割电子书
        for index, items in enumerate(spilt_items):
            self.log.log_it("制作 {}_{} 的元数据".format(self.book_name, str(index)),
                            'INFO')
            opf = []
            table = []
            table_name = '{}_{}.html'.format(self.book_name, str(index))
            opf_name = '{}_{}.opf'.format(self.book_name, str(index))
            ncx_name = '{}_{}.ncx'.format(self.book_name, str(index))
            table_path = os.path.join(self.path, table_name)
            opf_path = os.path.join(self.path, opf_name)
            ncx_path = os.path.join(self.path, ncx_name)

            # 标记,以便删除
            self.to_remove.add(table_path)
            self.to_remove.add(opf_path)
            self.to_remove.add(ncx_path)

            for item in items:
                kw = {
                    'author_name': item[5],
                    'voteup_count': item[4],
                    'created_time': item[3]
                }
                # 文件名=title+author
                article_path = os.path.join(
                    self.path,
                    format_file_name(item[1], item[5]) + '.html')
                if os.path.exists(article_path):
                    # 防止文件名重复
                    article_path = article_path.replace('.html', '') + ''.join(
                        random_char(3)) + '.html'

                self.make_content(item[1], item[2], article_path, kw)
                # 标记,以便删除
                self.to_remove.add(article_path)
                opf.append({
                    'id': article_path,
                    'href': article_path,
                    'title': item[1]
                })
                table.append({'href': article_path, 'name': item[1]})

            self.make_table(table, table_path)
            self.make_opf(self.book_name + '_' + str(index), opf, table_path,
                          opf_path, ncx_path)
            self.make_ncx(self.book_name + '_' + str(index), opf, table_path,
                          ncx_path)

    def make_opf(self, title: str, navigation: list, table_path: str,
                 opf_path: str, ncx_path: str) -> None:
        rendered_content = self.opf_template.render(title=title,
                                                    navigation=navigation,
                                                    table_href=table_path,
                                                    ncx_href=ncx_path)
        with codecs.open(opf_path, 'w', 'utf_8_sig') as f:
            f.write(rendered_content)

    def make_ncx(self, title: str, navigation: list, table_path: str,
                 opf_path: str) -> None:
        rendered_content = self.ncx_template.render(title=title,
                                                    navigation=navigation,
                                                    table_href=table_path)
        with codecs.open(opf_path, 'w', 'utf_8_sig') as f:
            f.write(rendered_content)

    def make_content(self,
                     title: str,
                     content: str,
                     path: str,
                     kw: dict = None) -> None:
        rendered_content = self.content_template.render(title=title,
                                                        content=content,
                                                        kw=kw)
        with codecs.open(path, 'w', 'utf_8_sig') as f:
            f.write(rendered_content)

    def make_table(self, navigation: list, path: str) -> None:
        rendered_content = self.index_template.render(navigation=navigation)
        with codecs.open(path, 'w', 'utf_8_sig') as f:
            f.write(rendered_content)

    @staticmethod
    def _make_book(kindlegen_path: str, log_path: str, path: str) -> None:
        os.system("{} -dont_append_source {}".format(kindlegen_path, path))

    def make_book_multi(self, rootdir: str, overwrite: bool = True) -> None:
        from multiprocessing import Pool
        self.log.log_it("新建 {} 个线程制作mobi文件.正在制作中,请稍后".format(str(cpu_count())),
                        'INFO')
        pool = Pool(cpu_count())
        opf_list = self.get_opf(rootdir, overwrite)
        pool.map(
            partial(self._make_book, self.kindlegen_path,
                    os.path.join(self.path, 'kindlegen.log')), opf_list)

    def make_book(self, rootdir: str, overwrite: bool = True) -> None:
        opf_list = self.get_opf(rootdir, overwrite)
        self.log.log_it("正在制作中,请稍后", 'INFO')
        for i in opf_list:
            os.system("{} -dont_append_source {} > {}".format(
                self.kindlegen_path, os.path.join(rootdir, i),
                os.path.join(self.path, 'kindlegen.log')))

    def get_opf(self, rootdir: str, overwrite: bool) -> list:
        result = []
        mobi = []
        for i in os.listdir(rootdir):
            if not os.path.isdir(os.path.join(rootdir, i)):
                if i.lower().endswith('mobi'):
                    mobi.append(i)

        for i in os.listdir(rootdir):
            if not os.path.isdir(os.path.join(rootdir, i)):
                if i.lower().endswith('opf'):
                    if overwrite:
                        result.append(os.path.join(rootdir, i))
                    else:
                        if i.replace('opf', 'mobi') not in mobi:
                            result.append(os.path.join(rootdir, i))
        return result
コード例 #5
0
ファイル: crawler.py プロジェクト: ssd4561/web2kindle
class Crawler:
    def __init__(self,
                 to_download_q: PriorityQueue,
                 downloader_parser_q: PriorityQueue,
                 result_q: Queue,
                 parser_worker_count,
                 downloader_worker_count,
                 resulter_worker_count,
                 session=requests.session()):
        self.parser_worker_count = int(parser_worker_count)
        self.downloader_worker_count = int(downloader_worker_count)
        self.resulter_worker_count = int(resulter_worker_count)
        self.downloader_worker = []
        self.parser_worker = []
        self.resulter_worker = []
        self.log = Log("Crawler")

        self.to_download_q = to_download_q
        self.downloader_parser_q = downloader_parser_q
        self.result_q = result_q

        self.task_manager = TaskManager(self.to_download_q)
        self.session = session
        self.lock = LOCK

        self.task_manager_thread = Thread(target=self.task_manager.run)

    def start(self):
        self.task_manager_thread.start()

        for i in range(self.downloader_worker_count):
            _worker = Downloader(
                self.to_download_q,
                self.downloader_parser_q,
                self.result_q,
                "Downloader {}".format(i),
                self.session,
            )
            self.downloader_worker.append(_worker)
            self.log.log_it("启动 Downloader {}".format(i), 'INFO')
            _worker.start()

        for i in range(self.parser_worker_count):
            _worker = Parser(self.to_download_q, self.downloader_parser_q,
                             self.result_q, "Parser {}".format(i))
            self.parser_worker.append(_worker)
            self.log.log_it("启动 Parser {}".format(i), 'INFO')
            _worker.start()

        for i in range(self.resulter_worker_count):
            _worker = Resulter(self.to_download_q, self.downloader_parser_q,
                               self.result_q, "Resulter {}".format(i))
            self.resulter_worker.append(_worker)
            self.log.log_it("启动 Resulter {}".format(i), 'INFO')
            _worker.start()

        while True:
            time.sleep(1)
            if self.task_manager.is_empty():
                for worker in self.downloader_worker:
                    worker.exit()
                for worker in self.parser_worker:
                    worker.exit()

                resulter_not_alive = False
                while not resulter_not_alive:
                    resulter_not_alive = True
                    time.sleep(1)
                    for worker in self.resulter_worker:
                        resulter_not_alive &= not worker.is_alive()

                for worker in self.resulter_worker:
                    worker.exit()

                self.task_manager.exit()
                TaskManager.ALLDONE = False
                return
コード例 #6
0
ファイル: crawler.py プロジェクト: ssd4561/web2kindle
class Resulter(Thread):
    def __init__(self, to_download_q: PriorityQueue,
                 downloader_parser_q: PriorityQueue, result_q: Queue,
                 name: str):
        super().__init__(name=name)
        self.result_q = result_q
        self.downloader_parser_q = downloader_parser_q
        self.to_download_q = to_download_q

        self._exit = False
        self.log = Log(self.name)

    def exit(self):
        self._exit = True

    def result(self):
        with COND:
            COND.notify_all()

        try:
            task = self.result_q.get_nowait()
        except Empty:
            time.sleep(0.1)
            return

        try:
            self.log.log_it("正在处理{}".format(task['tid']))
            task['resulter'](task)
        except RetryDownload:
            self.log.log_it("RetryDownload Exception.Task{}".format(task),
                            'INFO')
            retry(task, self.to_download_q)
            return
        except RetryDownloadEnForceNodelay:
            self.log.log_it(
                "RetryDownloadEnForce Exception.Task{}".format(task), 'INFO')
            self.to_download_q.put(task)
            return
        except RetryDownloadNodelay:
            self.log.log_it(
                "RetryDownloadNodelay Exception.Task{}".format(task), 'INFO')
            retry_nodelay(task, self.to_download_q)
            return

        except RetryParse:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            retry(task, self.downloader_parser_q)
            return
        except RetryParseEnForceNodelay:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            self.downloader_parser_q.put(task)
            return
        except RetryParseNodelay:
            self.log.log_it("RetryParseNodelay Exception.Task{}".format(task),
                            'INFO')
            retry_nodelay(task, self.downloader_parser_q)
            return

        except RetryResult:
            self.log.log_it("RetryResult Exception.Task{}".format(task),
                            'INFO')
            retry(task, self.result_q)
            return
        except RetryResultEnForceNodelay:
            self.log.log_it("RetryResultEnForce Exception.Task{}".format(task),
                            'INFO')
            self.result_q.put(task)
            return
        except RetryResultNodelay:
            self.log.log_it("RetryResultNodelay Exception.Task{}".format(task),
                            'INFO')
            retry_nodelay(task, self.result_q)
            return

        except Exception as e:
            traceback.print_exc()
            self.log.log_it(
                "Resulter函数错误。错误信息:{}。Task:{}".format(str(e), task), 'WARN')
            retry(task, self.result_q)
            return

    def run(self):
        while (not TaskManager.ALLDONE) or (not self.result_q.empty()):
            self.result()
コード例 #7
0
ファイル: crawler.py プロジェクト: ssd4561/web2kindle
class Parser(Thread):
    def __init__(self, to_download_q: PriorityQueue,
                 downloader_parser_q: PriorityQueue, result_q: Queue,
                 name: str):
        super().__init__(name=name)
        self.downloader_parser_q = downloader_parser_q
        self.to_download_q = to_download_q
        self.result_q = result_q

        self._exit = False
        self.log = Log(self.name)

    def exit(self):
        self._exit = True

    def parser(self):

        with COND:
            COND.notify_all()
        try:
            task = self.downloader_parser_q.get_nowait()
        except Empty:
            time.sleep(0.1)
            with COND:
                COND.notify_all()
            return

        try:
            task_with_parsed_data, tasks = task['parser'](task)
            if tasks:
                if not isinstance(tasks, list):
                    tasks = [tasks]
                self.log.log_it("获取新任务{}个。".format(len(tasks)), 'INFO')
                for each_task in tasks:
                    TaskManager.register(each_task['tid'])
                    self.to_download_q.put(each_task)
        except RetryDownload:
            self.log.log_it("RetryDownload Exception.Task{}".format(task),
                            'INFO')
            retry(task, self.to_download_q)
            return
        except RetryDownloadEnForce:
            self.log.log_it(
                "RetryDownloadEnForce Exception.Task{}".format(task), 'INFO')
            self.to_download_q.put(task)
            return
        except RetryParse:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            retry(task, self.downloader_parser_q)
            return
        except RetryParseEnForce:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            self.downloader_parser_q.put(task)
            return
        except Exception as e:
            self.log.log_it("解析错误。错误信息:{}。Task:{}".format(str(e), task),
                            'WARN')
            traceback.print_exc()
            return
        TaskManager.unregister(task['tid'])
        return task_with_parsed_data

    def run(self):
        while not self._exit:
            task_with_parsed_data = self.parser()
            if task_with_parsed_data:
                self.result_q.put(task_with_parsed_data)
コード例 #8
0
ファイル: crawler.py プロジェクト: zhangshaoze/web2kindle
class Crawler:
    def __init__(self,
                 to_download_q,
                 downloader_parser_q,
                 result_q,
                 parser_worker_count=CRAWLER_CONFIG.get('PARSER_WORKER', 1),
                 downloader_worker_count=CRAWLER_CONFIG.get('DOWNLOADER_WORKER', 1),
                 resulter_worker_count=CRAWLER_CONFIG.get('RESULTER_WORKER', 1),
                 session=requests.session()):
        self.parser_worker_count = parser_worker_count
        self.downloader_worker_count = downloader_worker_count
        self.resulter_worker_count = resulter_worker_count
        self.downloader_worker = []
        self.parser_worker = []
        self.resulter_worker = []
        self.log = Log("Crawler")

        self.to_download_q = to_download_q
        self.downloader_parser_q = downloader_parser_q
        self.result_q = result_q

        self.session = session
        self.lock = Lock()
        self.task_manager = TaskManager(self.lock)

    def start(self):
        for i in range(self.downloader_worker_count):
            _worker = Downloader(self.to_download_q, self.downloader_parser_q, self.result_q, "Downloader {}".format(i),
                                 self.lock, self.session, )
            self.downloader_worker.append(_worker)
            self.log.log_it("启动 Downloader {}".format(i), 'INFO')
            _worker.start()

        for i in range(self.parser_worker_count):
            _worker = Parser(self.to_download_q, self.downloader_parser_q, self.result_q, "Parser {}".format(i),
                             self.lock)
            self.parser_worker.append(_worker)
            self.log.log_it("启动 Parser {}".format(i), 'INFO')
            _worker.start()

        for i in range(self.resulter_worker_count):
            _worker = Resulter(self.to_download_q, self.downloader_parser_q, self.result_q, "Resulter {}".format(i),
                               self.lock)
            self.resulter_worker.append(_worker)
            self.log.log_it("启动 Resulter {}".format(i), 'INFO')
            _worker.start()

        while True:
            time.sleep(1)
            if self.task_manager.is_empty():
                for worker in self.downloader_worker:
                    worker.exit()
                for worker in self.parser_worker:
                    worker.exit()

                resulter_not_alive = False
                while not resulter_not_alive:
                    resulter_not_alive = True
                    time.sleep(1)
                    for worker in self.resulter_worker:
                        resulter_not_alive &= not worker.is_alive()
                return
コード例 #9
0
ファイル: crawler.py プロジェクト: zhangshaoze/web2kindle
class Resulter(Thread):
    def __init__(
            self,
            to_download_q: PriorityQueue,
            downloader_parser_q: PriorityQueue,
            result_q: Queue,
            name: str,
            lock):
        super().__init__(name=name)
        self.result_q = result_q
        self.downloader_parser_q = downloader_parser_q
        self.to_download_q = to_download_q

        self._exit = False
        self.log = Log(self.name)
        self.lock = lock
        self.task_manager = TaskManager(self.lock)

    def exit(self):
        self._exit = True

    def result(self):
        with COND:
            COND.notify_all()

        try:
            task = self.result_q.get_nowait()
        except Empty:
            time.sleep(1)
            return

        try:
            task['resulter'](task)
        except RetryDownload:
            self.log.log_it("RetryDownload Exception.Task{}".format(task), 'INFO')
            if task.get('retry', None):
                if task.get('retried', 0) < task.get('retry'):
                    task.update({'retried': task.get('retried', 1) + 1})
                    self.to_download_q.put(task)
            return
        except RetryDownloadEnForce:
            self.log.log_it("RetryDownloadEnForce Exception.Task{}".format(task), 'INFO')
            self.to_download_q.put(task)
            return
        except RetryParse:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            if task.get('retry', None):
                if task.get('retried', 0) < task.get('retry'):
                    task.update({'retried': task.get('retried', 1) + 1})
                    self.downloader_parser_q.put(task)
            return
        except RetryParseEnForce:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            self.downloader_parser_q.put(task)
        except RetryResult:
            self.log.log_it("RetryResult Exception.Task{}".format(task), 'INFO')
            if task.get('retry', None):
                if task.get('retried', 0) < task.get('retry'):
                    task.update({'retried': task.get('retried', 1) + 1})
                    self.result_q.put(task)
            return
        except RetryResultEnForce:
            self.log.log_it("RetryResultEnForce Exception.Task{}".format(task), 'INFO')
            self.result_q.put(task)
            return

        except Exception as e:
            # FIXME FileNotFoundError
            # traceback.print_exc(file=open(os.path.join(config.get('LOG_PATH'), 'parser_traceback'), 'a'))
            traceback.print_exc()
            self.log.log_it("Resulter函数错误。错误信息:{}。Task:{}".format(str(e), task), 'WARN')

    def run(self):
        while not (TaskManager.ALLDONE and self.result_q.empty()):
            self.result()
コード例 #10
0
ファイル: crawler.py プロジェクト: zhangshaoze/web2kindle
class Parser(Thread):
    def __init__(
            self,
            to_download_q: PriorityQueue,
            downloader_parser_q: PriorityQueue,
            result_q: Queue,
            name: str,
            lock):
        super().__init__(name=name)
        self.downloader_parser_q = downloader_parser_q
        self.to_download_q = to_download_q
        self.result_q = result_q

        self._exit = False
        self.log = Log(self.name)
        self.lock = lock
        self.task_manager = TaskManager(self.lock)

    def exit(self):
        self._exit = True

    def parser(self):

        with COND:
            COND.notify_all()
        task = self.downloader_parser_q.get()

        try:
            task_with_parsed_data, tasks = task['parser'](task)
            if tasks and isinstance(tasks, list):
                self.log.log_it("获取新任务{}个。".format(len(tasks)), 'INFO')
                for new_task in tasks:
                    self.task_manager.register(new_task['tid'])
                    self.to_download_q.put(new_task)
            elif tasks:
                self.log.log_it("获取新任务1个。", 'INFO')
                self.task_manager.register(tasks['tid'])
                self.to_download_q.put(tasks)
        except RetryDownload:
            self.log.log_it("RetryDownload Exception.Task{}".format(task), 'INFO')
            if task.get('retry', None):
                if task.get('retried', 0) < task.get('retry'):
                    task.update({'retried': task.get('retried', 1) + 1})
                    self.to_download_q.put(task)
            return
        except RetryDownloadEnForce:
            self.log.log_it("RetryDownloadEnForce Exception.Task{}".format(task), 'INFO')
            self.to_download_q.put(task)
            return
        except RetryParse:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            if task.get('retry', None):
                if task.get('retried', 0) < task.get('retry'):
                    task.update({'retried': task.get('retried', 1) + 1})
                    self.downloader_parser_q.put(task)
            return
        except RetryParseEnForce:
            self.log.log_it("RetryParse Exception.Task{}".format(task), 'INFO')
            self.downloader_parser_q.put(task)
            return
        except Exception as e:
            # FIXME FileNotFoundError
            # traceback.print_exc(file=open(os.path.join(config.get('LOG_PATH'), 'parser_traceback'), 'a'))
            traceback.print_exc()
            self.log.log_it("解析错误。错误信息:{}。Task:{}".format(str(e), task), 'WARN')
            return
        finally:
            self.task_manager.unregister(task['tid'])
        return task_with_parsed_data

    def run(self):
        while not self._exit:
            task_with_parsed_data = self.parser()
            if task_with_parsed_data:
                self.result_q.put(task_with_parsed_data)