Ejemplo n.º 1
0
    def fix_mode(self):
        """
        修复模式,检查temp文件夹下内容 与 小说目录页 下载未完成章节并合成小说
        :return:
        """
        start_time = time.time()
        self.logger.info("------------------Fix Mode------------------")
        # 获取所有详细内容链接
        detail_urls = self._parse_catalog()
        redownload_urls = CommonTool.get_not_downloaded_chapters(detail_urls)
        self.logger.debug("redownload: " + str(redownload_urls))
        self.logger.info("Get novel chapters: " + str(len(redownload_urls)))
        self.all_chapter_num = len(redownload_urls)
        # 使用threadpool 控制多线程数量
        requests = threadpool.makeRequests(self._get_detail, redownload_urls)
        [self.pool.putRequest(req) for req in requests]
        # 等待所有章节抓取完成
        self.pool.wait()

        self.logger.info("Checking download completeness...")
        if CommonTool.check_completion(detail_urls):
            # 合并全文
            self.logger.info("All chapters are downloaded successfully. Start merging ...")
            CommonTool.merge_all_chapters(self.output_name)
            self.logger.info("Merged. Enjoy reading!")
        else:
            self.logger.error("Some chapters download failed.")
            self.logger.error("Try: python novel_download.py -url URL -t THREAD_LIMIT --fix=true")
        self.logger.info("Total cost %.2fs" % (time.time() - start_time))
Ejemplo n.º 2
0
    def start(self):
        """
        解析目录页 尝试爬取所有章节 暂存至temp文件夹
        校验下载完整后,合成小说文件
        若下载不完整,则退出。 使用 --fix=true 参数进入修复模式
        :return:
        """
        start_time = time.time()
        # 先清除临时文件
        CommonTool.clean_temp()
        # 获取所有详细内容链接
        detail_urls = self._parse_catalog()
        self.logger.info("Get novel chapters: " + str(len(detail_urls)))
        self.all_chapter_num = len(detail_urls)
        # 使用threadpool 控制多线程数量
        requests = threadpool.makeRequests(self._get_detail, detail_urls)
        [self.pool.putRequest(req) for req in requests]
        # 等待所有章节抓取完成
        self.pool.wait()

        retry_max = 3
        retry_cnt = 0
        # 进行3次重试, 若无法下载完整,使用 --fix 模式
        while (self.progress_cnt < self.all_chapter_num) and (retry_cnt < retry_max):
            retry_cnt += 1
            self.logger.info("Retry failed set. Len: " + str(len(self.failed_set)))
            retry, self.failed_set = self.failed_set, set()
            requests = threadpool.makeRequests(self._get_detail, retry)
            [self.pool.putRequest(req) for req in requests]
            # 等待所有章节抓取完成
            self.pool.wait()

        self.logger.info("Checking download completeness...")
        if CommonTool.check_completion(detail_urls):
            # 合并全文
            self.logger.info("All chapters are downloaded successfully. Start merging ...")
            CommonTool.merge_all_chapters(self.output_name)
            self.logger.info("Merged. Enjoy reading!")
        else:
            self.logger.error("Some chapters download failed.")
            self.logger.error("Try python novel_download.py -url URL --fix")
        self.logger.info("Total cost %.2fs" % (time.time() - start_time))