def create_single_html_book(self, book_package): title = book_package.get_title() if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.reset_path() Path.chdir(Path.result_path) Path.rmdir(u'./' + title) Path.mkdir(u'./' + title) Path.chdir(u'./' + title) page = [] for book in book_package.book_list: page += book.page_list content = u' \r\n '.join([Match.html_body(x.content) for x in page]).replace(u'../images/', u'./images/') with open(TemplateConfig.content_base_uri) as html: content = html.read().format(title=title, body=content).replace(u'../style/', u'./') with open(title + u'.html', 'w') as html: html.write(content) Path.copy(Path.html_pool_path + u'/../{}/OEBPS/images'.format(title), u'./images') Path.copy(Path.www_css + u'/customer.css', u'./customer.css') Path.copy(Path.www_css + u'/markdown.css', u'./markdown.css') Path.copy(Path.www_css + u'/normalize.css', u'./normalize.css') Path.reset_path() return
def create_book(self, command, counter): Path.reset_path() Debug.logger.info(u"开始制作第 {} 本电子书".format(counter)) Debug.logger.info(u"对记录 {} 进行分析".format(command)) task_list = CommandParser.get_task_list(command) # 分析命令 if len(task_list) == 0: return for task in task_list: if Config.debug_for_create_book: pass else: Worker.distribute(task) Debug.logger.info(u"网页信息抓取完毕") task_result_list = [] for task in task_list: task_result = TaskResult(task) task_result.extract_data() task_result_list.append(task_result) Debug.logger.info(u"数据库信息获取完毕") # 下载图片 for task_result in task_result_list: task_result.download_img() Debug.logger.info(u"所有任务图片获取完毕") # 按体积自动分卷 # 渲染html && 压缩为电子书 book = Book(task_result_list) book_list = book.auto_split(Config.max_book_size_mb * 1024) for chapter in book_list: chapter.create_book() return
def create_book(self, book_package): book_package.image_container.set_save_path(Path.image_pool_path) book_package.image_container.start_download() title = book_package.get_title() if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.chdir(Path.base_path + u'/知乎电子书临时资源库/') epub = Epub(title) html_tmp_path = Path.html_pool_path + u'/' image_tmp_path = Path.image_pool_path + u'/' epub.set_creator(u'ZhihuHelp1.7.0') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') for book in book_package.book_list: page = book.page_list[0] with open(html_tmp_path + page.filename, u'w') as html: html.write(page.content) epub.create_chapter(html_tmp_path + page.filename, page.title) for page in book.page_list[1:]: with open(html_tmp_path + page.filename, u'w') as html: html.write(page.content) epub.add_html(html_tmp_path + page.filename, page.title) epub.finish_chapter() for image in book_package.image_list: epub.add_image(image_tmp_path + image['filename']) epub.create() Path.reset_path() return
def create_book(self): # 确定文件信息 title = Match.fix_filename(self.book_title) if self.is_split: title = self.book_title + u'_卷{}'.format(self.chapter_no) # 先切换到电子书临时资源目录下 Path.chdir(Path.book_pool_path) epub = Epub(title) for task_result in self.task_result_list: chapter_src = '' # info_page if task_result.task.task_type == Type.question: chapter_src = self.generate_question_info_page(task_result.info_page) elif task_result.task.task_type == Type.answer: chapter_src = self.generate_question_info_page(task_result.info_page) elif task_result.task.task_type == Type.collection: chapter_src = self.generate_collection_info_page(task_result.info_page) elif task_result.task.task_type == Type.topic: chapter_src = self.generate_topic_info_page(task_result.info_page) elif task_result.task.task_type == Type.author: chapter_src = self.generate_author_info_page(task_result.info_page) elif task_result.task.task_type == Type.column: chapter_src = self.generate_column_info_page(task_result.info_page) elif task_result.task.task_type == Type.article: chapter_src = self.generate_article_info_page(task_result.info_page) epub.create_chapter(chapter_src, task_result.get_title()) for question in task_result.question_list: # 添加图片文件 for filename in question.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) question_src = self.generate_question_page(question) epub.add_html(question_src, question.question_info.title) for column in task_result.column_list: # 添加图片文件 for filename in column.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) for article in column.article_list: article_src = self.generate_article_page(article) epub.add_html(article_src, article.title) epub.finish_chapter() epub.set_creator(u'ZhihuHelp1.8.0') epub.set_language(u'zh-cn') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') epub.create() Path.reset_path() return
def create_book(command, counter): Path.reset_path() Debug.logger.info(u"开始制作第 {} 本电子书".format(counter)) Debug.logger.info(u"对记录 {} 进行分析".format(command)) task_package = ReadListParser.get_task(command) # 分析命令 if not task_package.is_work_list_empty(): worker_factory(task_package.work_list) # 执行抓取程序 Debug.logger.info(u"网页信息抓取完毕") if not task_package.is_book_list_empty(): Debug.logger.info(u"开始从数据库中生成电子书") book = Book(task_package.book_list) book.create() return
def create_book(command, counter): Path.reset_path() Debug.logger.info(u"开始制作第 {} 本电子书".format(counter)) Debug.logger.info(u"对记录 {} 进行分析".format(command)) task_package = ReadListParser.get_task(command) # 分析命令 if not task_package.is_work_list_empty(): worker_factory(task_package.work_list) # 执行抓取程序 Debug.logger.info(u"网页信息抓取完毕") if not task_package.is_book_list_empty(): Debug.logger.info(u"开始从数据库中生成电子书") book = Book(task_package.book_list) book.create() return
def create_book(self, command, counter): Path.reset_path() Debug.logger.info(u"开始制作第 {} 本电子书".format(counter)) Debug.logger.info(u"对记录 {} 进行分析".format(command)) task_list = CommandParser.get_task_list(command) # 分析命令 if len(task_list) == 0: return for task in task_list: if Config.debug_for_create_book: pass else: Worker.distribute(task) Debug.logger.info(u"网页信息抓取完毕") task_result_list = [] toTo_list = [ Type.wechat, Type.huxiu, Type.huawei, Type.xueqiu, Type.sina, Type.zhengshitang, Type.jinwankansa, Type.wuxia, Type.doc360, Type.todo, Type.todo1, Type.todo2, Type.fiel, Type.taoguba_article ] for task in task_list: if task.get_task_type() in toTo_list: task = ColumnTask(task.account_id) task_result = TaskResult(task) task_result.extract_data() task_result_list.append(task_result) Debug.logger.info(u"数据库信息获取完毕") # 下载图片 for task_result in task_result_list: task_result.download_img() # print '所有任务图片获取完毕' Debug.logger.info(u"所有任务图片获取完毕") # 按体积自动分卷 # 渲染html && 压缩为电子书 book = Book(task_result_list) book_list = book.auto_split(Config.max_book_size_mb * 1024) for chapter in book_list: chapter.create_book() return
def create_book(self, book_package): book_package.image_container.set_save_path(Path.image_pool_path) book_package.image_container.start_download() title = book_package.get_title() Debug.logger.debug(u"title of the e-book:" + str(title)) if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.chdir(Path.in_base_path + u'/e-books_tmp_source') epub = Epub(title) html_tmp_path = Path.html_pool_path + u'/' image_tmp_path = Path.image_pool_path + u'/' epub.set_creator(u'EEBookV0-1') epub.set_language(u'zh') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.in_base_path + u'/www/css/markdown.css') epub.add_css(Path.in_base_path + u'/www/css/customer.css') epub.add_css(Path.in_base_path + u'/www/css/normalize.css') epub.add_css(Path.in_base_path + u'/www/css/bootstrap.css') # epub.add_css(Path.in_base_path + u'/www/css/article.css') # TODO: 来自新浪,需要精简 for book in book_package.book_list: page = book.page_list[0] with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) if '_' in page.title: page.title = ''.join(page.title.split('_')[1:]) # 删除章节前缀 epub.create_chapter(html_tmp_path + page.filename, page.title) for page in book.page_list[1:]: with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) epub.add_html(html_tmp_path + page.filename, page.title) epub.finish_chapter() for image in book_package.image_list: epub.add_image(image_tmp_path + image['filename']) epub.create() Path.reset_path() return
def create_single_html_book(self): title = '_'.join([book.epub.title for book in self.book_list]) title = title.strip()[:128] # 避开window文件名长度限制 title = ExtraTools.fix_filename(title) # 移除特殊字符 Path.reset_path() Path.chdir(Path.result_path) Path.rmdir(u'./' + title) Path.mkdir(u'./' + title) Path.chdir(u'./' + title) page = [] for book in self.book_list: page += book.page_list content = ' \r\n<hr /> \r\n '.join([Match.html_body(x.content) for x in page]).replace('../images/', './images/') with open(Path.base_path + '/src/template/content/single_html.html') as html: template = html.read().format(title=title, content=content) with open(title + u'.html', 'w') as html: html.write(template) shutil.copytree(Path.html_pool_path + u'/../{}/OEBPS/images'.format(title), './images') shutil.copy(Path.www_css + '/front.css' , './front.css') shutil.copy(Path.www_css + '/markdown.css' , './markdown.css') Path.reset_path() return
def create_book(command, counter): Path.reset_path() Debug.logger.info(u"Ready to make No.{} e-book".format(counter)) Debug.logger.info(u"Analysis {} ".format(command)) task_package = UrlParser.get_task(command) # 分析命令 Debug.logger.debug(u"#Debug:#task_package是:" + str(task_package)) if not task_package.is_work_list_empty(): worker_factory(task_package.work_list) # 执行抓取程序 Debug.logger.info(u"Complete fetching from web") file_name_set = None if not task_package.is_book_list_empty(): Debug.logger.info(u"Start generating e-book from the database") book = Book(task_package.book_list) file_name_set = book.create() if file_name_set is not None: file_name_set2list = list(file_name_set) file_name = '-'.join(file_name_set2list[0:3]) return file_name return u"Oops! no epub file produced"
def create_book(self, book_package): book_package.image_container.set_save_path(Path.image_pool_path) book_package.image_container.start_download() title = book_package.get_title() Debug.logger.debug(u"电子书的名称是???" + str(title)) if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.chdir(Path.base_path + u'/电子书临时资源库') epub = Epub(title) html_tmp_path = Path.html_pool_path + u'/' image_tmp_path = Path.image_pool_path + u'/' epub.set_creator(u'SinaBlogV01') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') # epub.add_css(Path.base_path + u'/www/css/article.css') # TODO: 来自新浪,需要精简 for book in book_package.book_list: page = book.page_list[0] with open(html_tmp_path + page.filename, u'w') as html: html.write(page.content) if '_' in page.title: page.title = ''.join(page.title.split('_')[1:]) # 删除章节前缀 epub.create_chapter(html_tmp_path + page.filename, page.title) for page in book.page_list[1:]: with open(html_tmp_path + page.filename, u'w') as html: html.write(page.content) epub.add_html(html_tmp_path + page.filename, page.title) epub.finish_chapter() for image in book_package.image_list: epub.add_image(image_tmp_path + image['filename']) epub.create() Path.reset_path() return
def create(self): self.image_container.set_save_path(Path.image_pool_path) self.image_container.start_download() title = '_'.join([book.epub.title for book in self.book_list]) title = title.strip()[:128] # 避开window文件名长度限制 title = ExtraTools.fix_filename(title) # 移除特殊字符 if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧。。。 return Path.chdir(Path.base_path + u'/知乎电子书临时资源库/') epub = Book(title, 27149527) html_tmp_path = Path.html_pool_path + '/' image_tmp_path = Path.image_pool_path + '/' for book in self.book_list: page = book.page_list[0] with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) #epub.createChapter(html_tmp_path + page.filename, ExtraTools.get_time(), page.title) epub.addInfoPage(html_tmp_path + page.filename, page.title) for page in book.page_list[1:]: with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) epub.addHtml(html_tmp_path + page.filename, page.title) for image in self.book.image_list: epub.addImg(image_tmp_path + image['filename']) epub.addLanguage('zh-cn') epub.addCreator('ZhihuHelp1.7.0') epub.addDesc(u'该电子书由知乎助手生成,知乎助手是姚泽源为知友制作的仅供个人使用的简易电子书制作工具,源代码遵循WTFPL,希望大家能认真领会该协议的真谛,为飞面事业做出自己的贡献 XD') epub.addRight('CC') epub.addPublisher('ZhihuHelp') epub.addCss(Path.base_path + u'/www/css/markdown.css') epub.addCss(Path.base_path + u'/www/css/front.css') epub.buildingEpub() Path.reset_path() return
def create_book(self): # 确定文件信息 title = Match.fix_filename(self.book_title) if self.is_split: title = self.book_title + u'_卷{}'.format(self.chapter_no) # 先切换到电子书临时资源目录下 Path.chdir(Path.book_pool_path) epub = Epub(title) for task_result in self.task_result_list: chapter_src = '' # info_page if task_result.task.task_type == Type.question: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.answer: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.collection: chapter_src = self.generate_collection_info_page( task_result.info_page) elif task_result.task.task_type == Type.topic: chapter_src = self.generate_topic_info_page( task_result.info_page) elif task_result.task.task_type == Type.author: chapter_src = self.generate_author_info_page( task_result.info_page) elif task_result.task.task_type == Type.column: task_result.info_page.article_count = ( task_result.column_list[0].article_list).__len__() chapter_src = self.generate_column_info_page( task_result.info_page) elif task_result.task.task_type == Type.article: chapter_src = self.generate_article_info_page( task_result.info_page) epub.create_chapter(chapter_src, task_result.get_title()) for question in task_result.question_list: # 添加图片文件 for filename in question.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) question_src = self.generate_question_page(question) epub.add_html(question_src, question.question_info.title) for column in task_result.column_list: # 添加图片文件 for filename in column.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) for article in column.article_list: article_src = self.generate_article_page(article) epub.add_html(article_src, article.title) epub.finish_chapter() href = self.task_result_list[0].info_page.image_url if len(href) > 0: print href if href: content = Http.get_content( url=href, timeout=Config.timeout_download_picture) if not content: Debug.logger.debug(u'图片『{}』下载失败'.format(href)) content = '' else: Debug.print_in_single_line(u'图片{}下载完成'.format(href)) else: # 当下载地址为空的时候,就没必要再去下载了 content = '' if content.__len__() > 10: filename = Path.image_pool_path + '/' + 'cover.jpg' with open(filename, 'wb') as image: image.write(content) epub.add_cover_image(filename) else: epub.add_cover_image('/Users/ex-liyan010/Desktop/cover.png') # epub.add_cover_image('/Users/ex-liyan010/Desktop/cover.png') epub.set_creator(u'macbookpro2100') epub.set_language(u'zh-cn') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') epub.create() Path.reset_path() return
def create_book(self): # 确定文件信息 title = Match.fix_filename(self.book_title) if self.is_split: title = self.book_title + u'_卷{}'.format(self.chapter_no) # 先切换到电子书临时资源目录下 Path.chdir(Path.book_pool_path) epub = Epub(title) for task_result in self.task_result_list: chapter_src = '' # info_page if task_result.task.task_type == Type.question: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.answer: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.collection: chapter_src = self.generate_collection_info_page( task_result.info_page) elif task_result.task.task_type == Type.topic: chapter_src = self.generate_topic_info_page( task_result.info_page) elif task_result.task.task_type == Type.author: chapter_src = self.generate_author_info_page( task_result.info_page) elif task_result.task.task_type == Type.column: chapter_src = self.generate_column_info_page( task_result.info_page) elif task_result.task.task_type == Type.article: chapter_src = self.generate_article_info_page( task_result.info_page) epub.create_chapter(chapter_src, task_result.get_title()) for question in task_result.question_list: # 添加图片文件 for filename in question.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) question_src = self.generate_question_page(question) epub.add_html(question_src, question.question_info.title) for column in task_result.column_list: # 添加图片文件 for filename in column.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) for article in column.article_list: article_src = self.generate_article_page(article) epub.add_html(article_src, article.title) epub.finish_chapter() epub.set_creator(u'ZhihuHelp1.8.0') epub.set_language(u'zh-cn') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') epub.create() Path.reset_path() return