def set_info(self, info): self.info.update(info) if self.kind == Type.question: self.epub.title = u'知乎问题集锦({})'.format(info['title']) self.epub.id = info['id'] elif self.kind == Type.answer: self.epub.title = u'知乎回答集锦({})'.format(info['title']) self.epub.id = info['id'] elif self.kind == Type.article: self.epub.title = u'知乎专栏文章集锦({})'.format(info['title']) self.epub.id = info['id'] if self.kind == Type.topic: self.epub.title = u'话题_{}({})'.format(info['title'], info['topic_id']) self.epub.id = info['topic_id'] if self.kind == Type.collection: self.epub.title = u'收藏夹_{}({})'.format(info['title'], info['collection_id']) self.epub.id = info['collection_id'] if self.kind == Type.author: self.epub.title = u'作者_{}({})'.format(info['name'], info['author_id']) self.epub.id = info['author_id'] if self.kind == Type.column: self.epub.title = u'专栏_{}({})'.format(info['name'], info['column_id']) self.epub.id = info['column_id'] self.epub.title = Match.fix_filename(self.epub.title) return
def create_book(self): # 确定文件信息 title = Match.fix_filename(self.book_title) if self.is_split: title = self.book_title + u'_卷{}'.format(self.chapter_no) # 先切换到电子书临时资源目录下 Path.chdir(Path.book_pool_path) epub = Epub(title) for task_result in self.task_result_list: chapter_src = '' # info_page if task_result.task.task_type == Type.question: chapter_src = self.generate_question_info_page(task_result.info_page) elif task_result.task.task_type == Type.answer: chapter_src = self.generate_question_info_page(task_result.info_page) elif task_result.task.task_type == Type.collection: chapter_src = self.generate_collection_info_page(task_result.info_page) elif task_result.task.task_type == Type.topic: chapter_src = self.generate_topic_info_page(task_result.info_page) elif task_result.task.task_type == Type.author: chapter_src = self.generate_author_info_page(task_result.info_page) elif task_result.task.task_type == Type.column: chapter_src = self.generate_column_info_page(task_result.info_page) elif task_result.task.task_type == Type.article: chapter_src = self.generate_article_info_page(task_result.info_page) epub.create_chapter(chapter_src, task_result.get_title()) for question in task_result.question_list: # 添加图片文件 for filename in question.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) question_src = self.generate_question_page(question) epub.add_html(question_src, question.question_info.title) for column in task_result.column_list: # 添加图片文件 for filename in column.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) for article in column.article_list: article_src = self.generate_article_page(article) epub.add_html(article_src, article.title) epub.finish_chapter() epub.set_creator(u'ZhihuHelp1.8.0') epub.set_language(u'zh-cn') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') epub.create() Path.reset_path() return
def parse_article_title(self): u""" 获得博文的标题 :return: """ article_title = '' article_id = '' list_pcyc_l_ = self.dom.find_all('div', class_="p_title") for tgo_tgo_ in list_pcyc_l_: for link in tgo_tgo_.findAll('a'): article_id = str(link.get('href')).split('/')[1] article_title = Match.fix_filename(tgo_tgo_.text) # article_title + return article_title + str(self.getArticleIndex()).strip()
def parse_article_title(self): u""" 获得博文的标题 :return: """ article_title = '' article_id = '' list_pcyc_l_ = self.dom.find_all('div', class_="p_title") for tgo_tgo_ in list_pcyc_l_: for link in tgo_tgo_.findAll('a'): article_id = str(link.get('href')).split('/')[1] article_title = Match.fix_filename(tgo_tgo_.text) # article_title + if self.parse_author_id() != self.account_id: article_title = u"跟帖:{}".format(article_title.strip()) return article_title.strip()
def get_title(self): title = '_'.join([book.epub.title for book in self.book_list]) title = Match.fix_filename(title) # 移除特殊字符 return title
def create_book(self): # 确定文件信息 title = Match.fix_filename(self.book_title) if self.is_split: title = self.book_title + u'_卷{}'.format(self.chapter_no) # 先切换到电子书临时资源目录下 Path.chdir(Path.book_pool_path) epub = Epub(title) for task_result in self.task_result_list: chapter_src = '' # info_page if task_result.task.task_type == Type.question: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.answer: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.collection: chapter_src = self.generate_collection_info_page( task_result.info_page) elif task_result.task.task_type == Type.topic: chapter_src = self.generate_topic_info_page( task_result.info_page) elif task_result.task.task_type == Type.author: chapter_src = self.generate_author_info_page( task_result.info_page) elif task_result.task.task_type == Type.column: task_result.info_page.article_count = ( task_result.column_list[0].article_list).__len__() chapter_src = self.generate_column_info_page( task_result.info_page) elif task_result.task.task_type == Type.article: chapter_src = self.generate_article_info_page( task_result.info_page) epub.create_chapter(chapter_src, task_result.get_title()) for question in task_result.question_list: # 添加图片文件 for filename in question.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) question_src = self.generate_question_page(question) epub.add_html(question_src, question.question_info.title) for column in task_result.column_list: # 添加图片文件 for filename in column.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) for article in column.article_list: article_src = self.generate_article_page(article) epub.add_html(article_src, article.title) epub.finish_chapter() href = self.task_result_list[0].info_page.image_url if len(href) > 0: print href if href: content = Http.get_content( url=href, timeout=Config.timeout_download_picture) if not content: Debug.logger.debug(u'图片『{}』下载失败'.format(href)) content = '' else: Debug.print_in_single_line(u'图片{}下载完成'.format(href)) else: # 当下载地址为空的时候,就没必要再去下载了 content = '' if content.__len__() > 10: filename = Path.image_pool_path + '/' + 'cover.jpg' with open(filename, 'wb') as image: image.write(content) epub.add_cover_image(filename) else: epub.add_cover_image('/Users/ex-liyan010/Desktop/cover.png') # epub.add_cover_image('/Users/ex-liyan010/Desktop/cover.png') epub.set_creator(u'macbookpro2100') epub.set_language(u'zh-cn') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') epub.create() Path.reset_path() return
def create_book(self): # 确定文件信息 title = Match.fix_filename(self.book_title) if self.is_split: title = self.book_title + u'_卷{}'.format(self.chapter_no) # 先切换到电子书临时资源目录下 Path.chdir(Path.book_pool_path) epub = Epub(title) for task_result in self.task_result_list: chapter_src = '' # info_page if task_result.task.task_type == Type.question: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.answer: chapter_src = self.generate_question_info_page( task_result.info_page) elif task_result.task.task_type == Type.collection: chapter_src = self.generate_collection_info_page( task_result.info_page) elif task_result.task.task_type == Type.topic: chapter_src = self.generate_topic_info_page( task_result.info_page) elif task_result.task.task_type == Type.author: chapter_src = self.generate_author_info_page( task_result.info_page) elif task_result.task.task_type == Type.column: chapter_src = self.generate_column_info_page( task_result.info_page) elif task_result.task.task_type == Type.article: chapter_src = self.generate_article_info_page( task_result.info_page) epub.create_chapter(chapter_src, task_result.get_title()) for question in task_result.question_list: # 添加图片文件 for filename in question.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) question_src = self.generate_question_page(question) epub.add_html(question_src, question.question_info.title) for column in task_result.column_list: # 添加图片文件 for filename in column.img_filename_list: epub.add_image(Path.image_pool_path + '/' + filename) for article in column.article_list: article_src = self.generate_article_page(article) epub.add_html(article_src, article.title) epub.finish_chapter() epub.set_creator(u'ZhihuHelp1.8.0') epub.set_language(u'zh-cn') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.base_path + u'/www/css/markdown.css') epub.add_css(Path.base_path + u'/www/css/customer.css') epub.add_css(Path.base_path + u'/www/css/normalize.css') epub.add_css(Path.base_path + u'/www/css/bootstrap.css') epub.create() Path.reset_path() return