Exemple #1
0
    def set_info(self, info):
        self.info.update(info)
        if self.kind == Type.question:
            self.epub.title = u'知乎问题集锦({})'.format(info['title'])
            self.epub.id = info['id']
        elif self.kind == Type.answer:
            self.epub.title = u'知乎回答集锦({})'.format(info['title'])
            self.epub.id = info['id']
        elif self.kind == Type.article:
            self.epub.title = u'知乎专栏文章集锦({})'.format(info['title'])
            self.epub.id = info['id']

        if self.kind == Type.topic:
            self.epub.title = u'话题_{}({})'.format(info['title'], info['topic_id'])
            self.epub.id = info['topic_id']
        if self.kind == Type.collection:
            self.epub.title = u'收藏夹_{}({})'.format(info['title'], info['collection_id'])
            self.epub.id = info['collection_id']
        if self.kind == Type.author:
            self.epub.title = u'作者_{}({})'.format(info['name'], info['author_id'])
            self.epub.id = info['author_id']
        if self.kind == Type.column:
            self.epub.title = u'专栏_{}({})'.format(info['name'], info['column_id'])
            self.epub.id = info['column_id']
        self.epub.title = Match.fix_filename(self.epub.title)
        return
Exemple #2
0
    def create_book(self):
        #   确定文件信息
        title = Match.fix_filename(self.book_title)
        if self.is_split:
            title = self.book_title + u'_卷{}'.format(self.chapter_no)

        #   先切换到电子书临时资源目录下
        Path.chdir(Path.book_pool_path)
        epub = Epub(title)
        for task_result in self.task_result_list:
            chapter_src = ''
            # info_page
            if task_result.task.task_type == Type.question:
                chapter_src = self.generate_question_info_page(task_result.info_page)
            elif task_result.task.task_type == Type.answer:
                chapter_src = self.generate_question_info_page(task_result.info_page)
            elif task_result.task.task_type == Type.collection:
                chapter_src = self.generate_collection_info_page(task_result.info_page)
            elif task_result.task.task_type == Type.topic:
                chapter_src = self.generate_topic_info_page(task_result.info_page)
            elif task_result.task.task_type == Type.author:
                chapter_src = self.generate_author_info_page(task_result.info_page)
            elif task_result.task.task_type == Type.column:
                chapter_src = self.generate_column_info_page(task_result.info_page)
            elif task_result.task.task_type == Type.article:
                chapter_src = self.generate_article_info_page(task_result.info_page)
            epub.create_chapter(chapter_src, task_result.get_title())
            for question in task_result.question_list:
                #   添加图片文件
                for filename in question.img_filename_list:
                    epub.add_image(Path.image_pool_path + '/' + filename)
                question_src = self.generate_question_page(question)
                epub.add_html(question_src, question.question_info.title)

            for column in task_result.column_list:
                #   添加图片文件
                for filename in column.img_filename_list:
                    epub.add_image(Path.image_pool_path + '/' + filename)
                for article in column.article_list:
                    article_src = self.generate_article_page(article)
                    epub.add_html(article_src, article.title)
            epub.finish_chapter()

        epub.set_creator(u'ZhihuHelp1.8.0')
        epub.set_language(u'zh-cn')
        epub.set_book_id()
        epub.set_output_path(Path.result_path)
        epub.add_css(Path.base_path + u'/www/css/markdown.css')
        epub.add_css(Path.base_path + u'/www/css/customer.css')
        epub.add_css(Path.base_path + u'/www/css/normalize.css')
        epub.add_css(Path.base_path + u'/www/css/bootstrap.css')
        epub.create()

        Path.reset_path()
        return
    def parse_article_title(self):
        u"""
        获得博文的标题
        :return:
        """

        article_title = ''
        article_id = ''

        list_pcyc_l_ = self.dom.find_all('div', class_="p_title")
        for tgo_tgo_ in list_pcyc_l_:
            for link in tgo_tgo_.findAll('a'):
                article_id = str(link.get('href')).split('/')[1]
            article_title = Match.fix_filename(tgo_tgo_.text)
        # article_title +

        return article_title + str(self.getArticleIndex()).strip()
    def parse_article_title(self):
        u"""
        获得博文的标题
        :return:
        """

        article_title = ''
        article_id = ''

        list_pcyc_l_ = self.dom.find_all('div', class_="p_title")
        for tgo_tgo_ in list_pcyc_l_:
            for link in tgo_tgo_.findAll('a'):
                article_id = str(link.get('href')).split('/')[1]
            article_title = Match.fix_filename(tgo_tgo_.text)
        # article_title +

        if self.parse_author_id() != self.account_id:
            article_title = u"跟帖:{}".format(article_title.strip())

        return article_title.strip()
 def get_title(self):
     title = '_'.join([book.epub.title for book in self.book_list])
     title = Match.fix_filename(title)  # 移除特殊字符
     return title
Exemple #6
0
 def get_title(self):
     title = '_'.join([book.epub.title for book in self.book_list])
     title = Match.fix_filename(title)  # 移除特殊字符
     return title
Exemple #7
0
    def create_book(self):
        #   确定文件信息
        title = Match.fix_filename(self.book_title)
        if self.is_split:
            title = self.book_title + u'_卷{}'.format(self.chapter_no)

        #   先切换到电子书临时资源目录下
        Path.chdir(Path.book_pool_path)
        epub = Epub(title)
        for task_result in self.task_result_list:
            chapter_src = ''
            # info_page
            if task_result.task.task_type == Type.question:
                chapter_src = self.generate_question_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.answer:
                chapter_src = self.generate_question_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.collection:
                chapter_src = self.generate_collection_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.topic:
                chapter_src = self.generate_topic_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.author:
                chapter_src = self.generate_author_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.column:

                task_result.info_page.article_count = (
                    task_result.column_list[0].article_list).__len__()

                chapter_src = self.generate_column_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.article:
                chapter_src = self.generate_article_info_page(
                    task_result.info_page)
            epub.create_chapter(chapter_src, task_result.get_title())
            for question in task_result.question_list:
                #   添加图片文件
                for filename in question.img_filename_list:
                    epub.add_image(Path.image_pool_path + '/' + filename)
                question_src = self.generate_question_page(question)
                epub.add_html(question_src, question.question_info.title)

            for column in task_result.column_list:
                #   添加图片文件
                for filename in column.img_filename_list:
                    epub.add_image(Path.image_pool_path + '/' + filename)
                for article in column.article_list:
                    article_src = self.generate_article_page(article)
                    epub.add_html(article_src, article.title)
            epub.finish_chapter()

        href = self.task_result_list[0].info_page.image_url
        if len(href) > 0:
            print href

            if href:
                content = Http.get_content(
                    url=href, timeout=Config.timeout_download_picture)
                if not content:
                    Debug.logger.debug(u'图片『{}』下载失败'.format(href))
                    content = ''
                else:
                    Debug.print_in_single_line(u'图片{}下载完成'.format(href))
            else:
                #   当下载地址为空的时候,就没必要再去下载了
                content = ''
            if content.__len__() > 10:
                filename = Path.image_pool_path + '/' + 'cover.jpg'
                with open(filename, 'wb') as image:
                    image.write(content)

                epub.add_cover_image(filename)

        else:
            epub.add_cover_image('/Users/ex-liyan010/Desktop/cover.png')
            # epub.add_cover_image('/Users/ex-liyan010/Desktop/cover.png')

        epub.set_creator(u'macbookpro2100')
        epub.set_language(u'zh-cn')
        epub.set_book_id()
        epub.set_output_path(Path.result_path)
        epub.add_css(Path.base_path + u'/www/css/markdown.css')
        epub.add_css(Path.base_path + u'/www/css/customer.css')
        epub.add_css(Path.base_path + u'/www/css/normalize.css')
        epub.add_css(Path.base_path + u'/www/css/bootstrap.css')
        epub.create()

        Path.reset_path()
        return
Exemple #8
0
    def create_book(self):
        #   确定文件信息
        title = Match.fix_filename(self.book_title)
        if self.is_split:
            title = self.book_title + u'_卷{}'.format(self.chapter_no)

        #   先切换到电子书临时资源目录下
        Path.chdir(Path.book_pool_path)
        epub = Epub(title)
        for task_result in self.task_result_list:
            chapter_src = ''
            # info_page
            if task_result.task.task_type == Type.question:
                chapter_src = self.generate_question_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.answer:
                chapter_src = self.generate_question_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.collection:
                chapter_src = self.generate_collection_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.topic:
                chapter_src = self.generate_topic_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.author:
                chapter_src = self.generate_author_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.column:
                chapter_src = self.generate_column_info_page(
                    task_result.info_page)
            elif task_result.task.task_type == Type.article:
                chapter_src = self.generate_article_info_page(
                    task_result.info_page)
            epub.create_chapter(chapter_src, task_result.get_title())
            for question in task_result.question_list:
                #   添加图片文件
                for filename in question.img_filename_list:
                    epub.add_image(Path.image_pool_path + '/' + filename)
                question_src = self.generate_question_page(question)
                epub.add_html(question_src, question.question_info.title)

            for column in task_result.column_list:
                #   添加图片文件
                for filename in column.img_filename_list:
                    epub.add_image(Path.image_pool_path + '/' + filename)
                for article in column.article_list:
                    article_src = self.generate_article_page(article)
                    epub.add_html(article_src, article.title)
            epub.finish_chapter()

        epub.set_creator(u'ZhihuHelp1.8.0')
        epub.set_language(u'zh-cn')
        epub.set_book_id()
        epub.set_output_path(Path.result_path)
        epub.add_css(Path.base_path + u'/www/css/markdown.css')
        epub.add_css(Path.base_path + u'/www/css/customer.css')
        epub.add_css(Path.base_path + u'/www/css/normalize.css')
        epub.add_css(Path.base_path + u'/www/css/bootstrap.css')
        epub.create()

        Path.reset_path()
        return