Esempio n. 1
0
    def __init__(self,
                 recipe_kind='Notset',
                 read_list='ReadList.txt',
                 url=None,
                 debug=False):
        u"""
        配置文件使用$符区隔,同一行内的配置文件归并至一本电子书内
        :param recipe_kind:
        :param read_list: default value: ReadList.txt
        :param url:
        :param debug:
        :return:
        """
        self.recipe_kind = recipe_kind
        self.read_list = read_list
        self.url = url
        log.warning_log(u"website type: " + str(self.recipe_kind) + '\n')
        import logging
        if debug is True:
            Debug.logger.setLevel(logging.DEBUG)
        else:
            Debug.logger.setLevel(logging.INFO)

        Debug.logger.debug(u"read_list: " + str(self.read_list))
        Debug.logger.debug(u"url: " + str(self.url))
        Debug.logger.debug(u"recipe type:" + str(recipe_kind))

        Path.init_base_path(recipe_kind)  # 设置路径
        Path.init_work_directory()  # 创建路径
        self.init_database()  # 初始化数据库
        Config._load()
        return
Esempio n. 2
0
 def init_database():
     if Path.is_file(Path.db_path):
         Debug.logger.debug(u"Connect to the database...")
         Debug.logger.debug(u"db_path: " + str(Path.db_path))
         DB.set_conn(sqlite3.connect(Path.db_path))
     else:
         Debug.logger.debug(u"Create db file...")
         DB.set_conn(sqlite3.connect(Path.db_path))
         with open(Path.sql_path) as sql_script:
             DB.cursor.executescript(sql_script.read())
         DB.commit()
Esempio n. 3
0
    def create_book(command, counter):
        Path.reset_path()

        Debug.logger.info(u"Ready to make No.{} e-book".format(counter))
        Debug.logger.info(u"Analyzes {} ".format(command))
        task_package = UrlParser.get_task(command)  # 分析命令
        if not task_package.is_work_list_empty():
            worker_factory(task_package.work_list)  # 执行抓取程序
            Debug.logger.info(u"Complete fetching from web")

        file_name_set = None
        if not task_package.is_book_list_empty():
            Debug.logger.info(u"Start generating e-book from the database")
            book = Book(task_package.book_list)
            file_name_set = book.create()
        if file_name_set is not None:
            file_name_set2list = list(file_name_set)
            file_name = '-'.join(file_name_set2list[0:3])
            return file_name
        return u"Oops! no epub file produced"
Esempio n. 4
0
 def create_book(self, book_package):
     book_package.image_container.set_save_path(Path.image_pool_path)
     book_package.image_container.start_download()
     title = book_package.get_title()
     Debug.logger.debug(u"title of the e-book:" + str(title))
     if not title:
         # 电子书题目为空时自动跳过
         # 否则会发生『rm -rf / 』的惨剧
         return
     Path.chdir(Path.pwd_path + u'/e-books_tmp_source')
     epub = Epub(title)
     html_tmp_path = Path.html_pool_path + u'/'
     image_tmp_path = Path.image_pool_path + u'/'
     epub.set_creator(u'macbookpro2100')
     epub.set_language(u'zh')
     epub.set_book_id()
     epub.set_output_path(Path.result_path)
     epub.add_css(Path.in_base_path + u'/www/css/markdown.css')
     epub.add_css(Path.in_base_path + u'/www/css/customer.css')
     epub.add_css(Path.in_base_path + u'/www/css/normalize.css')
     epub.add_css(Path.in_base_path + u'/www/css/bootstrap.css')
     # epub.add_css(Path.in_base_path + u'/www/css/article.css')    # TODO: 来自新浪,需要精简
     for book in book_package.book_list:
         page = book.page_list[0]
         with open(html_tmp_path + page.filename, 'w') as html:
             html.write(page.content)
         if '_' in page.title:
             page.title = ''.join(page.title.split('_')[1:])  # 删除章节前缀
         epub.create_chapter(html_tmp_path + page.filename, page.title)
         for page in book.page_list[1:]:
             with open(html_tmp_path + page.filename, 'w') as html:
                 html.write(page.content)
             epub.add_html(html_tmp_path + page.filename, page.title)
         epub.finish_chapter()
     for image in book_package.image_list:
         epub.add_image(image_tmp_path + image['filename'])
     epub.create()
     Path.reset_path()
     return
Esempio n. 5
0
 def create_book(self, book_package):
     book_package.image_container.set_save_path(Path.image_pool_path)
     book_package.image_container.start_download()
     title = book_package.get_title()
     Debug.logger.debug(u"title of the e-book:" + str(title))
     if not title:
         # 电子书题目为空时自动跳过
         # 否则会发生『rm -rf / 』的惨剧
         return
     Path.chdir(Path.pwd_path + u'/e-books_tmp_source')
     epub = Epub(title)
     html_tmp_path = Path.html_pool_path + u'/'
     image_tmp_path = Path.image_pool_path + u'/'
     epub.set_creator(u'EEBookV0-1')
     epub.set_language(u'zh')
     epub.set_book_id()
     epub.set_output_path(Path.result_path)
     epub.add_css(Path.in_base_path + u'/www/css/markdown.css')
     epub.add_css(Path.in_base_path + u'/www/css/customer.css')
     epub.add_css(Path.in_base_path + u'/www/css/normalize.css')
     epub.add_css(Path.in_base_path + u'/www/css/bootstrap.css')
     # epub.add_css(Path.in_base_path + u'/www/css/article.css')    # TODO: 来自新浪,需要精简
     for book in book_package.book_list:
         page = book.page_list[0]
         with open(html_tmp_path + page.filename, 'w') as html:
             html.write(page.content)
         if '_' in page.title:
             page.title = ''.join(page.title.split('_')[1:])  # 删除章节前缀
         epub.create_chapter(html_tmp_path + page.filename, page.title)
         for page in book.page_list[1:]:
             with open(html_tmp_path + page.filename, 'w') as html:
                 html.write(page.content)
             epub.add_html(html_tmp_path + page.filename, page.title)
         epub.finish_chapter()
     for image in book_package.image_list:
         epub.add_image(image_tmp_path + image['filename'])
     epub.create()
     Path.reset_path()
     return
Esempio n. 6
0
 def create_single_html_book(self, book_package):
     title = book_package.get_title()
     if not title:
         # 电子书题目为空时自动跳过
         # 否则会发生『rm -rf / 』的惨剧
         return
     Path.reset_path()
     Path.chdir(Path.result_path)
     Path.rmdir(u'./' + title)
     Path.mkdir(u'./' + title)
     Path.chdir(u'./' + title)
     page = []
     for book in book_package.book_list:
         page += book.page_list
     content = u' \r\n '.join([Match.html_body(x.content) for x in page]).replace(u'../images/', u'./images/')
     with open(TemplateConfig.content_base_uri) as html:
         content = html.read().format(title=title, body=content).replace(u'../style/', u'./')
     with open(title + u'.html', 'w') as html:
         html.write(content)
     Path.copy(Path.html_pool_path + u'/../{}/OEBPS/images'.format(title), u'./images')
     Path.copy(Path.www_css + u'/customer.css', u'./customer.css')
     Path.copy(Path.www_css + u'/markdown.css', u'./markdown.css')
     Path.copy(Path.www_css + u'/normalize.css', u'./normalize.css')
     # Path.copy(Path.www_css + u'/article.css', u'./article.css')         # TODO: 需要精简
     Path.reset_path()
     return title
Esempio n. 7
0
 def create_single_html_book(self, book_package):
     title = book_package.get_title()
     if not title:
         # 电子书题目为空时自动跳过
         # 否则会发生『rm -rf / 』的惨剧
         return
     Path.reset_path()
     Path.chdir(Path.result_path)
     Path.rmdir(u'./' + title)
     Path.mkdir(u'./' + title)
     Path.chdir(u'./' + title)
     page = []
     for book in book_package.book_list:
         page += book.page_list
     content = u' \r\n '.join([Match.html_body(x.content) for x in page]).replace(u'../images/', u'./images/')
     with open(TemplateConfig.content_base_uri) as html:
         content = html.read().format(title=title, body=content).replace(u'../style/', u'./')
     with open(title + u'.html', 'w') as html:
         html.write(content)
     Path.copy(Path.html_pool_path + u'/../{}/OEBPS/images'.format(title), u'./images')
     Path.copy(Path.www_css + u'/customer.css', u'./customer.css')
     Path.copy(Path.www_css + u'/markdown.css', u'./markdown.css')
     Path.copy(Path.www_css + u'/normalize.css', u'./normalize.css')
     # Path.copy(Path.www_css + u'/article.css', u'./article.css')         # TODO: 需要精简
     Path.reset_path()
     return title