def __init__(self, recipe_kind='Notset', read_list='ReadList.txt', url=None, debug=False): u""" 配置文件使用$符区隔,同一行内的配置文件归并至一本电子书内 :param recipe_kind: :param read_list: default value: ReadList.txt :param url: :param debug: :return: """ self.recipe_kind = recipe_kind self.read_list = read_list self.url = url log.warning_log(u"website type: " + str(self.recipe_kind) + '\n') import logging if debug is True: Debug.logger.setLevel(logging.DEBUG) else: Debug.logger.setLevel(logging.INFO) Debug.logger.debug(u"read_list: " + str(self.read_list)) Debug.logger.debug(u"url: " + str(self.url)) Debug.logger.debug(u"recipe type:" + str(recipe_kind)) Path.init_base_path(recipe_kind) # 设置路径 Path.init_work_directory() # 创建路径 self.init_database() # 初始化数据库 Config._load() return
def init_database(): if Path.is_file(Path.db_path): Debug.logger.debug(u"Connect to the database...") Debug.logger.debug(u"db_path: " + str(Path.db_path)) DB.set_conn(sqlite3.connect(Path.db_path)) else: Debug.logger.debug(u"Create db file...") DB.set_conn(sqlite3.connect(Path.db_path)) with open(Path.sql_path) as sql_script: DB.cursor.executescript(sql_script.read()) DB.commit()
def create_book(command, counter): Path.reset_path() Debug.logger.info(u"Ready to make No.{} e-book".format(counter)) Debug.logger.info(u"Analyzes {} ".format(command)) task_package = UrlParser.get_task(command) # 分析命令 if not task_package.is_work_list_empty(): worker_factory(task_package.work_list) # 执行抓取程序 Debug.logger.info(u"Complete fetching from web") file_name_set = None if not task_package.is_book_list_empty(): Debug.logger.info(u"Start generating e-book from the database") book = Book(task_package.book_list) file_name_set = book.create() if file_name_set is not None: file_name_set2list = list(file_name_set) file_name = '-'.join(file_name_set2list[0:3]) return file_name return u"Oops! no epub file produced"
def create_book(self, book_package): book_package.image_container.set_save_path(Path.image_pool_path) book_package.image_container.start_download() title = book_package.get_title() Debug.logger.debug(u"title of the e-book:" + str(title)) if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.chdir(Path.pwd_path + u'/e-books_tmp_source') epub = Epub(title) html_tmp_path = Path.html_pool_path + u'/' image_tmp_path = Path.image_pool_path + u'/' epub.set_creator(u'macbookpro2100') epub.set_language(u'zh') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.in_base_path + u'/www/css/markdown.css') epub.add_css(Path.in_base_path + u'/www/css/customer.css') epub.add_css(Path.in_base_path + u'/www/css/normalize.css') epub.add_css(Path.in_base_path + u'/www/css/bootstrap.css') # epub.add_css(Path.in_base_path + u'/www/css/article.css') # TODO: 来自新浪,需要精简 for book in book_package.book_list: page = book.page_list[0] with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) if '_' in page.title: page.title = ''.join(page.title.split('_')[1:]) # 删除章节前缀 epub.create_chapter(html_tmp_path + page.filename, page.title) for page in book.page_list[1:]: with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) epub.add_html(html_tmp_path + page.filename, page.title) epub.finish_chapter() for image in book_package.image_list: epub.add_image(image_tmp_path + image['filename']) epub.create() Path.reset_path() return
def create_book(self, book_package): book_package.image_container.set_save_path(Path.image_pool_path) book_package.image_container.start_download() title = book_package.get_title() Debug.logger.debug(u"title of the e-book:" + str(title)) if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.chdir(Path.pwd_path + u'/e-books_tmp_source') epub = Epub(title) html_tmp_path = Path.html_pool_path + u'/' image_tmp_path = Path.image_pool_path + u'/' epub.set_creator(u'EEBookV0-1') epub.set_language(u'zh') epub.set_book_id() epub.set_output_path(Path.result_path) epub.add_css(Path.in_base_path + u'/www/css/markdown.css') epub.add_css(Path.in_base_path + u'/www/css/customer.css') epub.add_css(Path.in_base_path + u'/www/css/normalize.css') epub.add_css(Path.in_base_path + u'/www/css/bootstrap.css') # epub.add_css(Path.in_base_path + u'/www/css/article.css') # TODO: 来自新浪,需要精简 for book in book_package.book_list: page = book.page_list[0] with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) if '_' in page.title: page.title = ''.join(page.title.split('_')[1:]) # 删除章节前缀 epub.create_chapter(html_tmp_path + page.filename, page.title) for page in book.page_list[1:]: with open(html_tmp_path + page.filename, 'w') as html: html.write(page.content) epub.add_html(html_tmp_path + page.filename, page.title) epub.finish_chapter() for image in book_package.image_list: epub.add_image(image_tmp_path + image['filename']) epub.create() Path.reset_path() return
def create_single_html_book(self, book_package): title = book_package.get_title() if not title: # 电子书题目为空时自动跳过 # 否则会发生『rm -rf / 』的惨剧 return Path.reset_path() Path.chdir(Path.result_path) Path.rmdir(u'./' + title) Path.mkdir(u'./' + title) Path.chdir(u'./' + title) page = [] for book in book_package.book_list: page += book.page_list content = u' \r\n '.join([Match.html_body(x.content) for x in page]).replace(u'../images/', u'./images/') with open(TemplateConfig.content_base_uri) as html: content = html.read().format(title=title, body=content).replace(u'../style/', u'./') with open(title + u'.html', 'w') as html: html.write(content) Path.copy(Path.html_pool_path + u'/../{}/OEBPS/images'.format(title), u'./images') Path.copy(Path.www_css + u'/customer.css', u'./customer.css') Path.copy(Path.www_css + u'/markdown.css', u'./markdown.css') Path.copy(Path.www_css + u'/normalize.css', u'./normalize.css') # Path.copy(Path.www_css + u'/article.css', u'./article.css') # TODO: 需要精简 Path.reset_path() return title