def ezpubParseBook(bookname, scid, ecid): sections = [] paragraph = '' logger.debug("ezpub parse book func start") book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) scid, count = calc_chap_count(scid, ecid, book.get_chapter_count()) bookdata = book.read_book() logger.debug("creating epub scid %d count %d", scid, count) for chapid,chaptitle,chapdata,rawdata in bookdata: if chapid < scid or chapid > scid + count: continue logger.debug("reading chapid %d for epub", chapid) section = ez_epub.Section() section.css = """.em { font-style: italic; }""" section.title = chaptitle sections.append(section) chapdata = chapdata.encode('ascii','ignore') for line in chapdata.splitlines(): fparagraph = formatParagraph(line) for f in fparagraph: try: f[0].decode('ascii') except UnicodeDecodeError: print f print "it was not a ascii-encoded unicode string" section.text.append(fparagraph) logger.debug("ezpub parse book func end") return sections
def createepub_book(bookname, scid, ecid): epubbook = ez_epub.Book() book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) epubbook.title = bookname epubbook.authors = list(book.read_book_author()) epubbook.sections = ezpubParseBook(bookname, scid, ecid) epubbook.make(r'./epub/%s' % epubbook.title)
def etranslate_book(bookname, scid, ecid): logger.debug("Enter etranslate book func") book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) etransmap = book.read_book_etransmap() translator = TNovelETranslator(name=bookname, etransmap=etransmap) bookdata = book.read_book() raw_data_list = [] chapid_list = [] cid_list = [] transcount = 0 count = 0 scid, chapcount = calc_chap_count(scid, ecid, book.get_chapter_count()) logger.debug("traslate book scid %d count %d book:%d", scid, chapcount, len(bookdata)) for index in range(chapcount): cid = scid + index if book.is_chapter_exist(cid): continue else: cid_list.append(cid) if len(cid_list) > 0: logger.debug('Following chapters are not available') logger.debug(cid_list) raise(ValueError) for chapid,chaptitle,chapdata,rawdata in bookdata: if chapid < scid or chapid >= (scid + chapcount): continue logger.debug("chapid id %d is withing the range %d-%d", chapid, scid, scid+chapcount) chapid_list.append(chapid) raw_data_list.append(chapdata) count = count + 1 if count >= 200: logger.debug("in loop going to etranslate chapter count %d", len(raw_data_list)) trans_data_list = translator.translate(raw_data_list) for cid in range(len(chapid_list)): book.update_chapter_data(chapid_list[cid],trans_data_list[cid]) transcount = transcount + 1 chapid_list = [] raw_data_list = [] count = 0 logger.debug("going to etranslate chapter count %d", len(raw_data_list)) if count > 0: trans_data_list = translator.translate(raw_data_list) for cid in range(len(chapid_list)): book.update_chapter_data(chapid_list[cid],trans_data_list[cid]) transcount = transcount + 1 logger.debug('total number of chapters etranslated %d', transcount)
def toolkit_book(bookname, scid, ecid): logger.debug("Enter toolkit book func") btempname = '/tmp/' + bookname + '-temp.txt' sdelimitter = "\nStart of chapter id:" edelimitter = "\nEnd of chapter id:" book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) transmap = book.read_book_transmap() translator = TNovelTKTranslator(name=bookname, transmap=transmap) bookdata = book.read_book() raw_data_list = [] chapid_list = [] trans_chapid_list = [] trans_chapdata_list = [] cid_list = [] transcount = 0 count = 0 scid, chapcount = calc_chap_count(scid, ecid, book.get_chapter_count()) logger.debug("traslate toolkit book scid %d count %d book:%d", scid, chapcount, len(bookdata)) for index in range(chapcount): cid = scid + index if book.is_chapter_exist(cid): continue else: cid_list.append(cid) if len(cid_list) > 0: logger.debug('Following chapters are not available') logger.debug(cid_list) raise(ValueError) for chapid,chaptitle,chapdata,rawdata in bookdata: if chapid < scid or chapid >= (scid + chapcount): continue logger.debug("chapid id %d is withing the range %d-%d", chapid, scid, scid+chapcount) chapid_list.append(chapid) raw_data_list.append(rawdata) count = count + 1 logger.debug("going to etranslate chapter count %d", len(raw_data_list)) if count > 0: trans_data_list = translator.translate(raw_data_list) with codecs.open(btempname, 'w', 'utf-8') as outfile: for cid in range(len(chapid_list)): outfile.write(sdelimitter+str(chapid_list[cid])+"\n") outfile.write(trans_data_list[cid]) outfile.write(edelimitter+str(chapid_list[cid])+"\n") raw_input("Press any key once the data is translated") with codecs.open(btempname, 'r', 'utf-8') as infile: data_list = infile.readlines() new_chapter = [] end_chapter = False for data in data_list: if sdelimitter.strip() in data: trans_chapid_list.append(int(data.split(':')[1].strip())) end_chapter = False new_chapter = [] continue if edelimitter.strip() in data: end_chapter = True trans_chapdata_list.append(''.join(new_chapter)) new_chapter = [] continue if end_chapter is False: if data.strip() != '': new_chapter.append(data) for cid in range(len(trans_chapid_list)): book.update_chapter_data(trans_chapid_list[cid],trans_chapdata_list[cid]) transcount = transcount + 1 logger.debug('total number of chapters etranslated %d', transcount)
def update_raw_book(bookname, scid, ecid): logger.debug("Enter update book func") book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) parser = [] count = 0 raw_data_list = [] cid_list = [] title_list = [] url_list = [] for url in book.read_book_url(): uparse = urlparse.urlparse(url) dname = uparse.netloc logger.debug('update_raw_book() url:%s dname:%s', url, dname); # add parser instance if dname == "www.ranwen.org": logger.info("using ranwen parser") parser.append(RanWenDotOrgParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.hjwzw.com": logger.info("using tv hjwzw parser") parser.append(TwDotHjwzwDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.bsxsw.com": logger.info("using tv bsxsw parser") parser.append(TwBsxswDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.fxnzw.com": logger.info("using tv fxnzw parser") parser.append(TwFxnzwDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "tw.zhsxs.com": logger.info("using tv zhsxs parser") parser.append(TwZhsxsDotComParser(bookname, url, loglevel=parser_log_level)) elif dname == "www.shumilou.co": logger.info("using shumilou.co parser") parser.append(ShumilouCoParser(bookname, url, loglevel=parser_log_level)) elif dname == "www.feizw.com": logger.info("using feizw.com parser") parser.append(FeizwDotComParser(bookname, url, loglevel=parser_log_level)) else: logger.info("using default parser") parser.append(TnovelParser(bookname, url)) # print some debug info for p in parser: logger.debug("domain: " + p.dname + " total chapters: " + str(len(p.chapter_list))) # check the count scid, count = calc_chap_count(scid, ecid, len(parser[0].chapter_list)) logger.info('reading raw data url %s count %d', parser[0].index_page, count) for i in range(count): raw_data = "" new_url = "" chapid = scid + i for p in parser: logger.debug('start read raw data for chapter %d', chapid); if new_url == "": raw_title, raw_data = p.get_processed_data(chapid) else: raw_title, raw_data = p.get_processed_data(url=new_url) logger.debug('end read raw data for chapter %d len %d', chapid, len(raw_data)); if len(raw_data) > 200: if new_url == "": url_list.append(p.chapter_list[chapid]) else: url_list.append(new_url) break else: if len(parser) > 1: new_dname = parser[parser.index(p) + 1].dname logger.warning('Invalid url %s try using %s ', p.chapter_list[chapid], new_dname) new_url = get_url_from_user(p.chapter_list[chapid], new_dname) raw_data = "" if len(raw_data) <= 200: logger.error("Raw data is null") logger.warning('Invalid url %s', p.chapter_list[chapid]) if continue_check(): continue else: raise(ValueError) raw_data_list.append(raw_data) cid_list.append(chapid) title_list.append("Chapter " + str(chapid)) logger.info('Total number of chapters read %d', len(raw_data_list)); if count > 0: for index in range(len(cid_list)): if book.is_chapter_exist(cid_list[index]): logger.debug('Updating chapter id %d to db', cid_list[index]); book.update_chapter_raw(cid_list[index], raw_data_list[index]) else: logger.debug('Adding chapter id %d to db', cid_list[index]); book.add_chapter(cid_list[index], title_list[index], raw_data_list[index], weblink=url_list[index])
def remove_book(bookname): logger.debug("Enter remove book func") book = Tnovel(bookname, dbfile=dbfilename) book.remove_book(author)
def update_transmap(bookname, mapobj, emapobj): logger.debug("Enter udpate tranamap func") book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) logger.debug("updating transmap prop") book.update_transmap(mapobj.read(), emapobj.read())
def add_book(bookname, author, urllist, mapobj, emapobj): logger.debug("Enter add book func") book = Tnovel(bookname, loglevel=book_log_level, dbfile=dbfilename) logger.debug("updating book prop") book.update_book(author, '~'.join(urllist), mapobj.read(), emapobj.read())