def rows2txt(): db = DdwDb() count = db.get_book_count()["count"] pageSize = 100 page = count / pageSize + 1 for i in range(0, page + 1): step = i * pageSize + 94 rows = db.list(step) if rows is not None: for item in rows: print "book -------------------------------" + str(item["id"]) path = mkdir(item["id"]) chs = db.get_chapters(item["id"]) if chs is not None: for ch in chs: # db.update_book_last_one(item['id'], one['id'], one['title'], one['create_at']) chtext = db.get_chapter_text(ch["id"]) if chtext is not None: try: file_path = os.path.join(path, str(ch["id"])) text = htmlstrip(chtext["text"]) # write to file write2file(file_path, text) # update chapter size and text db.update_chapter_size(ch["id"], len(text)) db.update_chapter_text(ch["id"], text) print ch["id"] except Exception, ex: log("Error: ch " + str(ch["id"]) + " " + str(Exception) + ":" + str(ex))
def rows2txt (): db = DdwDb() count = db.get_book_count()['count'] pageSize = 100 page = count / pageSize + 1 for i in range(0,page+1): step = i * pageSize+94 rows = db.list(step) if rows is not None: for item in rows: print "book -------------------------------" + str(item["id"]) path = mkdir(item["id"]) chs = db.get_chapters(item['id']) if chs is not None: for ch in chs: #db.update_book_last_one(item['id'], one['id'], one['title'], one['create_at']) chtext = db.get_chapter_text(ch['id']) if chtext is not None: try: file_path = os.path.join(path, str(ch['id'])) text = htmlstrip(chtext["text"]) #write to file write2file(file_path, text) #update chapter size and text db.update_chapter_size(ch['id'], len(text)) db.update_chapter_text(ch['id'], text) print ch['id'] except Exception,ex: log("Error: ch "+ str(ch['id']) + " " + str(Exception) + ":" + str(ex) )
def write_urls(): db = DdwDb() print os.path.dirname(__file__) _tmp_path = "D:\\liubaikui\\jiushulou\\script\\urls" _count = 24221 for i in range(24154, _count + 1): book = db.get_book(i) if book is not None: chs = db.get_chapters(i) if chs is not None and len(chs) > 0: print "write to " + str(i) path = os.path.join(_tmp_path, str(i)) obj_file = open(path, 'w+') urls = [] for var in chs: t = db.get_chapter_text(var['id']) if t is None: urls.append(str(var['id']) + "\n") #print urls obj_file.writelines(urls) obj_file.close()
def write_urls (): db = DdwDb() print os.path.dirname(__file__) _tmp_path = "D:\\liubaikui\\jiushulou\\script\\urls" _count = 24221 for i in range(24154, _count+1): book = db.get_book(i) if book is not None: chs = db.get_chapters(i) if chs is not None and len(chs) > 0: print "write to " + str(i) path = os.path.join(_tmp_path, str(i)) obj_file = open(path, 'w+') urls = [] for var in chs: t = db.get_chapter_text(var['id']) if t is None: urls.append(str(var['id'])+"\n") #print urls obj_file.writelines(urls) obj_file.close()
def save_text (id): dw = DdwDb() book = dw.get_book(id) if book is not None: print '-------------begin '+ str(id)+'-------------' chs = dw.get_chapters(id) if chs is not None or len(chs) > 0: for var in chs: print var['id'] text = '' t = dw.get_chapter_text(var['id']) if t is None: try: text = get_chapter(id, var['id']) if text is not None and text != '': dw.insert_chapter_text(var['id'], text) print '-------------end of '+ str(id)+'-----------' except Exception,ex: log("Error: get_chapter "+ str(id) + " " + str(Exception)+":"+str(ex) ) print "Info: chapter "+str(var['id'])+" exists" else: print 'Info: have no chapters'
def save_text(id): dw = DdwDb() book = dw.get_book(id) if book is not None: print '-------------begin ' + str(id) + '-------------' chs = dw.get_chapters(id) if chs is not None or len(chs) > 0: for var in chs: print var['id'] text = '' t = dw.get_chapter_text(var['id']) if t is None: try: text = get_chapter(id, var['id']) if text is not None and text != '': dw.insert_chapter_text(var['id'], text) print '-------------end of ' + str( id) + '-----------' except Exception, ex: log("Error: get_chapter " + str(id) + " " + str(Exception) + ":" + str(ex)) print "Info: chapter " + str(var['id']) + " exists" else: print 'Info: have no chapters'