def reader_tick(): global conf try: readepub(zf) if not conf.nogui: # Seen this ebook before? seb = mw.conf.sql.get(conf.basefile) if seb and seb != "": #print "opening remembered file", seb openHTMLcb(seb) # Seen this html before? Go to offset. comp = conf.basefile + "/" + seb offs = mw.conf.sql.get(comp) if offs != None: #print "seen", mw.fname, offs iter = mw.buffer_1.get_iter_at_offset(int(offs)) mw.buffer_1.place_cursor(iter) mw.view.scroll_to_iter(iter, 0.2) #, True, 0, 0) # Load last selection comp = conf.basefile + "_sel" lsel = mw.conf.sql.get(comp) mw.sel_tree(lsel) #print "lastsel", comp, lsel else: #print "opening first link", conf.ht.firstlink openHTMLcb(conf.ht.firstlink) # Requested command line read if conf.read: mw.read_tts(None) except: print_exception("Reader tick")
def locateroot(): global gzf rootfile = "" try: fh = gzf.open("META-INF/container.xml") ht = RootHTMLParser() while 1: sss = fh.readline() if sss == "": break ht.feed(sss) if conf.rootf: print sss, rootfile = ht.rootfile except: print_exception("No meta data") return rootfile
def openHTMLcb(fname, addback=True): global conf, gzf, backlist ret = False #print "openHTMLcb", fname conf.recurse = True usleep(1) try: if addback: backlist.append(fname) ret = openHTML(gzf, fname, False) if not mw.stopload: gobject.timeout_add(100, size_tick) except: print_exception("Cannot load HTML") conf.recurse = False return ret
def parse_ncx(fname): global conf ret = "", "" try: fh = zf.open(fname) ht = NcxHTMLParser(contentcb) conf.ht = ht while 1: sss = fh.readline() if sss == "": break ht.feed(sss) if conf.header: print sss, if not conf.nogui: mw.add_text(sss, True) ret = ht.title, ht.auth except: print_exception("Cannot parse .ncx file") return ret
def getimagecb(fname): global gzf, conf fullname = "" #print "Image callback", "'" + fname + "'" fh = None try: fh = gzf.open(fname) fullname = fname except: # Search for it ... for aa in zf.infolist(): if os.path.basename(aa.filename) == fname: fullname = aa.filename try: fh = gzf.open(aa.filename) except: print_exception("No image File") return break # Extract image file dd = conf.data_dir + "/" + conf.basefile + "/" + os.path.split(fullname)[0] if not os.path.isdir(dd): os.makedirs(dd) fname2 = dd + "/" + os.path.basename(fullname) if not os.path.isfile(fname2): try: fh2 = open(fname2, "w") while True: buff = fh.read() if len(buff) == 0: break fh2.write(buff) fh.close() fh2.close() except: print_exception("Cannot load image", fname) return pixbuf = gtk.gdk.pixbuf_new_from_file(fname2) return pixbuf
def parse_opf(fname): global conf ret = ("", "") try: fh = zf.open(fname) ht = OpfHTMLParser(content2cb) conf.ht = ht while 1: sss = fh.readline() if sss == "": break if conf.header: print sss, ht.feed(sss) if conf.header: print sss, if not conf.nogui: mw.add_text(sss, True) ret = (ht.title, ht.auth) except: print_exception("Cannot parse .opf file") return ret
def html_tick(fname): global conf, mw conf.recurse = False try: fh = open(fname) ht = publib.pubhtml.HTML_Parser(conf) ht.mw = mw ht.fh = fh ht.getimagecb = getimagecb ln = 0 mw.waitcursor(True) while 1: if conf.recurse: break sss = fh.readline() if ln % 10 == 0: mw.prog.set_text("Reading line: %d" % ln) usleep(1) ln += 1 if sss == "": break if conf.recurse: break if ht.feed(sss): break mw.add_text(sss, True) mw.fname = fname mw.waitcursor(False) except: print_exception("Opening HTML") try: mw.prog.set_text("Done Loading.") mw.hpaned.set_position(0) mw.gohome() except: print_exception("Positioning reader")
def main(zf, fname=None): global mw, conf if not conf.nogui: mw = publib.pubdisp.PubView(conf) mw.fname = "" mw.callback = openHTMLcb mw.bscallback = backcb if conf.nogui: reader_tick() else: if zf: gobject.timeout_add(100, reader_tick) else: gobject.timeout_add(100, html_tick, fname) if not conf.nogui: try: gtk.main() except: print_exception("gtk_main")
def size_tick(): #print "size_tick" try: mw.apply_size() except: print_exception("Size tick")
def openHTML(zf, fname, mark=True): global conf found = False fh = None spl = [] ht = None conf.recurse = False # Is it a link with a tag? spl = fname.split("#") # Not loaded, load if mw.fname != spl[0]: mw.clearall() found = False fh = None usleep(1) try: fh = zf.open(spl[0]) found = True except: for aa in zf.infolist(): if os.path.basename(aa.filename) == os.path.basename(spl[0]): #print "found", aa.filename try: fh = zf.open(aa.filename) except: print_exception("No Zip File") return found = True break if not found: return False try: ht = publib.pubhtml.HTML_Parser(conf) ht.mw = mw ht.mark = mark ht.fh = fh ht.getimagecb = getimagecb ln = 0 mw.waitcursor(True) while 1: if conf.recurse: break if mw.stopload: break sss = fh.readline() if mw.stopload: break if ln % 10 == 0: mw.prog.set_text("Reading line: %d" % ln) usleep(1) ln += 1 if sss == "": break if conf.recurse: break if ht.feed(sss): break mw.add_text(sss, True) mw.fname = spl[0] mw.waitcursor(False) except: print_exception("Parsing HTML") #if mw.stopload: # return mw.prog.set_text("Done Loading.") usleep(1) if mw.stopload: return # Jump to tag, if any if len(spl) > 1: #print "Jumping to", "'" + spl[1] + "'" mm = mw.buffer_1.get_mark(spl[1]) if mm: ii = mw.buffer_1.get_iter_at_mark(mm) mw.buffer_1.place_cursor(ii) mw.view.scroll_to_mark(mm, 0.0, True, 0, 0) else: mw.gohome() return True
def readepub(zf): global mw, conf names = [] allnames = [] if not conf.nogui: mw.waitcursor(True) if conf.extract: print "Extracting epub to:", conf.data_dir + "/" + conf.basefile + "/" for aa in zf.infolist(): if conf.list: print aa.filename, aa.file_size # Extract if conf.extract: try: dd = conf.data_dir + "/" + conf.basefile + "/" + os.path.split( aa.filename)[0] if not os.path.isdir(dd): os.makedirs(dd) fh = zf.open(aa.filename) fff = dd + "/" + os.path.basename(aa.filename) # Only extract once: if not os.path.isfile(fff): if not conf.nogui: tt = "Extracting '" + aa.filename + "'" mw.prog.set_text(tt[-18]) usleep(1) fh2 = open(fff, "w+") while 1: sss = fh.readline() if sss == "": break fh2.write(sss) fh2.close() except: print_exception("Cannot extract file") if os.path.splitext(aa.filename)[1].find("htm") >= 0: names.append(aa.filename) allnames.append(aa.filename) found = False ret1 = None ret2 = None '''rootfile = locateroot() print "rootfile:", rootfile if rootfile != "": found = True if ".ncx" in rootfile: ret1 = parse_ncs(rootfile) if ".opf" in rootfile: ret2 = parse_opf(rootfile)''' # Hack: to simplify parsing we search for an .ncx and .opf file if not found: for aa in allnames: if ".ncx" in aa: #print "Found toc", aa #found = True ret1 = parse_ncx(aa) break if not conf.nogui: mw.update_tree(" --------- ", "") # Fallback if not found: for aa in allnames: if ".opf" in aa: #print "Found opf", aa found = True ret2 = parse_opf(aa) break # Pick from the two optional places auth = ret1[0] try: if auth == "": auth = ret2[0] except: pass title = ret1[1] try: if title == "": title = ret2[1] except: pass if conf.title: print "'" + auth + "' '" + title + "'" if not conf.nogui: old = mw.get_title() mw.set_title(old + " '" + auth + "' '" + title + "'") if conf.nogui: return # No TOC Open and parse all the [x]htm[l] files if not found: nnn = "000" mw.update_tree("Start of Book", nnn) mw.add_text_mark(nnn) for bbb in names: openHTML(zf, bbb) nnn = "999" mw.update_tree("End", nnn) mw.add_text_mark(nnn) mw.waitcursor(False)