def walk(top): toppath = top rv = [] for base, dnames, names in os.walk(pathof(top)): base = utf8_str(base, enc=sys.getfilesystemencoding()) for name in names: name = utf8_str(name, enc=sys.getfilesystemencoding()) filepath = relpath(os.path.join(base,name), toppath) rv.append(filepath) return rv
def setspine(self, new_spine): spine = [] for (sid, linear) in new_spine: sid = utf8_str(sid) linear = utf8_str(linear) if sid not in self.id_to_href.keys(): raise WrapperException('Spine Id not in Manifest') if linear is not None: linear = linear.lower() if linear not in ['yes', 'no']: raise Exception('Improper Spine Linear Attribute') spine.append((sid, linear)) self.spine = spine self.modified['OEBPS/content.opf'] = 'file'
def setspine(self,new_spine): spine = [] for (sid, linear) in new_spine: sid = utf8_str(sid) linear = utf8_str(linear) if sid not in self.id_to_href.keys(): raise WrapperException('Spine Id not in Manifest') if linear is not None: linear = linear.lower() if linear not in ['yes', 'no']: raise Exception('Improper Spine Linear Attribute') spine.append((sid, linear)) self.spine = spine self.modified['OEBPS/content.opf'] = 'file'
def writefile(self, id, data): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') mime = self.id_to_mime.get(id, '') if mime.endswith('+xml'): data = utf8_str(data) filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.modified[id] = 'file'
def writefile(self, id, data): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') mime = self.id_to_mime.get(id,'') if mime.endswith('+xml'): data = utf8_str(data) filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data) self.modified[id] = 'file'
def deletefile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('id does not exist in manifest') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified.keys(): filepath = os.path.join(self.outdir,filepath) if path.exists(filepath) and path.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified.keys(): del self.modified[id] # remove from manifest href = self.id_to_href[id] del self.id_to_href[id] del self.id_to_mime[id] del self.href_to_id[href] # remove from spine new_spine = [] was_modified = False for sid, linear in self.spine: if sid != id: new_spine.append((sid, linear)) else: was_modified = True if was_modified: setspine(new_spine) if add_to_deleted: self.deleted.append(id) self.modified['OEBPS/content.opf'] = 'file' del self.id_to_filepath[id]
def deletefile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('id does not exist in manifest') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified.keys(): filepath = os.path.join(self.outdir, filepath) if path.exists(filepath) and path.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified.keys(): del self.modified[id] # remove from manifest href = self.id_to_href[id] del self.id_to_href[id] del self.id_to_mime[id] del self.href_to_id[href] # remove from spine new_spine = [] was_modified = False for sid, linear in self.spine: if sid != id: new_spine.append((sid, linear)) else: was_modified = True if was_modified: setspine(new_spine) if add_to_deleted: self.deleted.append(id) self.modified['OEBPS/content.opf'] = 'file' del self.id_to_filepath[id]
def setguide(self, new_guide): guide = [] for (type, title, href) in new_guide: type = utf8_str(type) title = utf8_str(title) href = utf8_str(href) if type not in _guide_types: type = "other." + type if title is None: title = 'title missing' title = utf8_str(title) thref = href.split('#')[0] if thref not in self.href_to_id.keys(): raise WrapperException('guide href not in manifest') guide.append(type, title, href) self.guide = guide self.modified['OEBPS/content.opf'] = 'file'
def write_opf(self): if self.op is not None: filepath = utf8_str(os.path.join(self.outdir, 'OEBPS', self.opfname)) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(self.build_opf())
def quoteurl(href): href = utf8_str(href) result = [] for char in href: if char in IRI_UNSAFE: char = "%%%02x" % ord(char) result.append(char) return ''.join(result)
def write_opf(self): if self.op is not None: filepath = utf8_str( os.path.join(self.outdir, 'OEBPS', self.opfname)) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(self.build_opf())
def map_basename_to_id(self, basename, ow): basename = utf8_str(basename) ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext,None) if mime.startswith("audio"): base = 'Audio' elif mime.startswith("video"): base = "Video" else: base = mime_base_map.get(mime,'Misc') href = base + "/" + basename return self.href_to_id.get(href,ow)
def map_basename_to_id(self, basename, ow): basename = utf8_str(basename) ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime.startswith("audio"): base = 'Audio' elif mime.startswith("video"): base = "Video" else: base = mime_base_map.get(mime, 'Misc') href = base + "/" + basename return self.href_to_id.get(href, ow)
def addfile(self, uniqueid, basename, data, mime=None): uniqueid = utf8_str(uniqueid) basename = utf8_str(basename) mime = utf8_str(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime.endswith('+xml'): data = utf8_str(data) if mime.startswith("audio"): base = 'Audio' elif mime.startswith("video"): base = "Video" else: base = mime_base_map.get(mime,'Misc') href = base + "/" + basename if uniqueid in self.id_to_href.keys(): raise WrapperException('Manifest Id is not unique') if href in self.href_to_id.keys(): raise WrapperException('Basename is not unique') # now actually write out the new file filepath = href.replace("/",os.sep) filepath = os.path.join('OEBPS', filepath) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir,filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.href_to_id[href] = uniqueid self.added.append(uniqueid) self.modified['OEBPS/content.opf'] = 'file' return uniqueid
def addfile(self, uniqueid, basename, data, mime=None): uniqueid = utf8_str(uniqueid) basename = utf8_str(basename) mime = utf8_str(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime.endswith('+xml'): data = utf8_str(data) if mime.startswith("audio"): base = 'Audio' elif mime.startswith("video"): base = "Video" else: base = mime_base_map.get(mime, 'Misc') href = base + "/" + basename if uniqueid in self.id_to_href.keys(): raise WrapperException('Manifest Id is not unique') if href in self.href_to_id.keys(): raise WrapperException('Basename is not unique') # now actually write out the new file filepath = href.replace("/", os.sep) filepath = os.path.join('OEBPS', filepath) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.href_to_id[href] = uniqueid self.added.append(uniqueid) self.modified['OEBPS/content.opf'] = 'file' return uniqueid
def writeotherfile(self, book_href, data): id = utf8_str(book_href) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('Attempt to modify protected file') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.modified[id] = 'file'
def writeotherfile(self, book_href, data): id = utf8_str(book_href) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('Attempt to modify protected file') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data) self.modified[id] = 'file'
def addotherfile(self, book_href, data) : id = utf8_str(book_href) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/",os.sep) filepath = os.path.join(pathof(self.outdir),desired_path) if path.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(pathof(filepath)) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb')as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def readfile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') # already added or modified it will be in outdir basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath), 'rb') as fp: data = fp.read() return data
def addotherfile(self, book_href, data): id = utf8_str(book_href) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/", os.sep) filepath = os.path.join(pathof(self.outdir), desired_path) if path.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(pathof(filepath)) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def readfile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') # already added or modified it will be in outdir basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath),'rb') as fp: data = fp.read() return data
def copy_book_contents_to(self, destdir): destdir = utf8_str(destdir) if destdir is None or not path.isdir(destdir): raise WrapperException('destination directory does not exist') for id in self.id_to_filepath.keys(): rpath = self.id_to_filepath[id] in_manifest = id in self.id_to_mime.keys() if in_manifest: data = self.readfile(id) else: data = self.readotherfile(id) filepath = os.path.join(destdir, rpath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data)
def copy_book_contents_to(self, destdir): destdir = utf8_str(destdir) if destdir is None or not path.isdir(destdir): raise WrapperException('destination directory does not exist') for id in self.id_to_filepath.keys(): rpath = self.id_to_filepath[id] in_manifest = id in self.id_to_mime.keys() if in_manifest: data = self.readfile(id) else: data = self.readotherfile(id) filepath = os.path.join(destdir,rpath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data)
def readotherfile(self, book_href): id = utf8_str(book_href) # handle special case of trying to read the opf if id is not None and id == "OEBPS/content.opf": return self.build_opf() filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath), 'rb') as fp: data = fp.read() return data
def readotherfile(self, book_href): id = utf8_str(book_href) # handle special case of trying to read the opf if id is not None and id == "OEBPS/content.opf": return self.build_opf() filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath),'rb') as fp: data = fp.read() return data
def __init__(self, opf_path, debug = False): self._debug = debug self.opfname = utf8_str(os.path.basename(opf_path)) self.opf = None with open(pathof(opf_path),'rb') as fp: self.opf = fp.read() self.opos = 0 self.package_tag = [None, None] # self.package_version = None self.metadata_tag = [None, None] self.metadata = [] self.cover_id = None self.manifest_id_to_href = {} self.manifest_id_to_mime = {} self.href_to_manifest_id ={} self.spine_ppd = None # self.spine_pageattributes = {} # self.spine_idrefs = {} self.spine = [] self.guide = [] self._parseData()
def __init__(self, opf_path, debug=False): self._debug = debug self.opfname = utf8_str(os.path.basename(opf_path)) self.opf = None with open(pathof(opf_path), "rb") as fp: self.opf = fp.read() self.opos = 0 self.package_tag = [None, None] # self.package_version = None self.metadata_tag = [None, None] self.metadata = [] self.cover_id = None self.manifest_id_to_href = {} self.manifest_id_to_mime = {} self.href_to_manifest_id = {} self.spine_ppd = None # self.spine_pageattributes = {} # self.spine_idrefs = {} self.spine = [] self.guide = [] self._parseData()
def deleteotherfile(self, book_href): id = utf8_str(book_href) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('attempt to delete protected file') add_to_deleted = True # if file was added or modified delete file from outdir if id in self.added or id in self.modified.keys(): filepath = os.path.join(self.outdir, filepath) if path.exists(filepath) and path.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.other: self.other.remove(id) if id in self.modified.keys(): del self.modified[id] if add_to_deleted: self.deleted.append(id) del self.id_to_filepath[id]
def deleteotherfile(self, book_href): id = utf8_str(book_href) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('attempt to delete protected file') add_to_deleted = True # if file was added or modified delete file from outdir if id in self.added or id in self.modified.keys(): filepath = os.path.join(self.outdir,filepath) if path.exists(filepath) and path.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.other: self.other.remove(id) if id in self.modified.keys(): del self.modified[id] if add_to_deleted: self.deleted.append(id) del self.id_to_filepath[id]
def map_id_to_href(self, id, ow): id = utf8_str(id) return self.id_to_href.get(id, ow)
def map_href_to_id(self, href, ow): href = utf8_str(href) return self.href_to_id.get(href, ow)
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False): global DUMP global WRITE_RAW_DATA global SPLIT_COMBO_MOBIS if DUMP or dodump: DUMP = True if WRITE_RAW_DATA or dowriteraw: WRITE_RAW_DATA = True if SPLIT_COMBO_MOBIS or dosplitcombos: SPLIT_COMBO_MOBIS = True infile = utf8_str(infile) outdir = utf8_str(outdir) if apnxfile is not None: apnxfile = utf8_str(apnxfile) files = fileNames(infile, outdir) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': raise unpackException('Invalid file format') if DUMP: sect.dumppalmheader() else: print "Palm DB type: %s, %d sections." % (sect.ident, sect.num_sections) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] mh = MobiHeader(sect, 0) # if this is a mobi8-only file hasK8 here will be true mhlst.append(mh) K8Boundary = -1 if mh.isK8(): print "Unpacking a KF8 book..." hasK8 = True else: # This is either a Mobipocket 7 or earlier, or a combi M7/KF8 # Find out which hasK8 = False for i in xrange(len(sect.sectionoffsets) - 1): before, after = sect.sectionoffsets[i:i + 2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: sect.setsectiondescription(i, "Mobi/KF8 Boundary Section") mh = MobiHeader(sect, i + 1) hasK8 = True mhlst.append(mh) K8Boundary = i break if hasK8: print "Unpacking a Combination M{0:d}/KF8 book...".format( mh.version) if SPLIT_COMBO_MOBIS: # if this is a combination mobi7-mobi8 file split them up mobisplit = mobi_split(infile) if mobisplit.combo: outmobi7 = os.path.join( files.outdir, 'mobi7-' + files.getInputFileBasename() + '.mobi') outmobi8 = os.path.join( files.outdir, 'mobi8-' + files.getInputFileBasename() + '.azw3') open(pathof(outmobi7), 'wb').write(mobisplit.getResult7()) open(pathof(outmobi8), 'wb').write(mobisplit.getResult8()) else: print "Unpacking a Mobipocket {0:d} book...".format(mh.version) if hasK8: files.makeK8Struct() process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd) if DUMP: sect.dumpsectionsinfo() return
def setpackagetag(self, new_packagetag): self.package_tag = utf8_str(new_packagetag) self.modified['OEBPS/content.opf'] = 'file'
def setmetadataxml(self, new_metadata): self.metadataxml = utf8_str(new_metadata) self.modified['OEBPS/content.opf'] = 'file'
def listdir(s): rv = [] for file in os.listdir(pathof(s)): rv.append(utf8_str(file, enc=sys.getfilesystemencoding())) return rv
def setspineppd(self, ppd): if ppd not in ['rtl', 'ltr', None]: raise WrapperException('incorrect page-progression direction') self.spine_ppd = utf8_str(ppd) self.modified['OEBPS/content.opf'] = 'file'
def getcwd(): return utf8_str(os.getcwdu())
def relpath(apath, start=None): rpath = os.path.relpath(utf8_str(apath),utf8_str(start)) return rpath
def map_id_to_mime(self, id, ow): id = utf8_str(id) return self.id_to_mime.get(id, ow)
def map_href_to_id(self, href, ow): href = utf8_str(href) return self.href_to_id.get(href,ow)
def unquoteurl(href): href = utf8_str(href) href = unquote(href) return href
def abspath(apath): return utf8_str(os.path.abspath(pathof(apath)))
def unpackBook(infile, outdir, apnxfile=None, epubver='2', use_hd=False, dodump=False, dowriteraw=False, dosplitcombos=False): global DUMP global WRITE_RAW_DATA global SPLIT_COMBO_MOBIS if DUMP or dodump: DUMP = True if WRITE_RAW_DATA or dowriteraw: WRITE_RAW_DATA = True if SPLIT_COMBO_MOBIS or dosplitcombos: SPLIT_COMBO_MOBIS = True infile = utf8_str(infile) outdir = utf8_str(outdir) if apnxfile is not None: apnxfile = utf8_str(apnxfile) files = fileNames(infile, outdir) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': raise unpackException('Invalid file format') if DUMP: sect.dumppalmheader() else: print "Palm DB type: %s, %d sections." % (sect.ident,sect.num_sections) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] mh = MobiHeader(sect,0) # if this is a mobi8-only file hasK8 here will be true mhlst.append(mh) K8Boundary = -1 if mh.isK8(): print "Unpacking a KF8 book..." hasK8 = True else: # This is either a Mobipocket 7 or earlier, or a combi M7/KF8 # Find out which hasK8 = False for i in xrange(len(sect.sectionoffsets)-1): before, after = sect.sectionoffsets[i:i+2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: sect.setsectiondescription(i,"Mobi/KF8 Boundary Section") mh = MobiHeader(sect,i+1) hasK8 = True mhlst.append(mh) K8Boundary = i break if hasK8: print "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version) if SPLIT_COMBO_MOBIS: # if this is a combination mobi7-mobi8 file split them up mobisplit = mobi_split(infile) if mobisplit.combo: outmobi7 = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.mobi') outmobi8 = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.azw3') open(pathof(outmobi7), 'wb').write(mobisplit.getResult7()) open(pathof(outmobi8), 'wb').write(mobisplit.getResult8()) else: print "Unpacking a Mobipocket {0:d} book...".format(mh.version) if hasK8: files.makeK8Struct() process_all_mobi_headers(files, apnxfile, sect, mhlst, K8Boundary, False, epubver, use_hd) if DUMP: sect.dumpsectionsinfo() return