def writeXHTML(self): files = self.files cover_page = self.cover_page cover_image = self.cover_image title = self.title lang = self.lang image_dir = os.path.relpath(files.k8images, files.k8text).replace("\\", "/") data = "" data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>' data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"' data += ' xml:lang="{:s}">\n'.format(lang) data += "<head>\n<title>{:s}</title>\n".format(title) data += '<style type="text/css">\n' data += "body {\n\tmargin: 0;\n\tpadding: 0;\n\ttext-align: center;\n}\n" data += "div {\n\theight: 100%;\n\twidth: 100%;\n\ttext-align: center;\n\tpage-break-inside: avoid;\n}\n" data += "img {\n\tdisplay: inline-block;\n\theight: 100%;\n\tmargin: 0 auto;\n}\n" data += "</style>\n</head>\n" data += "<body><div>\n" data += '\t<img src="{:s}/{:s}" alt=""/>\n'.format(image_dir, cover_image) data += "</div></body>\n</html>" outfile = os.path.join(files.k8text, self.cover_page) if os.path.exists(pathof(outfile)): print "Warning: {:s} already exists.".format(cover_page) # return os.remove(pathof(outfile)) open(pathof(outfile), "w").write(data) return
def writeXHTML(self): files = self.files cover_page = self.cover_page cover_image = self.cover_image title = self.title lang = self.lang image_dir = os.path.relpath(files.k8images, files.k8text).replace('\\', '/') data = '' data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>' data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"' data += ' xml:lang="{:s}">\n'.format(lang) data += '<head>\n<title>{:s}</title>\n'.format(title) data += '<style type="text/css">\n' data += 'body {\n\tmargin: 0;\n\tpadding: 0;\n\ttext-align: center;\n}\n' data += 'div {\n\theight: 100%;\n\twidth: 100%;\n\ttext-align: center;\n\tpage-break-inside: avoid;\n}\n' data += 'img {\n\tdisplay: inline-block;\n\theight: 100%;\n\tmargin: 0 auto;\n}\n' data += '</style>\n</head>\n' data += '<body><div>\n' data += '\t<img src="{:s}/{:s}" alt=""/>\n'.format( image_dir, cover_image) data += '</div></body>\n</html>' outfile = os.path.join(files.k8text, self.cover_page) if os.path.exists(pathof(outfile)): print 'Warning: {:s} already exists.'.format(cover_page) #return os.remove(pathof(outfile)) open(pathof(outfile), 'w').write(data) return
def processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset): global DUMP # Extract an Image imgtype = get_image_type(None, data) if imgtype is None: print "Warning: Section %s does not contain a recognised resource" % i rscnames.append(None) sect.setsectiondescription( i, "Mysterious Section, first four bytes %s" % describe(data[0:4])) if DUMP: fname = "unknown%05d.dat" % i outname = os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) sect.setsectiondescription( i, "Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname)) return rscnames, rsc_ptr imgname = "image%05d.%s" % (i, imgtype) if cover_offset is not None and i == beg + cover_offset: imgname = "cover%05d.%s" % (i, imgtype) print "Extracting image: {0:s} from section {1:d}".format(imgname, i) outimg = os.path.join(files.imgdir, imgname) open(pathof(outimg), 'wb').write(data) rscnames.append(imgname) sect.setsectiondescription(i, "Image {0:s}".format(imgname)) if rsc_ptr == -1: rsc_ptr = i - beg return rscnames, rsc_ptr
def processPrintReplica(metadata, files, imgnames, mh): global DUMP global WRITE_RAW_DATA rawML = mh.getRawML() if DUMP or WRITE_RAW_DATA: outraw = os.path.join(files.outdir,files.getInputFileBasename() + '.rawpr') open(pathof(outraw),'wb').write(rawML) fileinfo = [] print "Print Replica ebook detected" try: numTables, = struct.unpack_from('>L', rawML, 0x04) tableIndexOffset = 8 + 4*numTables # for each table, read in count of sections, assume first section is a PDF # and output other sections as binary files paths = [] for i in xrange(numTables): sectionCount, = struct.unpack_from('>L', rawML, 0x08 + 4*i) for j in xrange(sectionCount): sectionOffset, sectionLength, = struct.unpack_from('>LL', rawML, tableIndexOffset) tableIndexOffset += 8 if j == 0: entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i+1))) else: entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i+1),j))) open(pathof(entryName), 'wb').write(rawML[sectionOffset:(sectionOffset+sectionLength)]) except Exception, e: print 'Error processing Print Replica: ' + str(e)
def processPrintReplica(metadata, files, rscnames, mh): global DUMP global WRITE_RAW_DATA rawML = mh.getRawML() if DUMP or WRITE_RAW_DATA: outraw = os.path.join(files.outdir, files.getInputFileBasename() + '.rawpr') open(pathof(outraw), 'wb').write(rawML) fileinfo = [] print "Print Replica ebook detected" try: numTables, = struct.unpack_from('>L', rawML, 0x04) tableIndexOffset = 8 + 4 * numTables # for each table, read in count of sections, assume first section is a PDF # and output other sections as binary files for i in xrange(numTables): sectionCount, = struct.unpack_from('>L', rawML, 0x08 + 4 * i) for j in xrange(sectionCount): sectionOffset, sectionLength, = struct.unpack_from( '>LL', rawML, tableIndexOffset) tableIndexOffset += 8 if j == 0: entryName = os.path.join( files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i + 1))) else: entryName = os.path.join( files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i + 1), j))) open(pathof(entryName), 'wb').write( rawML[sectionOffset:(sectionOffset + sectionLength)]) except Exception, e: print 'Error processing Print Replica: ' + str(e)
def processImage(i, files, imgnames, sect, data, beg, image_ptr, cover_offset): global DUMP # Extract an Image imgtype = get_image_type(None, data) if imgtype is None: print "Warning: Section %s does not contain a recognised resource" % i imgnames.append(None) sect.setsectiondescription(i,"Mysterious Section, first four bytes %s" % describe(data[0:4])) if DUMP: fname = "unknown%05d.dat" % i outname= os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) sect.setsectiondescription(i,"Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname)) return imgnames, image_ptr imgname = "image%05d.%s" % (i, imgtype) if cover_offset is not None and i == beg + cover_offset: imgname = "cover%05d.%s" % (i, imgtype) print "Extracting image: {0:s} from section {1:d}".format(imgname,i) outimg = os.path.join(files.imgdir, imgname) open(pathof(outimg), 'wb').write(data) imgnames.append(imgname) sect.setsectiondescription(i,"Image {0:s}".format(imgname)) if image_ptr == -1: image_ptr = i - beg return imgnames, image_ptr
def processCRES(i, files, imgnames, sect, data, beg, image_ptr, use_hd): # extract an HDImage global DUMP data = data[12:] imgtype = get_image_type(None, data) if imgtype is None: print "Warning: CRES Section %s does not contain a recognised resource" % i imgnames.append(None) sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s" % describe(data[0:4])) if DUMP: fname = "unknown%05d.dat" % i outname= os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname)) image_ptr += 1 return imgnames, image_ptr if use_hd: # overwrite corresponding lower res image with hd version imgname = imgnames[image_ptr] imgdest = files.imgdir else: imgname = "HDimage%05d.%s" % (i, imgtype) imgdest = files.hdimgdir print "Extracting HD image: {0:s} from section {1:d}".format(imgname,i) outimg = os.path.join(imgdest, imgname) open(pathof(outimg), 'wb').write(data) imgnames.append(None) sect.setsectiondescription(i,"Optional HD Image {0:s}".format(imgname)) image_ptr += 1 return imgnames, image_ptr
def write_opf(self): if self.op is not None: filepath = utf8_str(os.path.join(self.outdir, 'OEBPS', self.opfname)) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(self.build_opf())
def write_opf(self): if self.op is not None: filepath = utf8_str( os.path.join(self.outdir, 'OEBPS', self.opfname)) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(self.build_opf())
def writeOPF(self, has_obfuscated_fonts=False): if self.isK8: data = self.buildEPUBOPF(has_obfuscated_fonts) outopf = os.path.join(self.files.k8oebps, EPUB_OPF) open(pathof(outopf), 'wb').write(data) return self.BookId else: data = self.buildMobi7OPF() outopf = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.opf') open(pathof(outopf), 'wb').write(data) return 0
def epub_zip_up_book_contents(ebook_path, epub_filepath): outzip = zipfile.ZipFile(pathof(epub_filepath), 'w') files = path.walk(ebook_path) if 'mimetype' in files: outzip.write(pathof(os.path.join(ebook_path, 'mimetype')), pathof('mimetype'), zipfile.ZIP_STORED) else: raise Exception('mimetype file is missing') files.remove('mimetype') for file in files: filepath = os.path.join(ebook_path, file) outzip.write(pathof(filepath),pathof(file),zipfile.ZIP_DEFLATED) outzip.close()
def writeXHTML(self): files = self.files cover_page = self.cover_page data = self.buildXHTML() outfile = os.path.join(files.k8text, cover_page) if os.path.exists(pathof(outfile)): print 'Warning: {:s} already exists.'.format(cover_page) # return os.remove(pathof(outfile)) open(pathof(outfile), 'w').write(data) return
def writeXHTML(self): files = self.files cover_page = self.cover_page data = self.buildXHTML() outfile = os.path.join(files.k8text, cover_page) if os.path.exists(pathof(outfile)): print 'Warning: {:s} already exists.'.format(cover_page) #return os.remove(pathof(outfile)) open(pathof(outfile), 'w').write(data) return
def zipUpDir(self, myzip, tdir, localname): currentdir = tdir if localname != "": currentdir = os.path.join(currentdir,localname) list = path.listdir(currentdir) for file in list: afilename = file localfilePath = os.path.join(localname, afilename) realfilePath = os.path.join(currentdir,file) if path.isfile(realfilePath): myzip.write(pathof(realfilePath), pathof(localfilePath), zipfile.ZIP_DEFLATED) elif path.isdir(realfilePath): self.zipUpDir(myzip, tdir, localfilePath)
def epub_zip_up_book_contents(ebook_path, epub_filepath): outzip = zipfile.ZipFile(pathof(epub_filepath), 'w') files = path.walk(ebook_path) if 'mimetype' in files: outzip.write(pathof(os.path.join(ebook_path, 'mimetype')), pathof('mimetype'), zipfile.ZIP_STORED) else: raise Exception('mimetype file is missing') files.remove('mimetype') for file in files: filepath = os.path.join(ebook_path, file) outzip.write(pathof(filepath), pathof(file), zipfile.ZIP_DEFLATED) outzip.close()
def zipUpDir(self, myzip, tdir, localname): currentdir = tdir if localname != "": currentdir = os.path.join(currentdir, localname) list = path.listdir(currentdir) for file in list: afilename = file localfilePath = os.path.join(localname, afilename) realfilePath = os.path.join(currentdir, file) if path.isfile(realfilePath): myzip.write(pathof(realfilePath), pathof(localfilePath), zipfile.ZIP_DEFLATED) elif path.isdir(realfilePath): self.zipUpDir(myzip, tdir, localfilePath)
def writeotherfile(self, book_href, data): id = utf8_str(book_href) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('Attempt to modify protected file') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.modified[id] = 'file'
def writeotherfile(self, book_href, data): id = utf8_str(book_href) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') if id in PROTECTED_FILES: raise WrapperException('Attempt to modify protected file') filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data) self.modified[id] = 'file'
def writefile(self, id, data): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') mime = self.id_to_mime.get(id,'') if mime.endswith('+xml'): data = utf8_str(data) filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data) self.modified[id] = 'file'
def writefile(self, id, data): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') mime = self.id_to_mime.get(id, '') if mime.endswith('+xml'): data = utf8_str(data) filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.modified[id] = 'file'
def addotherfile(self, book_href, data): id = utf8_str(book_href) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/", os.sep) filepath = os.path.join(pathof(self.outdir), desired_path) if path.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(pathof(filepath)) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def deletefile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('id does not exist in manifest') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified.keys(): filepath = os.path.join(self.outdir, filepath) if path.exists(filepath) and path.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified.keys(): del self.modified[id] # remove from manifest href = self.id_to_href[id] del self.id_to_href[id] del self.id_to_mime[id] del self.href_to_id[href] # remove from spine new_spine = [] was_modified = False for sid, linear in self.spine: if sid != id: new_spine.append((sid, linear)) else: was_modified = True if was_modified: setspine(new_spine) if add_to_deleted: self.deleted.append(id) self.modified['OEBPS/content.opf'] = 'file' del self.id_to_filepath[id]
def addotherfile(self, book_href, data) : id = utf8_str(book_href) if id in self.other: raise WrapperException('book href must be unquie') desired_path = id.replace("/",os.sep) filepath = os.path.join(pathof(self.outdir),desired_path) if path.isfile(filepath): raise WrapperException('desired path already exists') base = os.path.dirname(pathof(filepath)) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb')as fp: fp.write(data) self.other.append(id) self.added.append(id) self.id_to_filepath[id] = desired_path
def deletefile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if id is None: raise WrapperException('id does not exist in manifest') add_to_deleted = True # if file was added or modified, delete file from outdir if id in self.added or id in self.modified.keys(): filepath = os.path.join(self.outdir,filepath) if path.exists(filepath) and path.isfile(filepath): os.remove(pathof(filepath)) if id in self.added: self.added.remove(id) add_to_deleted = False if id in self.modified.keys(): del self.modified[id] # remove from manifest href = self.id_to_href[id] del self.id_to_href[id] del self.id_to_mime[id] del self.href_to_id[href] # remove from spine new_spine = [] was_modified = False for sid, linear in self.spine: if sid != id: new_spine.append((sid, linear)) else: was_modified = True if was_modified: setspine(new_spine) if add_to_deleted: self.deleted.append(id) self.modified['OEBPS/content.opf'] = 'file' del self.id_to_filepath[id]
def writeNAV(self, ncx_data, guidetext, metadata): # build the xhtml # print "Write Navigation Document." xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0]) fname = os.path.join(self.files.k8text, self.navname) open(pathof(fname), 'wb').write(xhtml)
def copy_book_contents_to(self, destdir): destdir = utf8_str(destdir) if destdir is None or not path.isdir(destdir): raise WrapperException('destination directory does not exist') for id in self.id_to_filepath.keys(): rpath = self.id_to_filepath[id] in_manifest = id in self.id_to_mime.keys() if in_manifest: data = self.readfile(id) else: data = self.readotherfile(id) filepath = os.path.join(destdir,rpath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data)
def processSRCS(i, files, imgnames, sect, data): # extract the source zip archive and save it. print "File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME) open(pathof(srcname), 'wb').write(data[16:]) imgnames.append(None) sect.setsectiondescription(i,"Zipped Source Files") return imgnames
def copy_book_contents_to(self, destdir): destdir = utf8_str(destdir) if destdir is None or not path.isdir(destdir): raise WrapperException('destination directory does not exist') for id in self.id_to_filepath.keys(): rpath = self.id_to_filepath[id] in_manifest = id in self.id_to_mime.keys() if in_manifest: data = self.readfile(id) else: data = self.readotherfile(id) filepath = os.path.join(destdir, rpath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data)
def processSRCS(i, files, rscnames, sect, data): # extract the source zip archive and save it. print "File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME) open(pathof(srcname), 'wb').write(data[16:]) rscnames.append(None) sect.setsectiondescription(i, "Zipped Source Files") return rscnames
def processCMET(i, files, rscnames, sect, data): # extract the build log print "File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME) open(pathof(srcname), 'wb').write(data[10:]) rscnames.append(None) sect.setsectiondescription(i, "Kindlegen log") return rscnames
def writeK8NCX(self, ncx_data, metadata): # build the xml self.isNCX = True print "Write K8 ncx" xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0]) bname = 'toc.ncx' ncxname = os.path.join(self.files.k8oebps,bname) open(pathof(ncxname), 'wb').write(xml)
def processCMET(i, files, imgnames, sect, data): # extract the build log print "File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME) open(pathof(srcname), 'wb').write(data[10:]) imgnames.append(None) sect.setsectiondescription(i,"Kindlegen log") return imgnames
def get_image_type(imgname, imgdata=None): imgtype = imghdr.what(pathof(imgname), imgdata) # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some # with only the magic JPEG bytes out there... # ImageMagick handles those, so, do it too. if imgtype is None: if imgdata is None: f = open(pathof(imgname), 'rb') imgdata = f.read() if imgdata[0:2] == b'\xFF\xD8': # Get last non-null bytes last = len(imgdata) while (imgdata[last - 1:last] == b'\x00'): last -= 1 # Be extra safe, check the trailing bytes, too. if imgdata[last - 2:last] == b'\xFF\xD9': imgtype = "jpeg" return imgtype
def writeNCX(self, metadata): # build the xml self.isNCX = True print "Write ncx" htmlname = os.path.basename(self.files.outbase) htmlname += '.html' xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0]) #write the ncx file ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx') open(pathof(ncxname), 'wb').write(xml)
def get_image_type(imgname, imgdata=None): imgtype = imghdr.what(pathof(imgname), imgdata) # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some # with only the magic JPEG bytes out there... # ImageMagick handles those, so, do it too. if imgtype is None: if imgdata is None: f = open(pathof(imgname), 'rb') imgdata = f.read() if imgdata[0:2] == b'\xFF\xD8': # Get last non-null bytes last = len(imgdata) while (imgdata[last-1:last] == b'\x00'): last-=1 # Be extra safe, check the trailing bytes, too. if imgdata[last-2:last] == b'\xFF\xD9': imgtype = "jpeg" return imgtype
def processUnknownSections(mh, sect, files, K8Boundary): global DUMP global TERMINATION_INDICATOR1 global TERMINATION_INDICATOR2 global TERMINATION_INDICATOR3 if DUMP: print "Unpacking any remaining unknown records" beg = mh.start end = sect.num_sections if beg < K8Boundary: # then we're processing the first part of a combination file end = K8Boundary for i in xrange(beg, end): if sect.sectiondescriptions[i] == "": data = sect.loadSection(i) type = data[0:4] if type == TERMINATION_INDICATOR3: description = "Termination Marker 3 Nulls" elif type == TERMINATION_INDICATOR2: description = "Termination Marker 2 Nulls" elif type == TERMINATION_INDICATOR1: description = "Termination Marker 1 Null" elif type == "INDX": fname = "Unknown%05d_INDX.dat" % i description = "Unknown INDX section" if DUMP: outname = os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) print "Extracting %s: %s from section %d" % (description, fname, i) description = description + ", extracting as %s" % fname else: fname = "unknown%05d.dat" % i description = "Mysterious Section, first four bytes %s" % describe( data[0:4]) if DUMP: outname = os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) print "Extracting %s: %s from section %d" % (description, fname, i) description = description + ", extracting as %s" % fname sect.setsectiondescription(i, description)
def unzip_epub_to_dir(path_to_epub, destdir): f = open(pathof(path_to_epub), 'rb') sz = ZipFile(f) for name in sz.namelist(): data = sz.read(name) name = name.replace("/", os.sep) filepath = os.path.join(destdir, name) basedir = os.path.dirname(filepath) if not os.path.isdir(basedir): os.makedirs(basedir) with open(filepath, 'wb') as fp: fp.write(data) f.close()
def __init__(self, filename): self.data = open(pathof(filename), 'rb').read() self.palmheader = self.data[:78] self.palmname = self.data[:32] self.ident = self.palmheader[0x3C:0x3C+8] self.num_sections, = struct.unpack_from('>H', self.palmheader, 76) self.filelength = len(self.data) sectionsdata = struct.unpack_from('>%dL' % (self.num_sections*2), self.data, 78) + (self.filelength, 0) self.sectionoffsets = sectionsdata[::2] self.sectionattributes = sectionsdata[1::2] self.sectiondescriptions = ["" for x in range(self.num_sections+1)] self.sectiondescriptions[-1] = "File Length Only" return
def processUnknownSections(mh, sect, files, K8Boundary): global DUMP global TERMINATION_INDICATOR1 global TERMINATION_INDICATOR2 global TERMINATION_INDICATOR3 if DUMP: print "Unpacking any remaining unknown records" beg = mh.start end = sect.num_sections if beg < K8Boundary: # then we're processing the first part of a combination file end = K8Boundary for i in xrange(beg, end): if sect.sectiondescriptions[i] == "": data = sect.loadSection(i) type = data[0:4] if type == TERMINATION_INDICATOR3: description = "Termination Marker 3 Nulls" elif type == TERMINATION_INDICATOR2: description = "Termination Marker 2 Nulls" elif type == TERMINATION_INDICATOR1: description = "Termination Marker 1 Null" elif type == "INDX": fname = "Unknown%05d_INDX.dat" % i description = "Unknown INDX section" if DUMP: outname= os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) print "Extracting %s: %s from section %d" % (description, fname, i) description = description + ", extracting as %s" % fname else: fname = "unknown%05d.dat" % i description = "Mysterious Section, first four bytes %s" % describe(data[0:4]) if DUMP: outname= os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) print "Extracting %s: %s from section %d" % (description, fname, i) description = description + ", extracting as %s" % fname sect.setsectiondescription(i, description)
def addfile(self, uniqueid, basename, data, mime=None): uniqueid = utf8_str(uniqueid) basename = utf8_str(basename) mime = utf8_str(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime.endswith('+xml'): data = utf8_str(data) if mime.startswith("audio"): base = 'Audio' elif mime.startswith("video"): base = "Video" else: base = mime_base_map.get(mime,'Misc') href = base + "/" + basename if uniqueid in self.id_to_href.keys(): raise WrapperException('Manifest Id is not unique') if href in self.href_to_id.keys(): raise WrapperException('Basename is not unique') # now actually write out the new file filepath = href.replace("/",os.sep) filepath = os.path.join('OEBPS', filepath) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir,filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath),'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.href_to_id[href] = uniqueid self.added.append(uniqueid) self.modified['OEBPS/content.opf'] = 'file' return uniqueid
def unzip_epub_to_dir(path_to_epub, destdir): f = open(pathof(path_to_epub), 'rb') sz = ZipFile(f) for name in sz.namelist(): data = sz.read(name) name = name.replace("/", os.sep) filepath = os.path.join(destdir,name) basedir = os.path.dirname(filepath) if not os.path.isdir(basedir): os.makedirs(basedir) with open(filepath,'wb') as fp: fp.write(data) f.close()
def addfile(self, uniqueid, basename, data, mime=None): uniqueid = utf8_str(uniqueid) basename = utf8_str(basename) mime = utf8_str(mime) if mime is None: ext = os.path.splitext(basename)[1] ext = ext.lower() mime = ext_mime_map.get(ext, None) if mime is None: raise WrapperException("Mime Type Missing") if mime.endswith('+xml'): data = utf8_str(data) if mime.startswith("audio"): base = 'Audio' elif mime.startswith("video"): base = "Video" else: base = mime_base_map.get(mime, 'Misc') href = base + "/" + basename if uniqueid in self.id_to_href.keys(): raise WrapperException('Manifest Id is not unique') if href in self.href_to_id.keys(): raise WrapperException('Basename is not unique') # now actually write out the new file filepath = href.replace("/", os.sep) filepath = os.path.join('OEBPS', filepath) self.id_to_filepath[uniqueid] = filepath filepath = os.path.join(self.outdir, filepath) base = os.path.dirname(filepath) if not path.exists(base): os.makedirs(pathof(base)) with open(pathof(filepath), 'wb') as fp: fp.write(data) self.id_to_href[uniqueid] = href self.id_to_mime[uniqueid] = mime self.href_to_id[href] = uniqueid self.added.append(uniqueid) self.modified['OEBPS/content.opf'] = 'file' return uniqueid
def __init__(self, filename): self.data = open(pathof(filename), 'rb').read() self.palmheader = self.data[:78] self.palmname = self.data[:32] self.ident = self.palmheader[0x3C:0x3C + 8] self.num_sections, = struct.unpack_from('>H', self.palmheader, 76) self.filelength = len(self.data) sectionsdata = struct.unpack_from('>%dL' % (self.num_sections * 2), self.data, 78) + (self.filelength, 0) self.sectionoffsets = sectionsdata[::2] self.sectionattributes = sectionsdata[1::2] self.sectiondescriptions = ["" for x in range(self.num_sections + 1)] self.sectiondescriptions[-1] = "File Length Only" return
def processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd): # extract an HDImage global DUMP data = data[12:] imgtype = get_image_type(None, data) if imgtype is None: print "Warning: CRES Section %s does not contain a recognised resource" % i rscnames.append(None) sect.setsectiondescription( i, "Mysterious CRES data, first four bytes %s" % describe(data[0:4])) if DUMP: fname = "unknown%05d.dat" % i outname = os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) sect.setsectiondescription( i, "Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname)) rsc_ptr += 1 return rscnames, rsc_ptr if use_hd: # overwrite corresponding lower res image with hd version imgname = rscnames[rsc_ptr] imgdest = files.imgdir else: imgname = "HDimage%05d.%s" % (i, imgtype) imgdest = files.hdimgdir print "Extracting HD image: {0:s} from section {1:d}".format(imgname, i) outimg = os.path.join(imgdest, imgname) open(pathof(outimg), 'wb').write(data) rscnames.append(None) sect.setsectiondescription(i, "Optional HD Image {0:s}".format(imgname)) rsc_ptr += 1 return rscnames, rsc_ptr
def readfile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') # already added or modified it will be in outdir basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath),'rb') as fp: data = fp.read() return data
def readfile(self, id): id = utf8_str(id) filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('Id does not exist in manifest') # already added or modified it will be in outdir basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath), 'rb') as fp: data = fp.read() return data
def processRESC(i, files, imgnames, sect, data, k8resc): global DUMP if DUMP: rescname = "RESC%05d.dat" % i print "Extracting Resource: ", rescname outrsc = os.path.join(files.outdir, rescname) open(pathof(outrsc), 'wb').write(data) if True: #try: # parse the spine and metadata from RESC k8resc = K8RESCProcessor(data[16:], DUMP) else: # except: print "Warning: cannot extract information from RESC." k8resc = None imgnames.append(None) sect.setsectiondescription(i,"K8 RESC section") return imgnames, k8resc
def processRESC(i, files, rscnames, sect, data, k8resc): global DUMP if DUMP: rescname = "RESC%05d.dat" % i print "Extracting Resource: ", rescname outrsc = os.path.join(files.outdir, rescname) open(pathof(outrsc), 'wb').write(data) if True: # try: # parse the spine and metadata from RESC k8resc = K8RESCProcessor(data[16:], DUMP) else: # except: print "Warning: cannot extract information from RESC." k8resc = None rscnames.append(None) sect.setsectiondescription(i, "K8 RESC section") return rscnames, k8resc
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr): fontname = "font%05d" % i ext = '.dat' font_error = False font_data = data try: usize, fflags, dstart, xor_len, xor_start = struct.unpack_from( '>LLLLL', data, 4) except: print "Failed to extract font: {0:s} from section {1:d}".format( fontname, i) font_error = True ext = '.failed' pass if not font_error: print "Extracting font:", fontname font_data = data[dstart:] extent = len(font_data) extent = min(extent, 1040) if fflags & 0x0002: # obfuscated so need to de-obfuscate the first 1040 bytes key = bytearray(data[xor_start:xor_start + xor_len]) buf = bytearray(font_data) for n in xrange(extent): buf[n] ^= key[n % xor_len] font_data = bytes(buf) if fflags & 0x0001: # ZLIB compressed data font_data = zlib.decompress(font_data) hdr = font_data[0:4] if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf': ext = '.ttf' elif hdr == 'OTTO': ext = '.otf' else: print "Warning: unknown font header %s" % hdr.encode('hex') if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002): obfuscate_data.append(fontname + ext) fontname += ext outfnt = os.path.join(files.imgdir, fontname) open(pathof(outfnt), 'wb').write(font_data) rscnames.append(fontname) sect.setsectiondescription(i, "Font {0:s}".format(fontname)) if rsc_ptr == -1: rsc_ptr = i - beg return rscnames, obfuscate_data, rsc_ptr
def readotherfile(self, book_href): id = utf8_str(book_href) # handle special case of trying to read the opf if id is not None and id == "OEBPS/content.opf": return self.build_opf() filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath), 'rb') as fp: data = fp.read() return data
def readotherfile(self, book_href): id = utf8_str(book_href) # handle special case of trying to read the opf if id is not None and id == "OEBPS/content.opf": return self.build_opf() filepath = self.id_to_filepath.get(id, None) if filepath is None: raise WrapperException('book href does not exist') basedir = self.ebook_root if id in self.added or id in self.modified.keys(): basedir = self.outdir filepath = os.path.join(basedir, filepath) if not path.exists(filepath): raise WrapperException('File Does Not Exist') data = '' with open(pathof(filepath),'rb') as fp: data = fp.read() return data
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr): fontname = "font%05d" % i ext = '.dat' font_error = False font_data = data try: usize, fflags, dstart, xor_len, xor_start = struct.unpack_from('>LLLLL',data,4) except: print "Failed to extract font: {0:s} from section {1:d}".format(fontname,i) font_error = True ext = '.failed' pass if not font_error: print "Extracting font:", fontname font_data = data[dstart:] extent = len(font_data) extent = min(extent, 1040) if fflags & 0x0002: # obfuscated so need to de-obfuscate the first 1040 bytes key = bytearray(data[xor_start: xor_start+ xor_len]) buf = bytearray(font_data) for n in xrange(extent): buf[n] ^= key[n%xor_len] font_data = bytes(buf) if fflags & 0x0001: # ZLIB compressed data font_data = zlib.decompress(font_data) hdr = font_data[0:4] if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf': ext = '.ttf' elif hdr == 'OTTO': ext = '.otf' else: print "Warning: unknown font header %s" % hdr.encode('hex') if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002): obfuscate_data.append(fontname + ext) fontname += ext outfnt = os.path.join(files.imgdir, fontname) open(pathof(outfnt), 'wb').write(font_data) rscnames.append(fontname) sect.setsectiondescription(i,"Font {0:s}".format(fontname)) if rsc_ptr == -1: rsc_ptr = i - beg return rscnames, obfuscate_data, rsc_ptr
def processCONT(i, files, imgnames, sect, data): global DUMP # process a container header, most of this is unknown # right now only extract its EXTH dt = data[0:12] if dt == "CONTBOUNDARY": imgnames.append(None) sect.setsectiondescription(i,"CONTAINER BOUNDARY") else: sect.setsectiondescription(i,"CONT Header") imgnames.append(None) if DUMP: cpage, = struct.unpack_from('>L', data, 12) contexth = data[48:] print "\n\nContainer EXTH Dump" dump_contexth(cpage, contexth) fname = "CONT_Header%05d.dat" % i outname= os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) return imgnames
def processCONT(i, files, rscnames, sect, data): global DUMP # process a container header, most of this is unknown # right now only extract its EXTH dt = data[0:12] if dt == "CONTBOUNDARY": rscnames.append(None) sect.setsectiondescription(i, "CONTAINER BOUNDARY") else: sect.setsectiondescription(i, "CONT Header") rscnames.append(None) if DUMP: cpage, = struct.unpack_from('>L', data, 12) contexth = data[48:] print "\n\nContainer EXTH Dump" dump_contexth(cpage, contexth) fname = "CONT_Header%05d.dat" % i outname = os.path.join(files.outdir, fname) open(pathof(outname), 'wb').write(data) return rscnames
def __init__(self, opf_path, debug = False): self._debug = debug self.opfname = utf8_str(os.path.basename(opf_path)) self.opf = None with open(pathof(opf_path),'rb') as fp: self.opf = fp.read() self.opos = 0 self.package_tag = [None, None] # self.package_version = None self.metadata_tag = [None, None] self.metadata = [] self.cover_id = None self.manifest_id_to_href = {} self.manifest_id_to_mime = {} self.href_to_manifest_id ={} self.spine_ppd = None # self.spine_pageattributes = {} # self.spine_idrefs = {} self.spine = [] self.guide = [] self._parseData()
def __init__(self, opf_path, debug=False): self._debug = debug self.opfname = utf8_str(os.path.basename(opf_path)) self.opf = None with open(pathof(opf_path), "rb") as fp: self.opf = fp.read() self.opos = 0 self.package_tag = [None, None] # self.package_version = None self.metadata_tag = [None, None] self.metadata = [] self.cover_id = None self.manifest_id_to_href = {} self.manifest_id_to_mime = {} self.href_to_manifest_id = {} self.spine_ppd = None # self.spine_pageattributes = {} # self.spine_idrefs = {} self.spine = [] self.guide = [] self._parseData()
def processPAGE(i, files, imgnames, sect, data, mh, pagemapproc): # process any page map information and create an apnx file pagemapproc = PageMapProcessor(mh, data) imgnames.append(None) sect.setsectiondescription(i,"PageMap") apnx_meta = {} apnx_meta['acr'] = str(sect.palmname).rstrip('\x00') apnx_meta['cdeType'] = mh.metadata['cdeType'][0] apnx_meta['contentGuid'] = hex(int(mh.metadata['UniqueID'][0]))[2:] apnx_meta['asin'] = mh.metadata['ASIN'][0] apnx_meta['pageMap'] = pagemapproc.getPageMap() if mh.version == 8: apnx_meta['format'] = 'MOBI_8' else: apnx_meta['format'] = 'MOBI_7' apnx_data = pagemapproc.generateAPNX(apnx_meta) if mh.isK8(): outname = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.apnx') else: outname = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.apnx') open(pathof(outname), 'wb').write(apnx_data) return imgnames, pagemapproc