def writeXHTML(self):
        files = self.files
        cover_page = self.cover_page
        cover_image = self.cover_image
        title = self.title
        lang = self.lang

        image_dir = os.path.relpath(files.k8images, files.k8text).replace("\\", "/")

        data = ""
        data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
        data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"'
        data += ' xml:lang="{:s}">\n'.format(lang)
        data += "<head>\n<title>{:s}</title>\n".format(title)
        data += '<style type="text/css">\n'
        data += "body {\n\tmargin: 0;\n\tpadding: 0;\n\ttext-align: center;\n}\n"
        data += "div {\n\theight: 100%;\n\twidth: 100%;\n\ttext-align: center;\n\tpage-break-inside: avoid;\n}\n"
        data += "img {\n\tdisplay: inline-block;\n\theight: 100%;\n\tmargin: 0 auto;\n}\n"
        data += "</style>\n</head>\n"
        data += "<body><div>\n"
        data += '\t<img src="{:s}/{:s}" alt=""/>\n'.format(image_dir, cover_image)
        data += "</div></body>\n</html>"

        outfile = os.path.join(files.k8text, self.cover_page)
        if os.path.exists(pathof(outfile)):
            print "Warning: {:s} already exists.".format(cover_page)
            # return
            os.remove(pathof(outfile))
        open(pathof(outfile), "w").write(data)
        return
    def writeXHTML(self):
        files = self.files
        cover_page = self.cover_page
        cover_image = self.cover_image
        title = self.title
        lang = self.lang

        image_dir = os.path.relpath(files.k8images,
                                    files.k8text).replace('\\', '/')

        data = ''
        data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
        data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"'
        data += ' xml:lang="{:s}">\n'.format(lang)
        data += '<head>\n<title>{:s}</title>\n'.format(title)
        data += '<style type="text/css">\n'
        data += 'body {\n\tmargin: 0;\n\tpadding: 0;\n\ttext-align: center;\n}\n'
        data += 'div {\n\theight: 100%;\n\twidth: 100%;\n\ttext-align: center;\n\tpage-break-inside: avoid;\n}\n'
        data += 'img {\n\tdisplay: inline-block;\n\theight: 100%;\n\tmargin: 0 auto;\n}\n'
        data += '</style>\n</head>\n'
        data += '<body><div>\n'
        data += '\t<img src="{:s}/{:s}" alt=""/>\n'.format(
            image_dir, cover_image)
        data += '</div></body>\n</html>'

        outfile = os.path.join(files.k8text, self.cover_page)
        if os.path.exists(pathof(outfile)):
            print 'Warning: {:s} already exists.'.format(cover_page)
            #return
            os.remove(pathof(outfile))
        open(pathof(outfile), 'w').write(data)
        return
Example #3
0
def processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset):
    global DUMP
    # Extract an Image
    imgtype = get_image_type(None, data)
    if imgtype is None:
        print "Warning: Section %s does not contain a recognised resource" % i
        rscnames.append(None)
        sect.setsectiondescription(
            i, "Mysterious Section, first four bytes %s" % describe(data[0:4]))
        if DUMP:
            fname = "unknown%05d.dat" % i
            outname = os.path.join(files.outdir, fname)
            open(pathof(outname), 'wb').write(data)
            sect.setsectiondescription(
                i, "Mysterious Section, first four bytes %s extracting as %s" %
                (describe(data[0:4]), fname))
        return rscnames, rsc_ptr

    imgname = "image%05d.%s" % (i, imgtype)
    if cover_offset is not None and i == beg + cover_offset:
        imgname = "cover%05d.%s" % (i, imgtype)
    print "Extracting image: {0:s} from section {1:d}".format(imgname, i)
    outimg = os.path.join(files.imgdir, imgname)
    open(pathof(outimg), 'wb').write(data)
    rscnames.append(imgname)
    sect.setsectiondescription(i, "Image {0:s}".format(imgname))
    if rsc_ptr == -1:
        rsc_ptr = i - beg
    return rscnames, rsc_ptr
def processPrintReplica(metadata, files, imgnames, mh):
    global DUMP
    global WRITE_RAW_DATA
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.outdir,files.getInputFileBasename() + '.rawpr')
        open(pathof(outraw),'wb').write(rawML)

    fileinfo = []
    print "Print Replica ebook detected"
    try:
        numTables, = struct.unpack_from('>L', rawML, 0x04)
        tableIndexOffset = 8 + 4*numTables
        # for each table, read in count of sections, assume first section is a PDF
        # and output other sections as binary files
        paths = []
        for i in xrange(numTables):
            sectionCount, = struct.unpack_from('>L', rawML, 0x08 + 4*i)
            for j in xrange(sectionCount):
                sectionOffset, sectionLength, = struct.unpack_from('>LL', rawML, tableIndexOffset)
                tableIndexOffset += 8
                if j == 0:
                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i+1)))
                else:
                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i+1),j)))
                open(pathof(entryName), 'wb').write(rawML[sectionOffset:(sectionOffset+sectionLength)])
    except Exception, e:
        print 'Error processing Print Replica: ' + str(e)
Example #5
0
def processPrintReplica(metadata, files, rscnames, mh):
    global DUMP
    global WRITE_RAW_DATA
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.outdir,
                              files.getInputFileBasename() + '.rawpr')
        open(pathof(outraw), 'wb').write(rawML)

    fileinfo = []
    print "Print Replica ebook detected"
    try:
        numTables, = struct.unpack_from('>L', rawML, 0x04)
        tableIndexOffset = 8 + 4 * numTables
        # for each table, read in count of sections, assume first section is a PDF
        # and output other sections as binary files
        for i in xrange(numTables):
            sectionCount, = struct.unpack_from('>L', rawML, 0x08 + 4 * i)
            for j in xrange(sectionCount):
                sectionOffset, sectionLength, = struct.unpack_from(
                    '>LL', rawML, tableIndexOffset)
                tableIndexOffset += 8
                if j == 0:
                    entryName = os.path.join(
                        files.outdir,
                        files.getInputFileBasename() + ('.%03d.pdf' % (i + 1)))
                else:
                    entryName = os.path.join(
                        files.outdir,
                        files.getInputFileBasename() + ('.%03d.%03d.data' %
                                                        ((i + 1), j)))
                open(pathof(entryName), 'wb').write(
                    rawML[sectionOffset:(sectionOffset + sectionLength)])
    except Exception, e:
        print 'Error processing Print Replica: ' + str(e)
def processImage(i, files, imgnames, sect, data, beg, image_ptr, cover_offset):
    global DUMP
    # Extract an Image
    imgtype = get_image_type(None, data)
    if imgtype is None:
        print "Warning: Section %s does not contain a recognised resource" % i
        imgnames.append(None)
        sect.setsectiondescription(i,"Mysterious Section, first four bytes %s" % describe(data[0:4]))
        if DUMP:
            fname = "unknown%05d.dat" % i
            outname= os.path.join(files.outdir, fname)
            open(pathof(outname), 'wb').write(data)
            sect.setsectiondescription(i,"Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
        return imgnames, image_ptr

    imgname = "image%05d.%s" % (i, imgtype)
    if cover_offset is not None and i == beg + cover_offset:
        imgname = "cover%05d.%s" % (i, imgtype)
    print "Extracting image: {0:s} from section {1:d}".format(imgname,i)
    outimg = os.path.join(files.imgdir, imgname)
    open(pathof(outimg), 'wb').write(data)
    imgnames.append(imgname)
    sect.setsectiondescription(i,"Image {0:s}".format(imgname))
    if image_ptr == -1:
        image_ptr = i - beg
    return imgnames, image_ptr
def processCRES(i, files, imgnames, sect, data, beg, image_ptr, use_hd):
    # extract an HDImage
    global DUMP
    data = data[12:]
    imgtype = get_image_type(None, data)

    if imgtype is None:
        print "Warning: CRES Section %s does not contain a recognised resource" % i
        imgnames.append(None)
        sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s" % describe(data[0:4]))
        if DUMP:
            fname = "unknown%05d.dat" % i
            outname= os.path.join(files.outdir, fname)
            open(pathof(outname), 'wb').write(data)
            sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
        image_ptr += 1
        return imgnames, image_ptr

    if use_hd:
        # overwrite corresponding lower res image with hd version
        imgname = imgnames[image_ptr]
        imgdest = files.imgdir
    else:
        imgname = "HDimage%05d.%s" % (i, imgtype)
        imgdest = files.hdimgdir
    print "Extracting HD image: {0:s} from section {1:d}".format(imgname,i)
    outimg = os.path.join(imgdest, imgname)
    open(pathof(outimg), 'wb').write(data)
    imgnames.append(None)
    sect.setsectiondescription(i,"Optional HD Image {0:s}".format(imgname))
    image_ptr += 1
    return imgnames, image_ptr
Example #8
0
 def write_opf(self):
     if self.op is not None:
         filepath = utf8_str(os.path.join(self.outdir, 'OEBPS', self.opfname))
         base = os.path.dirname(filepath)
         if not path.exists(base):
             os.makedirs(pathof(base))
         with open(pathof(filepath),'wb') as fp:
             fp.write(self.build_opf())
Example #9
0
 def write_opf(self):
     if self.op is not None:
         filepath = utf8_str(
             os.path.join(self.outdir, 'OEBPS', self.opfname))
         base = os.path.dirname(filepath)
         if not path.exists(base):
             os.makedirs(pathof(base))
         with open(pathof(filepath), 'wb') as fp:
             fp.write(self.build_opf())
Example #10
0
 def writeOPF(self, has_obfuscated_fonts=False):
     if self.isK8:
         data = self.buildEPUBOPF(has_obfuscated_fonts)
         outopf = os.path.join(self.files.k8oebps, EPUB_OPF)
         open(pathof(outopf), 'wb').write(data)
         return self.BookId
     else:
         data = self.buildMobi7OPF()
         outopf = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.opf')
         open(pathof(outopf), 'wb').write(data)
         return 0
Example #11
0
def epub_zip_up_book_contents(ebook_path, epub_filepath):
        outzip = zipfile.ZipFile(pathof(epub_filepath), 'w')
        files = path.walk(ebook_path)
        if 'mimetype' in files:
            outzip.write(pathof(os.path.join(ebook_path, 'mimetype')), pathof('mimetype'), zipfile.ZIP_STORED)
        else:
            raise Exception('mimetype file is missing')
        files.remove('mimetype')
        for file in files:
            filepath = os.path.join(ebook_path, file)
            outzip.write(pathof(filepath),pathof(file),zipfile.ZIP_DEFLATED)
        outzip.close()
Example #12
0
 def writeOPF(self, has_obfuscated_fonts=False):
     if self.isK8:
         data = self.buildEPUBOPF(has_obfuscated_fonts)
         outopf = os.path.join(self.files.k8oebps, EPUB_OPF)
         open(pathof(outopf), 'wb').write(data)
         return self.BookId
     else:
         data = self.buildMobi7OPF()
         outopf = os.path.join(self.files.mobi7dir,
                               self.files.getInputFileBasename() + '.opf')
         open(pathof(outopf), 'wb').write(data)
         return 0
Example #13
0
    def writeXHTML(self):
        files = self.files
        cover_page = self.cover_page

        data = self.buildXHTML()

        outfile = os.path.join(files.k8text, cover_page)
        if os.path.exists(pathof(outfile)):
            print 'Warning: {:s} already exists.'.format(cover_page)
            # return
            os.remove(pathof(outfile))
        open(pathof(outfile), 'w').write(data)
        return
Example #14
0
    def writeXHTML(self):
        files = self.files
        cover_page = self.cover_page

        data = self.buildXHTML()

        outfile = os.path.join(files.k8text, cover_page)
        if os.path.exists(pathof(outfile)):
            print 'Warning: {:s} already exists.'.format(cover_page)
            #return
            os.remove(pathof(outfile))
        open(pathof(outfile), 'w').write(data)
        return
 def zipUpDir(self, myzip, tdir, localname):
     currentdir = tdir
     if localname != "":
         currentdir = os.path.join(currentdir,localname)
     list = path.listdir(currentdir)
     for file in list:
         afilename = file
         localfilePath = os.path.join(localname, afilename)
         realfilePath = os.path.join(currentdir,file)
         if path.isfile(realfilePath):
             myzip.write(pathof(realfilePath), pathof(localfilePath), zipfile.ZIP_DEFLATED)
         elif path.isdir(realfilePath):
             self.zipUpDir(myzip, tdir, localfilePath)
Example #16
0
def epub_zip_up_book_contents(ebook_path, epub_filepath):
    outzip = zipfile.ZipFile(pathof(epub_filepath), 'w')
    files = path.walk(ebook_path)
    if 'mimetype' in files:
        outzip.write(pathof(os.path.join(ebook_path, 'mimetype')),
                     pathof('mimetype'), zipfile.ZIP_STORED)
    else:
        raise Exception('mimetype file is missing')
    files.remove('mimetype')
    for file in files:
        filepath = os.path.join(ebook_path, file)
        outzip.write(pathof(filepath), pathof(file), zipfile.ZIP_DEFLATED)
    outzip.close()
Example #17
0
 def zipUpDir(self, myzip, tdir, localname):
     currentdir = tdir
     if localname != "":
         currentdir = os.path.join(currentdir, localname)
     list = path.listdir(currentdir)
     for file in list:
         afilename = file
         localfilePath = os.path.join(localname, afilename)
         realfilePath = os.path.join(currentdir, file)
         if path.isfile(realfilePath):
             myzip.write(pathof(realfilePath), pathof(localfilePath),
                         zipfile.ZIP_DEFLATED)
         elif path.isdir(realfilePath):
             self.zipUpDir(myzip, tdir, localfilePath)
Example #18
0
 def writeotherfile(self, book_href, data):
     id = utf8_str(book_href)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('book href does not exist')
     if id in PROTECTED_FILES:
         raise WrapperException('Attempt to modify protected file')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath), 'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'
Example #19
0
 def writeotherfile(self, book_href, data):
     id = utf8_str(book_href)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('book href does not exist')
     if id in PROTECTED_FILES:
         raise WrapperException('Attempt to modify protected file')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath),'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'
Example #20
0
 def writefile(self, id, data):
     id = utf8_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     mime = self.id_to_mime.get(id,'')
     if mime.endswith('+xml'):
         data = utf8_str(data)
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath),'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'
Example #21
0
 def writefile(self, id, data):
     id = utf8_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     mime = self.id_to_mime.get(id, '')
     if mime.endswith('+xml'):
         data = utf8_str(data)
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath), 'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'
Example #22
0
 def addotherfile(self, book_href, data):
     id = utf8_str(book_href)
     if id in self.other:
         raise WrapperException('book href must be unquie')
     desired_path = id.replace("/", os.sep)
     filepath = os.path.join(pathof(self.outdir), desired_path)
     if path.isfile(filepath):
         raise WrapperException('desired path already exists')
     base = os.path.dirname(pathof(filepath))
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath), 'wb') as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.id_to_filepath[id] = desired_path
Example #23
0
 def deletefile(self, id):
     id = utf8_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if id is None:
         raise WrapperException('id does not exist in manifest')
     add_to_deleted = True
     # if file was added or modified, delete file from outdir
     if id in self.added or id in self.modified.keys():
         filepath = os.path.join(self.outdir, filepath)
         if path.exists(filepath) and path.isfile(filepath):
             os.remove(pathof(filepath))
         if id in self.added:
             self.added.remove(id)
             add_to_deleted = False
         if id in self.modified.keys():
             del self.modified[id]
     # remove from manifest
     href = self.id_to_href[id]
     del self.id_to_href[id]
     del self.id_to_mime[id]
     del self.href_to_id[href]
     # remove from spine
     new_spine = []
     was_modified = False
     for sid, linear in self.spine:
         if sid != id:
             new_spine.append((sid, linear))
         else:
             was_modified = True
     if was_modified:
         setspine(new_spine)
     if add_to_deleted:
         self.deleted.append(id)
         self.modified['OEBPS/content.opf'] = 'file'
     del self.id_to_filepath[id]
Example #24
0
 def addotherfile(self, book_href, data) :
     id = utf8_str(book_href)
     if id in self.other:
         raise WrapperException('book href must be unquie')
     desired_path = id.replace("/",os.sep)
     filepath = os.path.join(pathof(self.outdir),desired_path)
     if path.isfile(filepath):
         raise WrapperException('desired path already exists')
     base = os.path.dirname(pathof(filepath))
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath),'wb')as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.id_to_filepath[id] = desired_path
Example #25
0
 def deletefile(self, id):
     id = utf8_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if id is None:
         raise WrapperException('id does not exist in manifest')
     add_to_deleted = True
     # if file was added or modified, delete file from outdir
     if id in self.added or id in self.modified.keys():
         filepath = os.path.join(self.outdir,filepath)
         if path.exists(filepath) and path.isfile(filepath):
             os.remove(pathof(filepath))
         if id in self.added:
             self.added.remove(id)
             add_to_deleted = False
         if id in self.modified.keys():
             del self.modified[id]
     # remove from manifest
     href = self.id_to_href[id]
     del self.id_to_href[id]
     del self.id_to_mime[id]
     del self.href_to_id[href]
     # remove from spine
     new_spine = []
     was_modified = False
     for sid, linear in self.spine:
         if sid != id:
             new_spine.append((sid, linear))
         else:
             was_modified = True
     if was_modified:
         setspine(new_spine)
     if add_to_deleted:
         self.deleted.append(id)
         self.modified['OEBPS/content.opf'] = 'file'
     del self.id_to_filepath[id]
Example #26
0
 def writeNAV(self, ncx_data, guidetext, metadata):
     # build the xhtml
     # print "Write Navigation Document."
     xhtml = self.buildNAV(ncx_data, guidetext,
                           metadata.get('Title')[0],
                           metadata.get('Language')[0])
     fname = os.path.join(self.files.k8text, self.navname)
     open(pathof(fname), 'wb').write(xhtml)
Example #27
0
 def copy_book_contents_to(self, destdir):
     destdir = utf8_str(destdir)
     if destdir is None or not path.isdir(destdir):
         raise WrapperException('destination directory does not exist')
     for id in self.id_to_filepath.keys():
         rpath = self.id_to_filepath[id]
         in_manifest = id in self.id_to_mime.keys()
         if in_manifest:
             data = self.readfile(id)
         else:
             data = self.readotherfile(id)
         filepath = os.path.join(destdir,rpath)
         base = os.path.dirname(filepath)
         if not path.exists(base):
             os.makedirs(pathof(base))
         with open(pathof(filepath),'wb') as fp:
             fp.write(data) 
def processSRCS(i, files, imgnames, sect, data):
    # extract the source zip archive and save it.
    print "File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME
    srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
    open(pathof(srcname), 'wb').write(data[16:])
    imgnames.append(None)
    sect.setsectiondescription(i,"Zipped Source Files")
    return imgnames
Example #29
0
 def copy_book_contents_to(self, destdir):
     destdir = utf8_str(destdir)
     if destdir is None or not path.isdir(destdir):
         raise WrapperException('destination directory does not exist')
     for id in self.id_to_filepath.keys():
         rpath = self.id_to_filepath[id]
         in_manifest = id in self.id_to_mime.keys()
         if in_manifest:
             data = self.readfile(id)
         else:
             data = self.readotherfile(id)
         filepath = os.path.join(destdir, rpath)
         base = os.path.dirname(filepath)
         if not path.exists(base):
             os.makedirs(pathof(base))
         with open(pathof(filepath), 'wb') as fp:
             fp.write(data)
Example #30
0
def processSRCS(i, files, rscnames, sect, data):
    # extract the source zip archive and save it.
    print "File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME
    srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
    open(pathof(srcname), 'wb').write(data[16:])
    rscnames.append(None)
    sect.setsectiondescription(i, "Zipped Source Files")
    return rscnames
Example #31
0
def processCMET(i, files, rscnames, sect, data):
    # extract the build log
    print "File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME
    srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME)
    open(pathof(srcname), 'wb').write(data[10:])
    rscnames.append(None)
    sect.setsectiondescription(i, "Kindlegen log")
    return rscnames
Example #32
0
 def writeK8NCX(self, ncx_data, metadata):
     # build the xml
     self.isNCX = True
     print "Write K8 ncx"
     xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
     bname = 'toc.ncx'
     ncxname = os.path.join(self.files.k8oebps,bname)
     open(pathof(ncxname), 'wb').write(xml)
def processCMET(i, files, imgnames, sect, data):
    # extract the build log
    print "File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME
    srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME)
    open(pathof(srcname), 'wb').write(data[10:])
    imgnames.append(None)
    sect.setsectiondescription(i,"Kindlegen log")
    return imgnames
Example #34
0
def get_image_type(imgname, imgdata=None):
    imgtype = imghdr.what(pathof(imgname), imgdata)

    # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
    # with only the magic JPEG bytes out there...
    # ImageMagick handles those, so, do it too.
    if imgtype is None:
        if imgdata is None:
            f = open(pathof(imgname), 'rb')
            imgdata = f.read()
        if imgdata[0:2] == b'\xFF\xD8':
            # Get last non-null bytes
            last = len(imgdata)
            while (imgdata[last - 1:last] == b'\x00'):
                last -= 1
            # Be extra safe, check the trailing bytes, too.
            if imgdata[last - 2:last] == b'\xFF\xD9':
                imgtype = "jpeg"
    return imgtype
Example #35
0
 def writeNCX(self, metadata):
     # build the xml
     self.isNCX = True
     print "Write ncx"
     htmlname = os.path.basename(self.files.outbase)
     htmlname += '.html'
     xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
     #write the ncx file
     ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
     open(pathof(ncxname), 'wb').write(xml)
Example #36
0
def get_image_type(imgname, imgdata=None):
    imgtype = imghdr.what(pathof(imgname), imgdata)

    # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
    # with only the magic JPEG bytes out there...
    # ImageMagick handles those, so, do it too.
    if imgtype is None:
        if imgdata is None:
            f = open(pathof(imgname), 'rb')
            imgdata = f.read()
        if imgdata[0:2] == b'\xFF\xD8':
            # Get last non-null bytes
            last = len(imgdata)
            while (imgdata[last-1:last] == b'\x00'):
                last-=1
            # Be extra safe, check the trailing bytes, too.
            if imgdata[last-2:last] == b'\xFF\xD9':
                imgtype = "jpeg"
    return imgtype
Example #37
0
def processUnknownSections(mh, sect, files, K8Boundary):
    global DUMP
    global TERMINATION_INDICATOR1
    global TERMINATION_INDICATOR2
    global TERMINATION_INDICATOR3
    if DUMP:
        print "Unpacking any remaining unknown records"
    beg = mh.start
    end = sect.num_sections
    if beg < K8Boundary:
        # then we're processing the first part of a combination file
        end = K8Boundary
    for i in xrange(beg, end):
        if sect.sectiondescriptions[i] == "":
            data = sect.loadSection(i)
            type = data[0:4]
            if type == TERMINATION_INDICATOR3:
                description = "Termination Marker 3 Nulls"
            elif type == TERMINATION_INDICATOR2:
                description = "Termination Marker 2 Nulls"
            elif type == TERMINATION_INDICATOR1:
                description = "Termination Marker 1 Null"
            elif type == "INDX":
                fname = "Unknown%05d_INDX.dat" % i
                description = "Unknown INDX section"
                if DUMP:
                    outname = os.path.join(files.outdir, fname)
                    open(pathof(outname), 'wb').write(data)
                    print "Extracting %s: %s from section %d" % (description,
                                                                 fname, i)
                    description = description + ", extracting as %s" % fname
            else:
                fname = "unknown%05d.dat" % i
                description = "Mysterious Section, first four bytes %s" % describe(
                    data[0:4])
                if DUMP:
                    outname = os.path.join(files.outdir, fname)
                    open(pathof(outname), 'wb').write(data)
                    print "Extracting %s: %s from section %d" % (description,
                                                                 fname, i)
                    description = description + ", extracting as %s" % fname
            sect.setsectiondescription(i, description)
Example #38
0
def unzip_epub_to_dir(path_to_epub, destdir):
    f = open(pathof(path_to_epub), 'rb')
    sz = ZipFile(f)
    for name in sz.namelist():
        data = sz.read(name)
        name = name.replace("/", os.sep)
        filepath = os.path.join(destdir, name)
        basedir = os.path.dirname(filepath)
        if not os.path.isdir(basedir):
            os.makedirs(basedir)
        with open(filepath, 'wb') as fp:
            fp.write(data)
    f.close()
Example #39
0
 def __init__(self, filename):
     self.data = open(pathof(filename), 'rb').read()
     self.palmheader = self.data[:78]
     self.palmname = self.data[:32]
     self.ident = self.palmheader[0x3C:0x3C+8]
     self.num_sections, = struct.unpack_from('>H', self.palmheader, 76)
     self.filelength = len(self.data)
     sectionsdata = struct.unpack_from('>%dL' % (self.num_sections*2), self.data, 78) + (self.filelength, 0)
     self.sectionoffsets = sectionsdata[::2]
     self.sectionattributes = sectionsdata[1::2]
     self.sectiondescriptions = ["" for x in range(self.num_sections+1)]
     self.sectiondescriptions[-1] = "File Length Only"
     return
def processUnknownSections(mh, sect, files, K8Boundary):
    global DUMP
    global TERMINATION_INDICATOR1
    global TERMINATION_INDICATOR2
    global TERMINATION_INDICATOR3
    if DUMP:
        print "Unpacking any remaining unknown records"
    beg = mh.start
    end = sect.num_sections
    if beg < K8Boundary:
        # then we're processing the first part of a combination file
        end = K8Boundary
    for i in xrange(beg, end):
        if sect.sectiondescriptions[i] == "":
            data = sect.loadSection(i)
            type = data[0:4]
            if  type == TERMINATION_INDICATOR3:
                description = "Termination Marker 3 Nulls"
            elif type == TERMINATION_INDICATOR2:
                description = "Termination Marker 2 Nulls"
            elif type == TERMINATION_INDICATOR1:
                description = "Termination Marker 1 Null"
            elif type == "INDX":
                fname = "Unknown%05d_INDX.dat" % i
                description = "Unknown INDX section"
                if DUMP:
                    outname= os.path.join(files.outdir, fname)
                    open(pathof(outname), 'wb').write(data)
                    print "Extracting %s: %s from section %d" % (description, fname, i)
                    description = description + ", extracting as %s" % fname
            else:
                fname = "unknown%05d.dat" % i
                description = "Mysterious Section, first four bytes %s" % describe(data[0:4])
                if DUMP:
                    outname= os.path.join(files.outdir, fname)
                    open(pathof(outname), 'wb').write(data)
                    print "Extracting %s: %s from section %d" % (description, fname, i)
                    description = description + ", extracting as %s" % fname
            sect.setsectiondescription(i, description)
Example #41
0
 def addfile(self, uniqueid, basename, data, mime=None):
     uniqueid = utf8_str(uniqueid)
     basename = utf8_str(basename)
     mime = utf8_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime.endswith('+xml'):
         data = utf8_str(data)
     if mime.startswith("audio"):
         base = 'Audio'
     elif mime.startswith("video"):
         base = "Video"
     else:
         base = mime_base_map.get(mime,'Misc')
     href = base + "/" + basename
     if uniqueid in self.id_to_href.keys():
         raise WrapperException('Manifest Id is not unique')
     if href in self.href_to_id.keys():
         raise WrapperException('Basename is not unique')
     # now actually write out the new file
     filepath = href.replace("/",os.sep)
     filepath = os.path.join('OEBPS', filepath)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir,filepath)
     base = os.path.dirname(filepath)
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath),'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.href_to_id[href] = uniqueid
     self.added.append(uniqueid)
     self.modified['OEBPS/content.opf'] = 'file'
     return uniqueid
Example #42
0
def unzip_epub_to_dir(path_to_epub, destdir):
    f = open(pathof(path_to_epub), 'rb')
    sz = ZipFile(f)
    for name in sz.namelist():
        data = sz.read(name)
        name = name.replace("/", os.sep)
        filepath = os.path.join(destdir,name)
        basedir = os.path.dirname(filepath)
        if not os.path.isdir(basedir):
            os.makedirs(basedir)
        with open(filepath,'wb') as fp:
            fp.write(data)
    f.close()
Example #43
0
 def addfile(self, uniqueid, basename, data, mime=None):
     uniqueid = utf8_str(uniqueid)
     basename = utf8_str(basename)
     mime = utf8_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime.endswith('+xml'):
         data = utf8_str(data)
     if mime.startswith("audio"):
         base = 'Audio'
     elif mime.startswith("video"):
         base = "Video"
     else:
         base = mime_base_map.get(mime, 'Misc')
     href = base + "/" + basename
     if uniqueid in self.id_to_href.keys():
         raise WrapperException('Manifest Id is not unique')
     if href in self.href_to_id.keys():
         raise WrapperException('Basename is not unique')
     # now actually write out the new file
     filepath = href.replace("/", os.sep)
     filepath = os.path.join('OEBPS', filepath)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not path.exists(base):
         os.makedirs(pathof(base))
     with open(pathof(filepath), 'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.href_to_id[href] = uniqueid
     self.added.append(uniqueid)
     self.modified['OEBPS/content.opf'] = 'file'
     return uniqueid
Example #44
0
 def __init__(self, filename):
     self.data = open(pathof(filename), 'rb').read()
     self.palmheader = self.data[:78]
     self.palmname = self.data[:32]
     self.ident = self.palmheader[0x3C:0x3C + 8]
     self.num_sections, = struct.unpack_from('>H', self.palmheader, 76)
     self.filelength = len(self.data)
     sectionsdata = struct.unpack_from('>%dL' % (self.num_sections * 2),
                                       self.data, 78) + (self.filelength, 0)
     self.sectionoffsets = sectionsdata[::2]
     self.sectionattributes = sectionsdata[1::2]
     self.sectiondescriptions = ["" for x in range(self.num_sections + 1)]
     self.sectiondescriptions[-1] = "File Length Only"
     return
Example #45
0
def processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd):
    # extract an HDImage
    global DUMP
    data = data[12:]
    imgtype = get_image_type(None, data)

    if imgtype is None:
        print "Warning: CRES Section %s does not contain a recognised resource" % i
        rscnames.append(None)
        sect.setsectiondescription(
            i,
            "Mysterious CRES data, first four bytes %s" % describe(data[0:4]))
        if DUMP:
            fname = "unknown%05d.dat" % i
            outname = os.path.join(files.outdir, fname)
            open(pathof(outname), 'wb').write(data)
            sect.setsectiondescription(
                i,
                "Mysterious CRES data, first four bytes %s extracting as %s" %
                (describe(data[0:4]), fname))
        rsc_ptr += 1
        return rscnames, rsc_ptr

    if use_hd:
        # overwrite corresponding lower res image with hd version
        imgname = rscnames[rsc_ptr]
        imgdest = files.imgdir
    else:
        imgname = "HDimage%05d.%s" % (i, imgtype)
        imgdest = files.hdimgdir
    print "Extracting HD image: {0:s} from section {1:d}".format(imgname, i)
    outimg = os.path.join(imgdest, imgname)
    open(pathof(outimg), 'wb').write(data)
    rscnames.append(None)
    sect.setsectiondescription(i, "Optional HD Image {0:s}".format(imgname))
    rsc_ptr += 1
    return rscnames, rsc_ptr
Example #46
0
 def readfile(self, id):
     id = utf8_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     # already added or modified it will be in outdir
     basedir = self.ebook_root
     if id in self.added or id in self.modified.keys():
         basedir = self.outdir
     filepath = os.path.join(basedir, filepath)
     if not path.exists(filepath):
         raise WrapperException('File Does Not Exist')
     data = ''
     with open(pathof(filepath),'rb') as fp:
         data = fp.read()
     return data
Example #47
0
 def readfile(self, id):
     id = utf8_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     # already added or modified it will be in outdir
     basedir = self.ebook_root
     if id in self.added or id in self.modified.keys():
         basedir = self.outdir
     filepath = os.path.join(basedir, filepath)
     if not path.exists(filepath):
         raise WrapperException('File Does Not Exist')
     data = ''
     with open(pathof(filepath), 'rb') as fp:
         data = fp.read()
     return data
def processRESC(i, files, imgnames, sect, data, k8resc):
    global DUMP
    if DUMP:
        rescname = "RESC%05d.dat" % i
        print "Extracting Resource: ", rescname
        outrsc = os.path.join(files.outdir, rescname)
        open(pathof(outrsc), 'wb').write(data)
    if True: #try:
        # parse the spine and metadata from RESC
        k8resc = K8RESCProcessor(data[16:], DUMP)
    else: # except:
        print "Warning: cannot extract information from RESC."
        k8resc = None
    imgnames.append(None)
    sect.setsectiondescription(i,"K8 RESC section")
    return imgnames, k8resc
Example #49
0
def processRESC(i, files, rscnames, sect, data, k8resc):
    global DUMP
    if DUMP:
        rescname = "RESC%05d.dat" % i
        print "Extracting Resource: ", rescname
        outrsc = os.path.join(files.outdir, rescname)
        open(pathof(outrsc), 'wb').write(data)
    if True:  # try:
        # parse the spine and metadata from RESC
        k8resc = K8RESCProcessor(data[16:], DUMP)
    else:  # except:
        print "Warning: cannot extract information from RESC."
        k8resc = None
    rscnames.append(None)
    sect.setsectiondescription(i, "K8 RESC section")
    return rscnames, k8resc
Example #50
0
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr):
    fontname = "font%05d" % i
    ext = '.dat'
    font_error = False
    font_data = data
    try:
        usize, fflags, dstart, xor_len, xor_start = struct.unpack_from(
            '>LLLLL', data, 4)
    except:
        print "Failed to extract font: {0:s} from section {1:d}".format(
            fontname, i)
        font_error = True
        ext = '.failed'
        pass
    if not font_error:
        print "Extracting font:", fontname
        font_data = data[dstart:]
        extent = len(font_data)
        extent = min(extent, 1040)
        if fflags & 0x0002:
            # obfuscated so need to de-obfuscate the first 1040 bytes
            key = bytearray(data[xor_start:xor_start + xor_len])
            buf = bytearray(font_data)
            for n in xrange(extent):
                buf[n] ^= key[n % xor_len]
            font_data = bytes(buf)
        if fflags & 0x0001:
            # ZLIB compressed data
            font_data = zlib.decompress(font_data)
        hdr = font_data[0:4]
        if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf':
            ext = '.ttf'
        elif hdr == 'OTTO':
            ext = '.otf'
        else:
            print "Warning: unknown font header %s" % hdr.encode('hex')
        if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
            obfuscate_data.append(fontname + ext)
        fontname += ext
        outfnt = os.path.join(files.imgdir, fontname)
        open(pathof(outfnt), 'wb').write(font_data)
        rscnames.append(fontname)
        sect.setsectiondescription(i, "Font {0:s}".format(fontname))
        if rsc_ptr == -1:
            rsc_ptr = i - beg
    return rscnames, obfuscate_data, rsc_ptr
Example #51
0
 def readotherfile(self, book_href):
     id = utf8_str(book_href)
     # handle special case of trying to read the opf
     if id is not None and id == "OEBPS/content.opf":
         return self.build_opf()
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('book href does not exist')
     basedir = self.ebook_root
     if id in self.added or id in self.modified.keys():
         basedir = self.outdir
     filepath = os.path.join(basedir, filepath)
     if not path.exists(filepath):
         raise WrapperException('File Does Not Exist')
     data = ''
     with open(pathof(filepath), 'rb') as fp:
         data = fp.read()
     return data
Example #52
0
 def readotherfile(self, book_href):
     id = utf8_str(book_href)
     # handle special case of trying to read the opf
     if id is not None and id == "OEBPS/content.opf":
         return self.build_opf()
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('book href does not exist')
     basedir = self.ebook_root
     if id in self.added or id in self.modified.keys():
         basedir = self.outdir
     filepath = os.path.join(basedir, filepath)
     if not path.exists(filepath):
         raise WrapperException('File Does Not Exist')
     data = ''
     with open(pathof(filepath),'rb') as fp:
         data = fp.read()
     return data
Example #53
0
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr):
    fontname = "font%05d" % i
    ext = '.dat'
    font_error = False
    font_data = data
    try:
        usize, fflags, dstart, xor_len, xor_start = struct.unpack_from('>LLLLL',data,4)
    except:
        print "Failed to extract font: {0:s} from section {1:d}".format(fontname,i)
        font_error = True
        ext = '.failed'
        pass
    if not font_error:
        print "Extracting font:", fontname
        font_data = data[dstart:]
        extent = len(font_data)
        extent = min(extent, 1040)
        if fflags & 0x0002:
            # obfuscated so need to de-obfuscate the first 1040 bytes
            key = bytearray(data[xor_start: xor_start+ xor_len])
            buf = bytearray(font_data)
            for n in xrange(extent):
                buf[n] ^=  key[n%xor_len]
            font_data = bytes(buf)
        if fflags & 0x0001:
            # ZLIB compressed data
            font_data = zlib.decompress(font_data)
        hdr = font_data[0:4]
        if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf':
            ext = '.ttf'
        elif hdr == 'OTTO':
            ext = '.otf'
        else:
            print "Warning: unknown font header %s" % hdr.encode('hex')
        if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
            obfuscate_data.append(fontname + ext)
        fontname += ext
        outfnt = os.path.join(files.imgdir, fontname)
        open(pathof(outfnt), 'wb').write(font_data)
        rscnames.append(fontname)
        sect.setsectiondescription(i,"Font {0:s}".format(fontname))
        if rsc_ptr == -1:
            rsc_ptr = i - beg
    return rscnames, obfuscate_data, rsc_ptr
def processCONT(i, files, imgnames, sect, data):
    global DUMP
    # process a container header, most of this is unknown
    # right now only extract its EXTH
    dt = data[0:12]
    if dt == "CONTBOUNDARY":
        imgnames.append(None)
        sect.setsectiondescription(i,"CONTAINER BOUNDARY")
    else:
        sect.setsectiondescription(i,"CONT Header")
        imgnames.append(None)
        if DUMP:
            cpage, = struct.unpack_from('>L', data, 12)
            contexth = data[48:]
            print "\n\nContainer EXTH Dump"
            dump_contexth(cpage, contexth)
            fname = "CONT_Header%05d.dat" % i
            outname= os.path.join(files.outdir, fname)
            open(pathof(outname), 'wb').write(data)
    return imgnames
Example #55
0
def processCONT(i, files, rscnames, sect, data):
    global DUMP
    # process a container header, most of this is unknown
    # right now only extract its EXTH
    dt = data[0:12]
    if dt == "CONTBOUNDARY":
        rscnames.append(None)
        sect.setsectiondescription(i, "CONTAINER BOUNDARY")
    else:
        sect.setsectiondescription(i, "CONT Header")
        rscnames.append(None)
        if DUMP:
            cpage, = struct.unpack_from('>L', data, 12)
            contexth = data[48:]
            print "\n\nContainer EXTH Dump"
            dump_contexth(cpage, contexth)
            fname = "CONT_Header%05d.dat" % i
            outname = os.path.join(files.outdir, fname)
            open(pathof(outname), 'wb').write(data)
    return rscnames
Example #56
0
 def __init__(self, opf_path, debug = False):
     self._debug = debug
     self.opfname = utf8_str(os.path.basename(opf_path))
     self.opf = None
     with open(pathof(opf_path),'rb') as fp:
         self.opf = fp.read()
     self.opos = 0
     self.package_tag = [None, None]
     # self.package_version = None
     self.metadata_tag = [None, None]
     self.metadata = []
     self.cover_id = None
     self.manifest_id_to_href = {}
     self.manifest_id_to_mime = {}
     self.href_to_manifest_id ={}
     self.spine_ppd = None
     # self.spine_pageattributes = {}
     # self.spine_idrefs = {}
     self.spine = []
     self.guide = []
     self._parseData()
Example #57
0
 def __init__(self, opf_path, debug=False):
     self._debug = debug
     self.opfname = utf8_str(os.path.basename(opf_path))
     self.opf = None
     with open(pathof(opf_path), "rb") as fp:
         self.opf = fp.read()
     self.opos = 0
     self.package_tag = [None, None]
     # self.package_version = None
     self.metadata_tag = [None, None]
     self.metadata = []
     self.cover_id = None
     self.manifest_id_to_href = {}
     self.manifest_id_to_mime = {}
     self.href_to_manifest_id = {}
     self.spine_ppd = None
     # self.spine_pageattributes = {}
     # self.spine_idrefs = {}
     self.spine = []
     self.guide = []
     self._parseData()
def processPAGE(i, files, imgnames, sect, data, mh, pagemapproc):
    # process any page map information and create an apnx file
    pagemapproc = PageMapProcessor(mh, data)
    imgnames.append(None)
    sect.setsectiondescription(i,"PageMap")
    apnx_meta = {}
    apnx_meta['acr'] = str(sect.palmname).rstrip('\x00')
    apnx_meta['cdeType'] = mh.metadata['cdeType'][0]
    apnx_meta['contentGuid'] = hex(int(mh.metadata['UniqueID'][0]))[2:]
    apnx_meta['asin'] = mh.metadata['ASIN'][0]
    apnx_meta['pageMap'] = pagemapproc.getPageMap()
    if mh.version == 8:
        apnx_meta['format'] = 'MOBI_8'
    else:
        apnx_meta['format'] = 'MOBI_7'
    apnx_data = pagemapproc.generateAPNX(apnx_meta)
    if mh.isK8():
        outname = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.apnx')
    else:
        outname = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.apnx')
    open(pathof(outname), 'wb').write(apnx_data)
    return imgnames, pagemapproc