def processPAGE(i, files, imgnames, sect, data, mh, pagemapproc):
    # process any page map information and create an apnx file
    pagemapproc = PageMapProcessor(mh, data)
    imgnames.append(None)
    sect.setsectiondescription(i,"PageMap")
    apnx_meta = {}
    apnx_meta['acr'] = str(sect.palmname).rstrip('\x00')
    apnx_meta['cdeType'] = mh.metadata['cdeType'][0]
    apnx_meta['contentGuid'] = hex(int(mh.metadata['UniqueID'][0]))[2:]
    apnx_meta['asin'] = mh.metadata['ASIN'][0]
    apnx_meta['pageMap'] = pagemapproc.getPageMap()
    if mh.version == 8:
        apnx_meta['format'] = 'MOBI_8'
    else:
        apnx_meta['format'] = 'MOBI_7'
    apnx_data = pagemapproc.generateAPNX(apnx_meta)
    if mh.isK8():
        outname = os.path.join(files.outdir, 'mobi8-'+files.getInputFileBasename() + '.apnx')
    else:
        outname = os.path.join(files.outdir, 'mobi7-'+files.getInputFileBasename() + '.apnx')
    open(pathof(outname), 'wb').write(apnx_data)
    return imgnames, pagemapproc
Example #2
0
def processPAGE(i, files, rscnames, sect, data, mh, pagemapproc):
    # process any page map information and create an apnx file
    pagemapproc = PageMapProcessor(mh, data)
    rscnames.append(None)
    sect.setsectiondescription(i, "PageMap")
    apnx_meta = {}
    apnx_meta['acr'] = str(sect.palmname).rstrip('\x00')
    apnx_meta['cdeType'] = mh.metadata['cdeType'][0]
    apnx_meta['contentGuid'] = hex(int(mh.metadata['UniqueID'][0]))[2:]
    apnx_meta['asin'] = mh.metadata['ASIN'][0]
    apnx_meta['pageMap'] = pagemapproc.getPageMap()
    if mh.version == 8:
        apnx_meta['format'] = 'MOBI_8'
    else:
        apnx_meta['format'] = 'MOBI_7'
    apnx_data = pagemapproc.generateAPNX(apnx_meta)
    if mh.isK8():
        outname = os.path.join(
            files.outdir, 'mobi8-' + files.getInputFileBasename() + '.apnx')
    else:
        outname = os.path.join(
            files.outdir, 'mobi7-' + files.getInputFileBasename() + '.apnx')
    open(pathof(outname), 'wb').write(apnx_data)
    return rscnames, pagemapproc
Example #3
0
def processMobi8(mh,
                 metadata,
                 sect,
                 files,
                 rscnames,
                 pagemapproc,
                 k8resc,
                 obfuscate_data,
                 apnxfile=None,
                 epubver='2'):
    global DUMP
    global WRITE_RAW_DATA

    # extract raw markup langauge
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.k8dir,
                              files.getInputFileBasename() + '.rawml')
        open(pathof(outraw), 'wb').write(rawML)

    # KF8 require other indexes which contain parsing information and the FDST info
    # to process the rawml back into the xhtml files, css files, svg image files, etc
    k8proc = K8Processor(mh, sect, files, DUMP)
    k8proc.buildParts(rawML)

    # collect information for the guide first
    guidetext = k8proc.getGuideText()

    # if the guide was empty, add in any guide info from metadata, such as StartOffset
    if not guidetext and 'StartOffset' in metadata.keys():
        # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part...
        # Taking that into account, we only care about the *last* StartOffset, which
        # should always be the correct one in these cases (the one actually pointing
        # to the right place in the mobi8 part).
        starts = metadata['StartOffset']
        last_start = starts[-1]
        last_start = int(last_start)
        if last_start == 0xffffffff:
            last_start = 0
        seq, idtext = k8proc.getFragTblInfo(last_start)
        filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), '0000000000')
        linktgt = filename
        if idtext != '':
            linktgt += '#' + idtext
        guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt

    # if apnxfile is passed in use it for page map information
    if apnxfile is not None and pagemapproc is None:
        apnxdata = "00000000" + file(apnxfile, 'rb').read()
        pagemapproc = PageMapProcessor(mh, apnxdata)

    # generate the page map
    pagemapxml = ''
    if pagemapproc is not None:
        pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc)
        outpm = os.path.join(files.k8oebps, 'page-map.xml')
        open(pathof(outpm), 'wb').write(pagemapxml)
        if DUMP:
            print pagemapproc.getNames()
            print pagemapproc.getOffsets()
            print "\n\nPage Map"
            print pagemapxml

    # process the toc ncx
    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
    print "Processing ncx / toc"
    ncx = ncxExtract(mh, files)
    ncx_data = ncx.parseNCX()
    # extend the ncx data with filenames and proper internal idtags
    for i in range(len(ncx_data)):
        ncxmap = ncx_data[i]
        [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':')
        filename, idtag = k8proc.getIDTagByPosFid(fid, off)
        ncxmap['filename'] = filename
        ncxmap['idtag'] = idtag
        ncx_data[i] = ncxmap

    # convert the rawML to a set of xhtml files
    print "Building an epub-like structure"
    htmlproc = XHTMLK8Processor(rscnames, k8proc)
    usedmap = htmlproc.buildXHTML()

    # write out the xhtml svg, and css files
    # fileinfo = [skelid|coverpage, dir, name]
    fileinfo = []
    # first create a cover page if none exists
    if CREATE_COVER_PAGE:
        cover = CoverProcessor(files, metadata, rscnames)
        cover_img = cover.getImageName()
        need_to_create_cover_page = False
        if cover_img is not None:
            if k8resc is None or not k8resc.hasSpine():
                part = k8proc.getPart(0)
                if part.find(cover_img) == -1:
                    need_to_create_cover_page = True
            else:
                if "coverpage" not in k8resc.spine_idrefs.keys():
                    part = k8proc.getPart(int(k8resc.spine_order[0]))
                    if part.find(cover_img) == -1:
                        k8resc.prepend_to_spine("coverpage", "inserted", "no",
                                                None)
                if k8resc.spine_order[0] == "coverpage":
                    need_to_create_cover_page = True
            if need_to_create_cover_page:
                filename = cover.getXHTMLName()
                fileinfo.append(["coverpage", 'Text', filename])
                guidetext += cover.guide_toxml()
                cover.writeXHTML()

    n = k8proc.getNumberOfParts()
    for i in range(n):
        part = k8proc.getPart(i)
        [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i)
        fileinfo.append([str(skelnum), dir, filename])
        fname = os.path.join(files.k8oebps, dir, filename)
        open(pathof(fname), 'wb').write(part)
    n = k8proc.getNumberOfFlows()
    for i in range(1, n):
        [type, format, dir, filename] = k8proc.getFlowInfo(i)
        flowpart = k8proc.getFlow(i)
        if format == 'file':
            fileinfo.append([None, dir, filename])
            fname = os.path.join(files.k8oebps, dir, filename)
            open(pathof(fname), 'wb').write(flowpart)

    # create the opf
    opf = OPFProcessor(files, metadata.copy(), fileinfo, rscnames, True, mh,
                       usedmap, pagemapxml, guidetext, k8resc, epubver)
    uuid = opf.writeOPF(bool(obfuscate_data))

    if opf.hasNCX():
        # Create a toc.ncx.
        ncx.writeK8NCX(ncx_data, metadata)
    if opf.hasNAV():
        # Create a navigation document.
        nav = NAVProcessor(files)
        nav.writeNAV(ncx_data, guidetext, metadata)

    # make an epub-like structure of it all
    print "Creating an epub-like file"
    files.makeEPUB(usedmap, obfuscate_data, uuid)
def processMobi8(mh, metadata, sect, files, imgnames, pagemapproc, k8resc, obfuscate_data, apnxfile=None, epubver='2'):
    global DUMP
    global WRITE_RAW_DATA

    # extract raw markup langauge
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.k8dir,files.getInputFileBasename() + '.rawml')
        open(pathof(outraw),'wb').write(rawML)

    # KF8 require other indexes which contain parsing information and the FDST info
    # to process the rawml back into the xhtml files, css files, svg image files, etc
    k8proc = K8Processor(mh, sect, files, DUMP)
    k8proc.buildParts(rawML)

    # collect information for the guide first
    guidetext = k8proc.getGuideText()

    # if the guide was empty, add in any guide info from metadata, such as StartOffset
    if not guidetext and 'StartOffset' in metadata.keys():
        # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part...
        # Taking that into account, we only care about the *last* StartOffset, which
        # should always be the correct one in these cases (the one actually pointing
        # to the right place in the mobi8 part).
        starts = metadata['StartOffset']
        last_start = starts[-1]
        last_start = int(last_start)
        if last_start == 0xffffffff:
            last_start = 0
        seq, idtext = k8proc.getFragTblInfo(last_start)
        filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), '0000000000')
        linktgt = filename
        if idtext != '':
            linktgt += '#' + idtext
        guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt

    # if apnxfile is passed in use it for page map information
    if apnxfile is not None and pagemapproc is None:
        apnxdata = "00000000" + file(apnxfile, 'rb').read()
        pagemapproc = PageMapProcessor(mh, apnxdata)

    # generate the page map
    pagemapxml = ''
    if pagemapproc is not None:
        pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc)
        outpm = os.path.join(files.k8oebps,'page-map.xml')
        open(pathof(outpm),'wb').write(pagemapxml)
        if DUMP:
            print pagemapproc.getNames()
            print pagemapproc.getOffsets()
            print "\n\nPage Map"
            print pagemapxml

    # process the toc ncx
    # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
    print "Processing ncx / toc"
    ncx = ncxExtract(mh, files)
    ncx_data = ncx.parseNCX()
    # extend the ncx data with filenames and proper internal idtags
    for i in range(len(ncx_data)):
        ncxmap = ncx_data[i]
        [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':')
        filename, idtag = k8proc.getIDTagByPosFid(fid, off)
        ncxmap['filename'] = filename
        ncxmap['idtag'] = idtag
        ncx_data[i] = ncxmap

    # convert the rawML to a set of xhtml files
    print "Building an epub-like structure"
    htmlproc = XHTMLK8Processor(imgnames, k8proc)
    usedmap = htmlproc.buildXHTML()


    # write out the xhtml svg, and css files
    # fileinfo = [skelid|coverpage, dir, name]
    fileinfo = []
    # first create a cover page if none exists
    if CREATE_COVER_PAGE:
        cover = CoverProcessor(files, metadata, imgnames)
        cover_img = cover.getImageName()
        need_to_create_cover_page = False
        if cover_img is not None:
            if k8resc is None or not k8resc.hasSpine():
                part = k8proc.getPart(0)
                if part.find(cover_img) == -1:
                    need_to_create_cover_page = True
            else:
                if "coverpage" not in k8resc.spine_idrefs.keys():
                    part = k8proc.getPart(int(k8resc.spine_order[0]))
                    if part.find(cover_img) == -1:
                        k8resc.prepend_to_spine("coverpage", "inserted", "no", None)
                if k8resc.spine_order[0] == "coverpage":
                    need_to_create_cover_page = True
            if need_to_create_cover_page:
                filename = cover.getXHTMLName()
                fileinfo.append(["coverpage", 'Text', filename])
                guidetext += cover.guide_toxml()
                cover.writeXHTML()

    n =  k8proc.getNumberOfParts()
    for i in range(n):
        part = k8proc.getPart(i)
        [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i)
        fileinfo.append([str(skelnum), dir, filename])
        fname = os.path.join(files.k8oebps,dir,filename)
        open(pathof(fname),'wb').write(part)
    n = k8proc.getNumberOfFlows()
    for i in range(1, n):
        [type, format, dir, filename] = k8proc.getFlowInfo(i)
        flowpart = k8proc.getFlow(i)
        if format == 'file':
            fileinfo.append([None, dir, filename])
            fname = os.path.join(files.k8oebps,dir,filename)
            open(pathof(fname),'wb').write(flowpart)

    # create the opf
    opf = OPFProcessor(files, metadata.copy(), fileinfo, imgnames, True, mh, usedmap, pagemapxml, guidetext, k8resc, epubver)
    uuid = opf.writeOPF(bool(obfuscate_data))

    if opf.hasNCX():
        # Create a toc.ncx.
        ncx.writeK8NCX(ncx_data, metadata)
    if opf.hasNAV():
        # Create a navigation document.
        nav = NAVProcessor(files)
        nav.writeNAV(ncx_data, guidetext, metadata)

    # make an epub-like structure of it all
    print "Creating an epub-like file"
    files.makeEPUB(usedmap, obfuscate_data, uuid)