def generateBook(bookDir, raw, fixedimage): # sanity check Topaz file extraction if not os.path.exists(bookDir): print "Can not find directory with unencrypted book" return 1 dictFile = os.path.join(bookDir, 'dict0000.dat') if not os.path.exists(dictFile): print "Can not find dict0000.dat file" return 1 pageDir = os.path.join(bookDir, 'page') if not os.path.exists(pageDir): print "Can not find page directory in unencrypted book" return 1 imgDir = os.path.join(bookDir, 'img') if not os.path.exists(imgDir): print "Can not find image directory in unencrypted book" return 1 glyphsDir = os.path.join(bookDir, 'glyphs') if not os.path.exists(glyphsDir): print "Can not find glyphs directory in unencrypted book" return 1 metaFile = os.path.join(bookDir, 'metadata0000.dat') if not os.path.exists(metaFile): print "Can not find metadata0000.dat in unencrypted book" return 1 svgDir = os.path.join(bookDir, 'svg') if not os.path.exists(svgDir): os.makedirs(svgDir) xmlDir = os.path.join(bookDir, 'xml') if not os.path.exists(xmlDir): os.makedirs(xmlDir) otherFile = os.path.join(bookDir, 'other0000.dat') if not os.path.exists(otherFile): print "Can not find other0000.dat in unencrypted book" return 1 print "Updating to color images if available" spath = os.path.join(bookDir, 'color_img') dpath = os.path.join(bookDir, 'img') filenames = os.listdir(spath) filenames = sorted(filenames) for filename in filenames: imgname = filename.replace('color', 'img') sfile = os.path.join(spath, filename) dfile = os.path.join(dpath, imgname) imgdata = file(sfile, 'rb').read() file(dfile, 'wb').write(imgdata) print "Creating cover.jpg" isCover = False cpath = os.path.join(bookDir, 'img') cpath = os.path.join(cpath, 'img0000.jpg') if os.path.isfile(cpath): cover = file(cpath, 'rb').read() cpath = os.path.join(bookDir, 'cover.jpg') file(cpath, 'wb').write(cover) isCover = True print 'Processing Dictionary' dict = Dictionary(dictFile) print 'Processing Meta Data and creating OPF' meta_array = getMetaArray(metaFile) # replace special chars in title and authors like & < > title = meta_array.get('Title', 'No Title Provided') title = title.replace('&', '&') title = title.replace('<', '<') title = title.replace('>', '>') meta_array['Title'] = title authors = meta_array.get('Authors', 'No Authors Provided') authors = authors.replace('&', '&') authors = authors.replace('<', '<') authors = authors.replace('>', '>') meta_array['Authors'] = authors xname = os.path.join(xmlDir, 'metadata.xml') metastr = '' for key in meta_array: metastr += '<meta name="' + key + '" content="' + meta_array[ key] + '" />\n' file(xname, 'wb').write(metastr) print 'Processing StyleSheet' # get some scaling info from metadata to use while processing styles fontsize = '135' if 'fontSize' in meta_array: fontsize = meta_array['fontSize'] # also get the size of a normal text page spage = '1' if 'firstTextPage' in meta_array: spage = meta_array['firstTextPage'] pnum = int(spage) # get page height and width from first text page for use in stylesheet scaling pname = 'page%04d.dat' % (pnum + 1) fname = os.path.join(pageDir, pname) flat_xml = convert2xml.fromData(dict, fname) (ph, pw) = getPageDim(flat_xml) if (ph == '-1') or (ph == '0'): ph = '11000' if (pw == '-1') or (pw == '0'): pw = '8500' # print ' ', 'other0000.dat' xname = os.path.join(bookDir, 'style.css') flat_xml = convert2xml.fromData(dict, otherFile) cssstr, classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) file(xname, 'wb').write(cssstr) xname = os.path.join(xmlDir, 'other0000.xml') file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) print 'Processing Glyphs' gd = GlyphDict() filenames = os.listdir(glyphsDir) filenames = sorted(filenames) glyfname = os.path.join(svgDir, 'glyphs.svg') glyfile = open(glyfname, 'w') glyfile.write('<?xml version="1.0" standalone="no"?>\n') glyfile.write( '<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n' ) glyfile.write( '<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n' ) glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title']) glyfile.write('<defs>\n') counter = 0 for filename in filenames: # print ' ', filename print '.', fname = os.path.join(glyphsDir, filename) flat_xml = convert2xml.fromData(dict, fname) xname = os.path.join(xmlDir, filename.replace('.dat', '.xml')) file(xname, 'wb').write(convert2xml.getXML(dict, fname)) gp = GParser(flat_xml) for i in xrange(0, gp.count): path = gp.getPath(i) maxh, maxw = gp.getGlyphDim(i) fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % ( counter * 256 + i, path, maxw, maxh) glyfile.write(fullpath) gd.addGlyph(counter * 256 + i, fullpath) counter += 1 glyfile.write('</defs>\n') glyfile.write('</svg>\n') glyfile.close() print " " # start up the html htmlFileName = "book.html" htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n' htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n' htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n' htmlstr += '<head>\n' htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n' htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array[ 'Authors'] + '</title>\n' htmlstr += '<meta name="Author" content="' + meta_array[ 'Authors'] + '" />\n' htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n' if 'ASIN' in meta_array: htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n' if 'GUID' in meta_array: htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n' htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n' htmlstr += '</head>\n<body>\n' print 'Processing Pages' # Books are at 1440 DPI. This is rendering at twice that size for # readability when rendering to the screen. scaledpi = 1440.0 svgindex = '<?xml version="1.0" encoding="utf-8"?>\n' svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >' svgindex += '<head>\n' svgindex += '<title>' + meta_array['Title'] + '</title>\n' svgindex += '<meta name="Author" content="' + meta_array[ 'Authors'] + '" />\n' svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n' if 'ASIN' in meta_array: svgindex += '<meta name="ASIN" content="' + meta_array[ 'ASIN'] + '" />\n' if 'GUID' in meta_array: svgindex += '<meta name="GUID" content="' + meta_array[ 'GUID'] + '" />\n' svgindex += '</head>\n' svgindex += '<body>\n' filenames = os.listdir(pageDir) filenames = sorted(filenames) numfiles = len(filenames) counter = 0 for filename in filenames: # print ' ', filename print ".", fname = os.path.join(pageDir, filename) flat_xml = convert2xml.fromData(dict, fname) xname = os.path.join(xmlDir, filename.replace('.dat', '.xml')) file(xname, 'wb').write(convert2xml.getXML(dict, fname)) # first get the html htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) # now get the svg image of the page svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi) if (raw): pfile = open( os.path.join(svgDir, filename.replace('.dat', '.svg')), 'w') svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter) else: pfile = open(os.path.join(svgDir, 'page%04d.xhtml' % counter), 'w') svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % ( counter, counter) pfile.write(svgxml) pfile.close() counter += 1 print " " # finish up the html string and output it htmlstr += '</body>\n</html>\n' file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) # finish up the svg index string and output it svgindex += '</body>\n</html>\n' file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) # build the opf file opfname = os.path.join(bookDir, 'book.opf') opfstr = '<?xml version="1.0" encoding="utf-8"?>\n' opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n' # adding metadata opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n' if 'GUID' in meta_array: opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array[ 'GUID'] + '</dc:identifier>\n' if 'ASIN' in meta_array: opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array[ 'ASIN'] + '</dc:identifier>\n' if 'oASIN' in meta_array: opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array[ 'oASIN'] + '</dc:identifier>\n' opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n' opfstr += ' <dc:creator opf:role="aut">' + meta_array[ 'Authors'] + '</dc:creator>\n' opfstr += ' <dc:language>en</dc:language>\n' opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n' if isCover: opfstr += ' <meta name="cover" content="bookcover"/>\n' opfstr += ' </metadata>\n' opfstr += '<manifest>\n' opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n' opfstr += ' <item id="stylesheet" href="style.css" media-type="text/css"/>\n' # adding image files to manifest filenames = os.listdir(imgDir) filenames = sorted(filenames) for filename in filenames: imgname, imgext = os.path.splitext(filename) if imgext == '.jpg': imgext = 'jpeg' if imgext == '.svg': imgext = 'svg+xml' opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n' if isCover: opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n' opfstr += '</manifest>\n' # adding spine opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n' if isCover: opfstr += ' <guide>\n' opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n' opfstr += ' </guide>\n' opfstr += '</package>\n' file(opfname, 'wb').write(opfstr) print 'Processing Complete' return 0
def generateBook(bookDir, raw, fixedimage): # sanity check Topaz file extraction if not os.path.exists(bookDir) : print "Can not find directory with unencrypted book" return 1 dictFile = os.path.join(bookDir,'dict0000.dat') if not os.path.exists(dictFile) : print "Can not find dict0000.dat file" return 1 pageDir = os.path.join(bookDir,'page') if not os.path.exists(pageDir) : print "Can not find page directory in unencrypted book" return 1 imgDir = os.path.join(bookDir,'img') if not os.path.exists(imgDir) : print "Can not find image directory in unencrypted book" return 1 glyphsDir = os.path.join(bookDir,'glyphs') if not os.path.exists(glyphsDir) : print "Can not find glyphs directory in unencrypted book" return 1 metaFile = os.path.join(bookDir,'metadata0000.dat') if not os.path.exists(metaFile) : print "Can not find metadata0000.dat in unencrypted book" return 1 svgDir = os.path.join(bookDir,'svg') if not os.path.exists(svgDir) : os.makedirs(svgDir) if buildXML: xmlDir = os.path.join(bookDir,'xml') if not os.path.exists(xmlDir) : os.makedirs(xmlDir) otherFile = os.path.join(bookDir,'other0000.dat') if not os.path.exists(otherFile) : print "Can not find other0000.dat in unencrypted book" return 1 print "Updating to color images if available" spath = os.path.join(bookDir,'color_img') dpath = os.path.join(bookDir,'img') filenames = os.listdir(spath) filenames = sorted(filenames) for filename in filenames: imgname = filename.replace('color','img') sfile = os.path.join(spath,filename) dfile = os.path.join(dpath,imgname) imgdata = file(sfile,'rb').read() file(dfile,'wb').write(imgdata) print "Creating cover.jpg" isCover = False cpath = os.path.join(bookDir,'img') cpath = os.path.join(cpath,'img0000.jpg') if os.path.isfile(cpath): cover = file(cpath, 'rb').read() cpath = os.path.join(bookDir,'cover.jpg') file(cpath, 'wb').write(cover) isCover = True print 'Processing Dictionary' dict = Dictionary(dictFile) print 'Processing Meta Data and creating OPF' meta_array = getMetaArray(metaFile) # replace special chars in title and authors like & < > title = meta_array.get('Title','No Title Provided') title = title.replace('&','&') title = title.replace('<','<') title = title.replace('>','>') meta_array['Title'] = title authors = meta_array.get('Authors','No Authors Provided') authors = authors.replace('&','&') authors = authors.replace('<','<') authors = authors.replace('>','>') meta_array['Authors'] = authors if buildXML: xname = os.path.join(xmlDir, 'metadata.xml') mlst = [] for key in meta_array: mlst.append('<meta name="' + key + '" content="' + meta_array[key] + '" />\n') metastr = "".join(mlst) mlst = None file(xname, 'wb').write(metastr) print 'Processing StyleSheet' # get some scaling info from metadata to use while processing styles # and first page info fontsize = '135' if 'fontSize' in meta_array: fontsize = meta_array['fontSize'] # also get the size of a normal text page # get the total number of pages unpacked as a safety check filenames = os.listdir(pageDir) numfiles = len(filenames) spage = '1' if 'firstTextPage' in meta_array: spage = meta_array['firstTextPage'] pnum = int(spage) if pnum >= numfiles or pnum < 0: # metadata is wrong so just select a page near the front # 10% of the book to get a normal text page pnum = int(0.10 * numfiles) # print "first normal text page is", spage # get page height and width from first text page for use in stylesheet scaling pname = 'page%04d.dat' % (pnum + 1) fname = os.path.join(pageDir,pname) flat_xml = convert2xml.fromData(dict, fname) (ph, pw) = getPageDim(flat_xml) if (ph == '-1') or (ph == '0') : ph = '11000' if (pw == '-1') or (pw == '0') : pw = '8500' meta_array['pageHeight'] = ph meta_array['pageWidth'] = pw if 'fontSize' not in meta_array.keys(): meta_array['fontSize'] = fontsize # process other.dat for css info and for map of page files to svg images # this map is needed because some pages actually are made up of multiple # pageXXXX.xml files xname = os.path.join(bookDir, 'style.css') flat_xml = convert2xml.fromData(dict, otherFile) # extract info.original.pid to get original page information pageIDMap = {} pageidnums = stylexml2css.getpageIDMap(flat_xml) if len(pageidnums) == 0: filenames = os.listdir(pageDir) numfiles = len(filenames) for k in range(numfiles): pageidnums.append(k) # create a map from page ids to list of page file nums to process for that page for i in range(len(pageidnums)): id = pageidnums[i] if id in pageIDMap.keys(): pageIDMap[id].append(i) else: pageIDMap[id] = [i] # now get the css info cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) file(xname, 'wb').write(cssstr) if buildXML: xname = os.path.join(xmlDir, 'other0000.xml') file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) print 'Processing Glyphs' gd = GlyphDict() filenames = os.listdir(glyphsDir) filenames = sorted(filenames) glyfname = os.path.join(svgDir,'glyphs.svg') glyfile = open(glyfname, 'w') glyfile.write('<?xml version="1.0" standalone="no"?>\n') glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n') glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n') glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title']) glyfile.write('<defs>\n') counter = 0 for filename in filenames: # print ' ', filename print '.', fname = os.path.join(glyphsDir,filename) flat_xml = convert2xml.fromData(dict, fname) if buildXML: xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) file(xname, 'wb').write(convert2xml.getXML(dict, fname)) gp = GParser(flat_xml) for i in xrange(0, gp.count): path = gp.getPath(i) maxh, maxw = gp.getGlyphDim(i) fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh) glyfile.write(fullpath) gd.addGlyph(counter * 256 + i, fullpath) counter += 1 glyfile.write('</defs>\n') glyfile.write('</svg>\n') glyfile.close() print " " # start up the html # also build up tocentries while processing html htmlFileName = "book.html" hlst = [] hlst.append('<?xml version="1.0" encoding="utf-8"?>\n') hlst.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n') hlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n') hlst.append('<head>\n') hlst.append('<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n') hlst.append('<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n') hlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n') hlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n') if 'ASIN' in meta_array: hlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n') if 'GUID' in meta_array: hlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n') hlst.append('<link href="style.css" rel="stylesheet" type="text/css" />\n') hlst.append('</head>\n<body>\n') print 'Processing Pages' # Books are at 1440 DPI. This is rendering at twice that size for # readability when rendering to the screen. scaledpi = 1440.0 filenames = os.listdir(pageDir) filenames = sorted(filenames) numfiles = len(filenames) xmllst = [] elst = [] for filename in filenames: # print ' ', filename print ".", fname = os.path.join(pageDir,filename) flat_xml = convert2xml.fromData(dict, fname) # keep flat_xml for later svg processing xmllst.append(flat_xml) if buildXML: xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) file(xname, 'wb').write(convert2xml.getXML(dict, fname)) # first get the html pagehtml, tocinfo = flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) elst.append(tocinfo) hlst.append(pagehtml) # finish up the html string and output it hlst.append('</body>\n</html>\n') htmlstr = "".join(hlst) hlst = None file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) print " " print 'Extracting Table of Contents from Amazon OCR' # first create a table of contents file for the svg images tlst = [] tlst.append('<?xml version="1.0" encoding="utf-8"?>\n') tlst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n') tlst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >') tlst.append('<head>\n') tlst.append('<title>' + meta_array['Title'] + '</title>\n') tlst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n') tlst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n') if 'ASIN' in meta_array: tlst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n') if 'GUID' in meta_array: tlst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n') tlst.append('</head>\n') tlst.append('<body>\n') tlst.append('<h2>Table of Contents</h2>\n') start = pageidnums[0] if (raw): startname = 'page%04d.svg' % start else: startname = 'page%04d.xhtml' % start tlst.append('<h3><a href="' + startname + '">Start of Book</a></h3>\n') # build up a table of contents for the svg xhtml output tocentries = "".join(elst) elst = None toclst = tocentries.split('\n') toclst.pop() for entry in toclst: print entry title, pagenum = entry.split('|') id = pageidnums[int(pagenum)] if (raw): fname = 'page%04d.svg' % id else: fname = 'page%04d.xhtml' % id tlst.append('<h3><a href="'+ fname + '">' + title + '</a></h3>\n') tlst.append('</body>\n') tlst.append('</html>\n') tochtml = "".join(tlst) file(os.path.join(svgDir, 'toc.xhtml'), 'wb').write(tochtml) # now create index_svg.xhtml that points to all required files slst = [] slst.append('<?xml version="1.0" encoding="utf-8"?>\n') slst.append('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n') slst.append('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >') slst.append('<head>\n') slst.append('<title>' + meta_array['Title'] + '</title>\n') slst.append('<meta name="Author" content="' + meta_array['Authors'] + '" />\n') slst.append('<meta name="Title" content="' + meta_array['Title'] + '" />\n') if 'ASIN' in meta_array: slst.append('<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n') if 'GUID' in meta_array: slst.append('<meta name="GUID" content="' + meta_array['GUID'] + '" />\n') slst.append('</head>\n') slst.append('<body>\n') print "Building svg images of each book page" slst.append('<h2>List of Pages</h2>\n') slst.append('<div>\n') idlst = sorted(pageIDMap.keys()) numids = len(idlst) cnt = len(idlst) previd = None for j in range(cnt): pageid = idlst[j] if j < cnt - 1: nextid = idlst[j+1] else: nextid = None print '.', pagelst = pageIDMap[pageid] flst = [] for page in pagelst: flst.append(xmllst[page]) flat_svg = "".join(flst) flst=None svgxml = flatxml2svg.convert2SVG(gd, flat_svg, pageid, previd, nextid, svgDir, raw, meta_array, scaledpi) if (raw) : pfile = open(os.path.join(svgDir,'page%04d.svg' % pageid),'w') slst.append('<a href="svg/page%04d.svg">Page %d</a>\n' % (pageid, pageid)) else : pfile = open(os.path.join(svgDir,'page%04d.xhtml' % pageid), 'w') slst.append('<a href="svg/page%04d.xhtml">Page %d</a>\n' % (pageid, pageid)) previd = pageid pfile.write(svgxml) pfile.close() counter += 1 slst.append('</div>\n') slst.append('<h2><a href="svg/toc.xhtml">Table of Contents</a></h2>\n') slst.append('</body>\n</html>\n') svgindex = "".join(slst) slst = None file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) print " " # build the opf file opfname = os.path.join(bookDir, 'book.opf') olst = [] olst.append('<?xml version="1.0" encoding="utf-8"?>\n') olst.append('<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n') # adding metadata olst.append(' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n') if 'GUID' in meta_array: olst.append(' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n') if 'ASIN' in meta_array: olst.append(' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n') if 'oASIN' in meta_array: olst.append(' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n') olst.append(' <dc:title>' + meta_array['Title'] + '</dc:title>\n') olst.append(' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n') olst.append(' <dc:language>en</dc:language>\n') olst.append(' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n') if isCover: olst.append(' <meta name="cover" content="bookcover"/>\n') olst.append(' </metadata>\n') olst.append('<manifest>\n') olst.append(' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n') olst.append(' <item id="stylesheet" href="style.css" media-type="text/css"/>\n') # adding image files to manifest filenames = os.listdir(imgDir) filenames = sorted(filenames) for filename in filenames: imgname, imgext = os.path.splitext(filename) if imgext == '.jpg': imgext = 'jpeg' if imgext == '.svg': imgext = 'svg+xml' olst.append(' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n') if isCover: olst.append(' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n') olst.append('</manifest>\n') # adding spine olst.append('<spine>\n <itemref idref="book" />\n</spine>\n') if isCover: olst.append(' <guide>\n') olst.append(' <reference href="cover.jpg" type="cover" title="Cover"/>\n') olst.append(' </guide>\n') olst.append('</package>\n') opfstr = "".join(olst) olst = None file(opfname, 'wb').write(opfstr) print 'Processing Complete' return 0
def generateBook(bookDir, raw, fixedimage): # sanity check Topaz file extraction if not os.path.exists(bookDir) : print "Can not find directory with unencrypted book" return 1 dictFile = os.path.join(bookDir,'dict0000.dat') if not os.path.exists(dictFile) : print "Can not find dict0000.dat file" return 1 pageDir = os.path.join(bookDir,'page') if not os.path.exists(pageDir) : print "Can not find page directory in unencrypted book" return 1 imgDir = os.path.join(bookDir,'img') if not os.path.exists(imgDir) : print "Can not find image directory in unencrypted book" return 1 glyphsDir = os.path.join(bookDir,'glyphs') if not os.path.exists(glyphsDir) : print "Can not find glyphs directory in unencrypted book" return 1 metaFile = os.path.join(bookDir,'metadata0000.dat') if not os.path.exists(metaFile) : print "Can not find metadata0000.dat in unencrypted book" return 1 svgDir = os.path.join(bookDir,'svg') if not os.path.exists(svgDir) : os.makedirs(svgDir) xmlDir = os.path.join(bookDir,'xml') if not os.path.exists(xmlDir) : os.makedirs(xmlDir) otherFile = os.path.join(bookDir,'other0000.dat') if not os.path.exists(otherFile) : print "Can not find other0000.dat in unencrypted book" return 1 print "Updating to color images if available" spath = os.path.join(bookDir,'color_img') dpath = os.path.join(bookDir,'img') filenames = os.listdir(spath) filenames = sorted(filenames) for filename in filenames: imgname = filename.replace('color','img') sfile = os.path.join(spath,filename) dfile = os.path.join(dpath,imgname) imgdata = file(sfile,'rb').read() file(dfile,'wb').write(imgdata) print "Creating cover.jpg" isCover = False cpath = os.path.join(bookDir,'img') cpath = os.path.join(cpath,'img0000.jpg') if os.path.isfile(cpath): cover = file(cpath, 'rb').read() cpath = os.path.join(bookDir,'cover.jpg') file(cpath, 'wb').write(cover) isCover = True print 'Processing Dictionary' dict = Dictionary(dictFile) print 'Processing Meta Data and creating OPF' meta_array = getMetaArray(metaFile) # replace special chars in title and authors like & < > title = meta_array.get('Title','No Title Provided') title = title.replace('&','&') title = title.replace('<','<') title = title.replace('>','>') meta_array['Title'] = title authors = meta_array.get('Authors','No Authors Provided') authors = authors.replace('&','&') authors = authors.replace('<','<') authors = authors.replace('>','>') meta_array['Authors'] = authors xname = os.path.join(xmlDir, 'metadata.xml') metastr = '' for key in meta_array: metastr += '<meta name="' + key + '" content="' + meta_array[key] + '" />\n' file(xname, 'wb').write(metastr) print 'Processing StyleSheet' # get some scaling info from metadata to use while processing styles fontsize = '135' if 'fontSize' in meta_array: fontsize = meta_array['fontSize'] # also get the size of a normal text page spage = '1' if 'firstTextPage' in meta_array: spage = meta_array['firstTextPage'] pnum = int(spage) # get page height and width from first text page for use in stylesheet scaling pname = 'page%04d.dat' % (pnum + 1) fname = os.path.join(pageDir,pname) flat_xml = convert2xml.fromData(dict, fname) (ph, pw) = getPageDim(flat_xml) if (ph == '-1') or (ph == '0') : ph = '11000' if (pw == '-1') or (pw == '0') : pw = '8500' # print ' ', 'other0000.dat' xname = os.path.join(bookDir, 'style.css') flat_xml = convert2xml.fromData(dict, otherFile) cssstr , classlst = stylexml2css.convert2CSS(flat_xml, fontsize, ph, pw) file(xname, 'wb').write(cssstr) xname = os.path.join(xmlDir, 'other0000.xml') file(xname, 'wb').write(convert2xml.getXML(dict, otherFile)) print 'Processing Glyphs' gd = GlyphDict() filenames = os.listdir(glyphsDir) filenames = sorted(filenames) glyfname = os.path.join(svgDir,'glyphs.svg') glyfile = open(glyfname, 'w') glyfile.write('<?xml version="1.0" standalone="no"?>\n') glyfile.write('<!DOCTYPE svg PUBLIC "-//W3C/DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">\n') glyfile.write('<svg width="512" height="512" viewBox="0 0 511 511" xmlns="http://www.w3.org/2000/svg" version="1.1">\n') glyfile.write('<title>Glyphs for %s</title>\n' % meta_array['Title']) glyfile.write('<defs>\n') counter = 0 for filename in filenames: # print ' ', filename print '.', fname = os.path.join(glyphsDir,filename) flat_xml = convert2xml.fromData(dict, fname) xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) file(xname, 'wb').write(convert2xml.getXML(dict, fname)) gp = GParser(flat_xml) for i in xrange(0, gp.count): path = gp.getPath(i) maxh, maxw = gp.getGlyphDim(i) fullpath = '<path id="gl%d" d="%s" fill="black" /><!-- width=%d height=%d -->\n' % (counter * 256 + i, path, maxw, maxh) glyfile.write(fullpath) gd.addGlyph(counter * 256 + i, fullpath) counter += 1 glyfile.write('</defs>\n') glyfile.write('</svg>\n') glyfile.close() print " " # start up the html htmlFileName = "book.html" htmlstr = '<?xml version="1.0" encoding="utf-8"?>\n' htmlstr += '<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1 Strict//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11-strict.dtd">\n' htmlstr += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n' htmlstr += '<head>\n' htmlstr += '<meta http-equiv="content-type" content="text/html; charset=utf-8"/>\n' htmlstr += '<title>' + meta_array['Title'] + ' by ' + meta_array['Authors'] + '</title>\n' htmlstr += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n' htmlstr += '<meta name="Title" content="' + meta_array['Title'] + '" />\n' if 'ASIN' in meta_array: htmlstr += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n' if 'GUID' in meta_array: htmlstr += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n' htmlstr += '<link href="style.css" rel="stylesheet" type="text/css" />\n' htmlstr += '</head>\n<body>\n' print 'Processing Pages' # Books are at 1440 DPI. This is rendering at twice that size for # readability when rendering to the screen. scaledpi = 1440.0 svgindex = '<?xml version="1.0" encoding="utf-8"?>\n' svgindex += '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' svgindex += '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" >' svgindex += '<head>\n' svgindex += '<title>' + meta_array['Title'] + '</title>\n' svgindex += '<meta name="Author" content="' + meta_array['Authors'] + '" />\n' svgindex += '<meta name="Title" content="' + meta_array['Title'] + '" />\n' if 'ASIN' in meta_array: svgindex += '<meta name="ASIN" content="' + meta_array['ASIN'] + '" />\n' if 'GUID' in meta_array: svgindex += '<meta name="GUID" content="' + meta_array['GUID'] + '" />\n' svgindex += '</head>\n' svgindex += '<body>\n' filenames = os.listdir(pageDir) filenames = sorted(filenames) numfiles = len(filenames) counter = 0 for filename in filenames: # print ' ', filename print ".", fname = os.path.join(pageDir,filename) flat_xml = convert2xml.fromData(dict, fname) xname = os.path.join(xmlDir, filename.replace('.dat','.xml')) file(xname, 'wb').write(convert2xml.getXML(dict, fname)) # first get the html htmlstr += flatxml2html.convert2HTML(flat_xml, classlst, fname, bookDir, gd, fixedimage) # now get the svg image of the page svgxml = flatxml2svg.convert2SVG(gd, flat_xml, counter, numfiles, svgDir, raw, meta_array, scaledpi) if (raw) : pfile = open(os.path.join(svgDir,filename.replace('.dat','.svg')), 'w') svgindex += '<a href="svg/page%04d.svg">Page %d</a>\n' % (counter, counter) else : pfile = open(os.path.join(svgDir,'page%04d.xhtml' % counter), 'w') svgindex += '<a href="svg/page%04d.xhtml">Page %d</a>\n' % (counter, counter) pfile.write(svgxml) pfile.close() counter += 1 print " " # finish up the html string and output it htmlstr += '</body>\n</html>\n' file(os.path.join(bookDir, htmlFileName), 'wb').write(htmlstr) # finish up the svg index string and output it svgindex += '</body>\n</html>\n' file(os.path.join(bookDir, 'index_svg.xhtml'), 'wb').write(svgindex) # build the opf file opfname = os.path.join(bookDir, 'book.opf') opfstr = '<?xml version="1.0" encoding="utf-8"?>\n' opfstr += '<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="guid_id">\n' # adding metadata opfstr += ' <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">\n' if 'GUID' in meta_array: opfstr += ' <dc:identifier opf:scheme="GUID" id="guid_id">' + meta_array['GUID'] + '</dc:identifier>\n' if 'ASIN' in meta_array: opfstr += ' <dc:identifier opf:scheme="ASIN">' + meta_array['ASIN'] + '</dc:identifier>\n' if 'oASIN' in meta_array: opfstr += ' <dc:identifier opf:scheme="oASIN">' + meta_array['oASIN'] + '</dc:identifier>\n' opfstr += ' <dc:title>' + meta_array['Title'] + '</dc:title>\n' opfstr += ' <dc:creator opf:role="aut">' + meta_array['Authors'] + '</dc:creator>\n' opfstr += ' <dc:language>en</dc:language>\n' opfstr += ' <dc:date>' + meta_array['UpdateTime'] + '</dc:date>\n' if isCover: opfstr += ' <meta name="cover" content="bookcover"/>\n' opfstr += ' </metadata>\n' opfstr += '<manifest>\n' opfstr += ' <item id="book" href="book.html" media-type="application/xhtml+xml"/>\n' opfstr += ' <item id="stylesheet" href="style.css" media-type="text/css"/>\n' # adding image files to manifest filenames = os.listdir(imgDir) filenames = sorted(filenames) for filename in filenames: imgname, imgext = os.path.splitext(filename) if imgext == '.jpg': imgext = 'jpeg' if imgext == '.svg': imgext = 'svg+xml' opfstr += ' <item id="' + imgname + '" href="img/' + filename + '" media-type="image/' + imgext + '"/>\n' if isCover: opfstr += ' <item id="bookcover" href="cover.jpg" media-type="image/jpeg" />\n' opfstr += '</manifest>\n' # adding spine opfstr += '<spine>\n <itemref idref="book" />\n</spine>\n' if isCover: opfstr += ' <guide>\n' opfstr += ' <reference href="cover.jpg" type="cover" title="Cover"/>\n' opfstr += ' </guide>\n' opfstr += '</package>\n' file(opfname, 'wb').write(opfstr) print 'Processing Complete' return 0