def makehtml(itemid,doc,abbyyfile,abbyyhtml,scanfile,metafile,olid): classmap={} # print "Generating HTML file from abbyy scan file" scanstream=open(scanfile) scaninfo=parse_scandata(scanstream) close(scanstream) abbyystream=open(abbyyfile) metadata=getmetaxml(metafile) if (olid): bookid=olid else: bookid=itemid # print "Generating body content" pars=abbyyhtml.makehtmlbody(abbyystream,bookid,itemid,doc, classmap=classmap, scaninfo=scaninfo) # print "Wrapping body with metadata, etc" result=u"<?xml version='1.0' encoding='utf-8' ?>\n<?xml version='1.0' encoding='utf-8' ?>\n<!DOCTYPE html>\n" result=result+u"<html>\n<head>\n" style=abbyy_css classhist=classmap['%histogram'] for x in classmap: if not(x.startswith('%')): style=style+(".%s { %s } /* used %d times */\n"% (classmap[x],x,classhist[classmap[x]])) result=result+bighead.bighead(metadata,olid,style) result=result+u"\n</head>\n<body class='abbyytext'>" for par in pars: result=result+"\n"+par if wrap: result=result+"\n</body>\n</html>\n" return result
def makehtml(bookid,doc=False,path=False, abbyyref=False,scanref=False, wrap=True,mergepages=True): if not doc: doc=bookid classmap={} olib=False olid=False if doc is bookid: olib=olib_lookup(bookid) if olib: olid=os.path.basename(olib['key']) if olid: idprefix=olid else: idprefix=bookid # print "Generating HTML file from abbyy scan file" if scanref: scanstream=tryopen(scanref) else: scanstream=find_scandata(bookid,doc,path) if scanstream: scaninfo=parse_scandata(scanstream) else: scaninfo=False abbyystream=False if abbyyref: abbyystream=tryopen(abbyyref) if not abbystream: raise Exception("Can't open abbyy file '%s'"%abbyyref) else: abbyystream=find_abbyy(bookid,doc,path) if not abbyystream: raise Exception("Can't find abbyy file for '%s/%s' under '%s'"% (bookid,doc,path)) # print "Generating body content" pars=abbyyhtml.makehtmlbody(abbyystream,idprefix,bookid,doc, classmap=classmap, scaninfo=scaninfo) if wrap: # print "Wrapping body with metadata, etc" result="<?xml version='1.0' encoding='utf-8' ?>\n<?xml version='1.0' encoding='utf-8' ?>\n<!DOCTYPE html>\n" result=result+"<html>\n<head>\n" style=abbyy_css classhist=classmap['%histogram'] for x in classmap: if not(x.startswith('%')): style=style+(".%s { %s } /* used %d times */\n"% (classmap[x],x,classhist[classmap[x]])) result=result+bighead.bighead(olid,style).encode("utf-8") result=result+"\n</head>\n<body class='abbyytext'>" else: result="" for par in pars: result=result+"\n"+par if wrap: result=result+"\n</body>\n</html>\n" return result