def arrangeBigBook(self, fullbook): # print("made pub") targetDir = self.dir if not os.path.exists(targetDir): os.makedirs(targetDir) src_files = os.listdir(targetDir) # clear out dir for fn in src_files: pt = targetDir + "\\" + fn if os.path.isfile(pt): os.remove(pt) # move book to self's HTML dir stack fromPt = fullbook.bookPath toPt = targetDir + "\\" + fullbook.safeFn(fullbook.bookTag()) + ".epub" # print('copying fromto', fromPt, toPt) if os.path.isfile(fromPt): copyfile(fromPt, toPt) os.remove(fullbook.bookPath) # and then add the book's html # minibook.makeHTML() # actually not yet; topics aren't finished # copy cover image over, too thePaths = paths() fromPt = thePaths.scratchDir + "OEBPS\\cover.jpg" toPt = targetDir + "\\cover.jpg" # print('copying fromto', fromPt, toPt) if os.path.isfile(fromPt): copyfile(fromPt, toPt)
def arrangeTTYBook(self, fullbook): thePaths = paths() # print("made pub") targetDir = self.dir if not os.path.exists(targetDir): os.makedirs(targetDir) src_files = os.listdir(targetDir) # clear out dir for fn in src_files: pt = targetDir + "\\" + fn if os.path.isfile(pt): os.remove(pt) # move book to self's HTML dir fromPt = fullbook.bookPath filenm = fullbook.safeFn(fullbook.bookTag()) if not fullbook.txtPath == "-": # source, titleStr, targetDir tw = textWrapper(fullbook.bookPath, self.bookTag, targetDir) # source, titleIn, targetDir self.volCount = tw.output() print(str(self.volCount) + " volumes made") else: toPt = targetDir + "\\" + fullbook.safeFn( fullbook.bookTag()) + ".htm" if os.path.isfile(fromPt): copyfile(fromPt, toPt) os.remove(fullbook.bookPath) # copy cover image over fromPt = thePaths.scratchDir + "cover.jpg" toPt = targetDir + "\\cover.jpg" # print('copying fromto', fromPt, toPt) if os.path.isfile(fromPt): copyfile(fromPt, toPt)
def __init__(self, line): self.mark = line[0:1] self.description = line[2:] # text description of this order self.doubles = [] # list of LOCdoubles in thePaths = paths() self.path = thePaths.htmlDir + "topics\\" + self.mark + ".html" self.link = self.mark + ".html"
def __init__(self): self.scanner = scanner() self.thePaths = paths() self.theTree = LOCtree() self.bookList = LOCtitleService() self.covers = os.listdir(self.thePaths.coversDir) self.clips = os.listdir(self.thePaths.clipDir) self.authorSet = LOCauthorSet()
def __init__(self): # maybe something more efficient, maybe just sit on yr hands. self.auths = [] self.thePaths = paths() self.allowedTags = [] for i in range(0,26): self.auths.append([]) self.allowedTags.append([])
def __init__(self): self.gutenId = " " self.name = " " self.birth = " " self.death = " " self.workIds = [] self.wikiLink = " " thePaths = paths() self.calPath = thePaths.outputDir
def __init__(self, line): # oh, nope. marks can be more than 2 chars. spot = line.find(' ') # the letters of self.mark = line[0:spot] self.description = line[(spot + 1):] thePaths = paths() self.link = self.mark + ".html" self.path = thePaths.htmlDir + "\\topics\\" + self.mark + ".html" self.subDir = thePaths.htmlDir + "\\topics\\" + self.mark + "\\" self.topics = []
def setStdMark(self): desc = self.brutalDescription() thePaths = paths() self.dirPath = thePaths.htmlDir + "topics\\" + self.doubleMark + "\\" ender = 15 if len(desc) < ender: ender = len(desc) self.mark = desc[0:ender] self.link = self.mark + ".html" self.htmlRelativePath = "topics/" + self.doubleMark + "/" + self.mark + ".html"
def makeGBGDir(self): thePaths = paths() res = thePaths.contentDir idStr = str(self.gutenId) lng = len(idStr) if (lng==1): res += "0\\" for i in range(0,lng-1): res += idStr[i] + "\\" res += idStr; return res
def writeContent(self): thePaths = paths() targPath = thePaths.scratchDir + "OEBPS\\content.xhtml" if (self.txtPath!="-"): html = ET.Element("html") html.set("xmlns", "http://www.w3.org/1999/xhtml") head = ET.SubElement(html, "head") title = ET.SubElement(head, "title") title.text = self.title link = ET.SubElement(head, "link") link.set('type', "text/css") link.set('rel', "stylesheet") link.set('href', "stylesheet.css") body = ET.SubElement(html, "body") # body.set("id", "startyStart") pre = ET.SubElement(body, "pre") pre.set("id", "startyStart") txf = open(self.txtPath, 'r+') try: wholeText = txf.read() pre.text = wholeText tree = ET.ElementTree(html) tree.write(targPath) tree.write(targPath) return 1 except: print("failed text convert; on to HTML") if (self.htmPath!="-"): content = [] try: with open(self.htmPath) as f: content = f.readlines() numLns = len(content) if (numLns>100): numLns = 100 for lnCt in range(0,numLns): if ("/*" in content[lnCt]): if ("*/" in content[lnCt]): s1 = content[lnCt].find("/*") s2 = content[lnCt].find("*/") stra = content[lnCt][0:s1] + content[lnCt][s2+2:] # print(self.gutenId, ":fix:", content[lnCt], stra) content[lnCt] = stra if ("<body" in content[lnCt]): content[lnCt].replace("<body",'<body id=<body id="startyStart "') with open(targPath, 'w') as outf: for ln in content: outf.write(ln) return 1 except: if os.path.isfile(self.htmPath): copyfile(self.htmPath, targPath) return 1 return 0 # no data, I guess.
def __init__(self): self.thePaths = paths() self.singles = [] file = open(self.thePaths.dataDir + "LOCfams.txt", "r") lines = file.readlines() for ln in lines: if ln[1] == ' ': ord = LOCsingle(ln) self.singles.append(ord) else: ord.addDouble(ln)
def makeTxt(self, coverPt, clipPt): # self.printSelf() if (self.type != "Text"): print("not a Text", self.gutenId) return 0 if (self.txtPath == "-" and self.htmPath == "-"): print("no text", self.gutenId) return 0 # no book! skip if (len(self.title) < 1): print("no title", self.gutenId) return 0 # no title? f**k you # clean out all the last book's files thePaths = paths() self.bookPath = thePaths.scratchDir src_files = os.listdir(self.bookPath) for fn in src_files: pt = self.bookPath + fn if os.path.isfile(pt): os.remove(pt) # clean out images folder, too imgdir = self.bookPath + "images\\" src_files = os.listdir(imgdir) for fn in src_files: pt = imgdir + "\\" + fn if os.path.isfile(pt): os.remove(pt) self.makeCoverImage(coverPt, clipPt) # copy text if (self.txtPath != "-"): self.bookPath = thePaths.scratchDir + self.safeFn( self.bookTag()) + ".txt" copyfile(self.txtPath, self.bookPath) return 1 if (self.htmPath != "-"): # I like these less. self.bookPath = thePaths.scratchDir + self.safeFn( self.bookTag()) + ".htm" copyfile(self.htmPath, self.bookPath) # also do images folder? IDK. # write images if (self.imgPath != "-"): fromDir = thePaths.scratchDir + "images\\" toDir = thePaths.scratchDir + self.safeFn(self.bookTag()) for fn in self.imgs: fromPt = imgdir + "\\" + fn toPt = imgdir + "\\" + fn if os.path.isfile(fromPt): copyfile(fromPt, toPt) return 1 return 0
def __init__(self): self.dirIndex = 0 self.allFiles = [] self.counter = 1 self.gbID = '-1' self.last = -1 # find the top dir thePaths = paths() self.dir = thePaths.recordsDir mess = [x[1] for x in os.walk(self.dir)] self.allFiles = mess[0] self.last = len(self.allFiles) print("found ", self.last, " records") if (self.last > 0): self.counter = 1
def writeCover(self): html = ET.Element("html") html.set('xmlns', "http://www.w3.org/1999/xhtml") html.set('xml:lang', "en") head = ET.SubElement(html, "head") title = ET.SubElement(head, "title") title.text = "Cover" meta = ET.SubElement(head, "meta") meta.set('name', "calibre:cover") meta.set('content', "true") body = ET.SubElement(html, "body") img =ET.SubElement(body, "img") img.set("src", "cover.jpg") tree = ET.ElementTree(html) thePaths = paths() tree.write(thePaths.scratchDir + "OEBPS\\title.xhtml")
def makeFromBigBook(self, fullbook): if (fullbook.langOK() and fullbook.scanGBDir() == 0): for a in fullbook.auths: self.authors.append(a.duplicate()) self.title = fullbook.title self.gutenId = fullbook.gutenId for s in fullbook.subjects: if (s.find("LCC: ") != -1): if (self.lcc != "-"): print("multiple LCCs!") self.lcc = s[5:] self.valid = True if (s.find("LCSH: ") != -1): tpc = s[6:] # self.topics only contains LCSH strings. self.topics.append(tpc) self.valid = True thePaths = paths() digits = '%05d' % int(self.gutenId) self.dir = thePaths.htmlDir + "\\books\\" + digits[ 0:1] + "\\" + digits[1:3] + "\\" + digits[3:] + "\\" self.htmlRelativePath = "books/" + digits[0:1] + "/" + digits[ 1:3] + "/" + digits[3:] + "/" + "index.html" brute = self.title.upper() if (len(brute) < 1): return "A" alloweds = "ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890" for i in range(0, len(brute)): if (len(brute) <= i): return brute if (alloweds.find(brute[i]) == -1): brute = brute.replace(brute[i], "") words = brute.split(' ') okwords = "" for w in words: if w != "A" and w != "AN" and w != "THE": if len(okwords) > 0: okwords = okwords + "_" okwords = okwords + w self.brutalTitle = okwords self.brutalTitleA = okwords[0:1] self.brutalTitleAB = okwords[0:2] self.titlePath = thePaths.htmlDir + "\\titles\\" + self.brutalTitleA + '\\' self.bookTag = fullbook.safeFn(fullbook.bookTag()) print("brutal", okwords, self.brutalTitleA, self.brutalTitleAB) if (self.title.find("Punchinello") != -1): self.valid = False # seriously: *f**k* those guys
def makeTitleHTML(self): # sort books by brutalTitleAB firsts = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890" fl = len(firsts) thePaths = paths() for i in range(0, fl): letter = firsts[i:i + 1] pt = thePaths.htmlDir + "\\titles\\" + letter + ".html" file = open(pt, "w") file.write("<!DOCTYPE html>") file.write("<html>") file.write("<body>") file.write('<h3>Page for ' + letter + ' titles:</h3><table><tr>') ctr = 0 for j in range(0, fl): letter2 = firsts[j:j + 1] file.write('<td><a href="' + letter + '/' + letter + letter2 + '.html">' + letter + letter2 + '</a></td>') if ctr % 6 == 5: file.write('</tr><tr>') ctr = ctr + 1 file.write("</tr></table></body></html>") file.close() self.books.sort(key=lambda x: x.brutalTitleAB) oldMark = "INIT" file = 0 for bk in self.books: if len(bk.titlePath) > 0: if (bk.brutalTitleAB != oldMark): if oldMark != "INIT": file.write("</body>") file.write("</html>") file.close() oldMark = bk.brutalTitleAB if not os.path.exists(bk.titlePath): os.makedirs(bk.titlePath) pt = bk.titlePath + bk.brutalTitleAB + ".html" file = open(pt, "w") file.write("<!DOCTYPE html>") file.write("<html>") file.write("<body>") file.write('<h3>Titles Page for ' + bk.brutalTitleAB + ':</h3>') file.write('<a href="../../' + bk.htmlRelativePath + '">' + bk.title + '</a><br/>')
def writenavx(self): thePaths = paths() html = ET.Element("html") html.set('xmlns', "http://www.w3.org/1999/xhtml") html.set('xmlns:epub', "http://www.idpf.org/2007/ops") head = ET.SubElement(html, "head") title = ET.SubElement(head, "title") title.text = "Navigation" body = ET.SubElement(html, "body") nav = ET.SubElement(body, "nav") nav.set('epub:type', 'toc') ol = ET.SubElement(nav, "ol") li = ET.SubElement(ol, "li") aref = ET.SubElement(li, "a") aref.set('href', "content.xhtml") aref.text = "Start" tree = ET.ElementTree(html) tree.write(thePaths.scratchDir + "OEBPS\\nav.xhtml")
def writeNCX(self): ncx = ET.Element("ncx") # ncx.set('xmlns', "http://www.daisy.org/z3986/2005/ncx/") #ncx.set('version', "2005-1") head = ET.SubElement(ncx, "head") meta = ET.SubElement(head, "meta") meta.set('name', 'dtb:uid') meta.set('content', "urn:uuid:" + self.uuid) meta = ET.SubElement(head, "meta") meta.set('name', 'dtb:depth') meta.set('content', "1") meta = ET.SubElement(head, "meta") meta.set('name', 'dtb:totalPageCount') meta.set('content', "0") meta = ET.SubElement(head, "meta") meta.set('name', 'dtb:maxPageNumber') meta.set('content', "0") docTitle = ET.SubElement(ncx, "docTitle") txt = ET.SubElement(docTitle, "text") txt.text = self.title navMap = ET.SubElement(ncx, "navMap") navPoint = ET.SubElement(navMap, "navPoint") navPoint.set('id', 'navPoint-1') navPoint.set('playOrder', "1") navLabel = ET.SubElement(navPoint, "navLabel") text = ET.SubElement(navLabel, "text") text.text = "Start" content = ET.SubElement(navPoint, "content") content.set('src', 'content.xhtml#startyStart') thePaths = paths() tree = ET.ElementTree(ncx) tree.write(thePaths.dataDir + "scratch.ncx") # open file at finalVers = open(thePaths.scratchDir +'OEBPS\\toc.ncx', 'w+') # write in finalVers.write("<?xml version='1.0' encoding='utf-8'?>") finalVers.write('<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"') finalVers.write(' "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">') xmlvers = open(thePaths.dataDir + "scratch.ncx", 'r+') irm = xmlvers.read() finalVers.write(irm) finalVers.close()
def setAltMark(self, dupe): # find first char where self.desc and dupe differ notDone = True brute = self.brutalDescription() othBrute = dupe.brutalDescription() pl = 5 while notDone and pl < len(brute) - 3: c1 = brute[pl:pl + 1] c2 = othBrute[pl:pl + 1] if (c1 == c2): pl = pl + 1 else: notDone = False p2 = pl + 3 if p2 > len(brute): p2 = len(brute) thePaths = paths() self.dirPath = thePaths.htmlDir + "topics\\" + self.doubleMark + "\\" self.mark = self.doubleMark + "_" + brute[0:5] + brute[pl:p2] self.link = self.mark + ".html"
def __init__(self): self.scanner = scanner() self.thePaths = paths() self.bookList = titleSet() self.theTree = LOCtree() self.authorSet = authorSet()
def makeCoverImage(self, coverFn, clipFn): # make title strings and authors strings titl1, titl2, titl3, titInd = self.brutalString(self.title) # make 'em real ugly authNames = "" notFirst = 1 for aut in self.auths: if (notFirst==1): notFirst =0 else: authNames += "--" authNames += aut.name if (len(self.trns)>0): authNames += "--" for aut in self.trns: if (notFirst==1): notFirst =0 else: authNames += "--" authNames += aut.name if (len(authNames)<1): authNames = "Unknown" aut1, aut2, aut3, autInd = self.brutalString(authNames) # make bg image by loading the cover thePaths = paths() coverDir = thePaths.coversDir coverPath = coverDir + coverFn resultImg = cv2.imread(coverPath, cv2.IMREAD_COLOR) covHt, covWd, covCh = resultImg.shape # change tint tintRatio = 0.7 + (autInd / 80.0) rFac = 1.0 gFac = 1.0 bFac = 1.0 titInd = titInd % 6 if (titInd==0): rFac = tintRatio if (titInd==1): gFac = tintRatio if (titInd==2): bFac = tintRatio if (titInd==3): rFac = tintRatio gFac = tintRatio if (titInd==4): rFac = tintRatio bFac = tintRatio if (titInd==5): gFac = tintRatio bFac = tintRatio bchan, gchan, rchan = cv2.split(resultImg); rchanf = rchan * rFac; gchanf = gchan * gFac; bchanf = bchan * bFac; rchani = rchanf.astype(np.uint8) gchani = gchanf.astype(np.uint8) bchani = bchanf.astype(np.uint8) resultImg = cv2.merge((rchani,gchani,bchani)) # make text image txti = np.ones((500,500, 3), np.uint8) txti *= 255 # white bg font = cv2.FONT_HERSHEY_DUPLEX # convert title to all caps, A-Z and space only # convert to up to 3 strings 30 char x = 6 y = 26 fsc = 0.8 fsp = 27 fln = 2 fco = (0,0,0) cv2.putText(txti, titl1, (x, y), font, fsc, fco, fln) if (len(titl2)>0): y += fsp cv2.putText(txti,titl2,(x, y), font, fsc, fco, fln) if (len(titl3)>0): y += fsp cv2.putText(txti,titl3,(x, y), font,fsc, fco, fln) y += fsp cv2.putText(txti,aut1,(x, y), font,fsc, fco, fln) if (len(aut2)>0): y += fsp cv2.putText(txti,aut2,(x, y), font,fsc, fco, fln) if (len(aut3)>0): y += fsp cv2.putText(txti,aut3,(x, y), font,fsc, fco, fln) y += fsp cv2.putText(txti,"www.gutenberg.org/ebooks/"+str(self.gutenId),(x, y), font,fsc, fco, fln) cropTxt = txti[0:y+10, 0:500] newWd = covWd -40 ysz = int((newWd / 500.0) * (y+10.0)) reszTxt = cv2.resize(cropTxt, (newWd, ysz)) # a clipart image is available clipDir = thePaths.clipDir clipPath = clipDir + clipFn if (self.imgPath!="-"): if (self.coverImgPath!=-1): # but if there's an image from the book, use that. imgdir = thePaths.scratchDir + "OEBPS\\images\\" clipPath = imgdir +self.imgs[self.coverImgPath] clipImg = cv2.imread(clipPath, cv2.IMREAD_COLOR) if not clipImg is None: clHt, clWd, clCh = clipImg.shape bchan, gchan, rchan = cv2.split(clipImg); clipImg = cv2.merge((rchan,rchan,rchan)) clpx = int(covWd/2) clpy = int( clpx *(clHt/clWd)) clpSx = int(clpx*1.5) clpSy = int(clpy*1.5) reszClp = cv2.resize(clipImg, (clpSx, clpSy)) centerx = clpx centery = (((covHt - 20) - ysz) / 2.0) + (ysz +20) maxClipHt = covHt - ysz -50; if (clpSy>maxClipHt): clpSy = maxClipHt stx = int(centerx - (clpSx/2.0)) sty = int(centery - (clpSy/2.0)) resultImg[sty:sty+clpSy, stx:stx+clpSx] = reszClp[0:clpSy, 0:clpSx] # paste clip first resultImg[20:20+ysz, 20:20+newWd] = reszTxt # then text block resultImg[20:20+ysz, 20:20+newWd] = reszTxt # then text block resPath = thePaths.scratchDir + "OEBPS\\cover.jpg" cv2.imwrite(resPath, resultImg, [int(cv2.IMWRITE_JPEG_QUALITY), 20])
def writeOPF(self): doc = ET.Element("doc") package = ET.SubElement(doc, "package") package.set('version', "3.0") package.set('xmlns', "http://www.idpf.org/2007/opf") package.set('xml:lang', self.language) package.set('unique-identifier', "bookid") metadata = ET.SubElement(package, "metadata") metadata.set("xmlns:dc", "http://purl.org/dc/elements/1.1/") dcidentifier = ET.SubElement(metadata, "dc:identifier") dcidentifier.set('id', "bookid") dcidentifier.text = "urn:uuid:" +self.uuid modified = ET.SubElement(metadata, "meta") modified.set('property', "dcterms:modified") modified.text = "2019-10-11T01:34:19Z" dctitle = ET.SubElement(metadata, "dc:title") dctitle.text = self.title for aut in self.auths: dccontr = ET.SubElement(metadata, "dc:creator") dccontr.set('id', "creator") dccontr.text = aut.name suj = ET.SubElement(metadata, "dc:subject") suj.text = aut.wikiLink for aut in self.trns: dccontr = ET.SubElement(metadata, "dc:translator") dccontr.text = aut.name suj = ET.SubElement(metadata, "dc:subject") suj.text = aut.wikiLink description = ET.SubElement(metadata, "dc:description") description.text = self.description + " www.gutenberg.org/ebooks/" + str(self.gutenId) date = ET.SubElement(metadata, "dc:date") date.text = "2015-08-12T04:00:00+00:00" publisher = ET.SubElement(metadata, "dc:publisher") publisher.text = "http://www.gutenberg.org" cover = ET.SubElement(metadata, "meta") cover.set("name", "cover") cover.set("content", "cover") lnggt = ET.SubElement(metadata, "dc:language") lnggt.text = "eng" # need enum for language translations for sub in self.subjects: suj = ET.SubElement(metadata, "dc:subject") suj.text = sub for aut in self.auths: suj = ET.SubElement(metadata, "dc:subject") suj.text = aut.wikiLink for aut in self.trns: suj = ET.SubElement(metadata, "dc:subject") suj.text = aut.wikiLink manifest = ET.SubElement(package, "manifest") anItem = ET.SubElement(manifest, "item") anItem.set('id', "ncx") anItem.set('href', "toc.ncx") anItem.set("media-type", "application/x-dtbncx+xml") anItem = ET.SubElement(manifest, "item") anItem.set('id', "content") anItem.set('href', "content.xhtml") anItem.set("media-type", "application/xhtml+xml") anItem = ET.SubElement(manifest, "item") anItem.set('id', "cover") anItem.set('href', "title.xhtml") anItem.set("media-type", "application/xhtml+xml") anItem = ET.SubElement(manifest, "item") anItem.set('id', "coverImg") anItem.set('href', "cover.jpg") anItem.set("media-type", "image/jpeg") anItem = ET.SubElement(manifest, "item") anItem.set('id', "css") anItem.set('href', "stylesheet.css") anItem.set("media-type", "text/css") anItem = ET.SubElement(manifest, "item") anItem.set('id', "nav") anItem.set('href', "nav.xhtml") anItem.set('properties', "nav") anItem.set("media-type", "application/xhtml+xml") spine = ET.SubElement(package, "spine") spine.set('page-progression-direction', "default") spine.set('toc', "ncx") anItem = ET.SubElement(spine, "itemref") anItem.set('idref', "cover") anItem = ET.SubElement(spine, "itemref") anItem.set('idref', "content") guide = ET.SubElement(package, "guide") img = ET.SubElement(guide, "reference") img.set('href', "title.xhtml") img.set('title', "Cover") img.set('type', "cover") thePaths = paths() tree = ET.ElementTree(package) tree.write(thePaths.dataDir + "scratch.opf") finalVers = open(thePaths.scratchDir + 'OEBPS\\content.opf', 'w+') # write in finalVers.write("<?xml version='1.0' encoding='utf-8'?>") xmlvers = open(thePaths.dataDir + 'scratch.opf', 'r+') irm = xmlvers.read() finalVers.write(irm) finalVers.close()
def setStdMark(self): desc = self.brutalDescription() thePaths = paths() self.dirPath = thePaths.htmlDir + "topics\\" + self.doubleMark + "\\" self.mark = self.doubleMark + "." + desc[0:5] self.link = self.mark + ".html"
def htmlDir(self): nm = self.authTag() fs = nm[0:1] thePaths = paths() return thePaths.htmlDir + "\\authors\\" + fs + "\\"
def makeFromBigBook(self, fullbook): if (fullbook.langOK() and fullbook.scanGBDir() == 0): for a in fullbook.auths: self.authors.append(a.duplicate()) self.title = fullbook.title self.gutenId = fullbook.gutenId lcc = "" hasLCSH = False for s in fullbook.subjects: if (s.find("LCC: ") != -1 and lcc == ""): lcc = s[5:] if (s.find("LCSH: ") != -1): hasLCSH = True if (hasLCSH and lcc != ""): self.valid = True for s in fullbook.subjects: if (s.find("LCC: ") != -1): lcc = s[5:] if (s.find("LCSH: ") != -1): tpc = lcc + ' ' + s[ 6:] # self.topics only contains LCSH strings. self.topics.append(tpc) su = s.upper() if (su.find("JUVENILE") != -1): self.valid = False if (su.find("CHILDREN") != -1): self.valid = False # the adult content is maudlin enough if (su.find("PERIODICAL") != -1): self.valid = False thePaths = paths() digits = '%05d' % int(self.gutenId) self.dir = thePaths.htmlDir + "books\\" + digits[ 0:1] + "\\" + digits[1:3] + "\\" + digits[3:] + "\\" self.htmlRelativePath = "books/" + digits[0:1] + "/" + digits[ 1:3] + "/" + digits[3:] + "/" + "index.html" brute = self.title.upper() if (len(brute) < 1): brute = "A" alloweds = "ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890" for i in range(0, len(brute)): if (len(brute) > i): if (alloweds.find(brute[i]) == -1): brute = brute.replace(brute[i], "") words = brute.split(' ') okwords = "" for w in words: if w != "A" and w != "AN" and w != "THE": if len(okwords) > 0: okwords = okwords + "_" okwords = okwords + w self.brutalTitle = okwords self.brutalTitleA = okwords[0:1] self.brutalTitleAB = okwords[0:2] self.titlePath = thePaths.htmlDir + "titles\\" + self.brutalTitleA + '\\' self.bookTag = fullbook.safeFn(fullbook.bookTag()) # print(self.gutenId, okwords, self.brutalTitleA, self.brutalTitleAB) if (self.brutalTitle.find("PUNCHINELLO") != -1): self.valid = False if (self.brutalTitle.find("CHARIVARI") != -1): self.valid = False # seriously: *f**k* those guys if (self.brutalTitle.find("MISSIONARY") != -1): self.valid = False if (fullbook.language.find("en") == -1): self.valid = False # English only for now
def makeEpub(self, coverPt, clipPt): # self.printSelf() if (self.type!="Text"): print("not a Text", self.gutenId) return 0 if (self.txtPath=="-" and self.htmPath=="-"): print("no text", self.gutenId) return 0 # no book! skip if (len(self.title)<1): print("no title", self.gutenId) return 0 # no title? f**k you #clean out all the last book's files thePaths = paths() base = thePaths.scratchDir + "OEBPS\\" src_files = os.listdir(base) for fn in src_files: if (fn != "stylesheet.css"): # del everything but the stylesheet pt = base + fn if os.path.isfile(pt): os.remove(pt) # gbg has all the images in *-h/images, darn it. imgdir = base + "images" src_files = os.listdir(imgdir) for fn in src_files: pt = imgdir + "\\" + fn if os.path.isfile(pt): os.remove(pt) htmpath = base + "content.xhtml" # write content if (self.writeContent()==0): print("write content failed", self.gutenId) return 0 # could not convert # write images if (self.imgPath!="-" and self.txtPath!="-"): for fn in self.imgs: fromPt =self.imgPath + "\\" + fn toPt = imgdir + "\\" + fn if os.path.isfile(fromPt): copyfile(fromPt, toPt) self.writeOPF() self.makeCoverImage(coverPt, clipPt) self.writeCover() self.writeNCX() self.writenavx() # write index! self.bookPath = thePaths.outputDir + self.safeFn(self.bookTag()) + ".epub" if (os.path.isfile(self.bookPath)): os.remove(self.bookPath) zipf = zipfile.ZipFile(self.bookPath, 'a', zipfile.ZIP_DEFLATED) zipf.write("scratch/mimetype", "mimetype", zipfile.ZIP_STORED) zipf.write("scratch/META-INF/container.xml", "META-INF/container.xml", zipfile.ZIP_DEFLATED) src_files = os.listdir("scratch/OEBPS") for file in src_files: zipf.write("scratch/OEBPS/" + file, "OEBPS/"+file, zipfile.ZIP_DEFLATED) src_files = os.listdir("scratch/OEBPS/images") for file in src_files: zipf.write("scratch/OEBPS/images/" + file, "OEBPS/images/"+file, zipfile.ZIP_DEFLATED) zipf.close() booksize = os.path.getsize(self.bookPath) if booksize>2500000: print("too big", self.gutenId) return 0 return 1
def __init__(self): self.scanner = scanner() thePaths = paths() self.covers = os.listdir(thePaths.coversDir) self.clips = os.listdir(thePaths.clipDir) self.results = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0,0, 0,0,0, 0]
def __init__(self): self.books = [] self.thePaths = paths()