def mine_isbn(self): obuf = StringIO() rsrcmgr = PDFResourceManager() laparams = LAParams() device = TextConverter(rsrcmgr, outfp=obuf, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) if True: for page in self.document.get_pages(): interpreter.process_page(page) curpg = obuf.getvalue() part = curpg.partition("ISBN") if part[1] == 'ISBN': isbn = [] part = part[2].partition(" ") for c in part[2]: try: isbn.append(int(c)) except: pass if c == '\n': break if isbn[:3] == [9,7,8]: if len(isbn) == 12: return isbn + [ISBN.checksum_isbn13(isbn)] elif len(isbn) == 9: return isbn + [ISBN.checksum_isbn10(isbn)] print("ISBN " + ISBN.to_string(isbn) + " incomplete! continue...") #except Exception: #raise RuntimeError("ExtractionError") raise RuntimeError("ExtractionError")
def extract_isbn(self): if self.use_external == True: process = subprocess.Popen(self.program, stdout=subprocess.PIPE) txt = process.communicate()[0].decode(encoding='UTF-8') part = txt.partition("ISBN") if part[1] == 'ISBN': isbn = [] part = part[2].partition(" ") for c in part[2]: try: isbn.append(int(c)) except: pass if c == '\n': break if isbn[:3] == [9,7,8]: if len(isbn) == 12: return isbn + [ISBN.checksum_isbn13(isbn)] elif len(isbn) == 9: return isbn + [ISBN.checksum_isbn10(isbn)] else: return self.mine_isbn() raise RuntimeError("ExtractionError")