コード例 #1
0
ファイル: pdf.py プロジェクト: koe-/ebola
	def mine_isbn(self):
		obuf = StringIO()
		
		rsrcmgr = PDFResourceManager()
		laparams = LAParams()
		
		device = TextConverter(rsrcmgr, outfp=obuf, laparams=laparams)
		interpreter = PDFPageInterpreter(rsrcmgr, device)

		if True:	
			for page in self.document.get_pages():
				interpreter.process_page(page)
				curpg = obuf.getvalue()
				
				part = curpg.partition("ISBN")
				if part[1] == 'ISBN':
					isbn = []
					part = part[2].partition(" ")
					for c in part[2]:
						try:
							isbn.append(int(c))
						except:
							pass
						if c == '\n':
							break
					
						if isbn[:3] == [9,7,8]:
							if len(isbn) == 12:
								return isbn + [ISBN.checksum_isbn13(isbn)]
						elif len(isbn) == 9:
							return isbn + [ISBN.checksum_isbn10(isbn)]
					print("ISBN " + ISBN.to_string(isbn) + " incomplete! continue...")
		#except Exception:
			#raise RuntimeError("ExtractionError")
		raise RuntimeError("ExtractionError")
コード例 #2
0
ファイル: pdf.py プロジェクト: koe-/ebola
	def extract_isbn(self):
		if self.use_external == True:
			process = subprocess.Popen(self.program, stdout=subprocess.PIPE)
			
			txt = process.communicate()[0].decode(encoding='UTF-8')
			
			part = txt.partition("ISBN")
			if part[1] == 'ISBN':
				isbn = []
				part = part[2].partition(" ")
				for c in part[2]:
					try:
						isbn.append(int(c))
					except:
						pass
					if c == '\n':
						break
				
					if isbn[:3] == [9,7,8]:
						if len(isbn) == 12:
							return isbn + [ISBN.checksum_isbn13(isbn)]
					elif len(isbn) == 9:
						return isbn + [ISBN.checksum_isbn10(isbn)]
		else:
			return self.mine_isbn()
		
		raise RuntimeError("ExtractionError")