Esempio n. 1
0
File: pdf.py Progetto: koe-/ebola
	def extract_isbn(self):
		if self.use_external == True:
			process = subprocess.Popen(self.program, stdout=subprocess.PIPE)
			
			txt = process.communicate()[0].decode(encoding='UTF-8')
			
			part = txt.partition("ISBN")
			if part[1] == 'ISBN':
				isbn = []
				part = part[2].partition(" ")
				for c in part[2]:
					try:
						isbn.append(int(c))
					except:
						pass
					if c == '\n':
						break
				
					if isbn[:3] == [9,7,8]:
						if len(isbn) == 12:
							return isbn + [ISBN.checksum_isbn13(isbn)]
					elif len(isbn) == 9:
						return isbn + [ISBN.checksum_isbn10(isbn)]
		else:
			return self.mine_isbn()
		
		raise RuntimeError("ExtractionError")
Esempio n. 2
0
File: pdf.py Progetto: koe-/ebola
	def mine_isbn(self):
		obuf = StringIO()
		
		rsrcmgr = PDFResourceManager()
		laparams = LAParams()
		
		device = TextConverter(rsrcmgr, outfp=obuf, laparams=laparams)
		interpreter = PDFPageInterpreter(rsrcmgr, device)

		if True:	
			for page in self.document.get_pages():
				interpreter.process_page(page)
				curpg = obuf.getvalue()
				
				part = curpg.partition("ISBN")
				if part[1] == 'ISBN':
					isbn = []
					part = part[2].partition(" ")
					for c in part[2]:
						try:
							isbn.append(int(c))
						except:
							pass
						if c == '\n':
							break
					
						if isbn[:3] == [9,7,8]:
							if len(isbn) == 12:
								return isbn + [ISBN.checksum_isbn13(isbn)]
						elif len(isbn) == 9:
							return isbn + [ISBN.checksum_isbn10(isbn)]
					print("ISBN " + ISBN.to_string(isbn) + " incomplete! continue...")
		#except Exception:
			#raise RuntimeError("ExtractionError")
		raise RuntimeError("ExtractionError")
Esempio n. 3
0
File: book.py Progetto: koe-/ebola
	def to_xml(self, library=None):
		d = self.to_dict()
		
		res = "<book>\n"
		for x in d:
			if x == "authors":
				if len(d["authors"]) > 0:
					res = res + "  <authors>\n"+ \
								"    <name>" + ("</name>\n    <name>".join([Utilities.escape_xml(y) for y in d["authors"]])) + "</name>\n"+ \
								"  </authors>\n"
			elif x == "categories" and library != None:
				if len(d["categories"]) > 0:
					res = res + "  <categories>\n"
					col = library.categories.collection
					for i in d["categories"]:
						res = res + \
								"    <item>\n" + \
								"      " + "".join(["<n color='" + col[y].color + "'>" + Utilities.escape_xml(col[y].name) + "</n>" for y in library.categories.get_full_category_ids(i)]) + "\n" +\
								"    </item>\n"
					res = res + "  </categories>\n"
			elif x == "isbn10":
				res = res + "  <isbn10>" + ISBN.to_string(d["isbn10"], set_hyphen=False) + "</isbn10>\n"
			elif x == "isbn13":
				res = res + "  <isbn13>" + ISBN.to_string(d["isbn13"], set_hyphen=False) + "</isbn13>\n"
			elif d[x] != None:
				res = res + "  <" + x + ">" + Utilities.escape_xml(str(d[x])) + "</" + x + ">\n"
		return res + "</book>\n"
Esempio n. 4
0
File: lookup.py Progetto: koe-/ebola
	def  lookup(isbn, ignore=[]):
		isbn10 = None
		isbn13 = None
		if len(isbn) == 10:
			isbn10 = isbn
			isbn13 = ISBN.to_isbn13(isbn)
		else:
			isbn13 = isbn
			isbn10 = ISBN.to_isbn10(isbn)
		
		print('Fetching: https://www.googleapis.com/books/v1/volumes?q=isbn:' + ISBN.to_string(isbn13, set_hyphen=False))
		up = urlopen('https://www.googleapis.com/books/v1/volumes?q=isbn:' + ISBN.to_string(isbn13, set_hyphen=False) + "&projection=lite")
		bytes = up.read()
		up.close()
		
		sleep(0.5)
		
		resp = json.loads(bytes.decode('utf8'))
		if resp['totalItems'] == 1:
			cur = dict()
			
			up = urlopen('https://www.googleapis.com/books/v1/volumes/' + resp["items"][0]["id"])
			try:
				bytes = up.read()
			except Exception:
				print("Unauthorized")
			up.close()
			
			resp = json.loads(bytes.decode('utf8'))

			try:
				cur["isbn10"] = isbn10
				cur["isbn13"] = isbn13
				cur["title"] = resp["volumeInfo"]["title"]
				cur["authors"] = resp["volumeInfo"]["authors"]
				cur["publisher"] = resp["volumeInfo"]["publisher"]
				cur["publicationDate"] = resp["volumeInfo"]["publishedDate"]
				cur["description"] = Utilities.remove_tags(resp["volumeInfo"]["description"])
				cur["pages"] = resp["volumeInfo"]["pageCount"]
				cur["language"] = resp["volumeInfo"]["language"]
				cur["cover"] = resp["volumeInfo"]["imageLinks"]["thumbnail"]
				cur["categories"] = [[[y.strip(),None] for y in x.split(" / ")] for x in resp["volumeInfo"]["categories"]]
			except:
				pass
			
			for x in ignore:
				if x in cur:
					del cur[x]
			
			return cur
		elif resp['totalItems'] == 0:
			raise LookupError("The given ISBN Number cannot be associated with a book")
		else:
			raise NotImplementedError("TODO: Handle multiple results")
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ''
     if _is_arxiv(uid):
         self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_reference_field('dois', [])
         self.obj['reference']['dois'].append(idutils.normalize_doi(uid))
     elif idutils.is_handle(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'HDL',
             'value': idutils.normalize_handle(uid),
         })
     elif idutils.is_urn(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'URN',
             'value': uid,
         })
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_reference_field('publication_info', {})
         self.obj['reference']['publication_info']['cnum'] = uid
     else:
         # ``idutils.is_isbn`` is too strict in what it accepts.
         try:
             isbn = str(ISBN(uid))
             self._ensure_reference_field('isbn', {})
             self.obj['reference']['isbn'] = isbn
         except Exception:
             self.add_misc(uid)
Esempio n. 6
0
    def _add_uid(self, uid, skip_handle=False):
        """Add unique identifier in correct field.

        The ``skip_handle`` flag is used when adding a uid through the add_url function
        since urls can be easily confused with handle elements.
        """
        # We might add None values from wherever. Kill them here.
        uid = uid or ''
        if is_arxiv(uid):
            self._ensure_reference_field('arxiv_eprint', normalize_arxiv(uid))
        elif idutils.is_doi(uid):
            self._ensure_reference_field('dois', [])
            normalized_doi = idutils.normalize_doi(uid)
            if normalized_doi not in self.obj['reference']['dois']:
                self.obj['reference']['dois'].append(normalized_doi)
        elif idutils.is_handle(uid) and not skip_handle:
            self._ensure_reference_field('persistent_identifiers', [])
            self.obj['reference']['persistent_identifiers'].append({
                'schema':
                'HDL',
                'value':
                idutils.normalize_handle(uid),
            })
        elif idutils.is_urn(uid):
            self._ensure_reference_field('persistent_identifiers', [])
            self.obj['reference']['persistent_identifiers'].append({
                'schema':
                'URN',
                'value':
                uid,
            })
        elif self.RE_VALID_CNUM.match(uid):
            self._ensure_reference_field('publication_info', {})
            self.obj['reference']['publication_info']['cnum'] = uid
        elif is_cds_url(uid):
            self._ensure_reference_field('external_system_identifiers', [])
            cds_id = extract_cds_id(uid)
            cds_id_dict = {'schema': 'CDS', 'value': cds_id}
            if cds_id_dict not in self.obj['reference'][
                    'external_system_identifiers']:
                self.obj['reference']['external_system_identifiers'].append(
                    cds_id_dict)
        elif is_ads_url(uid):
            self._ensure_reference_field('external_system_identifiers', [])
            self.obj['reference']['external_system_identifiers'].append({
                'schema':
                'ADS',
                'value':
                extract_ads_id(uid),
            })
        else:
            # ``idutils.is_isbn`` is too strict in what it accepts.
            try:
                isbn = str(ISBN(uid))
                self._ensure_reference_field('isbn', {})
                self.obj['reference']['isbn'] = isbn
            except Exception:
                raise ValueError('Unrecognized uid type')
Esempio n. 7
0
File: book.py Progetto: koe-/ebola
	def import_from_xml_file(self, fname):
		if os.path.isfile(fname):
			try:
				self.data = []
				
				tree = ElementTree()
				tree.parse(fname)
				for i in tree.getroot().iter("book"):
					bdata = {}
					for child in i:
						if child.tag == "authors":
							bdata["authors"] = []
							for j in child.iter("name"):
								bdata["authors"].append(j.text)
						elif child.tag == "isbn13":
							bdata["isbn13"] = ISBN.from_string(child.text)
						elif child.tag == "isbn10":
							bdata["isbn10"] = ISBN.from_string(child.text)
						elif child.tag == "categories":
							bookcats = []
							for j in child.iter("item"):
								newcat = []
								for k in j.iter("n"):
									col = None
									if 'color' in k.attrib:
										col = k.attrib['color']
									newcat.append([k.text, col])
								bookcats.append(self.categories.register(newcat))

							bdata["categories"] = set()
							for i in bookcats:
								addi = True
								for j in bookcats:
									if j == self.categories.sub_category(j, i):
										addi = False
								if addi:
									bdata["categories"].add(i)
						else:
							bdata[child.tag] = child.text
					b = Book(bdata)
					self.add_book(b, localCover=False)
			except Exception as e:
				raise e
		else:
			self.data = [] ## TODO: use something more efficient (tree etc.)
Esempio n. 8
0
File: book.py Progetto: koe-/ebola
	def add_book(self, b, localCover = True):
		if b in self.data:
			return False
	
		if localCover:
			if b.cover[:7] == 'http://' or b.cover[:8] == 'https://':
				localPath = self.baseDir + "/covers/" + ISBN.to_string(b.isbn13) + "-" + os.path.basename(urlparse(b.cover).path)
				up = urlopen(b.cover)
				fp = open(localPath, "wb")
				fp.write(up.read())
				fp.close()
				up.close()
				b.cover = "file://" + localPath
		
		if self.default_db == "xml":
			self.data.append(b)
		elif self.default_db == "sqlite":
			c = self.connection.cursor()
			c.execute("SELECT id FROM book WHERE isbn13=?", [ISBN.to_string(b.isbn13, False)])
			res = c.fetchone()
			if res:
				print("[!] BOOK ALREADY IN COLLECTION: " + str(res))
				return False
			else:
				c.execute("INSERT INTO book(isbn13) VALUES (?)", [ISBN.to_string(b.isbn13, False)])
				lid = c.lastrowid
				for key,keyid in sql.sqlbase_attribute_names.items():
					attr_val = b.get_attribute_by_name(key)
					if attr_val:
						if isinstance(attr_val, list):
							for c in attr_val:
								c.execute("INSERT INTO attribute (id_book, id_attributename, attr_value) VALUES (?, ?, ?)", [lid, str(keyid), str(c)])
						else:
							c.execute("INSERT INTO attribute (id_book, id_attributename, attr_value) VALUES (?, ?, ?)", [lid, str(keyid), str(attr_val)])
				print("[!] INSERTED BOOK: id=" + str(c.lastrowid))
			self.connection.commit()
		
		return True
Esempio n. 9
0
File: book.py Progetto: koe-/ebola
	def to_dict(self):
		return {"cover": self.cover,
				"title": self.title,
				"authors": self.authors,
				"publisher": self.publisher,
				"publicationDate": self.publication_date,
				"isbn13": self.isbn13,
				"isbn10": ISBN.to_isbn10(self.isbn13),
				"uri": self.uri,
				"categories": self.categories,
				"description": self.description,
				"binding": self.binding,
				"volumes": self.volumes,
				"pages": self.pages,
				"edition": self.edition}
Esempio n. 10
0
def normalize_isbn(isbn):
    """Normalize an ISBN in order to be schema-compliant."""
    try:
        return str(ISBN(isbn))
    except Exception:
        return isbn
 def detect_isbn(self, value):
     is_numeric = all(map(lambda x: x.isnumeric(), value))
     length = len(value)
     has_valid_length = length == 10 or length == 13
     correct_checksum = ISBN.validate_isbn(value)
     return is_numeric and has_valid_length and correct_checksum
Esempio n. 12
0
def normalize_isbn(val):
    """Normalize an ISBN identifier."""
    val = val.replace(' ', '').replace('-', '').strip().upper()
    return ISBN(val).hyphen()
Esempio n. 13
0
	def add_book_item(self, book, library = None):
		row = Gtk.ListBoxRow()
		row.ebola_labels = {}
		
		self.connect("row-selected", self.on_row_selected)
		#row.connect("focus-in-event", self.on_focus_in)
		
		hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10)
		vimgbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL)
		vimgbox.pack_start(Gtk.Alignment(), False, False, 0)
		if book.cover == None:
			pixbuf = GdkPixbuf.Pixbuf.new_from_file_at_size("unknown_cover.svg", 105, 74)
			missingImg = Gtk.Image.new_from_pixbuf(pixbuf)
			vimgbox.pack_start(missingImg, False, False, 0)
		else:
			if book.cover[:7] == "file://":
				pixbuf = GdkPixbuf.Pixbuf.new_from_file_at_size(book.cover[7:], 105, 74)
				missingImg = Gtk.Image.new_from_pixbuf(pixbuf)
				vimgbox.pack_start(missingImg, False, False, 0)
		hbox.pack_start(vimgbox, False, False, 0)
		
		vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=5)
		
		lbl = Gtk.Label(xalign=0)
		lbl.set_markup("<big><b>" + Utilities.escape_xml(str(book.title)) + "</b></big>")
		lbl.set_justify(Gtk.Justification.LEFT)
		lbl.set_margin_right(5)
		row.ebola_labels["title"] = lbl
		
		hbox2 = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL)
		hbox2.pack_start(lbl, True, True, 0)
		
		vbox.pack_start(hbox2, True, True, 0)
		
		lbl = Gtk.Label(xalign=0)
		lbl.set_markup("<b><i>" + Utilities.escape_xml(", ".join(book.authors)) + "</i></b>")
		lbl.set_justify(Gtk.Justification.LEFT)
		vbox.pack_start(lbl, True, True, 0)
		row.ebola_labels["authors"] = lbl
		
		cats = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL)
		for cid in book.categories:
			cats.pack_start(self.category_selector_widget(cid, library), False, False, 0)
		vbox.pack_start(cats, False, True, 0)
		
		lbl = Gtk.Label(xalign=0)
		
		edition = ""
		if book.edition != None:
			edition = str(book.edition) + ". Edition, "
		
		lbl.set_markup("<b>Publisher:</b> " + Utilities.escape_xml(str(book.publisher)) + ", " + Utilities.escape_xml(edition + str(book.publication_date)))
		lbl.set_justify(Gtk.Justification.LEFT)
		vbox.pack_start(lbl, True, True, 0)
		row.ebola_labels["pubInfo"] = lbl
		
		lbl = Gtk.Label(xalign=0)
		lbl.set_markup("<i>ISBN " + ISBN.to_string(book.isbn13) + "</i>")
		lbl.set_justify(Gtk.Justification.LEFT)
		vbox.pack_start(lbl, True, True, 0)
		row.ebola_labels["isbn"] = lbl
		
		hbox.pack_start(vbox, True, True, 0)
		row.add(hbox)
		self.add(row)
		row.show_all()
		
		row.ebola_button_show_in_folder = Gtk.LinkButton(book.uri, "Show in folder")
		row.ebola_button_show_in_folder.set_margin_top(5)
		row.ebola_button_show_in_folder.set_margin_bottom(5)
		row.ebola_button_show_in_folder.set_margin_right(5)
		row.ebola_button_show_in_folder.connect("activate-link", self.on_show_in_folder)
		hbox2.pack_end(row.ebola_button_show_in_folder, False, True, 0)
		
		row.ebola_button_open = Gtk.LinkButton(book.uri, "Open")
		row.ebola_button_open.set_margin_top(5)
		row.ebola_button_open.set_margin_bottom(5)
		row.ebola_button_open.set_margin_right(5)
		hbox2.pack_end(row.ebola_button_open, False, True, 0)
		
		row.ebola_book_description = None
		if book.description != None and book.description != "":
			lbl = Gtk.Label(xalign=0)
			lbl.set_markup("\n<b>Description:</b> " + Utilities.escape_xml(str(book.description)))
			lbl.set_justify(Gtk.Justification.LEFT)
			lbl.set_line_wrap(True)
			lbl.set_selectable(True)
			lbl.set_lines(3)
			
			revealer = Gtk.Revealer()
			revealer.set_reveal_child(False)
			revealer.add(lbl)
			
			lbl = Gtk.Label()
			lbl.set_markup("<a href='#'>Show Description</a>")
			lbl.ebola_revealer = revealer
			lbl.connect("activate-link", self.toggle_description)
			
			row.ebola_book_description = Gtk.Box(orientation=Gtk.Orientation.VERTICAL)
			row.ebola_book_description.pack_end(lbl, True, True, 0)
			row.ebola_book_description.pack_end(revealer, True, True, 0)
			
			vbox.pack_start(row.ebola_book_description, True, True, 0)
		
		self.rows.append(row)
Esempio n. 14
0
File: layout.py Progetto: koe-/ebola
	def on_add_file(self, widget):
		dialog = Gtk.FileChooserDialog("Add eBook to Library", self,
			Gtk.FileChooserAction.OPEN,
			(Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL,
			Gtk.STOCK_OPEN, Gtk.ResponseType.OK))
		self.add_filters(dialog)
		
		if self.last_location != None:
			dialog.set_filename("/home/koe/Documents/Library")

		response = dialog.run()
		fn = dialog.get_filename()
		dialog.destroy()
		if response == Gtk.ResponseType.OK:
			self.last_location = os.path.dirname(fn)
			try:
				b = self.library.add_local_book(fn)
				if b != None:
					self.filtered_list.add_book_item(b, self.library)
			except Exception as inst:
				if inst.args[0] == "ExtractionError":
					validInput = False
					isbn = []
					while validInput == False:
						inputDialog = Gtk.MessageDialog(self, Gtk.DialogFlags.MODAL | Gtk.DialogFlags.DESTROY_WITH_PARENT, 
														Gtk.MessageType.QUESTION, Gtk.ButtonsType.OK_CANCEL, "Insert ISBN")
						inputDialog.set_title("ISBN")
						inputDialog.format_secondary_markup("The ISBN automaitc extraction of a book failed.\n"+
											"If you want to add the book <a href='" + fn + "'>" + os.path.basename(fn) +
											"</a> to the library, provide a valid ISBN number:")
						dialogBox = inputDialog.get_content_area()
						infoText = Gtk.Label()
						#infoText.set_markup()
						inputDialog.get_children()[0].get_children()[0].get_children()[1].get_children()[1].connect('activate-link', self.on_file_link_clicked)
						userInput = Gtk.Entry()
						userInput.set_size_request(100,0)
						dialogBox.pack_end(userInput, False, False, 0)
						#dialogBox.pack_end(infoText, False, False, 0)
					
						inputDialog.show_all()
						response = inputDialog.run()
						text = userInput.get_text() 
						inputDialog.destroy()
						if response == Gtk.ResponseType.OK:
							validInput = True
							try:
								isbn = ISBN.from_string("".join(i for i in text if ord(i)<128))
								b = self.library.add_local_book(fn, isbn=isbn)
								if b != None:
									self.filtered_list.add_book_item(b, self.library)
							except TypeError:
								validInput = False
						else:
							validInput = True
				else:
					raise inst
					errorDialog = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR,
													Gtk.ButtonsType.OK, "Importing the Book failed.")
					errorDialog.format_secondary_text("While importing the Book:\n'" + fn + "' the following error occured:\n" + "\n".join(map(str, inst.args)))
					errorDialog.run()
					errorDialog.destroy()
Esempio n. 15
0
File: lookup.py Progetto: koe-/ebola
	def lookup(isbn, ignore=[]):
		isbn10 = None
		isbn13 = None
		if len(isbn) == 10:
			isbn10 = isbn
			isbn13 = ISBN.to_isbn13(isbn)
		else:
			isbn13 = isbn
			isbn10 = ISBN.to_isbn10(isbn)
		
		print('Fetching: http://www.lookupbyisbn.com/Lookup/Book/' + ISBN.to_string(isbn10, set_hyphen=False) + '/' + ISBN.to_string(isbn13, set_hyphen=False) + '/1')
		up = urlopen('http://www.lookupbyisbn.com/Lookup/Book/' + ISBN.to_string(isbn10, set_hyphen=False) + '/' + ISBN.to_string(isbn13, set_hyphen=False) + '/1')
		bytes = up.read()
		up.close()
		
		resp = bytes.decode('utf8')
		
		error = resp.find("<title>Lookup by ISBN: Error!</title>")
		if error != -1:
			raise LookupError("[lookupisbn] The given ISBN Number cannot be associated with a book")
		
		cur = dict()
		
		resp = resp.partition("<h2>")[2]
		tmp = resp.partition("</h2>")
		title = tmp[0]
		resp = tmp[2]
		
		resp = resp.partition('<div class="specimage">')[2]
		resp = resp.partition('<img src="')[2]
		tmp = resp.partition('"')
		coverURL = tmp[0]
		resp = tmp[2]
		
		info = ['<span class="title">ISBN:</span>',
				'<span class="title">Author(s):</span>',
				'<span class="title">Publisher:</span>',
				'<span class="title">Publication date:</span>',
				'<span class="title">Edition:</span>',
				'<span class="title">Binding:</span>',
				'<span class="title">Volume(s):</span>',
				'<span class="title">Pages:</span>']

		extracted = []

		for i in info:
			resp = resp.partition(i)[2]
			tmp = resp.partition("\r")
			extracted.append(tmp[0])
			resp = tmp[2]
		
		resp = resp.partition("<h2>")[2]
		resp = resp.partition("</h2>")[2]
		tmp = resp.partition("</div>")
		description = Utilities.remove_tags(tmp[0])
		resp = tmp[2]
		
		cur["isbn10"] = isbn10
		cur["isbn13"] = isbn13
		cur["title"] = title
		cur["cover"] = coverURL
		cur["authors"] = [y.strip() for y in extracted[1].split(",")]
		cur["publisher"] = extracted[2]
		cur["publicationDate"] = extracted[3]
		cur["edition"] = None if extracted[4] == '--' else extracted[4]
		cur["description"] = description.strip()
		cur["binding"] = extracted[5]
		cur["volumes"] = None if extracted[6] == '--' else extracted[6]
		cur["pages"] = extracted[7]
		
		for x in ignore:
			if x in cur:
				del cur[x]
		
		return cur