def extract_isbn(self): if self.use_external == True: process = subprocess.Popen(self.program, stdout=subprocess.PIPE) txt = process.communicate()[0].decode(encoding='UTF-8') part = txt.partition("ISBN") if part[1] == 'ISBN': isbn = [] part = part[2].partition(" ") for c in part[2]: try: isbn.append(int(c)) except: pass if c == '\n': break if isbn[:3] == [9,7,8]: if len(isbn) == 12: return isbn + [ISBN.checksum_isbn13(isbn)] elif len(isbn) == 9: return isbn + [ISBN.checksum_isbn10(isbn)] else: return self.mine_isbn() raise RuntimeError("ExtractionError")
def mine_isbn(self): obuf = StringIO() rsrcmgr = PDFResourceManager() laparams = LAParams() device = TextConverter(rsrcmgr, outfp=obuf, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) if True: for page in self.document.get_pages(): interpreter.process_page(page) curpg = obuf.getvalue() part = curpg.partition("ISBN") if part[1] == 'ISBN': isbn = [] part = part[2].partition(" ") for c in part[2]: try: isbn.append(int(c)) except: pass if c == '\n': break if isbn[:3] == [9,7,8]: if len(isbn) == 12: return isbn + [ISBN.checksum_isbn13(isbn)] elif len(isbn) == 9: return isbn + [ISBN.checksum_isbn10(isbn)] print("ISBN " + ISBN.to_string(isbn) + " incomplete! continue...") #except Exception: #raise RuntimeError("ExtractionError") raise RuntimeError("ExtractionError")
def to_xml(self, library=None): d = self.to_dict() res = "<book>\n" for x in d: if x == "authors": if len(d["authors"]) > 0: res = res + " <authors>\n"+ \ " <name>" + ("</name>\n <name>".join([Utilities.escape_xml(y) for y in d["authors"]])) + "</name>\n"+ \ " </authors>\n" elif x == "categories" and library != None: if len(d["categories"]) > 0: res = res + " <categories>\n" col = library.categories.collection for i in d["categories"]: res = res + \ " <item>\n" + \ " " + "".join(["<n color='" + col[y].color + "'>" + Utilities.escape_xml(col[y].name) + "</n>" for y in library.categories.get_full_category_ids(i)]) + "\n" +\ " </item>\n" res = res + " </categories>\n" elif x == "isbn10": res = res + " <isbn10>" + ISBN.to_string(d["isbn10"], set_hyphen=False) + "</isbn10>\n" elif x == "isbn13": res = res + " <isbn13>" + ISBN.to_string(d["isbn13"], set_hyphen=False) + "</isbn13>\n" elif d[x] != None: res = res + " <" + x + ">" + Utilities.escape_xml(str(d[x])) + "</" + x + ">\n" return res + "</book>\n"
def lookup(isbn, ignore=[]): isbn10 = None isbn13 = None if len(isbn) == 10: isbn10 = isbn isbn13 = ISBN.to_isbn13(isbn) else: isbn13 = isbn isbn10 = ISBN.to_isbn10(isbn) print('Fetching: https://www.googleapis.com/books/v1/volumes?q=isbn:' + ISBN.to_string(isbn13, set_hyphen=False)) up = urlopen('https://www.googleapis.com/books/v1/volumes?q=isbn:' + ISBN.to_string(isbn13, set_hyphen=False) + "&projection=lite") bytes = up.read() up.close() sleep(0.5) resp = json.loads(bytes.decode('utf8')) if resp['totalItems'] == 1: cur = dict() up = urlopen('https://www.googleapis.com/books/v1/volumes/' + resp["items"][0]["id"]) try: bytes = up.read() except Exception: print("Unauthorized") up.close() resp = json.loads(bytes.decode('utf8')) try: cur["isbn10"] = isbn10 cur["isbn13"] = isbn13 cur["title"] = resp["volumeInfo"]["title"] cur["authors"] = resp["volumeInfo"]["authors"] cur["publisher"] = resp["volumeInfo"]["publisher"] cur["publicationDate"] = resp["volumeInfo"]["publishedDate"] cur["description"] = Utilities.remove_tags(resp["volumeInfo"]["description"]) cur["pages"] = resp["volumeInfo"]["pageCount"] cur["language"] = resp["volumeInfo"]["language"] cur["cover"] = resp["volumeInfo"]["imageLinks"]["thumbnail"] cur["categories"] = [[[y.strip(),None] for y in x.split(" / ")] for x in resp["volumeInfo"]["categories"]] except: pass for x in ignore: if x in cur: del cur[x] return cur elif resp['totalItems'] == 0: raise LookupError("The given ISBN Number cannot be associated with a book") else: raise NotImplementedError("TODO: Handle multiple results")
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or '' if _is_arxiv(uid): self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) self.obj['reference']['dois'].append(idutils.normalize_doi(uid)) elif idutils.is_handle(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'HDL', 'value': idutils.normalize_handle(uid), }) elif idutils.is_urn(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'URN', 'value': uid, }) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid else: # ``idutils.is_isbn`` is too strict in what it accepts. try: isbn = str(ISBN(uid)) self._ensure_reference_field('isbn', {}) self.obj['reference']['isbn'] = isbn except Exception: self.add_misc(uid)
def _add_uid(self, uid, skip_handle=False): """Add unique identifier in correct field. The ``skip_handle`` flag is used when adding a uid through the add_url function since urls can be easily confused with handle elements. """ # We might add None values from wherever. Kill them here. uid = uid or '' if is_arxiv(uid): self._ensure_reference_field('arxiv_eprint', normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) normalized_doi = idutils.normalize_doi(uid) if normalized_doi not in self.obj['reference']['dois']: self.obj['reference']['dois'].append(normalized_doi) elif idutils.is_handle(uid) and not skip_handle: self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'HDL', 'value': idutils.normalize_handle(uid), }) elif idutils.is_urn(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'URN', 'value': uid, }) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid elif is_cds_url(uid): self._ensure_reference_field('external_system_identifiers', []) cds_id = extract_cds_id(uid) cds_id_dict = {'schema': 'CDS', 'value': cds_id} if cds_id_dict not in self.obj['reference'][ 'external_system_identifiers']: self.obj['reference']['external_system_identifiers'].append( cds_id_dict) elif is_ads_url(uid): self._ensure_reference_field('external_system_identifiers', []) self.obj['reference']['external_system_identifiers'].append({ 'schema': 'ADS', 'value': extract_ads_id(uid), }) else: # ``idutils.is_isbn`` is too strict in what it accepts. try: isbn = str(ISBN(uid)) self._ensure_reference_field('isbn', {}) self.obj['reference']['isbn'] = isbn except Exception: raise ValueError('Unrecognized uid type')
def import_from_xml_file(self, fname): if os.path.isfile(fname): try: self.data = [] tree = ElementTree() tree.parse(fname) for i in tree.getroot().iter("book"): bdata = {} for child in i: if child.tag == "authors": bdata["authors"] = [] for j in child.iter("name"): bdata["authors"].append(j.text) elif child.tag == "isbn13": bdata["isbn13"] = ISBN.from_string(child.text) elif child.tag == "isbn10": bdata["isbn10"] = ISBN.from_string(child.text) elif child.tag == "categories": bookcats = [] for j in child.iter("item"): newcat = [] for k in j.iter("n"): col = None if 'color' in k.attrib: col = k.attrib['color'] newcat.append([k.text, col]) bookcats.append(self.categories.register(newcat)) bdata["categories"] = set() for i in bookcats: addi = True for j in bookcats: if j == self.categories.sub_category(j, i): addi = False if addi: bdata["categories"].add(i) else: bdata[child.tag] = child.text b = Book(bdata) self.add_book(b, localCover=False) except Exception as e: raise e else: self.data = [] ## TODO: use something more efficient (tree etc.)
def add_book(self, b, localCover = True): if b in self.data: return False if localCover: if b.cover[:7] == 'http://' or b.cover[:8] == 'https://': localPath = self.baseDir + "/covers/" + ISBN.to_string(b.isbn13) + "-" + os.path.basename(urlparse(b.cover).path) up = urlopen(b.cover) fp = open(localPath, "wb") fp.write(up.read()) fp.close() up.close() b.cover = "file://" + localPath if self.default_db == "xml": self.data.append(b) elif self.default_db == "sqlite": c = self.connection.cursor() c.execute("SELECT id FROM book WHERE isbn13=?", [ISBN.to_string(b.isbn13, False)]) res = c.fetchone() if res: print("[!] BOOK ALREADY IN COLLECTION: " + str(res)) return False else: c.execute("INSERT INTO book(isbn13) VALUES (?)", [ISBN.to_string(b.isbn13, False)]) lid = c.lastrowid for key,keyid in sql.sqlbase_attribute_names.items(): attr_val = b.get_attribute_by_name(key) if attr_val: if isinstance(attr_val, list): for c in attr_val: c.execute("INSERT INTO attribute (id_book, id_attributename, attr_value) VALUES (?, ?, ?)", [lid, str(keyid), str(c)]) else: c.execute("INSERT INTO attribute (id_book, id_attributename, attr_value) VALUES (?, ?, ?)", [lid, str(keyid), str(attr_val)]) print("[!] INSERTED BOOK: id=" + str(c.lastrowid)) self.connection.commit() return True
def to_dict(self): return {"cover": self.cover, "title": self.title, "authors": self.authors, "publisher": self.publisher, "publicationDate": self.publication_date, "isbn13": self.isbn13, "isbn10": ISBN.to_isbn10(self.isbn13), "uri": self.uri, "categories": self.categories, "description": self.description, "binding": self.binding, "volumes": self.volumes, "pages": self.pages, "edition": self.edition}
def normalize_isbn(isbn): """Normalize an ISBN in order to be schema-compliant.""" try: return str(ISBN(isbn)) except Exception: return isbn
def detect_isbn(self, value): is_numeric = all(map(lambda x: x.isnumeric(), value)) length = len(value) has_valid_length = length == 10 or length == 13 correct_checksum = ISBN.validate_isbn(value) return is_numeric and has_valid_length and correct_checksum
def normalize_isbn(val): """Normalize an ISBN identifier.""" val = val.replace(' ', '').replace('-', '').strip().upper() return ISBN(val).hyphen()
def add_book_item(self, book, library = None): row = Gtk.ListBoxRow() row.ebola_labels = {} self.connect("row-selected", self.on_row_selected) #row.connect("focus-in-event", self.on_focus_in) hbox = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=10) vimgbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL) vimgbox.pack_start(Gtk.Alignment(), False, False, 0) if book.cover == None: pixbuf = GdkPixbuf.Pixbuf.new_from_file_at_size("unknown_cover.svg", 105, 74) missingImg = Gtk.Image.new_from_pixbuf(pixbuf) vimgbox.pack_start(missingImg, False, False, 0) else: if book.cover[:7] == "file://": pixbuf = GdkPixbuf.Pixbuf.new_from_file_at_size(book.cover[7:], 105, 74) missingImg = Gtk.Image.new_from_pixbuf(pixbuf) vimgbox.pack_start(missingImg, False, False, 0) hbox.pack_start(vimgbox, False, False, 0) vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=5) lbl = Gtk.Label(xalign=0) lbl.set_markup("<big><b>" + Utilities.escape_xml(str(book.title)) + "</b></big>") lbl.set_justify(Gtk.Justification.LEFT) lbl.set_margin_right(5) row.ebola_labels["title"] = lbl hbox2 = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL) hbox2.pack_start(lbl, True, True, 0) vbox.pack_start(hbox2, True, True, 0) lbl = Gtk.Label(xalign=0) lbl.set_markup("<b><i>" + Utilities.escape_xml(", ".join(book.authors)) + "</i></b>") lbl.set_justify(Gtk.Justification.LEFT) vbox.pack_start(lbl, True, True, 0) row.ebola_labels["authors"] = lbl cats = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL) for cid in book.categories: cats.pack_start(self.category_selector_widget(cid, library), False, False, 0) vbox.pack_start(cats, False, True, 0) lbl = Gtk.Label(xalign=0) edition = "" if book.edition != None: edition = str(book.edition) + ". Edition, " lbl.set_markup("<b>Publisher:</b> " + Utilities.escape_xml(str(book.publisher)) + ", " + Utilities.escape_xml(edition + str(book.publication_date))) lbl.set_justify(Gtk.Justification.LEFT) vbox.pack_start(lbl, True, True, 0) row.ebola_labels["pubInfo"] = lbl lbl = Gtk.Label(xalign=0) lbl.set_markup("<i>ISBN " + ISBN.to_string(book.isbn13) + "</i>") lbl.set_justify(Gtk.Justification.LEFT) vbox.pack_start(lbl, True, True, 0) row.ebola_labels["isbn"] = lbl hbox.pack_start(vbox, True, True, 0) row.add(hbox) self.add(row) row.show_all() row.ebola_button_show_in_folder = Gtk.LinkButton(book.uri, "Show in folder") row.ebola_button_show_in_folder.set_margin_top(5) row.ebola_button_show_in_folder.set_margin_bottom(5) row.ebola_button_show_in_folder.set_margin_right(5) row.ebola_button_show_in_folder.connect("activate-link", self.on_show_in_folder) hbox2.pack_end(row.ebola_button_show_in_folder, False, True, 0) row.ebola_button_open = Gtk.LinkButton(book.uri, "Open") row.ebola_button_open.set_margin_top(5) row.ebola_button_open.set_margin_bottom(5) row.ebola_button_open.set_margin_right(5) hbox2.pack_end(row.ebola_button_open, False, True, 0) row.ebola_book_description = None if book.description != None and book.description != "": lbl = Gtk.Label(xalign=0) lbl.set_markup("\n<b>Description:</b> " + Utilities.escape_xml(str(book.description))) lbl.set_justify(Gtk.Justification.LEFT) lbl.set_line_wrap(True) lbl.set_selectable(True) lbl.set_lines(3) revealer = Gtk.Revealer() revealer.set_reveal_child(False) revealer.add(lbl) lbl = Gtk.Label() lbl.set_markup("<a href='#'>Show Description</a>") lbl.ebola_revealer = revealer lbl.connect("activate-link", self.toggle_description) row.ebola_book_description = Gtk.Box(orientation=Gtk.Orientation.VERTICAL) row.ebola_book_description.pack_end(lbl, True, True, 0) row.ebola_book_description.pack_end(revealer, True, True, 0) vbox.pack_start(row.ebola_book_description, True, True, 0) self.rows.append(row)
def on_add_file(self, widget): dialog = Gtk.FileChooserDialog("Add eBook to Library", self, Gtk.FileChooserAction.OPEN, (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OPEN, Gtk.ResponseType.OK)) self.add_filters(dialog) if self.last_location != None: dialog.set_filename("/home/koe/Documents/Library") response = dialog.run() fn = dialog.get_filename() dialog.destroy() if response == Gtk.ResponseType.OK: self.last_location = os.path.dirname(fn) try: b = self.library.add_local_book(fn) if b != None: self.filtered_list.add_book_item(b, self.library) except Exception as inst: if inst.args[0] == "ExtractionError": validInput = False isbn = [] while validInput == False: inputDialog = Gtk.MessageDialog(self, Gtk.DialogFlags.MODAL | Gtk.DialogFlags.DESTROY_WITH_PARENT, Gtk.MessageType.QUESTION, Gtk.ButtonsType.OK_CANCEL, "Insert ISBN") inputDialog.set_title("ISBN") inputDialog.format_secondary_markup("The ISBN automaitc extraction of a book failed.\n"+ "If you want to add the book <a href='" + fn + "'>" + os.path.basename(fn) + "</a> to the library, provide a valid ISBN number:") dialogBox = inputDialog.get_content_area() infoText = Gtk.Label() #infoText.set_markup() inputDialog.get_children()[0].get_children()[0].get_children()[1].get_children()[1].connect('activate-link', self.on_file_link_clicked) userInput = Gtk.Entry() userInput.set_size_request(100,0) dialogBox.pack_end(userInput, False, False, 0) #dialogBox.pack_end(infoText, False, False, 0) inputDialog.show_all() response = inputDialog.run() text = userInput.get_text() inputDialog.destroy() if response == Gtk.ResponseType.OK: validInput = True try: isbn = ISBN.from_string("".join(i for i in text if ord(i)<128)) b = self.library.add_local_book(fn, isbn=isbn) if b != None: self.filtered_list.add_book_item(b, self.library) except TypeError: validInput = False else: validInput = True else: raise inst errorDialog = Gtk.MessageDialog(self, 0, Gtk.MessageType.ERROR, Gtk.ButtonsType.OK, "Importing the Book failed.") errorDialog.format_secondary_text("While importing the Book:\n'" + fn + "' the following error occured:\n" + "\n".join(map(str, inst.args))) errorDialog.run() errorDialog.destroy()
def lookup(isbn, ignore=[]): isbn10 = None isbn13 = None if len(isbn) == 10: isbn10 = isbn isbn13 = ISBN.to_isbn13(isbn) else: isbn13 = isbn isbn10 = ISBN.to_isbn10(isbn) print('Fetching: http://www.lookupbyisbn.com/Lookup/Book/' + ISBN.to_string(isbn10, set_hyphen=False) + '/' + ISBN.to_string(isbn13, set_hyphen=False) + '/1') up = urlopen('http://www.lookupbyisbn.com/Lookup/Book/' + ISBN.to_string(isbn10, set_hyphen=False) + '/' + ISBN.to_string(isbn13, set_hyphen=False) + '/1') bytes = up.read() up.close() resp = bytes.decode('utf8') error = resp.find("<title>Lookup by ISBN: Error!</title>") if error != -1: raise LookupError("[lookupisbn] The given ISBN Number cannot be associated with a book") cur = dict() resp = resp.partition("<h2>")[2] tmp = resp.partition("</h2>") title = tmp[0] resp = tmp[2] resp = resp.partition('<div class="specimage">')[2] resp = resp.partition('<img src="')[2] tmp = resp.partition('"') coverURL = tmp[0] resp = tmp[2] info = ['<span class="title">ISBN:</span>', '<span class="title">Author(s):</span>', '<span class="title">Publisher:</span>', '<span class="title">Publication date:</span>', '<span class="title">Edition:</span>', '<span class="title">Binding:</span>', '<span class="title">Volume(s):</span>', '<span class="title">Pages:</span>'] extracted = [] for i in info: resp = resp.partition(i)[2] tmp = resp.partition("\r") extracted.append(tmp[0]) resp = tmp[2] resp = resp.partition("<h2>")[2] resp = resp.partition("</h2>")[2] tmp = resp.partition("</div>") description = Utilities.remove_tags(tmp[0]) resp = tmp[2] cur["isbn10"] = isbn10 cur["isbn13"] = isbn13 cur["title"] = title cur["cover"] = coverURL cur["authors"] = [y.strip() for y in extracted[1].split(",")] cur["publisher"] = extracted[2] cur["publicationDate"] = extracted[3] cur["edition"] = None if extracted[4] == '--' else extracted[4] cur["description"] = description.strip() cur["binding"] = extracted[5] cur["volumes"] = None if extracted[6] == '--' else extracted[6] cur["pages"] = extracted[7] for x in ignore: if x in cur: del cur[x] return cur