def validate_book_data(book_data): """Checks to see if the given book data is valid Args: book_data (dict): The book data to validate. Required keys are 'isbn', 'title', and 'authors' Raises: InvalidRequest: If 'isbn', 'title', and 'authors' do not appear as keys in book_data InvalidRequest: If the value for key 'authors' is not a list of strings InvalidRequest: If the isbn is not valid. See https://en.wikipedia.org/wiki/International_Standard_Book_Number#Check_digits ResourceExists: If a book with the provided isbn already exists Returns: dict: The validated book data """ isbn = book_data.get("isbn") title = book_data.get("title") authors = book_data.get("authors") # Ensure request is valid format if not (title and isbn and authors): raise InvalidRequest( "Request should be of the form {{isbn: 'isbn', title: 'title', authors: [author1, author2,]}}" ) # Check if isbn is valid if not (isbn := isbnlib.to_isbn13(isbnlib.clean(isbn))): raise InvalidRequest( "The isbn provided is not valid or could not be converted into isbn-13 format" )
def _isbn10toIsbn13(match): """Helper function to deal with a single ISBN.""" isbn = match.group('code') isbn = isbn.upper() try: is_valid(isbn) except InvalidIsbnException: # don't change return isbn try: stdnum.isbn except NameError: pass else: return stdnum.isbn.to_isbn13(isbn) try: isbnlib except NameError: pass else: # remove hyphenation, otherwise isbnlib.to_isbn13() returns None i = isbnlib.canonical(isbn) if i == isbn: i13 = isbnlib.to_isbn13(i) return i13 # add removed hyphenation i13 = isbnlib.to_isbn13(i) i13h = hyphenateIsbnNumbers('ISBN ' + i13) return i13h[5:]
def isbn_differ(entry_data, suggestion_data): entry_isbn = to_isbn13(canonical(entry_data)) if not entry_isbn: return True suggestion_isbns = [to_isbn13(canonical(s)) for (s, _) in suggestion_data] return entry_isbn not in suggestion_isbns
def type_and_identifier_for_urn(cls, identifier_string): if not identifier_string: return None, None m = cls.GUTENBERG_URN_SCHEME_RE.match(identifier_string) if m: type = Identifier.GUTENBERG_ID identifier_string = m.groups()[0] elif identifier_string.startswith("http:") or identifier_string.startswith("https:"): type = Identifier.URI elif identifier_string.startswith(Identifier.URN_SCHEME_PREFIX): identifier_string = identifier_string[len(Identifier.URN_SCHEME_PREFIX):] type, identifier_string = map( urllib.unquote, identifier_string.split("/", 1)) elif identifier_string.startswith(Identifier.ISBN_URN_SCHEME_PREFIX): type = Identifier.ISBN identifier_string = identifier_string[len(Identifier.ISBN_URN_SCHEME_PREFIX):] identifier_string = urllib.unquote(identifier_string) # Make sure this is a valid ISBN, and convert it to an ISBN-13. if not (isbnlib.is_isbn10(identifier_string) or isbnlib.is_isbn13(identifier_string)): raise ValueError("%s is not a valid ISBN." % identifier_string) if isbnlib.is_isbn10(identifier_string): identifier_string = isbnlib.to_isbn13(identifier_string) elif identifier_string.startswith(Identifier.OTHER_URN_SCHEME_PREFIX): type = Identifier.URI else: raise ValueError( "Could not turn %s into a recognized identifier." % identifier_string) return (type, identifier_string)
def set_isbn13(): """If isbn13 is empty, calculate it from isbn10 Remember there can be multiple isbns in the isbn_10 field, due to errors in the openlibrary.org db. Thus we must split on ';' and calculate isbn13 for all of the them""" sql = "select id, isbn_10 from book where isbn_13 = ? and isbn_10 != ?;" key = ("", "") cur = conn.cursor() cur.execute(sql, key) res = cur.fetchall() data = [] for book in res: id, isbns = book isbns = isbns.split("; ") isbn_13 = [] for isbn_10 in isbns: if notisbn(isbn_10): print(f"error, id {id}, isbn_10 {isbn_10} is not a valid isbn") continue isbn_13.append(to_isbn13(isbn_10)) data.append(("; ".join(isbn_13), id)) sql = "UPDATE book SET isbn_13 = ? WHERE id = ?;" cur.executemany(sql, data) conn.commit()
def type_and_identifier_for_urn(cls, identifier_string): if not identifier_string: return None, None m = cls.GUTENBERG_URN_SCHEME_RE.match(identifier_string) if m: type = Identifier.GUTENBERG_ID identifier_string = m.groups()[0] elif identifier_string.startswith( "http:") or identifier_string.startswith("https:"): type = Identifier.URI elif identifier_string.startswith(Identifier.URN_SCHEME_PREFIX): identifier_string = identifier_string[len(Identifier. URN_SCHEME_PREFIX):] type, identifier_string = map(urllib.unquote, identifier_string.split("/", 1)) elif identifier_string.startswith(Identifier.ISBN_URN_SCHEME_PREFIX): type = Identifier.ISBN identifier_string = identifier_string[len(Identifier. ISBN_URN_SCHEME_PREFIX):] identifier_string = urllib.unquote(identifier_string) # Make sure this is a valid ISBN, and convert it to an ISBN-13. if not (isbnlib.is_isbn10(identifier_string) or isbnlib.is_isbn13(identifier_string)): raise ValueError("%s is not a valid ISBN." % identifier_string) if isbnlib.is_isbn10(identifier_string): identifier_string = isbnlib.to_isbn13(identifier_string) elif identifier_string.startswith(Identifier.OTHER_URN_SCHEME_PREFIX): type = Identifier.URI else: raise ValueError( "Could not turn %s into a recognized identifier." % identifier_string) return (type, identifier_string)
def post(self, request): env = json.loads(request.body) isbnlike = env['ISBN'] # Make sure that only isbn13 data gets in the database if is_isbn10(isbnlike): isbnlike = to_isbn13(isbnlike) # Search for a existing entry of this ISBN print(isbnlike) for b in Book.objects.all(): print("\t" + b.isbnlike) try: b = request.user.desiredBooks.get(isbnlike=isbnlike) request.user.desiredBooks.remove(b) return JsonResponse({ "message": "Book successfully deleted", "messageCode": CREATE_BOOK_SUCESS }) except ObjectDoesNotExist: return JsonResponse({ "message": "Book not found", "messageCode": CREATE_BOOK_FAILED })
def extractISBN(self): isbn = None; rsrcmgr = PDFResourceManager() retstr = StringIO() device = TextConverter(rsrcmgr, retstr, codec='utf-8', laparams=LAParams()) interpreter = PDFPageInterpreter(rsrcmgr, device) for page in PDFPage.get_pages(self.pdf, set(), maxpages=0, password="",caching=True, check_extractable=True): # Get the text from the page interpreter.process_page(page) text = retstr.getvalue() retstr.truncate(0) # Extract ISBN isbn = self.searchCodeInPage(text) if isbn: break device.close() retstr.close() # Convert to ISBN 10 and 13 if isbnlib.is_isbn10(isbn): self.isbn10 = isbn self.isbn13 = isbnlib.to_isbn13(self.isbn10) elif isbnlib.is_isbn13(isbn): self.isbn13 = isbn self.isbn10 = isbnlib.to_isbn10(self.isbn13)
def _process_isbn(isbn): # only strip quotes if wsr, reg, or consignment number, or none if re.match("^wsr|^reg|^\d{2,4}-\d{1,4}$|n/a|none", isbn, re.I): isbn = re.sub("['\"]", "", isbn) price = 0.00 # strip quotes, dashes and whitespace. convert isbn10 to isbn13. # split isbn and price if it's an extended isbn else: isbn = re.sub("[\s'\"\-]", "", isbn) price = 0.00 # note the checking for the first character of ean5 extension # if it's 5, it means price is in us dollars 0-99.99 # otherwise, we need to do price ourself. if len(isbn) == 18: if isbn[-5] == "5": price = float(isbn[-4:]) / 100 isbn = isbn[:-5] if len(isbn) == 10: if isbnlib.is_isbn10(isbn): isbn = isbnlib.to_isbn13(isbn) else: raise isbnlib.NotValidISBNError(isbn) print("isbn13_match", re.match(isbn13_regex, isbn)) if re.match(isbn13_regex, isbn): #can't use isbnlib.is_isnb13 because of internal isbns if isbnlib.check_digit13(isbn[0:12]) != isbn[12]: raise isbnlib.NotValidISBNError(isbn) else: raise isbnlib.NotValidISBNError return isbn, price
def post(self, request): env = json.loads(request.body) isbnlike = env['ISBN'] # Make sure that only isbn13 data gets in the database if is_isbn10(isbnlike): isbnlike = to_isbn13(isbnlike) # Search for a existing entry of this ISBN try: b = Book.objects.get(isbnlike=isbnlike) #isDesired=True except ObjectDoesNotExist: # if it does not exits, tries to create a new one try: b = Book(isbnlike=isbnlike) #isDesired=True b.full_clean() # Validates b.save() except ValidationError as verr: # if validation goes wrong, convert the exception message in dictionary suitable for json conversion and return this json # as a response d = dict(verr) for k in d.keys(): d[k] = d[k][0] d["messageCode"] = CREATE_BOOK_FAILED resp = JsonResponse(d) resp.status_code = 400 return resp # Add the book to the list of the current logged user. request.user.desiredBooks.add(b) return JsonResponse({ "message": "Book successfully added", "messageCode": CREATE_BOOK_SUCESS })
def isbn_lookup(isbnlike, good_reads): """ Fetch in Good Reads for a given ISBN code """ book_info = {} val = [c for c in isbnlike if c.isdigit()] isbn = ''.join(val) if isbnlib.is_isbn10(val): isbn = isbnlib.to_isbn13(val) if isbnlib.is_isbn13(isbn): try: book = good_reads.book(isbn=isbn) publisher = book.publisher if book.publisher is not None else '-' pages_qty = book.num_pages if book.num_pages is not None else int( 0) book_info.update({ 'Título': book.title, 'Autor': str(book.authors[0]), 'Editora': publisher, 'ISBN-13': isbn, 'Qtd. de Páginas': pages_qty, 'Link': book.link }) except Exception as e: logger.exception('{}'.format(e), exc_info=False) finally: return book_info else: return book_info
def standardize_citekey(citekey, warn_if_changed=False): """ Standardize citation keys based on their source """ source, identifier = citekey.split(':', 1) if source == 'doi': if identifier.startswith('10/'): from manubot.cite.doi import expand_short_doi try: identifier = expand_short_doi(identifier) except Exception as error: # If DOI shortening fails, return the unshortened DOI. # DOI metadata lookup will eventually fail somewhere with # appropriate error handling, as opposed to here. logging.error(f'Error in expand_short_doi for {identifier} ' f'due to a {error.__class__.__name__}:\n{error}') logging.info(error, exc_info=True) identifier = identifier.lower() if source == 'isbn': from isbnlib import to_isbn13 identifier = to_isbn13(identifier) standard_citekey = f'{source}:{identifier}' if warn_if_changed and citekey != standard_citekey: logging.warning( f'standardize_citekey expected citekey to already be standardized.\n' f'Instead citekey was changed from {citekey!r} to {standard_citekey!r}' ) return standard_citekey
def search_add(isbn): db = sqlite3.connect('test.db') db.row_factory = sqlite3.Row cursor = db.cursor() if isbnlib.is_isbn13(isbn): info = isbnlib.meta(isbn) au = ", ".join(info['Authors']) addBook(cursor, info['ISBN-13'], info['Title'], au, info['Year'], info['Publisher'], getCoverSmall(isbn), getCover(isbn), getDesc(isbn)) db.commit() return search_equals(cursor, 'ISBN', isbn) elif isbnlib.is_isbn10(isbn): isbn = isbnlib.to_isbn13(isbn) info = isbnlib.meta(isbn) au = ", ".join(info['Authors']) addBook(cursor, info['ISBN-13'], info['Title'], au, info['Year'], info['Publisher'], getCoverSmall(isbn), getCover(isbn), getDesc(isbn)) db.commit() return search_equals(cursor, 'ISBN', isbn) else: print("Not a valid ISBN") return None db.commit() db.close()
def isbn(self,isbn): #adds isbn to google spread sheet #check if valid clean_isbn = isbnlib.clean(isbn) if isbnlib.notisbn(clean_isbn): return "not valid isbn" #should check if has been collected before canonical = None; #first check trove canonical = self.trove.extract(clean_isbn); if not canonical : # try alternative isbn form print "trying alternative form " alt_isbn = clean_isbn; if isbnlib.is_isbn13(clean_isbn): alt_isbn = isbnlib.to_isbn10(clean_isbn) else : alt_isbn = isbnlib.to_isbn13(clean_isbn) canonical = self.trove.extract(alt_isbn); if canonical : clean_isbn = alt_isbn if not canonical : canonical = self.__reduce_metadata(clean_isbn,['merge','isbndb','openl']) if not canonical: return "no metadata found for isbn: " + clean_isbn canonical['source']='isbnlib' canonical["Authors"] = u', '.join(canonical["Authors"]) canonical['link']=None row_data = ['isbn:'+clean_isbn, canonical["Title"], canonical["Authors"], canonical["Year"], canonical["Publisher"],canonical['link']] return self.__add_and_render(row_data)
def raw_mapping(self, results): mapping = {} for item in self.get_items(results): attrs = item['ItemAttributes'] isbn = isbnlib.to_isbn13(attrs.get('ISBN', attrs.get('EISBN', ''))) mapping[isbn] = item return mapping
async def check(self, entry): length = self._cfg.get('isbn_length', entry, 13) if not length: return [] isbn = entry.data.get('isbn') if not isbn: return [] clean_isbn = clean(isbn) if not clean_isbn or notisbn(clean_isbn): return [] if length not in (10, 13): raise ConfigurationError( "The option 'isbn_length' must be either of 10 or 13.") if length == 10: if not is_isbn10(clean_isbn): return [(type(self).NAME, "ISBN '{}' is not of length 10.".format(isbn), "ISBN-10 would be '{}'".format(to_isbn10(clean_isbn))) ] elif length == 13: if not is_isbn13(clean_isbn): return [(type(self).NAME, "ISBN '{}' is not of length 13.".format(isbn), "ISBN-13 would be '{}'".format(to_isbn13(clean_isbn))) ] return []
def set_user_recommendation(): """ params: book_isbn """ user_id = current_user.id book_isbn = request.form.get('book_isbn') book = Book.get(isbn=book_isbn) if not book: book = Book() # First try amazon scraper amazon_books = AmazonScraper().get_amazon_books_for_keyword( book_isbn, ) if not amazon_books: # use google books if not amazon google_books = get_books_for_book_title_using_google_books( book_isbn, ) if not google_books: return 'Book could not be found', 500 google_book = google_books[0] book.title = google_book.title book.author = google_book.author book.isbn = to_isbn13(google_book.isbn) book.thumbnail_link = google_book.thumbnail_link else: amazon_book = amazon_books[0] book.isbn = to_isbn13(book_isbn) if 'title' in amazon_book: book.title = amazon_book['title'] if 'author' in amazon_book: book.author = amazon_book['author'] if not 'thumbnail_link': return 'No thumbnail', 500 book.thumbnail_link = amazon_book['thumbnail_link'] book.save() existing_bv = BooksViewed.get(user_id=user_id, book_id=book.id) if not existing_bv: bv = BooksViewed() bv.user_id = user_id bv.book_id = book.id bv.save() return 'Book View added successfully', 201 return 'Book View already exists', 200
def _isbn10toIsbn13(match): """Helper function to deal with a single ISBN.""" isbn = match.group('code') isbn = isbn.upper() try: stdnum.isbn except NameError: pass else: try: is_valid(isbn) except InvalidIsbnException: return isbn i = stdnum.isbn.to_isbn13(isbn) return i try: isbnlib except NameError: pass else: try: is_valid(isbn) except InvalidIsbnException: return isbn # remove hyphenation, otherwise isbnlib.to_isbn13() returns None i = isbnlib.canonical(isbn) if i == isbn: i13 = isbnlib.to_isbn13(i) return i13 # add removed hyphenation i13 = isbnlib.to_isbn13(i) i13h = hyphenateIsbnNumbers('ISBN ' + i13) return i13h[5:] try: is_valid(isbn) except InvalidIsbnException: # don't change return isbn i1x = getIsbn(isbn) if not isinstance(i1x, ISBN13): i13 = i1x.toISBN13() else: i13 = i1x return i13.code
def isbn_normalizer(self, isbn): isbn = isbn.replace("-","") isbn = isbn.upper() if isbnlib.is_isbn10(isbn): isbn = isbnlib.to_isbn13(isbn) isbn = isbn.replace("-","") #TODO: ハイフンは消したい return isbn
def fix_isbn(entry): if 'isbn' in entry: value = entry['isbn'] if isbnlib.is_isbn10(value): value = isbnlib.to_isbn13(value) if not isbnlib.is_isbn13(value): raise Exception(f'invalid isbn in {entry["ID"]}: {entry["isbn"]}') entry['isbn'] = isbnlib.mask(value, separator='-') return entry
def clean_isbn(isbn): """Convert an ISBN to the ISBN-13 format, remove extra characters""" isbn = re.sub("[^\d]*", "", isbn) if isbnlib.is_isbn10(isbn): isbn = isbnlib.to_isbn13(isbn) return isbn
def clean_isbn(isbn: str) -> str: """Cleans ISBN and formats it as ISBN 13 :param isbn: ISBN 10 or ISBN 13 :return: Cleaned ISBN, formatted to ISBN 13 and with hyphens stripped out """ if isbnlib.notisbn(isbn): raise ValueError(f"{isbn} is not a valid ISBN") return isbnlib.to_isbn13(isbn)
def standardize_citation(citation): """ Standardize citation idenfiers based on their source """ source, identifier = citation.split(':', 1) if source == 'doi': identifier = identifier.lower() if source == 'isbn': from isbnlib import to_isbn13 identifier = to_isbn13(identifier) return f'{source}:{identifier}'
def isbn(self, value): if not value: self._isbn = '' else: try: if isbnlib.is_isbn13(value): self._isbn = isbnlib.canonical(value) elif isbnlib.is_isbn10(value): self._isbn = isbnlib.to_isbn13(value) except: raise ValueError('Invalid ISBN {}'.format(value))
def book_for_isbn(): isbn = request.args.get('isbn') isbn = to_isbn13(isbn) amazon_books = AmazonScraper().get_amazon_books_for_keyword(isbn) # Convert all values to strings book_list = [ {key: value for (key, value) in d.iteritems()} for d in amazon_books ] json_output = json.dumps(book_list[0], sort_keys=True, indent=4) return json_output
def clean_ISBN(self): isbn = self.cleaned_data.get("ISBN") if isbn: if is_isbn10(isbn): isbn = to_isbn13(isbn) if "-" in isbn: isbn = isbn.replace('-', '') if "." in isbn: isbn = isbn.replace('.', '') return isbn
def converter(): if request.method == 'POST': ISBN_13 = isbnlib.canonical(request.form['ISBN-13']) ISBN_10 = isbnlib.canonical(request.form['ISBN-10']) #converts ISBNS flash(isbnlib.to_isbn13(ISBN_10)) flash(isbnlib.to_isbn10(ISBN_13)) return redirect(url_for('book.converter')) return render_template('book/converter.html')
def clean_isbn(isbn): """ Checks if ISBN is valid and converts it to ISBN13 format without dashes """ if isbnlib.is_isbn10(isbn): return isbnlib.to_isbn13(isbn) elif isbnlib.is_isbn13(isbn): return isbnlib.canonical(isbn) # test if wrongly prefixed isbn10 if isbn.startswith("978"): return clean_isbn(isbn[3:]) return ""
def extract_identifiers_from_row(row, isbn_columns): cols = [int(x) for x in isbn_columns.split(',')] isbns = set() for isbn_column in cols: raw = row[isbn_column].strip('"=') isbns.add(raw) # Transform to ISBN 10 or 13. if isbnlib.is_isbn13(raw): isbns.add(isbnlib.to_isbn10(raw)) elif isbnlib.is_isbn10(raw): isbns.add(isbnlib.to_isbn13(raw)) return isbns
def normalize(identifier): """Remove a possible prefix from the identifier.""" identifier = identifier.lower() identifier = identifier.replace('-', '') prefix = 'isbn:' if identifier.startswith(prefix): identifier = identifier[len(prefix):] if isbnlib.is_isbn10(identifier): identifier = isbnlib.to_isbn13(identifier) return identifier
def get_isbn(self): _isbn = self.book.get_metadata('DC', 'identifier') # [('Ratio', {})] if _isbn: isbn = _isbn[0][0] if not isbn: return None if isbnlib.notisbn(isbn): return None if isbnlib.is_isbn10(isbn): return isbnlib.to_isbn13(isbn) return isbn return None
def create_book(self, isbn): if is_isbn10(isbn): isbn = to_isbn13(isbn) try: b = Book.objects.get(isbnlike=isbn) except: try: b = Book(isbnlike=isbn) b.full_clean() b.save() except Exception as ex: print("Problem with isbn = ", isbn) raise (ex) return b
def extract_isbn(value): try: isbns = isbnlib.get_isbnlike(value) isbn = isbns[0] except: raise ValidationError(f"Bad format {value}") if len(isbns) > 1: raise ValidationError("Too much ISBN numbers") elif (len(isbns) == 0) or (not isbnlib.is_isbn10(isbn) and not isbnlib.to_isbn13(isbn)): raise ValidationError("It is not ISBN number") elif len(isbns) == 1: return isbnlib.mask(isbn) else: raise ValidationError("Unexpected option")
def format_input_data(self, isbn, price): errors = [] isbn = isbn.encode('utf8') # Strip '-' isbn = isbn.replace('-', '') isbn = to_isbn13(isbn) try: price = "{:.2f}".format(float(price)) except ValueError: errors.append('Please enter a valid price (e.g. "$40.00")') errors.extend(self.form_error_checking(isbn=isbn, price=price)) return isbn, price, errors
def preprocess_isbns(isbns): """ :param isbns: isbns in different formats :return: canonical isbn13s """ canonical_isbns = [] for isbn in isbns: if not isbnlib.notisbn(isbn, level='strict'): if isbnlib.is_isbn10(isbn): isbn = isbnlib.to_isbn13(isbn) isbn = isbnlib.get_canonical_isbn(isbn) canonical_isbns.append(isbn) canonical_isbns = set(canonical_isbns) return list(canonical_isbns)
def from_asin(cls, _db, asin, autocreate=True): """Turn an ASIN-like string into an Identifier. If the string is an ISBN10 or ISBN13, the Identifier will be of type ISBN and the value will be the equivalent ISBN13. Otherwise the Identifier will be of type ASIN and the value will be the value of `asin`. """ asin = asin.strip().replace("-", "") if isbnlib.is_isbn10(asin): asin = isbnlib.to_isbn13(asin) if isbnlib.is_isbn13(asin): type = cls.ISBN else: type = cls.ASIN return cls.for_foreign_id(_db, type, asin, autocreate)
def main(): isbn13 = " " bprint = False if isbnlib.is_isbn10(sys.argv[1]): isbn13 = isbnlib.to_isbn13(sys.argv[1]) bprint = True elif isbnlib.is_isbn13(sys.argv[1]): isbn13 = sys.argv[1] bprint = True if bprint: print(isbn13) print(isbnlib.info(isbn13)) else: print("invalid ISBN")
def query_isbn_data(isbn_str): # type: (str) -> Book if isbnlib.is_isbn10(isbn_str): isbn_str = isbnlib.to_isbn13(isbn_str) logger.info('query openlibrary for %s' % isbn_str) meta = _get_metadata_for_isbn(isbn_str, 'openl') if not meta: logger.info('query google books for %s' % isbn_str) meta = _get_metadata_for_isbn(isbn_str, 'goob') if meta: return meta return None
def _parse_book_list_item_into_books(self, item): def get_book_type(item): text = item.get_text() if "$" in text: return "" else: return text # Title and link contained in link element thumbnail_link = item.find("img").attrs["src"].encode("utf8") isbn = item.attrs["data-asin"].encode("utf8") # If B in isbn, some kind of amazon special book if "B" in isbn: return [] isbn = to_isbn13(isbn) link_item = item.find("a", class_="a-link-normal s-access-detail-page a-text-normal") if not link_item: return [] author_combined_item = item.find("div", class_="a-row a-spacing-none") authors = self._get_author_from_author_combined_item(author_combined_item) new_book_title = link_item.attrs["title"].encode("utf8") # Handle pricing for hardcover, Paperback, Kindle Edition, rent price_column_item = item.find("div", class_="a-column a-span7") price_bulk_items = price_column_item.find_all("a", class_="a-link-normal a-text-normal") new_books = [] book_type = "" for item in price_bulk_items: # First check for book type (Hardcover, softcover, etc) new_book_type = get_book_type(item) if new_book_type: book_type = new_book_type continue book = self._parse_price_bulk_item_into_book(item, new_book_title) book["book_type"] = book_type if book: new_books.append(book) # Add isbn for book in new_books: book["isbn"] = isbn book["thumbnail_link"] = thumbnail_link book["author"] = authors return new_books
def get_isbn13(reference, verbose=False): isbn13_array = [] for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["isbn13", "isbn-13", "isbn 13"]): if iden["identifier"] not in isbn13_array: isbn13_array.append(iden["identifier"]) if isbn13_array: return isbn13_array for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["isbn10", "isbn-10", "isbn 10"]): try: isbn13_temp = isbnlib.to_isbn13(iden["identifier"]) if isbn13_temp not in isbn13_array: isbn13_array.append(isbn13_temp) gnomics.objects.reference.Reference.add_identifier(reference, identifier=isbn13_temp, identifier_type="ISBN-13", source="ISBNlib", language=None) except: if verbose: print("No corresponding ISBN-13 found.") for obj in gnomics.reference.Reference.openlibrary(ref): if obj["isbn_13"] not in isbn13_array: isbn13_array.append(obj["isbn_13"]) gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None) for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["openlibrary", "openlibrary id", "openlibrary identifier", "olid"]): for obj in gnomics.reference.Reference.openlibrary(ref): if obj["isbn_13"] not in isbn13_array: isbn13_array.append(obj["isbn_13"]) gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None) for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["lccn", "library of congress control number"]): for obj in gnomics.reference.Reference.openlibrary(ref): if obj["isbn_13"] not in isbn13_array: isbn13_array.append(obj["isbn_13"]) gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None) for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["oclc", "oclc number", "oclc control number"]): for obj in gnomics.reference.Reference.openlibrary(ref): if obj["isbn_13"] not in isbn13_array: isbn13_array.append(obj["isbn_13"]) gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None) return isbn13_array
def parse_results(self, results): results_out = [] items = self.get_items(results) for result in items: attrs = result.get('ItemAttributes') book_type = attrs.get('Binding').lower() if book_type == 'paperback': book_type = 'trade paperback' isbn = attrs.get('ISBN', '') book = Book( title=attrs.get('Title'), isbn=isbnlib.to_isbn13(isbn), book_type=book_type, publication_date=parser.parse(attrs.get('PublicationDate')).date(), amazon_link=result.get('DetailPageURL', ''), ) setattr(book, 'large_cover', result.get('LargeImage', {}).get('URL', '')) results_out.append(book) return results_out
def valid(): # create ten variables to store each number in the ISBN and multiply it by its position # for number validation firstNum = int(isbnInput[0]) * 10 secondNum = int(isbnInput[1]) * 9 thirdNum = int(isbnInput[2]) * 8 fourthNum = int(isbnInput[3]) * 7 fithNum = int(isbnInput[4]) * 6 sixthNum = int(isbnInput[5]) * 5 seventhNum = int(isbnInput[6]) * 4 eigthNum = int(isbnInput[7]) * 3 ninethNum = int(isbnInput[8]) * 2 # sometimes, the last number in the ISBN is a Ten, which is written as an 'X' if isbnInput[9] == 'X': tenthNum = 10 # otherwise, do the math else: tenthNum = int(isbnInput[9]) * 1 # calculate the sum of all of the expanded numbers above and store as isbnSum isbnSum = firstNum + secondNum + thirdNum + fourthNum + fithNum\ + sixthNum + seventhNum + eigthNum + ninethNum + tenthNum print ('The sum of the ISBN is: ' + str(isbnSum)) # this is where we call the isbnlib method to_isbn13 which converts ISBN10 numbers # to ISBN13 isbnFinal = isbnlib.to_isbn13(isbnInput) # this is where we check to see if the ISBN that the use entered is valid at all # or tell them that it's not if isbnSum % 11 == 0: print ('You entered a valid ISBN number') # this is where we call isbnlib's other function called meta() # parameters are the ISBN you wish to look up, the service you want to use to # look it up, and how much memory you want to allocate this information print (isbnlib.meta(isbnInput, service = 'merge', cache = 'default')) elif isbnSum % 11 != 0: print str(isbnFinal) + ' is not a valid ISBN number'
def handle_isbn(val): if val: return mask(to_isbn13(str(val)))
def main(): #Commnd line arguments parser = argparse.ArgumentParser() parser.add_argument('-path', '--GCIS', help = "Insert url path to GCIS book in JSON format [ex.'https://gcis-search-stage.jpl.net:3000/book.json?all=1'] ") args = parser.parse_args() GCIS = args.GCIS if GCIS is None: GCIS = 'https://gcis-search-stage.jpl.net:3000/book.json?all=1' print('NO MANUAL GCIS PATH\n ALL GCIS BOOK JSON FORMATS WILL BE USED AS DEFAULT') GCISPAR = parse(GCIS) for x in range(len(GCISPAR)): try: #Extracts book identifier from GCIS# IDEN = GCISPAR[x]["identifier"] match = re.search(r'.*/(.*?)\..*?$', GCIS) if match: FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN) HREFPAR = parse(HREF) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #V13 = validated canonical ISBN-13 V13 = EAN13(CISBN) if V13 is None: V13 = canonical(CISBN) M = parse(HREF) print("GCIS-DEV\n\n\t", M, '\n\n\t', "isbn_original:", ISBNS, '\n\n\t', "isbn_mod:", V13, "\n\n") #DBpedia ISBN formats a = ISBNS b = canonical(CISBN) c = to_isbn10(CISBN) d = hyphenate(to_isbn10(CISBN)) e = to_isbn13(CISBN) f = hyphenate(to_isbn13(CISBN)) g = V13 h = "ISBN {}" .format(CISBN) i = "ISBN {}" .format(canonical(CISBN)) j = "ISBN {}" .format(hyphenate(to_isbn13(CISBN))) k = "ISBN {}" .format(V13) l = "ISBN {}" .format(to_isbn10(CISBN)) m = "ISBN {}" .format(hyphenate(to_isbn10(CISBN))) tests = [a,b,c,d,e,f,g,h,i,j,k,l,m] for indie in tests: r = QUERY % indie RQUERY(r) if len(RQUERY(r)) != 0: print(RQUERY(r)) break except: Error = '\n\t######## PROBLEM #######\n\tTitle:{}\n\tGCIS-ISBN:{}\n\tIdentifier:{}\n\n'.format(TITLE, ISBNS, IDEN) print(Error) file.write(Error)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=8) parser.add_argument("input") parser.add_argument("output") args = parser.parse_args() countdown = args.batchsize with io.open(args.input, 'r', encoding='utf-8') as input: books_reader = csv.DictReader(input) with io.open(args.output, 'w', encoding='utf-8') as output: books_writer = csv.DictWriter(output, fieldnames) books_writer.writeheader() for row in books_reader: if countdown > 0 and not row.get('webchecked', None): isbn = str(row.get('ISBN', None)) if len(isbn) == 9: isbn = "0" + isbn if isbn: countdown = countdown - 1 new_isbn = isbnlib.to_isbn13(isbnlib.canonical(isbn)) if new_isbn is None or new_isbn == "": print "Could not canonicalize isbn", isbn else: row['ISBN'] = new_isbn details = None try: details = isbnlib.meta(isbn) except isbnlib.dev._exceptions.NoDataForSelectorError: print "No data for ISBN", isbn, "title", row.get('Title', "Unknown") row['webchecked'] = "No data for ISBN" except isbnlib._exceptions.NotValidISBNError: print "Invalid ISBN", isbn, "for", row['Title'] row['webchecked'] = "Invalid ISBN" except isbnlib.dev._exceptions.ISBNNotConsistentError: print "Inconsistent data for", row['Title'] row['webchecked'] = "Inconsistent ISBN data" if details: if details.get('ISBN-13', "") != "" and row.get('ISBN', "") == "": row['ISBN'] = details['ISBN-13'] if 'Authors' in row: row['Authors'] = row['Authors'].split('/') old_title = row['Title'] web_title = details['Title'] if old_title != web_title: old_canon = canonicalize_title(old_title) web_canon = canonicalize_title(web_title) old_len = len(old_canon) web_len = len(web_canon) if ((web_len > old_len and old_canon in web_canon) or (web_len == old_len and old_canon == web_canon)): print "Title improvement from", old_title, "to", web_title else: print "Title discrepancy:", old_title, "in file,", web_title, "found online" details['Title'] = old_title # don't use 'update', because we don't want to drag in random other fields that dictwriter will then object to for key in fieldnames: if key in details: row[key] = details[key] if 'Authors' in row: row['Authors'] = '/'.join(row['Authors']) row['webchecked'] = "OK" # from https://docs.python.org/2/library/csv.html encoded_row = {k: (v.encode("utf-8") if isinstance(v, basestring) else v) for k,v in row.iteritems()} books_writer.writerow(row)
if 'Project Gutenberg' in publisher_names and not metadata.links: # Project Gutenberg texts don't have ISBNs, so if there's an # ISBN on there, it's probably wrong. Unless someone stuck a # description on there, there's no point in discussing # OCLC+LD's view of a Project Gutenberg work. return None if publisher_names: metadata.publisher = publisher_names[0] # Grab all the ISBNs. example_graphs = self.internal_lookup(subgraph, example_uris) for example in example_graphs: for isbn_name in 'schema:isbn', 'isbn': for isbn in ldq.values(example.get(isbn_name, [])): if len(isbn) == 10: isbn = isbnlib.to_isbn13(isbn) elif len(isbn) != 13: continue if isbn: metadata.identifiers.append(IdentifierData( type = Identifier.ISBN, identifier = isbn )) for subject_type, subjects_details in subjects.items(): for subject_detail in subjects_details: if isinstance(subject_detail, dict): subject_name = subject_detail.get('name') subject_identifier = subject_detail.get('id') metadata.subjects.append(SubjectData( type=subject_type, identifier=subject_identifier, name=subject_name,
IDEN = GCISPAR[x]["identifier"] match = re.search(r'.*/(.*?)\..*?$', GCIS) if match: FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN) #HREF = 'https://gcis-search-stage.jpl.net:3000/book/13b8b4fc-3de1-4bd8-82aa-7d3a6aa54ad5.json' HREFPAR = parse(HREF) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #Converts all listed ISBNS to a ISBN-13 format C13 = to_isbn13(CISBN) #V13 = validated canonical ISBN-13 V13 = EAN13(C13) M = parse(HREF) MV13 = M["isbn"] = V13 ORGISBN = M["org_isbn"] = ISBNS print(M, '\n\t', "isbn_original:", ISBNS) s = requests.Session() s.auth = ('alex' , '8aed39fa67049cdfd42ef612a97e8535ecd46d8955afcc8b') s.headers.update({'Accept': 'application/json'}) r = s.post(HREF, data = M , verify = False) r.raise_for_status() sys.exit() #print('Title:', TITLE, '\nIdentifier:', IDEN,'\n',HREF,'\n\tISBN:', V13, '\n') except(TypeError, ValueError): print('\n\t######## PROBLEM #######\n','\tTitle:', TITLE,'\n\tGCIS-ISBN:', ISBNS,'\n\tIdentifier:', IDEN, '\n\n')
def com_isbn_10_to_13(isbn_string): return isbnlib.to_isbn13(isbn_string)