def isbn_differ(entry_data, suggestion_data): entry_isbn = to_isbn13(canonical(entry_data)) if not entry_isbn: return True suggestion_isbns = [to_isbn13(canonical(s)) for (s, _) in suggestion_data] return entry_isbn not in suggestion_isbns
def converter(): if request.method == 'POST': ISBN_13 = isbnlib.canonical(request.form['ISBN-13']) ISBN_10 = isbnlib.canonical(request.form['ISBN-10']) #converts ISBNS flash(isbnlib.to_isbn13(ISBN_10)) flash(isbnlib.to_isbn10(ISBN_13)) return redirect(url_for('book.converter')) return render_template('book/converter.html')
def lookup_ISBN(book_isbn): if isbnlib.is_isbn13(book_isbn): trimmed_id = isbnlib.canonical(book_isbn) webbrowser.open(f"https://isbnsearch.org/isbn/{trimmed_id}") else: if isbnlib.is_isbn10(book_isbn): trimmed_id_isbn10 = isbnlib.canonical(book_isbn) webbrowser.open(f"https://isbnsearch.org/isbn/{trimmed_id_isbn10}") else: print( "Invalid input. Please check to see if it is in either ISBN13 or ISBN10." )
def LookupISBN(ID): if isbnlib.is_isbn13(ID): trimmedID = isbnlib.canonical(ID) webbrowser.open("https://isbnsearch.org/isbn/{}".format(trimmedID)) else: if isbnlib.is_isbn10(ID): trimmedIdISBN10 = isbnlib.canonical(ID) webbrowser.open( "https://isbnsearch.org/isbn/{}".format(trimmedIdISBN10)) else: print( "Invalid input. Please check to see if it is in either ISBN13 or ISBN10." ) input() exit()
def main(): #Commnd line arguments parser = argparse.ArgumentParser() parser.add_argument('-apikey', '--isbndbkey', help="Insert ISBNDB apikey") args = parser.parse_args() if args.isbndbkey: print(args.isbndbkey) else: print('NO MANUAL API KEY') for x in range(len(GCISPAR)): try: #Extracts book identifier from GCIS# IDEN = GCISPAR[x]["identifier"] match = re.search(r'.*/(.*?)\..*?$', GCIS) if match: FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json'.format( FILETYPE, IDEN) HREFPAR = parse(HREF) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #V13 = validated canonical ISBN-13 V13 = EAN13(CISBN) if V13 is None: V13 = canonical(CISBN) M = parse(HREF) apikey = args.isbndbkey if args.isbndbkey is None: apikey = 'XOATAY1G' data = 'http://isbndb.com/api/v2/json/{}/book/{}'.format( apikey, V13) v = parse(data) GCISDATA = "GCIS-DEV\n\n\t{}\n\n\tisbn_original:{}\n\n\tisbn_mod:{}\n\n".format( M, ISBNS, V13) APIDATA = "ISBNDB\n\n\t{}\n\n------------\n\n".format(v) print("GCIS-DEV\n\n\t", M, '\n\n\t', "isbn_original:", ISBNS, '\n\n\t', "isbn_mod:", V13, "\n\n") print("ISBNDB\n\n\t", v, '\n\n') if v['error']: file.write(v['error'] + "\n") else: pass #Writing Metadata onto file2 file2.write(GCISDATA) file2.write(APIDATA) except: Error = '\n\t######## PROBLEM #######\n\tTitle:{}\n\tGCIS-ISBN:{}\n\tIdentifier:{}\n\n'.format( TITLE, ISBNS, IDEN) print(Error) file.write(Error)
async def check(self, entry): fmt = self._cfg.get('isbn_format', entry) if not fmt: return [] isbn = entry.data.get('isbn') if not isbn: return [] clean_isbn = clean(isbn) if not clean_isbn or notisbn(clean_isbn): return [] if fmt not in ('canonical', 'masked'): raise ConfigurationError("The option 'isbn_format' must be \ either of 'canonical' or 'masked'.") if fmt == 'canonical': cisbn = canonical(clean_isbn) if cisbn != isbn: return [(type(self).NAME, "ISBN '{}' is not in canonical format.".format(isbn), "Canonical format would be '{}'".format(cisbn))] elif fmt == 'masked': misbn = mask(clean_isbn) if misbn != isbn: return [(type(self).NAME, "ISBN '{}' is not in masked format.".format(isbn), "Masked format would be '{}'".format(misbn))] return []
def get_meta(self): """ Return the book's meta data (Title, Authors, Year, etc...) in a dictionary form, with the isbn13 field masked. """ d = meta(canonical(self.isbnlike)) d['ISBN-13'] = mask(d['ISBN-13']) return d
def _isbn(details_url): """Get the card isbn - details_url: valid url leading to the card's product page return: a tuple valid and clean-up isbn (str), the soup """ import isbnlib isbn = None try: log.info("Looking for isbn of {}...".format(details_url)) req = requests.get(details_url) soup = BeautifulSoup(req.content, "lxml") isbn = soup.find(class_="col49 floatRight") isbn = isbnlib.get_isbnlike(isbn.text) isbn = filter(lambda it: it.startswith('978'), isbn) if isbn: isbn = isbnlib.canonical(isbn[0]) log.info("Found isbn of url {}: {}".format(details_url, isbn)) except Exception as e: log.error("Error while getting the isbn from url '{}': {}".format(details_url, e)) return isbn return isbn, soup
def isbn_13_to_isbn_10(isbn_13): isbn_13 = canonical(isbn_13) if (len(isbn_13) != 13 or not isbn_13.isdigit() or not isbn_13.startswith('978') or check_digit_13(isbn_13[:-1]) != isbn_13[-1]): return return isbn_13[3:-1] + check_digit_10(isbn_13[3:-1])
def isbn_10_to_isbn_13(isbn_10): isbn_10 = canonical(isbn_10) if (len(isbn_10) != 10 or not isbn_10[:-1].isdigit() or check_digit_10(isbn_10[:-1]) != isbn_10[-1]): return isbn_13 = '978' + isbn_10[:-1] return isbn_13 + check_digit_13(isbn_13)
def _isbn10toIsbn13(match): """Helper function to deal with a single ISBN.""" isbn = match.group('code') isbn = isbn.upper() try: is_valid(isbn) except InvalidIsbnException: # don't change return isbn try: stdnum.isbn except NameError: pass else: return stdnum.isbn.to_isbn13(isbn) try: isbnlib except NameError: pass else: # remove hyphenation, otherwise isbnlib.to_isbn13() returns None i = isbnlib.canonical(isbn) if i == isbn: i13 = isbnlib.to_isbn13(i) return i13 # add removed hyphenation i13 = isbnlib.to_isbn13(i) i13h = hyphenateIsbnNumbers('ISBN ' + i13) return i13h[5:]
def post(self, request, *args, **kwargs): action = request.POST['create'] new_order = request.session['new_order'] transactions = request.session['transactions'] grand_total = request.session['grand_total'] grand_total_paid = request.session['grand_total_paid'] form_class = self.get_form_class() form = self.get_form(form_class) context = {'form': form} if action == 'Add': isbn_maybe = request.POST['ISBN'] quantity = request.POST['Quantity'] if is_isbn10(isbn_maybe) or is_isbn13(isbn_maybe): isbn_raw = canonical(isbn_maybe) books = get_tradeable_books(isbn_raw) if len(books) == 0: context['error'] = 'Do not buy' else: book = books[0].as_book() transaction = Transaction( book=book, subtotal=transaction_subtotal(new_order, books[0], quantity), subtotal_paid=transaction_subtotal_paid( new_order, books[0], quantity), quantity=quantity, notes='') transactions.append(transaction) grand_total += transaction.subtotal grand_total_paid += transaction.subtotal_paid request.session['transactions'] = transactions request.session['grand_total'] = grand_total request.session['grand_total_paid'] = grand_total_paid else: # isbn check failed context['error'] = 'Not ISBN' elif action == 'Submit': if transactions == []: context['error'] = 'You need to add at least one book.' else: return redirect('orders:confirm') elif action == 'Cancel': transactions = [] grand_total = Decimal(0).quantize(Decimal('0.01'), rounding=ROUND_DOWN) grand_total_paid = Decimal(0).quantize(Decimal('0.01'), rounding=ROUND_DOWN) request.session['transactions'] = transactions request.session['grand_total'] = grand_total request.session['grand_total_paid'] = grand_total_paid # context['error'] = 'Cancel order' context['new_order'] = request.session['new_order'] context['transactions'] = request.session['transactions'] context['grand_total'] = request.session['grand_total'] context['grand_total_paid'] = request.session['grand_total_paid'] return render(request, self.template_name, context)
def normalize_isbn(isbn): """Removes spaces and dashes from isbn and ensures length. :param: str isbn: An isbn to normalize :rtype: str|None :return: A valid isbn, or None """ return isbn and canonical(isbn) or None
def save(self, force_insert=False, force_update=False, using=None, update_fields=None): if self.isbn is not None: self.isbn = isbnlib.canonical(self.isbn) return super().save( force_insert, force_update, using, update_fields)
def get_isbn10(self): """Fetches either isbn_10 or isbn_13 from record and returns canonical isbn_10 """ isbn_10 = self.isbn_10 and canonical(self.isbn_10[0]) if not isbn_10: isbn_13 = self.get_isbn13() return isbn_13 and isbn_13_to_isbn_10(isbn_13) return isbn_10
def newbook(): if request.method == 'POST': barcode = str(request.form['barcode']) isbn = isbnlib.canonical(request.form['isbn']) # invoice = request.form['invoice'] error = None author = None lang = None publisher = None title = None publYear = None db = get_db() # Ensure barcode has not already been inserted to Database if db.execute('SELECT id FROM new_book WHERE barcode = ?', (barcode, )).fetchone() is not None: error = 'Barcode {} is already captured.'.format(barcode) # Checks correct barcode length elif int(len(barcode)) != 24: error = '{} is an incorrect length.'.format(barcode) # ISBN Validation elif isbnlib.is_isbn10(isbn) is not True: if isbnlib.is_isbn13(isbn) is not True: error = f'{isbn} is not a valid ISBN' if error is not None: flash(error) else: try: book_lib = (isbnlib.meta(isbn, service='goob', cache='default')) # Display meta data to user. flash(book_lib) # Assign meta dictionary values to variables for insertion to DB. author = str(book_lib['Authors']) lang = book_lib['Language'] publisher = book_lib['Publisher'] title = book_lib['Title'] publYear = book_lib['Year'] # Catch exception error for book that was not found on search. except NoDataForSelectorError: flash("Book recorded: Author, Title not found.") pass # execute query and insert data db.execute( 'INSERT INTO new_book (barcode, isbn, author_id, author, lang, publisher, title, publYear)' ' VALUES (?, ?, ?, ?, ?, ?, ?, ?)', (barcode, isbn, g.user['id'], author, lang, publisher, title, publYear)) db.commit() return redirect(url_for('book.newbook')) return render_template('book/newbook.html')
def get_isbn13(self): """Fetches either isbn_13 or isbn_10 from record and returns canonical isbn_13 """ isbn_13 = self.isbn_13 and canonical(self.isbn_13[0]) if not isbn_13: isbn_10 = self.isbn_10 and self.isbn_10[0] return isbn_10 and isbn_10_to_isbn_13(isbn_10) return isbn_13
def normalize_isbn(isbn): """ Keep only numbers and X/x to return an ISBN-like string. Does NOT validate length or checkdigits. :param: str isbn: An isbnlike string to normalize :rtype: str|None :return: isbnlike string containing only valid ISBN characters, or None """ return isbn and canonical(isbn) or None
def isbn(self, value): if not value: self._isbn = '' else: try: if isbnlib.is_isbn13(value): self._isbn = isbnlib.canonical(value) elif isbnlib.is_isbn10(value): self._isbn = isbnlib.to_isbn13(value) except: raise ValueError('Invalid ISBN {}'.format(value))
def isbn_13_to_isbn_10(isbn_13): isbn_13 = canonical(isbn_13) try: if len(isbn_13) != 13 or not isbn_13.isdigit()\ or not isbn_13.startswith('978')\ or check_digit_13(isbn_13[:-1]) != isbn_13[-1]: raise ValueError("%s is not a valid ISBN 13" % isbn_13) except ValueError as e: logger.info("Exception caught in ISBN transformation: %s" % e) return return isbn_13[3:-1] + check_digit_10(isbn_13[3:-1])
def clean_isbn(isbn): """ Checks if ISBN is valid and converts it to ISBN13 format without dashes """ if isbnlib.is_isbn10(isbn): return isbnlib.to_isbn13(isbn) elif isbnlib.is_isbn13(isbn): return isbnlib.canonical(isbn) # test if wrongly prefixed isbn10 if isbn.startswith("978"): return clean_isbn(isbn[3:]) return ""
def isbn_to_asin(isbn): # returns isbn10 (asin) clean = isbnlib.canonical(isbn) if (len(isbn) == 10): if (isbnlib.is_isbn10(clean)): return clean else: return '0' elif (len(isbn) == 13): if (isbnlib.is_isbn13(clean)): return isbnlib.to_isbn10(clean) else: return '0'
def main(): #Commnd line arguments parser = argparse.ArgumentParser() parser.add_argument('-apikey', '--isbndbkey', help = "Insert ISBNDB apikey") args = parser.parse_args() if args.isbndbkey: print(args.isbndbkey) else: print('NO MANUAL API KEY') for x in range(len(GCISPAR)): try: #Extracts book identifier from GCIS# IDEN = GCISPAR[x]["identifier"] match = re.search(r'.*/(.*?)\..*?$', GCIS) if match: FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN) HREFPAR = parse(HREF) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #V13 = validated canonical ISBN-13 V13 = EAN13(CISBN) if V13 is None: V13 = canonical(CISBN) M = parse(HREF) apikey = args.isbndbkey if args.isbndbkey is None: apikey = 'XOATAY1G' data = 'http://isbndb.com/api/v2/json/{}/book/{}'.format(apikey, V13) v = parse(data) GCISDATA = "GCIS-DEV\n\n\t{}\n\n\tisbn_original:{}\n\n\tisbn_mod:{}\n\n" .format(M, ISBNS, V13) APIDATA = "ISBNDB\n\n\t{}\n\n------------\n\n" .format(v) print("GCIS-DEV\n\n\t", M, '\n\n\t', "isbn_original:", ISBNS, '\n\n\t', "isbn_mod:", V13, "\n\n") print ("ISBNDB\n\n\t", v, '\n\n') if v['error']: file.write(v['error']+"\n") else: pass #Writing Metadata onto file2 file2.write(GCISDATA) file2.write(APIDATA) except: Error = '\n\t######## PROBLEM #######\n\tTitle:{}\n\tGCIS-ISBN:{}\n\tIdentifier:{}\n\n'.format(TITLE, ISBNS, IDEN) print(Error) file.write(Error)
def isbn_10_to_isbn_13(isbn_10): isbn_10 = canonical(isbn_10) if len(isbn_10) == 13: return isbn_10 try: if len(isbn_10) != 10 or not isbn_10[:-1].isdigit()\ or check_digit_10(isbn_10[:-1]) != isbn_10[-1]: raise ValueError("%s is not a valid ISBN 10" % isbn_10) except ValueError as e: logger.info("Exception caught in ISBN transformation: %s" % e) return isbn_13 = '978' + isbn_10[:-1] return isbn_13 + check_digit_13(isbn_13)
def isbn_validator(isbnlike): """ This is a validator for our isbn data. The Book class only accepts isbn13 format, so if this function receive a isbn10 it will raise a exception. """ if (not is_isbn13(isbnlike)) or notisbn(isbnlike): raise ValidationError("ISBN invalid") else: try: m = meta(canonical(isbnlike)) print(m) except Exception: raise ValidationError("ISBN valid but not used")
def find_isbns(cls, text): isbns = [] for token in text.split(): if token.startswith("http"): url = requests.head(token).headers.get("Location") or token for service_name in cls.SERVICES: _isbns = getattr(cls, service_name)(url) isbns.extend(_isbns) else: isbns.extend(isbnlib.get_isbnlike(token, level="normal")) return [ isbnlib.canonical(isbn) for isbn in isbns if isbnlib.is_isbn10(isbn) or isbnlib.is_isbn13(isbn) ]
def checksum_13(isbnlike): '''Calculate the proper ISBN-13 check sum for a test ISBN 13 string. The input string must have 13 legal characters with or without dashes but the checksum character need not be valid. Return a string character of the checksum digit. ''' isbndigits = isbn.canonical(isbnlike) tmp_sum = 0 for num, value in zip(isbn13_mults, isbndigits[:12]): tmp_sum += num * int(value) return str(int(10 - fmod(tmp_sum, 10)))
def createISBNLibraryFile(filename): ''' This function is to create a .csv file given a name for the file It will also add in the first row for the file; headings for all book info Inputs: filename, name of the file as a string Outputs: Creates a file with first row filled in ''' # Get a sample ISBN for header purposes sampleISBN = "978-0-099-52848-7" # Call of Cthulu (Nominal data) isbn = isbnlib.canonical(sampleISBN) itemKeys = isbnlib.meta(isbn).keys() # Open file and write header row with open(filename, 'w') as file: write = csv.writer(file) write.writerow(itemKeys)
def post(self, request, *args, **kwargs): isbn_maybe = request.POST['ISBN'] form_class = self.get_form_class() form = self.get_form(form_class) context = {'form': form} if is_isbn10(isbn_maybe) or is_isbn13(isbn_maybe): isbn_raw = canonical(isbn_maybe) tradeable_books = get_tradeable_books(isbn_raw) if len(tradeable_books) == 0: context['error'] = 'Do not buy' context['book'] = get_books_with_isbn(isbn_raw)[0] else: context['book'] = tradeable_books[0] else: context['error'] = 'Book not found or not ISBN' return render(request, self.template_name, context)
def _isbn10toIsbn13(match): """Helper function to deal with a single ISBN.""" isbn = match.group('code') isbn = isbn.upper() try: stdnum.isbn except NameError: pass else: try: is_valid(isbn) except InvalidIsbnException: return isbn i = stdnum.isbn.to_isbn13(isbn) return i try: isbnlib except NameError: pass else: try: is_valid(isbn) except InvalidIsbnException: return isbn # remove hyphenation, otherwise isbnlib.to_isbn13() returns None i = isbnlib.canonical(isbn) if i == isbn: i13 = isbnlib.to_isbn13(i) return i13 # add removed hyphenation i13 = isbnlib.to_isbn13(i) i13h = hyphenateIsbnNumbers('ISBN ' + i13) return i13h[5:] try: is_valid(isbn) except InvalidIsbnException: # don't change return isbn i1x = getIsbn(isbn) if not isinstance(i1x, ISBN13): i13 = i1x.toISBN13() else: i13 = i1x return i13.code
def normalizeISBN(isbn): """ >>> normalizeISBN('978800105473-4') '978-80-01-05473-4' >>> normalizeISBN('80978800105473-4') '80978800105473-4' >>> normalizeISBN('988800105473-4') '988800105473-4' >>> normalizeISBN('978-80-254-94677') '978-80-254-9467-7' """ try: return isbnlib.mask(isbnlib.canonical(isbn)) except isbnlib.NotValidISBNError: return isbn
def addBookToISBNLibrary(filename, isbnRaw): ''' This function adds an item to the library given the ISBN Inputs: filename, name of the library file, isbnRaw, ISBN of the item to be added in ISBN-13 format as string (e.g. 978-0-099-52848-7) also accepts canonical verson (e.g. 9780099528487) Outputs: Writes a new row in the given file ''' # Translate string into ISBN canonical format isbn = isbnlib.canonical(isbnRaw) itemInfo = isbnlib.meta(isbn) ItemValues = itemInfo.values() # Write item information into the csv with open(filename, 'a', newline='') as file: write = csv.writer(file) write.writerow(ItemValues)
def classify_isbn(isbnlike): isbn = {} isbn['canon'] = ib.canonical(isbnlike) if ib.is_isbn10(isbnlike): isbn['type'] = 'isbn10' elif ib.is_isbn10('0'+isbn['canon']): isbn['canon'] = '0'+isbn['canon'] isbn['masked'] = ib.mask(isbn['canon']) isbn['type'] = 'isbn10' elif ib.is_isbn10('00'+isbn['canon']): isbn['canon'] = '00'+isbn['canon'] isbn['masked'] = ib.mask(isbn['canon']) isbn['type'] = 'isbn10' elif ib.is_isbn13(isbn['canon']): isbn['masked'] = ib.mask(isbn['canon']) isbn['type'] = 'isbn13' else: isbn['type'] = 'invalid?' return isbn
def checksum(isbnlike): '''Calculate the proper ISBN-check sum for a test ISBN string. The input string must have 10 or 13 legal characters with or without dashes but the checksum character need not be valid. Return a string character of the checksum digit. ''' isbndigits = isbn.canonical(isbnlike) isbnlen = len(isbndigits) # get length, choose 10 or 13 checksum if isbnlen == 10: chksum = checksum_10(isbndigits) elif isbnlen == 13: chksum = checksum_13(isbndigits) else: return None return chksum
def checksum_10(isbnlike): '''Calculate the proper ISBN-10 check sum for a test ISBN 10 string. The input string must be 10 legal characters with or without dashes but the checksum character need not be valid. Return a string character of the checksum digit or 'X' ''' isbndigits = isbn.canonical(isbnlike) tmp_sum = 0 for num, value in zip(isbn10_mults, isbndigits[:9]): tmp_sum += num * int(value) chksum = int(fmod(tmp_sum, 11)) if chksum == 10: return 'X' return str(chksum)
def main(): #Commnd line arguments parser = argparse.ArgumentParser() parser.add_argument('-path', '--GCIS', help = "Insert url path to GCIS book in JSON format [ex.'https://gcis-search-stage.jpl.net:3000/book.json?all=1'] ") args = parser.parse_args() GCIS = args.GCIS if GCIS is None: GCIS = 'https://gcis-search-stage.jpl.net:3000/book.json?all=1' print('NO MANUAL GCIS PATH\n ALL GCIS BOOK JSON FORMATS WILL BE USED AS DEFAULT') GCISPAR = parse(GCIS) for x in range(len(GCISPAR)): try: #Extracts book identifier from GCIS# IDEN = GCISPAR[x]["identifier"] match = re.search(r'.*/(.*?)\..*?$', GCIS) if match: FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN) HREFPAR = parse(HREF) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #V13 = validated canonical ISBN-13 V13 = EAN13(CISBN) if V13 is None: V13 = canonical(CISBN) M = parse(HREF) print("GCIS-DEV\n\n\t", M, '\n\n\t', "isbn_original:", ISBNS, '\n\n\t', "isbn_mod:", V13, "\n\n") #DBpedia ISBN formats a = ISBNS b = canonical(CISBN) c = to_isbn10(CISBN) d = hyphenate(to_isbn10(CISBN)) e = to_isbn13(CISBN) f = hyphenate(to_isbn13(CISBN)) g = V13 h = "ISBN {}" .format(CISBN) i = "ISBN {}" .format(canonical(CISBN)) j = "ISBN {}" .format(hyphenate(to_isbn13(CISBN))) k = "ISBN {}" .format(V13) l = "ISBN {}" .format(to_isbn10(CISBN)) m = "ISBN {}" .format(hyphenate(to_isbn10(CISBN))) tests = [a,b,c,d,e,f,g,h,i,j,k,l,m] for indie in tests: r = QUERY % indie RQUERY(r) if len(RQUERY(r)) != 0: print(RQUERY(r)) break except: Error = '\n\t######## PROBLEM #######\n\tTitle:{}\n\tGCIS-ISBN:{}\n\tIdentifier:{}\n\n'.format(TITLE, ISBNS, IDEN) print(Error) file.write(Error)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=8) parser.add_argument("input") parser.add_argument("output") args = parser.parse_args() countdown = args.batchsize with io.open(args.input, 'r', encoding='utf-8') as input: books_reader = csv.DictReader(input) with io.open(args.output, 'w', encoding='utf-8') as output: books_writer = csv.DictWriter(output, fieldnames) books_writer.writeheader() for row in books_reader: if countdown > 0 and not row.get('webchecked', None): isbn = str(row.get('ISBN', None)) if len(isbn) == 9: isbn = "0" + isbn if isbn: countdown = countdown - 1 new_isbn = isbnlib.to_isbn13(isbnlib.canonical(isbn)) if new_isbn is None or new_isbn == "": print "Could not canonicalize isbn", isbn else: row['ISBN'] = new_isbn details = None try: details = isbnlib.meta(isbn) except isbnlib.dev._exceptions.NoDataForSelectorError: print "No data for ISBN", isbn, "title", row.get('Title', "Unknown") row['webchecked'] = "No data for ISBN" except isbnlib._exceptions.NotValidISBNError: print "Invalid ISBN", isbn, "for", row['Title'] row['webchecked'] = "Invalid ISBN" except isbnlib.dev._exceptions.ISBNNotConsistentError: print "Inconsistent data for", row['Title'] row['webchecked'] = "Inconsistent ISBN data" if details: if details.get('ISBN-13', "") != "" and row.get('ISBN', "") == "": row['ISBN'] = details['ISBN-13'] if 'Authors' in row: row['Authors'] = row['Authors'].split('/') old_title = row['Title'] web_title = details['Title'] if old_title != web_title: old_canon = canonicalize_title(old_title) web_canon = canonicalize_title(web_title) old_len = len(old_canon) web_len = len(web_canon) if ((web_len > old_len and old_canon in web_canon) or (web_len == old_len and old_canon == web_canon)): print "Title improvement from", old_title, "to", web_title else: print "Title discrepancy:", old_title, "in file,", web_title, "found online" details['Title'] = old_title # don't use 'update', because we don't want to drag in random other fields that dictwriter will then object to for key in fieldnames: if key in details: row[key] = details[key] if 'Authors' in row: row['Authors'] = '/'.join(row['Authors']) row['webchecked'] = "OK" # from https://docs.python.org/2/library/csv.html encoded_row = {k: (v.encode("utf-8") if isinstance(v, basestring) else v) for k,v in row.iteritems()} books_writer.writerow(row)
def opposite_isbn(isbn): # ISBN10 -> ISBN13 and ISBN13 -> ISBN10 for f in isbn_13_to_isbn_10, isbn_10_to_isbn_13: alt = f(canonical(isbn)) if alt: return alt
def normalize_isbn(isbn): """removes spaces and dashes from isbn and ensures length XXX deprecated, just use isbnlib.canonical """ return canonical(isbn)
if i_type and i_type.startswith('isbn') and isbnlib.get_canonical_isbn(i_value) == isbn : return True return False def extract_from_link(self, link) : url = urlparse.urlparse(link); res = requests.get('http://api.trove.nla.gov.au{}?{}&encoding=json&reclevel=full&include=workVersions&key={}'.format(url.path,url.query,self.key)) queries = urlparse.parse_qs(url.query); vid = queries.get('versionId') work = res.json()['work'] versions = work['version'] newlink = work.get('troveUrl', link); for v in versions : if v['id'] == vid[0] : newlink = newlink +'?q&versionId='+urllib.quote(v['id']) return self.massage_data(v, newlink) newlink = newlink +'?q&versionId='+urllib.quote(v['id']) return self.massage_data(versions[0], link) if __name__ == '__main__': #646324853 #9780241956182 isbn = isbnlib.canonical('9781584350118'); trove = Trove(); books = trove.find_isbn(isbn) v = trove.get_metadata(books[0], isbn) #print books[0]['troveUrl']+'?q&versionId='+v['id']; pp.pprint(v)
def main(): #Commnd line arguments parser = argparse.ArgumentParser() parser.add_argument('-log', '--login', help="Route path to Gcis.conf YAML file") parser.add_argument('-url', '--gcis', help='INSERT EITHER: https://data.globalchange.gov OR https://gcis-search-stage.jpl.net:3000') parser.add_argument('-name', '--username', help = "Insert GCIS username") parser.add_argument('-pw', '--apikey', help = "Insert GCIS username's api key") args = parser.parse_args() gcis = 'https://data.globalchange.gov' gcisdev = 'https://gcis-search-stage.jpl.net:3000' #Extracts login info from Gcis.conf if args.login: a = open(args.login, "r") list = (yaml.load(a)) diction = list[0] path = diction['url'] user = diction['userinfo'] key = diction['key'] print(path+'\n'+user+'\n'+key) else: pass if args.gcis == gcis: print(args.gcis) elif args.gcis == gcisdev: print(args.gcis) else: print('NO MANUAL ENDPOINT (Ignore if using Config file)') if args.username: print(args.username) else: print('NO MANUAL USERNAME (Ignore if using Config file)') if args.apikey: print(args.apikey) else: print('NO MANUAL API KEY (Ignore if using Config file)') #Credentials path = diction['url'] if diction['url'] is None: path = args.gcis else: path = gcisdev user = diction['userinfo'] if diction['userinfo'] is None: user = args.username key = diction['key'] if diction['key'] is None: key = args.apikey #Parses url.json# def parse(url): import requests r = requests.get(url, verify = False) JSONdict = r.json() return JSONdict GCIS = 'https://gcis-search-stage.jpl.net:3000/book.json?all=1' GCISPAR = parse(GCIS) for x in range(len(GCISPAR)): #Extracts book identifier from GCIS# IDEN = GCISPAR[x]["identifier"] match = re.search(r'.*/(.*?)\..*?$', GCIS) if match: FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN) #HREF for either GCIS or GCIS-DEV #HREF = '{}//{}/{}.json' .format(path, FILETYPE, IDEN) #test #HREF = 'https://gcis-search-stage.jpl.net:3000/book/305e4144-39d2-4d84-8843-3f502ab890e0.json' HREFPAR = parse(HREF) print(HREFPAR) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #V13 = validated canonical ISBN-13 V13 = EAN13(CISBN) if V13 is None: V13 = canonical(CISBN) M = parse(HREF) #For possible future implementation of adding original isbn into the JSON dictionary. """M["isbn"] = V13 M["org_isbn"] = ISBNS""" print(M, '\n\t', "isbn_original:", ISBNS) #Posts updated JSON dictionary back into GCIS-DEV using credentials from command line arguments. s = requests.Session() s.auth = ( user , key ) s.headers.update({'Accept': 'application/json'}) r = s.post(HREF, data = M , verify = False) r.raise_for_status() sys.exit()
FILETYPE = match.groups()[0] #HREF = url that leads to book.json in GCIS-DEV try: HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN) #HREF = 'https://gcis-search-stage.jpl.net:3000/book/13b8b4fc-3de1-4bd8-82aa-7d3a6aa54ad5.json' HREFPAR = parse(HREF) #Extracts book title and isbn from GCIS-DEV d = dict(HREFPAR) TITLE = d['title'] ISBNS = d['isbn'] #Cleans ISBNS to only conatian valid characters CISBN = clean(ISBNS) #V13 = validated canonical ISBN-13 V13 = EAN13(CISBN) if V13 is None: V13 = canonical(CISBN) M = parse(HREF) v = meta(V13, service = 'wcat', cache ='default') GCISDATA = "GCIS-DEV\n\n\t{}\n\n\tisbn_original:{}\n\n\tisbn_mod:{}\n\n" .format(M, ISBNS, V13) APIDATA = "WorldCat\n\n\t{}\n\n------------\n\n" .format(v) print("GCIS-DEV\n\n\t", M, '\n\n\t', "isbn_original:", ISBNS, '\n\n\t', "isbn_mod:", V13, "\n\n") print ("WorldCat\n\n\t", v, '\n\n') file2.write(GCISDATA) file2.write(APIDATA) except: Error = '\n\t######## PROBLEM #######\n\tTitle:{}\n\tGCIS-ISBN:{}\n\tIdentifier:{}\n\n'.format(TITLE, ISBNS, IDEN) print(Error) file.write(Error) if __name__ =='__main__':