def is_valid(isbn_id): """ Check that a given string is a valid ISBN. :param isbn_id: the isbn to be checked. :returns: boolean indicating whether the isbn is valid or not. >>> is_valid("978-3-16-148410-0") True >>> is_valid("9783161484100") True >>> is_valid("9783161484100aa") False >>> is_valid("abcd") False >>> is_valid("0136091814") True >>> is_valid("0136091812") False >>> is_valid("9780136091817") False >>> is_valid("123456789X") True """ return ((not isbnlib.notisbn(isbn_id)) and (isbnlib.get_canonical_isbn(isbn_id) == isbn_id or isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id))
def get_canonical_isbn(self, line): # logger.debug('[ ' + line + ' ]') isbns = [] for regex in self.ISBN_PATTERN: matches = regex.findall(line) if len(matches) > 0: logger.debug('Unchecked [' + ' '.join(matches) + ']') for match in matches: match = match.strip() match = match.replace('i', 'I') match = match.replace('s', 'S') match = match.replace('b', 'B') match = match.replace('n', 'N') match = re.sub(r'\x20', '', match) match = re.sub(r'ISBN', 'ISBN\x20', match) # logger.debug('match= ' + match) if match not in self.SPECIAL_ISBN: try: # logger.debug('isbn= ' + isbn) isbn = isbnlib.get_canonical_isbn(match) except: logger.error('Error in isbnlib while calling get_canonical_isbn') else: if isbn: isbns.append(isbn) return isbns
def run(self, dump_filepath: str) -> None: """ Performs ISBN normalization (removes hyphens and capitalizes letters) dump_filepath -- path to *.txt.gz dump containing editions that need to be operated on """ if self.dry_run: self.logger.info( 'dry_run set to TRUE. Script will run, but no data will be modified.' ) header = { 'type': 0, 'key': 1, 'revision': 2, 'last_modified': 3, 'JSON': 4 } comment = 'normalize ISBN' with gzip.open(dump_filepath, 'rb') as fin: for row_num, row in enumerate(fin): row = row.decode().split('\t') _json = json.loads(row[header['JSON']]) if _json['type']['key'] != '/type/edition': continue isbns_by_type = dict() if 'isbn_10' in _json: isbns_by_type['isbn_10'] = _json.get('isbn_10', None) if 'isbn_13' in _json: isbns_by_type['isbn_13'] = _json.get('isbn_13', None) if not isbns_by_type: continue needs_normalization = any([ self.isbn_needs_normalization(isbn) for isbns in isbns_by_type.values() for isbn in isbns ]) if not needs_normalization: continue olid = _json['key'].split('/')[-1] edition = self.ol.Edition.get(olid) if edition.type['key'] != '/type/edition': continue for isbn_type, isbns in isbns_by_type.items( ): # if an ISBN is in the wrong field this script will not move it to the appropriate one normalized_isbns = list() isbns = getattr(edition, isbn_type, []) for isbn in isbns: if self.isbn_needs_normalization(isbn): normalized_isbn = isbnlib.get_canonical_isbn(isbn) normalized_isbns.append(normalized_isbn) else: normalized_isbns.append(isbn) normalized_isbns = dedupe( normalized_isbns) # remove duplicates if normalized_isbns != isbns and normalized_isbns != []: setattr(edition, isbn_type, normalized_isbns) self.logger.info('\t'.join( [olid, str(isbns), str(normalized_isbns)])) self.save(lambda: edition.save(comment=comment))
def get_canonical_isbn(self, line): # logger.debug('[ ' + line + ' ]') isbns = [] for regex in self.ISBN_PATTERN: matches = regex.findall(line) if len(matches) > 0: logger.debug('Unchecked [' + ' '.join(matches) + ']') for match in matches: match = match.strip() match = match.replace('i', 'I') match = match.replace('s', 'S') match = match.replace('b', 'B') match = match.replace('n', 'N') match = re.sub(r'\x20', '', match) match = re.sub(r'ISBN', 'ISBN\x20', match) # logger.debug('match= ' + match) if match not in self.SPECIAL_ISBN: try: # logger.debug('isbn= ' + isbn) isbn = isbnlib.get_canonical_isbn(match) except: logger.error( 'Error in isbnlib while calling get_canonical_isbn' ) else: if isbn: isbns.append(isbn) return isbns
def get(self, isbn): self.isbn = isbnlib.get_canonical_isbn(isbn) self.isbns = [self.isbn] if self.cached is not None: return self.cached self.cached = self.search() return self.cached
def version_with_isbn(self, record, isbn) : ids = record.get('identifier',[]) gen = (x for x in ids if isinstance(x,dict)) for obj in gen: i_type = obj.get('type'); i_value = obj.get('value'); if i_type and i_type.startswith('isbn') and isbnlib.get_canonical_isbn(i_value) == isbn : return True return False
def isbn_needs_normalization(isbn: str) -> bool: """ Returns True if the given ISBN is valid and needs to be normalized (hyphens removed, letters capitalized, etc.) Returns False otherwise """ if not set(isbn.strip()).issubset(ALLOWED_ISBN_CHARS): return False elif isbnlib.notisbn(isbn): return False else: normalized_isbn = isbnlib.get_canonical_isbn(isbn) # get_canonical_isbn returns None if ISBN is invalid return normalized_isbn and normalized_isbn != isbn
def extract_from_text(text): """ Extract ISBNs from a text. :param text: Some text. :returns: A list of canonical ISBNs found in the text. >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X") ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X'] """ isbns = [isbnlib.get_canonical_isbn(isbn) for isbn in isbnlib.get_isbnlike(text)] return [i for i in isbns if i is not None]
def extract_from_text(text): """ Extract ISBNs from a text. :param text: Some text. :returns: A list of canonical ISBNs found in the text. >>> extract_from_text("978-3-16-148410-0 9783161484100 9783161484100aa abcd 0136091814 0136091812 9780136091817 123456789X") ['9783161484100', '9783161484100', '9783161484100', '0136091814', '123456789X'] """ isbns = [ isbnlib.get_canonical_isbn(isbn) for isbn in isbnlib.get_isbnlike(text) ] return [i for i in isbns if i is not None]
def check_isbn_validity(self, isbn): """ Returns boolean. Called by views.alternates() and views.filtered_alternates() """ validity = False try: self.canonical_isbn = isbnlib.get_canonical_isbn( isbn, output='isbn13') # will return None on bad isbn validity = isbnlib.is_isbn13( self.canonical_isbn) # will raise exception on None except Exception as e: log.warning( 'exception assessing validity, ```%s```; looks like ```%s``` is not valid' % (e, isbn)) log.debug('validity, `%s`' % validity) return validity
def is_valid(isbn_id): """ Check that a given string is a valid ISBN. :param isbn_id: the isbn to be checked. :returns: boolean indicating whether the isbn is valid or not. >>> is_valid("978-3-16-148410-0") True >>> is_valid("9783161484100") True >>> is_valid("9783161484100aa") False >>> is_valid("abcd") False >>> is_valid("0136091814") True >>> is_valid("0136091812") False >>> is_valid("9780136091817") False >>> is_valid("123456789X") True """ return ( (not isbnlib.notisbn(isbn_id)) and ( isbnlib.get_canonical_isbn(isbn_id) == isbn_id or isbnlib.mask(isbnlib.get_canonical_isbn(isbn_id)) == isbn_id) )
def preprocess_isbns(isbns): """ :param isbns: isbns in different formats :return: canonical isbn13s """ canonical_isbns = [] for isbn in isbns: if not isbnlib.notisbn(isbn, level='strict'): if isbnlib.is_isbn10(isbn): isbn = isbnlib.to_isbn13(isbn) isbn = isbnlib.get_canonical_isbn(isbn) canonical_isbns.append(isbn) canonical_isbns = set(canonical_isbns) return list(canonical_isbns)
def get_canonical_isbn2(self, line): # logger.debug('[ ' + line + ' ]') isbns = [] matches = isbnlib.get_isbnlike(line) if len(matches) > 0: logger.debug('Unchecked [' + ' '.join(matches) + ']') for match in matches: if match not in self.SPECIAL_ISBN and not any(match in s for s in isbns): try: # logger.debug('isbn= ' + isbn) isbn = isbnlib.get_canonical_isbn(match) except: logger.error('Error in isbnlib while calling get_canonical_isbn') else: if isbn: isbns.append(isbn) return isbns
def build_keys(): """ Takes the hyphenated isbns and builds canonical isbns. """ new_dct = {} with open(f'{stuff_dir}/02_source_booklist_2019-04-26.json', 'r', encoding='utf-8') as f: lst = json.loads(f.read()) for dct in lst: if dct['ISBN']: # some records are empty canonical_isbn = isbnlib.get_canonical_isbn(dct['ISBN'], output='isbn13') new_dct[canonical_isbn] = { 'isbn_original': dct['ISBN'], 'title': dct['Title'], 'author': dct['Author'] } jsn = json.dumps(new_dct, sort_keys=True, indent=2) log.debug(f'jsn, ```{jsn}```') with open(f'{project_dir}/data/05_source_key_data.json', 'w', encoding='utf-8') as f: f.write(jsn)
async def convert(self, ctx: commands.Context, argument: str) -> int: # if argument.isnumeric() and (len(argument)==10 or len(argument)==13): # return int(argument) if isbnlib.notisbn(argument): raise commands.errors.BadArgument('Invalid ISBN: ' + argument) return isbnlib.get_canonical_isbn(argument)
itemsgen = zot.makeiter(zot.top(limit=10)) for batch in tqdm(itemsgen, total=int(nitems/10 + int(bool(nitems % 10)))): for item in batch: try: isbn = item['data']['ISBN'] except KeyError: # Not a book-ish item continue if not isbn: # No ISBN listed continue # Transform to canonical (bare) form, then return to standardised # form with hyphens. canisbn = get_canonical_isbn(isbn.replace(' ', '')) if not canisbn: # This most likely means that the ISBN given is bogus # (e.g. has a faulty checksum). Some books have bogus # ISBNs printed on them, so they are used for cataloguing # by some libraries despite being formally invalid. print("Error extracting ISBN from "+str(isbn)) continue newisbn = mask(canisbn) if newisbn != isbn: assert newisbn print("Updating "+str(isbn)+" to "+str(newisbn)) item['data']['ISBN'] = newisbn if not zot.update_item(item):
with open(infile) as f: for line in f: data = line.split("\t") book = json.loads(data[4]) olid = book.get('key').replace('/books/', '') wolid = book.get('works', 'NONE') if wolid != 'NONE': wolid = wolid[0]['key'].replace('/works/', '') # get isbn good_isbn = [] bad_isbn = [] isbn_13 = book.get('isbn_13', []) isbn_10 = book.get('isbn_10', []) for isbn in isbn_13 + isbn_10: canonical = isbnlib.get_canonical_isbn(isbn) if canonical: if len(canonical) == 10: canonical = isbnlib.to_isbn13(canonical) good_isbn.append(canonical) else: bad_isbn.append(isbn) isbns = set(good_isbn) for isbn in isbns: try: assert isbnlib.get_canonical_isbn(isbn) print("\t".join( [isbnlib.get_canonical_isbn(isbn), olid, wolid])) except Exception as e: bad_isbn.append(isbn)
def raw_mapping(self, results): return {isbnlib.get_canonical_isbn(x['isbn']): x for x in results['data']['results']}
def get_search_results(self, request, queryset, search_term): qs, use_distinct = super().get_search_results(request, queryset, search_term) isbn = isbnlib.get_canonical_isbn(search_term) if isbn is not None: qs |= self.model.objects.filter(isbn=isbn) return qs, use_distinct