Beispiel #1
0
def validate_book_data(book_data):
    """Checks to see if the given book data is valid

    Args:
        book_data (dict): The book data to validate. Required keys are 'isbn', 'title', and 'authors'

    Raises:
        InvalidRequest: If 'isbn', 'title', and 'authors' do not appear as keys in book_data
        InvalidRequest: If the value for key 'authors' is not a list of strings
        InvalidRequest: If the isbn is not valid. See https://en.wikipedia.org/wiki/International_Standard_Book_Number#Check_digits
        ResourceExists: If a book with the provided isbn already exists

    Returns:
        dict: The validated book data
    """
    isbn = book_data.get("isbn")
    title = book_data.get("title")
    authors = book_data.get("authors")

    # Ensure request is valid format
    if not (title and isbn and authors):
        raise InvalidRequest(
            "Request should be of the form {{isbn: 'isbn', title: 'title', authors: [author1, author2,]}}"
        )

    # Check if isbn is valid
    if not (isbn := isbnlib.to_isbn13(isbnlib.clean(isbn))):
        raise InvalidRequest(
            "The isbn provided is not valid or could not be converted into isbn-13 format"
        )
Beispiel #2
0
def _isbn10toIsbn13(match):
    """Helper function to deal with a single ISBN."""
    isbn = match.group('code')
    isbn = isbn.upper()
    try:
        is_valid(isbn)
    except InvalidIsbnException:
        # don't change
        return isbn

    try:
        stdnum.isbn
    except NameError:
        pass
    else:
        return stdnum.isbn.to_isbn13(isbn)

    try:
        isbnlib
    except NameError:
        pass
    else:
        # remove hyphenation, otherwise isbnlib.to_isbn13() returns None
        i = isbnlib.canonical(isbn)
        if i == isbn:
            i13 = isbnlib.to_isbn13(i)
            return i13
        # add removed hyphenation
        i13 = isbnlib.to_isbn13(i)
        i13h = hyphenateIsbnNumbers('ISBN ' + i13)
        return i13h[5:]
Beispiel #3
0
def isbn_differ(entry_data, suggestion_data):
    entry_isbn = to_isbn13(canonical(entry_data))
    if not entry_isbn:
        return True

    suggestion_isbns = [to_isbn13(canonical(s)) for (s, _) in suggestion_data]

    return entry_isbn not in suggestion_isbns
 def type_and_identifier_for_urn(cls, identifier_string):
     if not identifier_string:
         return None, None
     m = cls.GUTENBERG_URN_SCHEME_RE.match(identifier_string)
     if m:
         type = Identifier.GUTENBERG_ID
         identifier_string = m.groups()[0]
     elif identifier_string.startswith("http:") or identifier_string.startswith("https:"):
         type = Identifier.URI
     elif identifier_string.startswith(Identifier.URN_SCHEME_PREFIX):
         identifier_string = identifier_string[len(Identifier.URN_SCHEME_PREFIX):]
         type, identifier_string = map(
             urllib.unquote, identifier_string.split("/", 1))
     elif identifier_string.startswith(Identifier.ISBN_URN_SCHEME_PREFIX):
         type = Identifier.ISBN
         identifier_string = identifier_string[len(Identifier.ISBN_URN_SCHEME_PREFIX):]
         identifier_string = urllib.unquote(identifier_string)
         # Make sure this is a valid ISBN, and convert it to an ISBN-13.
         if not (isbnlib.is_isbn10(identifier_string) or
                 isbnlib.is_isbn13(identifier_string)):
             raise ValueError("%s is not a valid ISBN." % identifier_string)
         if isbnlib.is_isbn10(identifier_string):
             identifier_string = isbnlib.to_isbn13(identifier_string)
     elif identifier_string.startswith(Identifier.OTHER_URN_SCHEME_PREFIX):
         type = Identifier.URI
     else:
         raise ValueError(
             "Could not turn %s into a recognized identifier." %
             identifier_string)
     return (type, identifier_string)
Beispiel #5
0
def set_isbn13():
    """If isbn13 is empty, calculate it from isbn10

    Remember there can be multiple isbns in the isbn_10 field, due to errors in
    the openlibrary.org db. Thus we must split on ';' and calculate isbn13 for
    all of the them"""

    sql = "select id, isbn_10 from book where isbn_13 = ? and isbn_10 != ?;"
    key = ("", "")
    cur = conn.cursor()
    cur.execute(sql, key)
    res = cur.fetchall()

    data = []
    for book in res:
        id, isbns = book
        isbns = isbns.split("; ")
        isbn_13 = []
        for isbn_10 in isbns:
            if notisbn(isbn_10):
                print(f"error, id {id}, isbn_10 {isbn_10} is not a valid isbn")
                continue
            isbn_13.append(to_isbn13(isbn_10))
        data.append(("; ".join(isbn_13), id))

    sql = "UPDATE book SET isbn_13 = ? WHERE id = ?;"
    cur.executemany(sql, data)
    conn.commit()
Beispiel #6
0
 def type_and_identifier_for_urn(cls, identifier_string):
     if not identifier_string:
         return None, None
     m = cls.GUTENBERG_URN_SCHEME_RE.match(identifier_string)
     if m:
         type = Identifier.GUTENBERG_ID
         identifier_string = m.groups()[0]
     elif identifier_string.startswith(
             "http:") or identifier_string.startswith("https:"):
         type = Identifier.URI
     elif identifier_string.startswith(Identifier.URN_SCHEME_PREFIX):
         identifier_string = identifier_string[len(Identifier.
                                                   URN_SCHEME_PREFIX):]
         type, identifier_string = map(urllib.unquote,
                                       identifier_string.split("/", 1))
     elif identifier_string.startswith(Identifier.ISBN_URN_SCHEME_PREFIX):
         type = Identifier.ISBN
         identifier_string = identifier_string[len(Identifier.
                                                   ISBN_URN_SCHEME_PREFIX):]
         identifier_string = urllib.unquote(identifier_string)
         # Make sure this is a valid ISBN, and convert it to an ISBN-13.
         if not (isbnlib.is_isbn10(identifier_string)
                 or isbnlib.is_isbn13(identifier_string)):
             raise ValueError("%s is not a valid ISBN." % identifier_string)
         if isbnlib.is_isbn10(identifier_string):
             identifier_string = isbnlib.to_isbn13(identifier_string)
     elif identifier_string.startswith(Identifier.OTHER_URN_SCHEME_PREFIX):
         type = Identifier.URI
     else:
         raise ValueError(
             "Could not turn %s into a recognized identifier." %
             identifier_string)
     return (type, identifier_string)
    def post(self, request):
        env = json.loads(request.body)
        isbnlike = env['ISBN']

        # Make sure that only isbn13 data gets in the database
        if is_isbn10(isbnlike):
            isbnlike = to_isbn13(isbnlike)
        # Search for a existing entry of this ISBN

        print(isbnlike)

        for b in Book.objects.all():
            print("\t" + b.isbnlike)

        try:
            b = request.user.desiredBooks.get(isbnlike=isbnlike)
            request.user.desiredBooks.remove(b)
            return JsonResponse({
                "message": "Book successfully deleted",
                "messageCode": CREATE_BOOK_SUCESS
            })
        except ObjectDoesNotExist:
            return JsonResponse({
                "message": "Book not found",
                "messageCode": CREATE_BOOK_FAILED
            })
Beispiel #8
0
	def extractISBN(self):

		isbn = None;

		rsrcmgr = PDFResourceManager()
		retstr = StringIO()
		device = TextConverter(rsrcmgr, retstr, codec='utf-8', laparams=LAParams())
		interpreter = PDFPageInterpreter(rsrcmgr, device)

		for page in PDFPage.get_pages(self.pdf, set(), maxpages=0, password="",caching=True, check_extractable=True):

			# Get the text from the page
			interpreter.process_page(page)
			text = retstr.getvalue()
			retstr.truncate(0)

			# Extract ISBN
			isbn = self.searchCodeInPage(text)

			if isbn:
				break

		device.close()
		retstr.close()

		# Convert to ISBN 10 and 13
		if isbnlib.is_isbn10(isbn):
			self.isbn10 = isbn
			self.isbn13 = isbnlib.to_isbn13(self.isbn10)
		elif isbnlib.is_isbn13(isbn):
			self.isbn13 = isbn
			self.isbn10 = isbnlib.to_isbn10(self.isbn13)
def _process_isbn(isbn):
    # only strip quotes if wsr, reg, or consignment number, or none
    if re.match("^wsr|^reg|^\d{2,4}-\d{1,4}$|n/a|none", isbn, re.I):
        isbn = re.sub("['\"]", "", isbn)
        price = 0.00
    # strip quotes, dashes and whitespace. convert isbn10 to isbn13.
    # split isbn and price if it's an extended isbn
    else:
        isbn = re.sub("[\s'\"\-]", "", isbn)
        price = 0.00
        # note the checking for the first character of ean5 extension
        # if it's 5, it means price is in us dollars 0-99.99
        # otherwise, we need to do price ourself.
        if len(isbn) == 18:
            if isbn[-5] == "5":
                price = float(isbn[-4:]) / 100
            isbn = isbn[:-5]
        if len(isbn) == 10:
            if isbnlib.is_isbn10(isbn):
                isbn = isbnlib.to_isbn13(isbn)
            else:
                raise isbnlib.NotValidISBNError(isbn)
        print("isbn13_match", re.match(isbn13_regex, isbn))
        if re.match(isbn13_regex, isbn):
            #can't use isbnlib.is_isnb13 because of internal isbns
            if isbnlib.check_digit13(isbn[0:12]) != isbn[12]:
                raise isbnlib.NotValidISBNError(isbn)
        else:
            raise isbnlib.NotValidISBNError
    return isbn, price
    def post(self, request):
        env = json.loads(request.body)
        isbnlike = env['ISBN']

        # Make sure that only isbn13 data gets in the database
        if is_isbn10(isbnlike):
            isbnlike = to_isbn13(isbnlike)
        # Search for a existing entry of this ISBN
        try:
            b = Book.objects.get(isbnlike=isbnlike)  #isDesired=True
        except ObjectDoesNotExist:
            # if it does not exits, tries to create a new one
            try:
                b = Book(isbnlike=isbnlike)  #isDesired=True
                b.full_clean()  # Validates
                b.save()
            except ValidationError as verr:
                # if validation goes wrong, convert the exception message in dictionary suitable for json conversion and return this json
                # as a response
                d = dict(verr)
                for k in d.keys():
                    d[k] = d[k][0]
                d["messageCode"] = CREATE_BOOK_FAILED
                resp = JsonResponse(d)
                resp.status_code = 400
                return resp

        # Add the book to the list of the current logged user.
        request.user.desiredBooks.add(b)
        return JsonResponse({
            "message": "Book successfully added",
            "messageCode": CREATE_BOOK_SUCESS
        })
Beispiel #11
0
def isbn_lookup(isbnlike, good_reads):
    """
    Fetch in Good Reads for a given ISBN code
    """
    book_info = {}

    val = [c for c in isbnlike if c.isdigit()]
    isbn = ''.join(val)

    if isbnlib.is_isbn10(val):
        isbn = isbnlib.to_isbn13(val)

    if isbnlib.is_isbn13(isbn):
        try:
            book = good_reads.book(isbn=isbn)

            publisher = book.publisher if book.publisher is not None else '-'
            pages_qty = book.num_pages if book.num_pages is not None else int(
                0)

            book_info.update({
                'Título': book.title,
                'Autor': str(book.authors[0]),
                'Editora': publisher,
                'ISBN-13': isbn,
                'Qtd. de Páginas': pages_qty,
                'Link': book.link
            })
        except Exception as e:
            logger.exception('{}'.format(e), exc_info=False)
        finally:
            return book_info
    else:
        return book_info
Beispiel #12
0
def standardize_citekey(citekey, warn_if_changed=False):
    """
    Standardize citation keys based on their source
    """
    source, identifier = citekey.split(':', 1)

    if source == 'doi':
        if identifier.startswith('10/'):
            from manubot.cite.doi import expand_short_doi
            try:
                identifier = expand_short_doi(identifier)
            except Exception as error:
                # If DOI shortening fails, return the unshortened DOI.
                # DOI metadata lookup will eventually fail somewhere with
                # appropriate error handling, as opposed to here.
                logging.error(f'Error in expand_short_doi for {identifier} '
                              f'due to a {error.__class__.__name__}:\n{error}')
                logging.info(error, exc_info=True)
        identifier = identifier.lower()

    if source == 'isbn':
        from isbnlib import to_isbn13
        identifier = to_isbn13(identifier)

    standard_citekey = f'{source}:{identifier}'
    if warn_if_changed and citekey != standard_citekey:
        logging.warning(
            f'standardize_citekey expected citekey to already be standardized.\n'
            f'Instead citekey was changed from {citekey!r} to {standard_citekey!r}'
        )
    return standard_citekey
Beispiel #13
0
def search_add(isbn):
    db = sqlite3.connect('test.db')
    db.row_factory = sqlite3.Row
    cursor = db.cursor()

    if isbnlib.is_isbn13(isbn):
        info = isbnlib.meta(isbn)
        au = ", ".join(info['Authors'])
        addBook(cursor, info['ISBN-13'], info['Title'], au, info['Year'],
                info['Publisher'], getCoverSmall(isbn), getCover(isbn),
                getDesc(isbn))

        db.commit()
        return search_equals(cursor, 'ISBN', isbn)
    elif isbnlib.is_isbn10(isbn):
        isbn = isbnlib.to_isbn13(isbn)
        info = isbnlib.meta(isbn)
        au = ", ".join(info['Authors'])
        addBook(cursor, info['ISBN-13'], info['Title'], au, info['Year'],
                info['Publisher'], getCoverSmall(isbn), getCover(isbn),
                getDesc(isbn))

        db.commit()
        return search_equals(cursor, 'ISBN', isbn)
    else:
        print("Not a valid ISBN")
        return None

    db.commit()
    db.close()
Beispiel #14
0
	def isbn(self,isbn):
		#adds isbn to google spread sheet
		
		#check if valid
		clean_isbn = isbnlib.clean(isbn)
		if isbnlib.notisbn(clean_isbn):
			return "not valid isbn"
		
		#should check if has been collected before

		canonical = None;
		#first check trove
		canonical = self.trove.extract(clean_isbn);
		if not canonical :
			# try alternative isbn form
			print "trying alternative form "
			alt_isbn = clean_isbn;
			if isbnlib.is_isbn13(clean_isbn):
				alt_isbn = isbnlib.to_isbn10(clean_isbn)
			else :
				alt_isbn = isbnlib.to_isbn13(clean_isbn)
			canonical = self.trove.extract(alt_isbn);
			if canonical :
				clean_isbn = alt_isbn
		if not canonical :
			canonical = self.__reduce_metadata(clean_isbn,['merge','isbndb','openl'])
			if not canonical:
				return "no metadata found for isbn: " + clean_isbn
			canonical['source']='isbnlib'
			canonical["Authors"] = u', '.join(canonical["Authors"])
			canonical['link']=None

		row_data = ['isbn:'+clean_isbn, canonical["Title"], canonical["Authors"], canonical["Year"], canonical["Publisher"],canonical['link']]
		return self.__add_and_render(row_data)
Beispiel #15
0
 def raw_mapping(self, results):
     mapping = {}
     for item in self.get_items(results):
         attrs = item['ItemAttributes']
         isbn = isbnlib.to_isbn13(attrs.get('ISBN', attrs.get('EISBN', '')))
         mapping[isbn] = item
     return mapping
Beispiel #16
0
    async def check(self, entry):
        length = self._cfg.get('isbn_length', entry, 13)
        if not length:
            return []

        isbn = entry.data.get('isbn')
        if not isbn:
            return []

        clean_isbn = clean(isbn)
        if not clean_isbn or notisbn(clean_isbn):
            return []

        if length not in (10, 13):
            raise ConfigurationError(
                "The option 'isbn_length' must be either of 10 or 13.")

        if length == 10:
            if not is_isbn10(clean_isbn):
                return [(type(self).NAME,
                         "ISBN '{}' is not of length 10.".format(isbn),
                         "ISBN-10 would be '{}'".format(to_isbn10(clean_isbn)))
                        ]
        elif length == 13:
            if not is_isbn13(clean_isbn):
                return [(type(self).NAME,
                         "ISBN '{}' is not of length 13.".format(isbn),
                         "ISBN-13 would be '{}'".format(to_isbn13(clean_isbn)))
                        ]

        return []
Beispiel #17
0
def set_user_recommendation():
    """
    params: book_isbn
    """
    user_id = current_user.id
    book_isbn = request.form.get('book_isbn')
    book = Book.get(isbn=book_isbn)

    if not book:
        book = Book()
        # First try amazon scraper
        amazon_books = AmazonScraper().get_amazon_books_for_keyword(
            book_isbn,
        )
        if not amazon_books:
            # use google books if not amazon
            google_books = get_books_for_book_title_using_google_books(
                book_isbn,
            )
            if not google_books:
                return 'Book could not be found', 500
            google_book = google_books[0]
            book.title = google_book.title
            book.author = google_book.author
            book.isbn = to_isbn13(google_book.isbn)
            book.thumbnail_link = google_book.thumbnail_link
        else:
            amazon_book = amazon_books[0]
            book.isbn = to_isbn13(book_isbn)
            if 'title' in amazon_book:
                book.title = amazon_book['title']
            if 'author' in amazon_book:
                book.author = amazon_book['author']
            if not 'thumbnail_link':
                return 'No thumbnail', 500
            book.thumbnail_link = amazon_book['thumbnail_link']
        book.save()

    existing_bv = BooksViewed.get(user_id=user_id, book_id=book.id)
    if not existing_bv:
        bv = BooksViewed()
        bv.user_id = user_id
        bv.book_id = book.id
        bv.save()
        return 'Book View added successfully', 201

    return 'Book View already exists', 200
Beispiel #18
0
def _isbn10toIsbn13(match):
    """Helper function to deal with a single ISBN."""
    isbn = match.group('code')
    isbn = isbn.upper()
    try:
        stdnum.isbn
    except NameError:
        pass
    else:
        try:
            is_valid(isbn)
        except InvalidIsbnException:
            return isbn
        i = stdnum.isbn.to_isbn13(isbn)
        return i

    try:
        isbnlib
    except NameError:
        pass
    else:
        try:
            is_valid(isbn)
        except InvalidIsbnException:
            return isbn
        # remove hyphenation, otherwise isbnlib.to_isbn13() returns None
        i = isbnlib.canonical(isbn)
        if i == isbn:
            i13 = isbnlib.to_isbn13(i)
            return i13
        # add removed hyphenation
        i13 = isbnlib.to_isbn13(i)
        i13h = hyphenateIsbnNumbers('ISBN ' + i13)
        return i13h[5:]

    try:
        is_valid(isbn)
    except InvalidIsbnException:
        # don't change
        return isbn
    i1x = getIsbn(isbn)
    if not isinstance(i1x, ISBN13):
        i13 = i1x.toISBN13()
    else:
        i13 = i1x
    return i13.code
    def isbn_normalizer(self, isbn):
        isbn = isbn.replace("-","")
        isbn = isbn.upper()
        if isbnlib.is_isbn10(isbn):
            isbn = isbnlib.to_isbn13(isbn)
            isbn = isbn.replace("-","") #TODO: ハイフンは消したい

        return isbn
Beispiel #20
0
def fix_isbn(entry):
    if 'isbn' in entry:
        value = entry['isbn']
        if isbnlib.is_isbn10(value):
            value = isbnlib.to_isbn13(value)
        if not isbnlib.is_isbn13(value):
            raise Exception(f'invalid isbn in {entry["ID"]}: {entry["isbn"]}')
        entry['isbn'] = isbnlib.mask(value, separator='-')
    return entry
Beispiel #21
0
def clean_isbn(isbn):
    """Convert an ISBN to the ISBN-13 format, remove extra characters"""

    isbn = re.sub("[^\d]*", "", isbn)

    if isbnlib.is_isbn10(isbn):
        isbn = isbnlib.to_isbn13(isbn)

    return isbn
Beispiel #22
0
def clean_isbn(isbn):
    """Convert an ISBN to the ISBN-13 format, remove extra characters"""

    isbn = re.sub("[^\d]*", "", isbn)

    if isbnlib.is_isbn10(isbn):
        isbn = isbnlib.to_isbn13(isbn)

    return isbn
def clean_isbn(isbn: str) -> str:
    """Cleans ISBN and formats it as ISBN 13

    :param isbn: ISBN 10 or ISBN 13
    :return:     Cleaned ISBN, formatted to ISBN 13 and with hyphens stripped out
    """
    if isbnlib.notisbn(isbn):
        raise ValueError(f"{isbn} is not a valid ISBN")

    return isbnlib.to_isbn13(isbn)
Beispiel #24
0
def standardize_citation(citation):
    """
    Standardize citation idenfiers based on their source
    """
    source, identifier = citation.split(':', 1)
    if source == 'doi':
        identifier = identifier.lower()
    if source == 'isbn':
        from isbnlib import to_isbn13
        identifier = to_isbn13(identifier)
    return f'{source}:{identifier}'
 def isbn(self, value):
     if not value:
         self._isbn = ''
     else:
         try:
             if isbnlib.is_isbn13(value):
                 self._isbn = isbnlib.canonical(value)
             elif isbnlib.is_isbn10(value):
                 self._isbn = isbnlib.to_isbn13(value)
         except:
             raise ValueError('Invalid ISBN {}'.format(value))
Beispiel #26
0
def book_for_isbn():
    isbn = request.args.get('isbn')
    isbn = to_isbn13(isbn)
    amazon_books = AmazonScraper().get_amazon_books_for_keyword(isbn)
    # Convert all values to strings
    book_list = [
        {key: value for (key, value) in d.iteritems()}
        for d in amazon_books
    ]
    json_output = json.dumps(book_list[0], sort_keys=True, indent=4)
    return json_output
Beispiel #27
0
    def clean_ISBN(self):
        isbn = self.cleaned_data.get("ISBN")
        if isbn:
            if is_isbn10(isbn):
                isbn = to_isbn13(isbn)
            if "-" in isbn:
                isbn = isbn.replace('-', '')
            if "." in isbn:
                isbn = isbn.replace('.', '')

        return isbn
Beispiel #28
0
def converter():
    if request.method == 'POST':
        ISBN_13 = isbnlib.canonical(request.form['ISBN-13'])
        ISBN_10 = isbnlib.canonical(request.form['ISBN-10'])

        #converts ISBNS
        flash(isbnlib.to_isbn13(ISBN_10))
        flash(isbnlib.to_isbn10(ISBN_13))

        return redirect(url_for('book.converter'))

    return render_template('book/converter.html')
Beispiel #29
0
def clean_isbn(isbn):
    """
    Checks if ISBN is valid and converts it to ISBN13 format without dashes
    """
    if isbnlib.is_isbn10(isbn):
        return isbnlib.to_isbn13(isbn)
    elif isbnlib.is_isbn13(isbn):
        return isbnlib.canonical(isbn)
    # test if wrongly prefixed isbn10
    if isbn.startswith("978"):
        return clean_isbn(isbn[3:])
    return ""
Beispiel #30
0
def extract_identifiers_from_row(row, isbn_columns):
    cols = [int(x) for x in isbn_columns.split(',')]
    isbns = set()
    for isbn_column in cols:
        raw = row[isbn_column].strip('"=')
        isbns.add(raw)
        # Transform to ISBN 10 or 13.
        if isbnlib.is_isbn13(raw):
            isbns.add(isbnlib.to_isbn10(raw))
        elif isbnlib.is_isbn10(raw):
            isbns.add(isbnlib.to_isbn13(raw))
    return isbns
Beispiel #31
0
def normalize(identifier):
    """Remove a possible prefix from the identifier."""

    identifier = identifier.lower()
    identifier = identifier.replace('-', '')
    prefix = 'isbn:'
    if identifier.startswith(prefix):
        identifier = identifier[len(prefix):]

    if isbnlib.is_isbn10(identifier):
        identifier = isbnlib.to_isbn13(identifier)

    return identifier
Beispiel #32
0
 def get_isbn(self):
     _isbn = self.book.get_metadata('DC', 'identifier')
     # [('Ratio', {})]
     if _isbn:
         isbn = _isbn[0][0]
         if not isbn:
             return None
         if isbnlib.notisbn(isbn):
             return None
         if isbnlib.is_isbn10(isbn):
             return isbnlib.to_isbn13(isbn)
         return isbn
     return None
 def create_book(self, isbn):
     if is_isbn10(isbn):
         isbn = to_isbn13(isbn)
     try:
         b = Book.objects.get(isbnlike=isbn)
     except:
         try:
             b = Book(isbnlike=isbn)
             b.full_clean()
             b.save()
         except Exception as ex:
             print("Problem with isbn = ", isbn)
             raise (ex)
     return b
Beispiel #34
0
 def extract_isbn(value):
     try:
         isbns = isbnlib.get_isbnlike(value)
         isbn = isbns[0]
     except:
         raise ValidationError(f"Bad format {value}")
     if len(isbns) > 1:
         raise ValidationError("Too much ISBN numbers")
     elif (len(isbns) == 0) or (not isbnlib.is_isbn10(isbn) and not isbnlib.to_isbn13(isbn)):
         raise ValidationError("It is not ISBN number")
     elif len(isbns) == 1:
         return isbnlib.mask(isbn)
     else:
         raise ValidationError("Unexpected option")
Beispiel #35
0
    def format_input_data(self, isbn, price):
        errors = []

        isbn = isbn.encode('utf8')
        # Strip '-'
        isbn = isbn.replace('-', '')
        isbn = to_isbn13(isbn)

        try:
            price = "{:.2f}".format(float(price))
        except ValueError:
            errors.append('Please enter a valid price (e.g. "$40.00")')

        errors.extend(self.form_error_checking(isbn=isbn, price=price))
        return isbn, price, errors
Beispiel #36
0
def preprocess_isbns(isbns):
    """

    :param isbns: isbns in different formats
    :return: canonical isbn13s
    """
    canonical_isbns = []
    for isbn in isbns:
        if not isbnlib.notisbn(isbn, level='strict'):
            if isbnlib.is_isbn10(isbn):
                isbn = isbnlib.to_isbn13(isbn)
            isbn = isbnlib.get_canonical_isbn(isbn)
            canonical_isbns.append(isbn)
    canonical_isbns = set(canonical_isbns)
    return list(canonical_isbns)
Beispiel #37
0
 def from_asin(cls, _db, asin, autocreate=True):
     """Turn an ASIN-like string into an Identifier.
     If the string is an ISBN10 or ISBN13, the Identifier will be
     of type ISBN and the value will be the equivalent ISBN13.
     Otherwise the Identifier will be of type ASIN and the value will
     be the value of `asin`.
     """
     asin = asin.strip().replace("-", "")
     if isbnlib.is_isbn10(asin):
         asin = isbnlib.to_isbn13(asin)
     if isbnlib.is_isbn13(asin):
         type = cls.ISBN
     else:
         type = cls.ASIN
     return cls.for_foreign_id(_db, type, asin, autocreate)
Beispiel #38
0
def main():
    isbn13 = " "
    bprint = False
    if isbnlib.is_isbn10(sys.argv[1]):
        isbn13 = isbnlib.to_isbn13(sys.argv[1])
        bprint = True
    elif isbnlib.is_isbn13(sys.argv[1]):
        isbn13 = sys.argv[1]
        bprint = True

    if bprint:
        print(isbn13)
        print(isbnlib.info(isbn13))
    else:
        print("invalid ISBN")
 def from_asin(cls, _db, asin, autocreate=True):
     """Turn an ASIN-like string into an Identifier.
     If the string is an ISBN10 or ISBN13, the Identifier will be
     of type ISBN and the value will be the equivalent ISBN13.
     Otherwise the Identifier will be of type ASIN and the value will
     be the value of `asin`.
     """
     asin = asin.strip().replace("-", "")
     if isbnlib.is_isbn10(asin):
         asin = isbnlib.to_isbn13(asin)
     if isbnlib.is_isbn13(asin):
         type = cls.ISBN
     else:
         type = cls.ASIN
     return cls.for_foreign_id(_db, type, asin, autocreate)
Beispiel #40
0
def query_isbn_data(isbn_str):
    # type: (str) -> Book

    if isbnlib.is_isbn10(isbn_str):
        isbn_str = isbnlib.to_isbn13(isbn_str)

    logger.info('query openlibrary for %s' % isbn_str)
    meta = _get_metadata_for_isbn(isbn_str, 'openl')

    if not meta:
        logger.info('query google books for %s' % isbn_str)
        meta = _get_metadata_for_isbn(isbn_str, 'goob')

    if meta:
        return meta
    return None
Beispiel #41
0
    def _parse_book_list_item_into_books(self, item):
        def get_book_type(item):
            text = item.get_text()
            if "$" in text:
                return ""
            else:
                return text

        # Title and link contained in link element
        thumbnail_link = item.find("img").attrs["src"].encode("utf8")
        isbn = item.attrs["data-asin"].encode("utf8")
        # If B in isbn, some kind of amazon special book
        if "B" in isbn:
            return []
        isbn = to_isbn13(isbn)
        link_item = item.find("a", class_="a-link-normal s-access-detail-page a-text-normal")
        if not link_item:
            return []
        author_combined_item = item.find("div", class_="a-row a-spacing-none")
        authors = self._get_author_from_author_combined_item(author_combined_item)
        new_book_title = link_item.attrs["title"].encode("utf8")

        # Handle pricing for hardcover, Paperback, Kindle Edition, rent
        price_column_item = item.find("div", class_="a-column a-span7")
        price_bulk_items = price_column_item.find_all("a", class_="a-link-normal a-text-normal")

        new_books = []
        book_type = ""
        for item in price_bulk_items:
            # First check for book type (Hardcover, softcover, etc)
            new_book_type = get_book_type(item)
            if new_book_type:
                book_type = new_book_type
                continue
            book = self._parse_price_bulk_item_into_book(item, new_book_title)
            book["book_type"] = book_type
            if book:
                new_books.append(book)
        # Add isbn
        for book in new_books:
            book["isbn"] = isbn
            book["thumbnail_link"] = thumbnail_link
            book["author"] = authors
        return new_books
Beispiel #42
0
def get_isbn13(reference, verbose=False):
    isbn13_array = []
            
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["isbn13", "isbn-13", "isbn 13"]):
        if iden["identifier"] not in isbn13_array:
            isbn13_array.append(iden["identifier"])
            
    if isbn13_array:
        return isbn13_array
                
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["isbn10", "isbn-10", "isbn 10"]):
        try:
            isbn13_temp = isbnlib.to_isbn13(iden["identifier"])
            if isbn13_temp not in isbn13_array:
                isbn13_array.append(isbn13_temp)
                gnomics.objects.reference.Reference.add_identifier(reference, identifier=isbn13_temp, identifier_type="ISBN-13", source="ISBNlib", language=None)
        except:
            if verbose:
                print("No corresponding ISBN-13 found.")
            
        for obj in gnomics.reference.Reference.openlibrary(ref):
            if obj["isbn_13"] not in isbn13_array:
                isbn13_array.append(obj["isbn_13"])
                gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None)
                
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["openlibrary", "openlibrary id", "openlibrary identifier", "olid"]):
        for obj in gnomics.reference.Reference.openlibrary(ref):
            if obj["isbn_13"] not in isbn13_array:
                isbn13_array.append(obj["isbn_13"])
                gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None)
                
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["lccn", "library of congress control number"]):
        for obj in gnomics.reference.Reference.openlibrary(ref):
            if obj["isbn_13"] not in isbn13_array:
                isbn13_array.append(obj["isbn_13"])
                gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None)
    
    for iden in gnomics.objects.auxiliary_files.identifier.filter_identifiers(reference.identifiers, ["oclc", "oclc number", "oclc control number"]):
        for obj in gnomics.reference.Reference.openlibrary(ref):
            if obj["isbn_13"] not in isbn13_array:
                isbn13_array.append(obj["isbn_13"])
                gnomics.objects.reference.Reference.add_identifier(reference, identifier=obj["isbn_13"], identifier_type="ISBN-13", source="OpenLibrary", language=None)
                
    return isbn13_array
Beispiel #43
0
 def parse_results(self, results):
     results_out = []
     items = self.get_items(results)
     for result in items:
         attrs = result.get('ItemAttributes')
         book_type = attrs.get('Binding').lower()
         if book_type == 'paperback':
             book_type = 'trade paperback'
         isbn = attrs.get('ISBN', '')
         book = Book(
             title=attrs.get('Title'),
             isbn=isbnlib.to_isbn13(isbn),
             book_type=book_type,
             publication_date=parser.parse(attrs.get('PublicationDate')).date(),
             amazon_link=result.get('DetailPageURL', ''),
         )
         setattr(book, 'large_cover', result.get('LargeImage', {}).get('URL', ''))
         results_out.append(book)
     return results_out
def valid():
    # create ten variables to store each number in the ISBN and multiply it by its position
    # for number validation
    firstNum = int(isbnInput[0]) * 10
    secondNum = int(isbnInput[1]) * 9
    thirdNum = int(isbnInput[2]) * 8
    fourthNum = int(isbnInput[3]) * 7
    fithNum = int(isbnInput[4]) * 6
    sixthNum = int(isbnInput[5]) * 5
    seventhNum = int(isbnInput[6]) * 4
    eigthNum = int(isbnInput[7]) * 3
    ninethNum = int(isbnInput[8]) * 2
    # sometimes, the last number in the ISBN is a Ten, which is written as an 'X'
    if isbnInput[9] == 'X':
        tenthNum = 10
    # otherwise, do the math
    else:
        tenthNum = int(isbnInput[9]) * 1

    # calculate the sum of all of the expanded numbers above and store as isbnSum
    isbnSum = firstNum + secondNum + thirdNum + fourthNum + fithNum\
              + sixthNum + seventhNum + eigthNum + ninethNum + tenthNum
    print ('The sum of the ISBN is: ' + str(isbnSum))

    # this is where we call the isbnlib method to_isbn13 which converts ISBN10 numbers
    # to ISBN13
    isbnFinal = isbnlib.to_isbn13(isbnInput)

    # this is where we check to see if the ISBN that the use entered is valid at all
    # or tell them that it's not
    if isbnSum % 11 == 0:
        print ('You entered a valid ISBN number')
        # this is where we call isbnlib's other function called meta()
        # parameters are the ISBN you wish to look up, the service you want to use to
        # look it up, and how much memory you want to allocate this information
        print (isbnlib.meta(isbnInput, service = 'merge', cache = 'default'))
    elif isbnSum % 11 != 0:
        print str(isbnFinal) + ' is not a valid ISBN number'
Beispiel #45
0
def handle_isbn(val):
    if val:
        return mask(to_isbn13(str(val)))
def main():
#Commnd line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('-path', '--GCIS', help = "Insert url path to GCIS book in JSON format [ex.'https://gcis-search-stage.jpl.net:3000/book.json?all=1'] ")
    args = parser.parse_args()
    GCIS = args.GCIS

    if GCIS is None:
        GCIS = 'https://gcis-search-stage.jpl.net:3000/book.json?all=1'
        print('NO MANUAL GCIS PATH\n ALL GCIS BOOK JSON FORMATS WILL BE USED AS DEFAULT')

    GCISPAR = parse(GCIS)
    for x in range(len(GCISPAR)):
        try:
        #Extracts book identifier from GCIS#
            IDEN = GCISPAR[x]["identifier"]
            match =  re.search(r'.*/(.*?)\..*?$', GCIS)
            if match:
                FILETYPE = match.groups()[0]
        #HREF = url that leads to book.json in GCIS-DEV
            HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN)
            HREFPAR = parse(HREF)
        #Extracts book title and isbn from GCIS-DEV
            d = dict(HREFPAR)
            TITLE = d['title']
            ISBNS = d['isbn']
        #Cleans ISBNS to only conatian valid characters
            CISBN = clean(ISBNS)
        #V13 = validated canonical ISBN-13
            V13 = EAN13(CISBN)
            if V13 is None:
                V13 = canonical(CISBN)
            M = parse(HREF)

            print("GCIS-DEV\n\n\t", M, '\n\n\t', "isbn_original:", ISBNS, '\n\n\t', "isbn_mod:", V13, "\n\n")

        #DBpedia ISBN formats
            a = ISBNS
            b = canonical(CISBN)
            c = to_isbn10(CISBN)
            d = hyphenate(to_isbn10(CISBN))
            e = to_isbn13(CISBN)
            f = hyphenate(to_isbn13(CISBN))
            g = V13
            h = "ISBN {}" .format(CISBN)
            i = "ISBN {}" .format(canonical(CISBN))
            j = "ISBN {}" .format(hyphenate(to_isbn13(CISBN)))
            k = "ISBN {}" .format(V13)
            l = "ISBN {}" .format(to_isbn10(CISBN))
            m = "ISBN {}" .format(hyphenate(to_isbn10(CISBN)))

            tests = [a,b,c,d,e,f,g,h,i,j,k,l,m]

            for indie in tests:
                r = QUERY % indie
                RQUERY(r)
                if len(RQUERY(r)) != 0:
                    print(RQUERY(r))
                    break


        except:
            Error = '\n\t######## PROBLEM #######\n\tTitle:{}\n\tGCIS-ISBN:{}\n\tIdentifier:{}\n\n'.format(TITLE, ISBNS, IDEN)
            print(Error)
            file.write(Error)
Beispiel #47
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--batchsize", "-b", type=int, default=8)
    parser.add_argument("input")
    parser.add_argument("output")
    args = parser.parse_args()
    countdown = args.batchsize
    with io.open(args.input, 'r', encoding='utf-8') as input:
        books_reader = csv.DictReader(input)
        with io.open(args.output, 'w', encoding='utf-8') as output:
            books_writer = csv.DictWriter(output, fieldnames)
            books_writer.writeheader()
            for row in books_reader:
                if countdown > 0 and not row.get('webchecked', None):
                    isbn = str(row.get('ISBN', None))
                    if len(isbn) == 9:
                        isbn = "0" + isbn
                    if isbn:
                        countdown = countdown - 1
                        new_isbn = isbnlib.to_isbn13(isbnlib.canonical(isbn))
                        if new_isbn is None or new_isbn == "":
                            print "Could not canonicalize isbn", isbn
                        else:
                            row['ISBN'] = new_isbn
                        details = None
                        try:
                            details = isbnlib.meta(isbn)
                        except isbnlib.dev._exceptions.NoDataForSelectorError:
                            print "No data for ISBN", isbn, "title", row.get('Title', "Unknown")
                            row['webchecked'] = "No data for ISBN"
                        except isbnlib._exceptions.NotValidISBNError:
                            print "Invalid ISBN", isbn, "for", row['Title']
                            row['webchecked'] = "Invalid ISBN"
                        except isbnlib.dev._exceptions.ISBNNotConsistentError:
                            print "Inconsistent data for",  row['Title']
                            row['webchecked'] = "Inconsistent ISBN data"
                        if details:
                            if details.get('ISBN-13', "") != "" and row.get('ISBN', "") == "":
                                row['ISBN'] = details['ISBN-13']
                            if 'Authors' in row:
                                row['Authors'] = row['Authors'].split('/')
                            old_title = row['Title']
                            web_title = details['Title']
                            if old_title != web_title:
                                old_canon = canonicalize_title(old_title)
                                web_canon = canonicalize_title(web_title)
                                old_len = len(old_canon)
                                web_len = len(web_canon)
                                if ((web_len > old_len and old_canon in web_canon)
                                    or (web_len == old_len and old_canon == web_canon)):
                                    print "Title improvement from", old_title, "to", web_title
                                else:
                                    print "Title discrepancy:", old_title, "in file,", web_title, "found online"
                                    details['Title'] = old_title
                            # don't use 'update', because we don't want to drag in random other fields that dictwriter will then object to
                            for key in fieldnames:
                                if key in details:
                                    row[key] = details[key]
                            if 'Authors' in row:
                                row['Authors'] = '/'.join(row['Authors'])
                            row['webchecked'] = "OK"
                # from https://docs.python.org/2/library/csv.html
                encoded_row = {k: (v.encode("utf-8") if isinstance(v, basestring) else v)
                               for k,v in row.iteritems()}
                books_writer.writerow(row)
Beispiel #48
0
        if 'Project Gutenberg' in publisher_names and not metadata.links:
            # Project Gutenberg texts don't have ISBNs, so if there's an
            # ISBN on there, it's probably wrong. Unless someone stuck a
            # description on there, there's no point in discussing
            # OCLC+LD's view of a Project Gutenberg work.
            return None
        if publisher_names:
            metadata.publisher = publisher_names[0]

        # Grab all the ISBNs.
        example_graphs = self.internal_lookup(subgraph, example_uris)
        for example in example_graphs:
            for isbn_name in 'schema:isbn', 'isbn':
                for isbn in ldq.values(example.get(isbn_name, [])):
                    if len(isbn) == 10:
                        isbn = isbnlib.to_isbn13(isbn)
                    elif len(isbn) != 13:
                        continue
                    if isbn:
                        metadata.identifiers.append(IdentifierData(
                            type = Identifier.ISBN, identifier = isbn
                        ))

        for subject_type, subjects_details in subjects.items():
            for subject_detail in subjects_details:
                if isinstance(subject_detail, dict):
                    subject_name = subject_detail.get('name')
                    subject_identifier = subject_detail.get('id')
                    metadata.subjects.append(SubjectData(
                        type=subject_type, identifier=subject_identifier,
                        name=subject_name,
        IDEN = GCISPAR[x]["identifier"]
        match =  re.search(r'.*/(.*?)\..*?$', GCIS)
        if match:
            FILETYPE = match.groups()[0]
#HREF = url that leads to book.json in GCIS-DEV
        HREF = 'https://gcis-search-stage.jpl.net:3000/{}/{}.json' .format(FILETYPE,IDEN)
        #HREF = 'https://gcis-search-stage.jpl.net:3000/book/13b8b4fc-3de1-4bd8-82aa-7d3a6aa54ad5.json'
        HREFPAR = parse(HREF)
#Extracts book title and isbn from GCIS-DEV
        d = dict(HREFPAR)
        TITLE = d['title']
        ISBNS = d['isbn']
#Cleans ISBNS to only conatian valid characters
        CISBN = clean(ISBNS)
#Converts all listed ISBNS to a ISBN-13 format
        C13 = to_isbn13(CISBN)
#V13 = validated canonical ISBN-13
        V13 = EAN13(C13)
        M = parse(HREF)
        MV13 = M["isbn"] = V13
        ORGISBN = M["org_isbn"] = ISBNS
        print(M, '\n\t', "isbn_original:", ISBNS)
        s = requests.Session()
        s.auth = ('alex' , '8aed39fa67049cdfd42ef612a97e8535ecd46d8955afcc8b')
        s.headers.update({'Accept': 'application/json'})
        r = s.post(HREF, data = M , verify = False)
        r.raise_for_status()
        sys.exit()
        #print('Title:', TITLE, '\nIdentifier:', IDEN,'\n',HREF,'\n\tISBN:', V13, '\n')
    except(TypeError, ValueError):
            print('\n\t######## PROBLEM #######\n','\tTitle:', TITLE,'\n\tGCIS-ISBN:', ISBNS,'\n\tIdentifier:', IDEN, '\n\n')
def com_isbn_10_to_13(isbn_string):
    return isbnlib.to_isbn13(isbn_string)