def post(self): # Required arguments self.reqparse.add_argument("title", type=str, required=True, location='json') args = self.reqparse.parse_args() user = g.user if not user: # User not found return {'message': 'User not found'}, log__(404, g.user) try: isbn = isbn_from_words(args['title']) title = meta(isbn)['Title'] wish = Wishlist.query.filter_by(isbn=isbn, user_id=user.id).first() if wish: # Book already in the wishlist return {'data': wish.serialize}, log__(200, g.user) else: # If book not in the wishlist new_wish = Wishlist(isbn=isbn, title=title, user_id=user.id) db.session.add(new_wish) db.session.commit() return {'data': new_wish.serialize}, log__(201, g.user) except Exception as error: print(error) return {'message': 'Unexpected Error'}, log__(500, g.user)
async def search_book_2(self, isbn: str, keywords: str) -> dict: if isbn is None: if keywords is None: raise ValueError info = self.cache.get(keywords, None) if info is not None: return info isbn = isbnlib.isbn_from_words(keywords) info = dict() for key in ['wiki', 'default', 'openl', 'goob']: try: i = isbnlib.meta(isbn, service=key) except (isbnlib.dev._exceptions.DataNotFoundAtServiceError, isbnlib.dev._exceptions.ISBNLibHTTPError): continue if i is not None and len(i) > 0: info.update({'title': i['Title'], 'authors': i['Authors']}) if i['Year']: info['publication'] = i['Year'] if i['Publisher']: info['publisher'] = i['Publisher'] if 'language' not in info and len(i['Language']) > 0: info['language'] = i['Language'] if len(info) > 0: co = isbnlib.cover(isbn) if 'thumbnail' in co: info['cover'] = co['thumbnail'] info['isbn'] = isbn info = None if len(info) == 0 else info self.cache[keywords] = info return info
def get_unmodified_isbn_data(query): import isbnlib isbn = isbnlib.isbn_from_words(query) data = isbnlib.meta(isbn, service="openl") assert data is not None return data
def test_isbn_extraction(): """ For further usage, not used at the moment """ isbn = isbnlib.isbn_from_words("to kill a mockingbird") # isbn = '3493589182' print isbnlib.meta(isbn, service='default', cache='default')
def setMetadata(self): try: self.isbn = isbnlib.isbn_from_words(f"{self.title+self.author}") self.save() try: self.imageURL = isbnlib.cover(self.isbn)['thumbnail'] self.save() except KeyError as coverError: print(f"cover error -> {self.title}") except (IndexError, TypeError, UnboundLocalError) as isbnError: print(f"isbn error -> {self.title}")
def datahunt(isbnSearch): searchTerm = isbnSearch isbn = isbnlib.isbn_from_words(searchTerm) returnData = isbnlib.meta(isbn) cover = isbnlib.cover(isbn) #print("Cover: ", cover) try: mainTitle = returnData["Title"] except Exception as e: mainTitle = "" try: mainAuthors = returnData["Authors"] except Exception as e: mainAuthors = "" try: mainPublisher = returnData["Publisher"] except Exception as e: mainPublisher = "" try: mainYear = returnData["Year"] except Exception as e: mainYear = "" try: mainISBN = returnData["ISBN-13"] except Exception as e: mainISBN = "" try: mainCover = cover["smallThumbnail"] except Exception as e: mainCover = "" bookInfo = { "title" : mainTitle, "author" : mainAuthors, "publisher" : mainPublisher, "year" : mainYear, "isbn" : mainISBN, "coverURL" : mainCover } return bookInfo
def get_data(query: str = "", service: str = 'openl') -> List[Dict[str, Any]]: logger.debug("Trying to retrieve isbn from query: '%s'", query) import isbnlib results = [] # type: List[Dict[str, Any]] isbn = isbnlib.isbn_from_words(query) data = isbnlib.meta(isbn, service=service) if data is None: return results else: assert (isinstance(data, dict)) results.append(data_to_papis(data)) return results
def get_data(query="", service=None): global logger results = [] logger.debug('Trying to retrieve isbn') isbn = isbnlib.isbn_from_words(query) data = isbnlib.meta(isbn, service=service) if data is None: return results else: logger.debug('Trying to retrieve isbn') assert (isinstance(data, dict)) results.append(data_to_papis(data)) return results
def get_data(query: str = "", service: str = 'openl') -> List[Dict[str, Any]]: isbnlib_version = tuple(int(n) for n in isbnlib.__version__.split('.')) if service is None and isbnlib_version >= (3, 10, 0): service = "default" logger.debug('Trying to retrieve isbn') isbn = isbnlib.isbn_from_words(query) data = isbnlib.meta(isbn, service=service) results = [] # type: List[Dict[str, Any]] if data is not None: assert isinstance(data, dict) results.append(data_to_papis(data)) return results
def get_data( query: str = "", service: Optional[str] = None) -> List[Dict[str, Any]]: global logger results = [] # type: List[Dict[str, Any]] logger.debug('Trying to retrieve isbn') isbn = isbnlib.isbn_from_words(query) data = isbnlib.meta(isbn, service=service) if data is None: return results else: logger.debug('Trying to retrieve isbn') assert(isinstance(data, dict)) results.append(data_to_papis(data)) return results
def abgerufen(text, pfad: Path): if text: isbns = get_isbnlike(str(text), level='normal') if len(isbns) == 0: isbns = [isbn_from_words(str(pfad.stem))] if len(isbns) == 0: return None m = None for isbn in isbns: try: m = meta(isbn) if m: break except: continue return m
""" from tqdm import tqdm from isbnlib import isbn_from_words, meta import pandas as pd from pathlib import Path parent_dir = Path(__file__).parent.parent ref_df = pd.read_csv(f'{parent_dir}/data/reference.csv') isbns = [] for _, row in tqdm(ref_df.iterrows(), desc='Retrieving ISBNs'): title = row['title'] author = row['author'] try: isbn = isbn_from_words(title + author) if isbn is None: isbn = '0' except UnboundLocalError: isbn = '0' isbns.append(isbn) dates = [] for i in tqdm(isbns, desc='Retrieving dates'): if i == '0': date = 0 else: date = meta(str(i)) dates.append(date['Year'])
csv_file_path = input("Enter your csv file path here: ") #open csv file with open(csv_file_path, "r") as file: book_dicts = csv.DictReader(file) for row in book_dicts: book = (dict(row)) #write the other necessary fields for TW5 book['created'] = book['date_added'].replace("/", "") + ("000000000") book['modified'] = current_time.strftime(fmt) + "00001" #error handling on getting book description try: book['book_desc'] = isbn_from_words(book['isbn']) except: print("There appears to have been an error.") print(f"Attempting to get isbn from title: {book['title']}") try: possible_book_isbn = isbn_from_words(book['title']) book['book_desc'] = desc(possible_book_isbn) except: print("That didn't work either...") is_correct_book = "no" while is_correct_book != "Y": possible_book_isbn = isbn_from_words( input("Type book title: ")) isbnlib_book = meta(possible_book_isbn, SERVICE) print(isbnlib_book["Title"]) is_correct_book = input(
def get_isbn_from_words(query_list): """ This method will the necessary metadata to a json file ( ISBN, Author and description) This method will return a list of ISBNs for the book titles. :return : list of isbns :rtype : list """ try: count = 0 failed_books = [] successful_books_list = [] for query in query_list: # 10 digit or 13 digit isbn received isbn = isbnlib.isbn_from_words(query) if isbn == None: print "Failed to get isbn for ", query continue # The string returned from desc() functiuon is a unicode string.hence encoding it to ascii. description = isbnlib.desc(isbn) # It is observed that for some books isbn_desc() returns None. # Capturing such data and continue with the next book. if description != None: description = description.encode('ascii', 'ignore') else: print "Failed to get description for isbn ", query failed_books.append(query) continue # "\n" is continued as a string and not interpreted as a new line. # Hence the following line. final_description = description.replace("\n", " ") # Same as above. Encoding it to ascii meta_data_dict = isbnlib.meta(isbn) if meta_data_dict == None: print "No metadata present for the book with isbn ", query failed_books.append(query) continue else: author = meta_data_dict["Authors"][0].encode('ascii', 'ignore') if type(author) == None: print "Failed to get author for isbn ", query failed_books.append(query) continue books_isbn_dict[query]["isbn"] = isbn books_isbn_dict[query]["description"] = final_description books_isbn_dict[query]["author"] = author successful_books_list.append(query) # Writing data to json file try: with open('book_data_base.json', 'w') as json_file_handler: json.dump(dict(books_isbn_dict), json_file_handler, indent=4) except Exception as ex: print "This exception occured while printing to the json file" print ex except Exception as ex: print "Failed to open the file handler" print ex print len(successful_books_list)
max_row = sheet.max_row data = [] for i in range(2, max_row + 1): title = sheet.cell(row=i, column=1).value authors = sheet.cell(row=i, column=2).value data.append([title.replace(" ", "+"), authors.replace(" ", "+")]) isbn_list = [] a = len(data) errors = [41, 225, 226, 227, 228, 296, 353, 354] for i in range(a): query = data[i][0] + "+" + data[i][1] if i not in errors: try: isbn = isbn_from_words(query) res = [ i + 2, isbn, data[i][0].replace("+", " "), data[i][1].replace("+", " ") ] except UnboundLocalError: res = [ i + 2, 'ISBN NOT FOUND', data[i][0].replace("+", " "), data[i][1].replace("+", " ") ] isbn_list.append(res) pass isbn_list.append(res) print(str(i + 1) + '/' + str(a)) json_data = []
from isbnlib import isbn_from_words from isbnlib import meta from isbnlib import cover from isbnlib import desc from pprint import pprint from random import randint with open('booktitles.json') as f: books = json.load(f) books = books['books'] results = [] for book in books: isbn = isbn_from_words(book['author'] + " " + book['title']) print("for ", book['title'], " the isbn is ", isbn) try: metadata = meta(isbn, service='default', cache=None) except: metadata = {} images = cover(isbn) description = desc(isbn) book['isbn'] = isbn book['metadata'] = metadata book['desc'] = description book['cover'] = images results.append(book) with open('cleanbooks.json', 'w') as outfile: json.dump(results, outfile)
def com_isbn_lookup_title(title_words): """ Returns the most probable ISBN from a list of words (for your geographic area). """ return isbnlib.isbn_from_words(title_words)
def amazon(url): ua = UserAgent() headers = {'User-Agent': ua.random} page = requests.get(url, headers=headers) while True: sleep(3) try: parser_page = html.fromstring(page.content) raw_title = parser_page.xpath('//span[@id="productTitle"]//text()') raw_price = parser_page.xpath( '//span[@class="a-size-medium a-color-price offer-price a-text-normal"]' '//text()') raw_sale = parser_page.xpath( '//span[@class="a-size-base a-color-secondary"]//text()') raw_author = parser_page.xpath( '//a[@class="a-link-normal contributorNameID"]//text()') raw_category = parser_page.xpath( '//a[@class="a-link-normal a-color-tertiary"]//text()') raw_availability = parser_page.xpath( '//div[@id="availability"]//text()') ratings = parser_page.xpath('//table[@id="histogramTable"]//tr') reviews = parser_page.xpath( '//div[contains(@id,"reviews-summary")]') title = ''.join(''.join(raw_title).strip()) if raw_title else None sale = ''.join( ''.join(raw_sale).split()).strip() if raw_sale else None category = ' > '.join([i.strip() for i in raw_category ]) if raw_category else None price = ''.join(raw_price).strip() if raw_price else None availability = ''.join( raw_availability).strip() if raw_availability else None review_author = ''.join(raw_author).strip() if raw_author else None title_to_isbn = str(title) isbn = isbnlib.isbn_from_words(title_to_isbn) desc = str(isbnlib.desc(isbn)) description = ''.join(desc).strip() if desc else None isbn10 = isbnlib.to_isbn10(isbn) raw_isbn13 = isbn[:3] + '-' + isbn[3:] isbn_13 = ''.join(raw_isbn13).strip() if raw_isbn13 else None isbn_10 = ''.join(isbn10).strip() if isbn10 else None if not reviews: reviews = parser_page.xpath('//div[@data-hook="review"]') # Rating ratings_dict = {} for ratings in ratings: extracted_rating = ratings.xpath('./td//a//text()') if extracted_rating: rating_key = extracted_rating[0] raw_rating_value = extracted_rating[1] rating_value = raw_rating_value if rating_key: ratings_dict.update({rating_key: rating_value}) # Reviews reviews_list = [] for review in reviews: raw_review_header = review.xpath( './/a[@data-hook="review-title"]//text()') raw_review_author = review.xpath( './/a[contains(@href,"/profile/")]/parent::span//text()') raw_review_rating = review.xpath( './/i[@data-hook="review-star-rating"]//text()') raw_review_posted_date = review.xpath( './/a[contains(@href,"/profile/")]' '/parent::span/following-sibling::span/text()') raw_review_text1 = review.xpath( './/div[@data-hook="review-collapsed"]//text()') raw_review_text2 = review.xpath( './/div//span[@data-action="columnbalancing-showfullreview"]' '/@data-columnbalancing-showfullreview') raw_review_text3 = review.xpath( './/div[contains(@id,"dpReviews")]/div/text()') review_header = ' '.join(' '.join(raw_review_header).split()) review_author = ''.join( ''.join(raw_review_author).split()).strip('By') review_rating = ''.join(raw_review_rating).replace( 'out of 5 stars', '') review_posted_date = dateparser.parse( ''.join(raw_review_posted_date)).strftime('%d %b %Y') review_text = ' '.join(' '.join(raw_review_text1).split()) if raw_review_text2: json_loaded_review_data = json.loads(raw_review_text2[0]) json_loaded_review_data_text = json_loaded_review_data[ 'rest'] cleaned_json_loaded_review_data_text = re.sub( '<.*?>', '', json_loaded_review_data_text) full_review_text = review_text + cleaned_json_loaded_review_data_text else: full_review_text = review_text if not raw_review_text1: full_review_text = ' '.join( ' '.join(raw_review_text3).split()) review_dict = { 'review_header': review_header, 'review_author': review_author, 'review_rating': review_rating, 'review_posted_date': review_posted_date, 'review_text': full_review_text, } reviews_list.append(review_dict) if not price: price = sale if page.status_code != 200: raise ValueError('captha') data = { 'URL': url, 'TITLE': title, 'AUTHOR': review_author, 'PRICE': price, 'SALE': sale, 'CATEGORY': category, 'DESCRIPTION': description, 'ISBN-10': isbn_10, 'ISBN-13': isbn_13, 'AVAILABILITY': availability, 'RATING': ratings_dict, 'REVIEW': reviews_list, } return data except Exception as e: print(e) if e == 'NoneType': return None
def get_probable_isbn(words): return isbnlib.isbn_from_words(words)