def GET(self, isbn): isbn = normalize_isbn(isbn) isbn_type = 'isbn_' + ('13' if len(isbn) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(isbn) or {}, 'betterworldbooks': get_betterworldbooks_metadata(isbn) or {} } # if bwb fails and isbn10, try again with isbn13 if len(isbn) == 10 and \ metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(isbn) metadata['betterworldbooks'] = get_betterworldbooks_metadata( isbn_13) or {} # fetch book by isbn if it exists book = web.ctx.site.things({ 'type': '/type/edition', isbn_type: isbn, }) # if no OL edition for isbn, attempt to create if (not book) and metadata.get('amazon'): book = load(clean_amazon_metadata_for_load( metadata.get('amazon'))) # include ol edition metadata in response, if available if book: ed = web.ctx.site.get(book[0]) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return simplejson.dumps(metadata)
def get_isbn13(self): """Fetches either isbn_13 or isbn_10 from record and returns canonical isbn_13 """ isbn_13 = self.isbn_13 and canonical(self.isbn_13[0]) if not isbn_13: isbn_10 = self.isbn_10 and self.isbn_10[0] return isbn_10 and isbn_10_to_isbn_13(isbn_10) return isbn_13
def _get_amazon_metadata(isbn): # XXX some esbns may be < 10! isbn_10 = isbn if len(isbn) == 10 else isbn_13_to_isbn_10(isbn) isbn_13 = isbn if len(isbn) == 13 else isbn_10_to_isbn_13(isbn) try: if not lending.amazon_api: raise Exception product = lending.amazon_api.lookup(ItemId=isbn_10) except Exception as e: return None price_fmt, price, qlt = (None, None, None) used = product._safe_get_element_text( 'OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price) / 100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) return { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (isbn, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{ 'name': name } for name in product.authors], 'publish_date': product.publication_date.strftime('%b %d, %Y'), 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), # needs to be normalized 'publishers': [product.publisher], 'cover': product.large_image_url, 'isbn_10': [isbn_10], 'isbn_13': [isbn_13] }
def _get_amazon_metadata(isbn=None): # XXX @hornc, you should be extending this to work with # isbn=, asin=, title=, authors=, etc isbn = normalize_isbn(isbn) try: if not lending.amazon_api: raise Exception product = lending.amazon_api.lookup( ItemId=isbn, IdType="ISBN", SearchIndex="Books") except Exception as e: return None price_fmt, price, qlt = (None, None, None) used = product._safe_get_element_text('OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price)/100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) data = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % ( isbn, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{'name': name} for name in product.authors], 'publish_date': product.publication_date.strftime('%b %d, %Y'), 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), # needs to be normalized 'cover': product.large_image_url, } if product.publisher: data['publishers'] = [product.publisher] if len(isbn) == 10: data['isbn_10'] = [isbn] data['isbn_13'] = [isbn_10_to_isbn_13(isbn)] if len(isbn) == 13: data['isbn_13'] = [isbn] if isbn.startswith('978'): data['isbn_10'] = [isbn_13_to_isbn_10(isbn)] return data
def GET(self): # @hornc, add: title='', asin='', authors='' i = web.input(isbn='', asin='') if not (i.isbn or i.asin): return simplejson.dumps({ 'error': 'isbn or asin required' }) id_ = i.asin if i.asin else normalize_isbn(i.isbn) id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(id_) or {}, 'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {} } # if isbn_13 fails for amazon, we may want to check isbn_10 also # xxx # if bwb fails and isbn10, try again with isbn13 if id_type == 'isbn_10' and \ metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(id_) metadata['betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata( isbn_13) or {} # fetch book by isbn if it exists # if asin... for now, it will fail (which is fine) matches = web.ctx.site.things({ 'type': '/type/edition', id_type: id_, }) book_key = matches[0] if matches else None # if no OL edition for isbn, attempt to create if (not book_key) and metadata.get('amazon'): resp = load(clean_amazon_metadata_for_load( metadata.get('amazon'))) if resp and 'edition' in resp: book_key = resp.get('edition').get('key') # include ol edition metadata in response, if available if book_key: ed = web.ctx.site.get(book_key) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return simplejson.dumps(metadata)
def GET(self): # @hornc, add: title='', asin='', authors='' i = web.input(isbn='', asin='') if not (i.isbn or i.asin): return simplejson.dumps({'error': 'isbn or asin required'}) id_ = i.asin if i.asin else normalize_isbn(i.isbn) id_type = 'asin' if i.asin else 'isbn_' + ( '13' if len(id_) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(id_) or {}, 'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {} } # if isbn_13 fails for amazon, we may want to check isbn_10 also # xxx # if bwb fails and isbn10, try again with isbn13 if id_type == 'isbn_10' and \ metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(id_) metadata[ 'betterworldbooks'] = isbn_13 and get_betterworldbooks_metadata( isbn_13) or {} # fetch book by isbn if it exists # if asin... for now, it will fail (which is fine) matches = web.ctx.site.things({ 'type': '/type/edition', id_type: id_, }) book_key = matches[0] if matches else None # if no OL edition for isbn, attempt to create if (not book_key) and metadata.get('amazon'): resp = load(clean_amazon_metadata_for_load(metadata.get('amazon'))) if resp and 'edition' in resp: book_key = resp.get('edition').get('key') # include ol edition metadata in response, if available if book_key: ed = web.ctx.site.get(book_key) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return simplejson.dumps(metadata)
def GET(self): i = web.input(isbn='', asin='') if not (i.isbn or i.asin): return json.dumps({'error': 'isbn or asin required'}) id_ = i.asin if i.asin else normalize_isbn(i.isbn) id_type = 'asin' if i.asin else 'isbn_' + ('13' if len(id_) == 13 else '10') metadata = { 'amazon': get_amazon_metadata(id_, id_type=id_type[:4]) or {}, 'betterworldbooks': get_betterworldbooks_metadata(id_) if id_type.startswith('isbn_') else {}, } # if user supplied isbn_{n} fails for amazon, we may want to check the alternate isbn # if bwb fails and isbn10, try again with isbn13 if id_type == 'isbn_10' and metadata['betterworldbooks'].get('price') is None: isbn_13 = isbn_10_to_isbn_13(id_) metadata['betterworldbooks'] = ( isbn_13 and get_betterworldbooks_metadata(isbn_13) or {} ) # fetch book by isbn if it exists # TODO: perform existing OL lookup by ASIN if supplied, if possible matches = web.ctx.site.things( { 'type': '/type/edition', id_type: id_, } ) book_key = matches[0] if matches else None # if no OL edition for isbn, attempt to create if (not book_key) and metadata.get('amazon'): book_key = create_edition_from_amazon_metadata(id_, id_type[:4]) # include ol edition metadata in response, if available if book_key: ed = web.ctx.site.get(book_key) if ed: metadata['key'] = ed.key if getattr(ed, 'ocaid'): metadata['ocaid'] = ed.ocaid return json.dumps(metadata)
def serialize(product): """Takes a full Amazon product Advertising API returned AmazonProduct with multiple ResponseGroups, and extracts the data we are interested in. :param AmazonAPI product: :return: Amazon metadata for one product :rtype: dict { 'price': '$54.06', 'price_amt': 5406, 'physical_format': 'hardcover', 'authors': [{'name': 'Guterson, David'}], 'publish_date': 'Jan 21, 2020', #'dimensions': { # 'width': [1.7, 'Inches'], # 'length': [8.5, 'Inches'], # 'weight': [5.4, 'Pounds'], # 'height': [10.875, 'Inches'] # }, 'publishers': ['Victory Belt Publishing'], 'source_records': ['amazon:1628603976'], 'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging', 'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20', 'number_of_pages': 640, 'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg', 'languages': ['English'] 'edition_num': '1' } """ if not product: return {} # no match? item_info = getattr(product, 'item_info') images = getattr(product, 'images') edition_info = item_info and getattr(item_info, 'content_info') attribution = item_info and getattr(item_info, 'by_line_info') price = (getattr(product, 'offers') and product.offers.listings and product.offers.listings[0].price) brand = (attribution and getattr(attribution, 'brand') and getattr(attribution.brand, 'display_value')) manufacturer = (item_info and getattr(item_info, 'by_line_info') and getattr(item_info.by_line_info, 'manufacturer') and item_info.by_line_info.manufacturer.display_value) product_group = (item_info and getattr( item_info, 'classifications', ) and getattr(item_info.classifications, 'product_group') and item_info.classifications.product_group.display_value) try: publish_date = edition_info and isoparser.parse( edition_info.publication_date.display_value).strftime( '%b %d, %Y') except Exception: logger.exception("serialize({})".format(product)) publish_date = None book = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (product.asin, h.affiliate_id('amazon')), 'source_records': ['amazon:%s' % product.asin], 'isbn_10': [product.asin], 'isbn_13': [isbn_10_to_isbn_13(product.asin)], 'price': price and price.display_amount, 'price_amt': price and price.amount and int(100 * price.amount), 'title': (item_info and item_info.title and getattr(item_info.title, 'display_value')), 'cover': (images and images.primary and images.primary.large and images.primary.large.url), 'authors': attribution and [{ 'name': contrib.name } for contrib in attribution.contributors], 'publishers': list(set(p for p in (brand, manufacturer) if p)), 'number_of_pages': (edition_info and edition_info.pages_count and edition_info.pages_count.display_value), 'edition_num': (edition_info and edition_info.edition and edition_info.edition.display_value), 'publish_date': publish_date, 'product_group': product_group, 'physical_format': (item_info and item_info.classifications and getattr(item_info.classifications.binding, 'display_value', '').lower()), } return book
def _serialize_amazon_product(product): """Takes a full Amazon product Advertising API returned AmazonProduct with multiple ResponseGroups, and extracts the data we are interested in. :param amazon.api.AmazonProduct product: :return: Amazon metadata for one product :rtype: dict """ price_fmt = price = qlt = None used = product._safe_get_element_text( 'OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price) / 100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) data = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (product.asin, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{ 'name': name } for name in product.authors], 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), 'cover': product.large_image_url, 'product_group': product.product_group, } if product._safe_get_element('OfferSummary') is not None: data['offer_summary'] = { 'total_new': int(product._safe_get_element_text('OfferSummary.TotalNew')), 'total_used': int(product._safe_get_element_text('OfferSummary.TotalUsed')), 'total_collectible': int(product._safe_get_element_text( 'OfferSummary.TotalCollectible')), } collectible = product._safe_get_element_text( 'OfferSummary.LowestCollectiblePrice.Amount') if new: data['offer_summary']['lowest_new'] = int(new) if used: data['offer_summary']['lowest_used'] = int(used) if collectible: data['offer_summary']['lowest_collectible'] = int(collectible) amazon_offers = product._safe_get_element_text('Offers.TotalOffers') if amazon_offers: data['offer_summary']['amazon_offers'] = int(amazon_offers) if product.publication_date: data['publish_date'] = product._safe_get_element_text( 'ItemAttributes.PublicationDate') if re.match(AMAZON_FULL_DATE_RE, data['publish_date']): data['publish_date'] = product.publication_date.strftime( '%b %d, %Y') if product.binding: data['physical_format'] = product.binding.lower() if product.edition: data['edition'] = product.edition if product.publisher: data['publishers'] = [product.publisher] if product.isbn: isbn = product.isbn if len(isbn) == 10: data['isbn_10'] = [isbn] data['isbn_13'] = [isbn_10_to_isbn_13(isbn)] elif len(isbn) == 13: data['isbn_13'] = [isbn] if isbn.startswith('978'): data['isbn_10'] = [isbn_13_to_isbn_10(isbn)] return data
def _get_amazon_metadata(id_=None, id_type='isbn'): # TODO: extend this to work with # isbn=, asin=, title=, authors=, etc kwargs = {} if id_type == 'isbn': id_ = normalize_isbn(id_) kwargs = {'SearchIndex': 'Books', 'IdType': 'ISBN'} kwargs['ItemId'] = id_ try: if not lending.amazon_api: raise Exception product = lending.amazon_api.lookup(**kwargs) # sometimes more than one product can be returned, choose first if isinstance(product, list): product = product[0] except Exception as e: return None price_fmt, price, qlt = (None, None, None) used = product._safe_get_element_text( 'OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price) / 100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) data = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (id_, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{ 'name': name } for name in product.authors], 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), # needs to be normalized 'cover': product.large_image_url, 'product_group': product.product_group, } if product.publication_date: # TODO: Don't populate false month and day for older products data['publish_date'] = product.publication_date.strftime('%b %d, %Y') if product.binding: data['physical_format'] = product.binding.lower() if product.edition: data['edition'] = product.edition if product.publisher: data['publishers'] = [product.publisher] if product.isbn: isbn = product.isbn if len(isbn) == 10: data['isbn_10'] = [isbn] data['isbn_13'] = [isbn_10_to_isbn_13(isbn)] elif len(isbn) == 13: data['isbn_13'] = [isbn] if isbn.startswith('978'): data['isbn_10'] = [isbn_13_to_isbn_10(isbn)] return data
def test_isbn_10_to_isbn_13(): assert isbn_10_to_isbn_13('0-940787-08-3') == '9780940787087' assert isbn_10_to_isbn_13('0940787083') == '9780940787087' assert isbn_10_to_isbn_13('BAD-ISBN') is None
def serialize(product): """Takes a full Amazon product Advertising API returned AmazonProduct with multiple ResponseGroups, and extracts the data we are interested in. :param AmazonAPI product: :return: Amazon metadata for one product :rtype: dict { 'price': '$54.06', 'price_amt': 5406, 'physical_format': 'Hardcover', 'authors': [{'role': 'Author', 'name': 'Guterson, David'}], 'publish_date': 'Jan 21, 2020', #'dimensions': { # 'width': [1.7, 'Inches'], # 'length': [8.5, 'Inches'], # 'weight': [5.4, 'Pounds'], # 'height': [10.875, 'Inches'] # }, 'publishers': ['Victory Belt Publishing'], 'source_records': ['amazon:1628603976'], 'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging', 'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20', 'number_of_pages': 640, 'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg', 'languages': ['English'] 'edition_num': '1' } """ if not product: return {} # no match? item_info = product.item_info edition_info = item_info.content_info attribution = item_info.by_line_info price = product.offers.listings and product.offers.listings[0].price dims = item_info.product_info and item_info.product_info.item_dimensions try: publish_date = isoparser.parse( edition_info.publication_date.display_value).strftime( '%b %d, %Y') except Exception: publish_date = None book = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (product.asin, h.affiliate_id('amazon')), 'source_records': ['amazon:%s' % product.asin], 'isbn_10': [product.asin], 'isbn_13': [isbn_10_to_isbn_13(product.asin)], 'price': price and price.display_amount, 'price_amt': price and price.amount and int(100 * price.amount), 'title': item_info.title and item_info.title.display_value, 'cover': (product.images and product.images.primary and product.images.primary.large and product.images.primary.large.url), 'authors': [{ 'name': contrib.name, 'role': contrib.role } for contrib in attribution.contributors], 'publishers': attribution.brand and [attribution.brand.display_value], 'number_of_pages': (edition_info.pages_count and edition_info.pages_count.display_value), 'edition_num': (edition_info.edition and edition_info.edition.display_value), 'publish_date': publish_date, 'languages': (edition_info.languages and list( set(lang.display_value for lang in edition_info.languages.display_values))), 'physical_format': (item_info.classifications and getattr(item_info.classifications.binding, 'display_value')), 'dimensions': dims and { d: [getattr(dims, d).display_value, getattr(dims, d).unit] for d in dims.to_dict() if getattr(dims, d) } } return book