def serialize(product): """Takes a full Amazon product Advertising API returned AmazonProduct with multiple ResponseGroups, and extracts the data we are interested in. :param AmazonAPI product: :return: Amazon metadata for one product :rtype: dict { 'price': '$54.06', 'price_amt': 5406, 'physical_format': 'hardcover', 'authors': [{'name': 'Guterson, David'}], 'publish_date': 'Jan 21, 2020', #'dimensions': { # 'width': [1.7, 'Inches'], # 'length': [8.5, 'Inches'], # 'weight': [5.4, 'Pounds'], # 'height': [10.875, 'Inches'] # }, 'publishers': ['Victory Belt Publishing'], 'source_records': ['amazon:1628603976'], 'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging', 'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20', 'number_of_pages': 640, 'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg', 'languages': ['English'] 'edition_num': '1' } """ if not product: return {} # no match? item_info = getattr(product, 'item_info') images = getattr(product, 'images') edition_info = item_info and getattr(item_info, 'content_info') attribution = item_info and getattr(item_info, 'by_line_info') price = (getattr(product, 'offers') and product.offers.listings and product.offers.listings[0].price) brand = (attribution and getattr(attribution, 'brand') and getattr(attribution.brand, 'display_value')) manufacturer = (item_info and getattr(item_info, 'by_line_info') and getattr(item_info.by_line_info, 'manufacturer') and item_info.by_line_info.manufacturer.display_value) product_group = (item_info and getattr( item_info, 'classifications', ) and getattr(item_info.classifications, 'product_group') and item_info.classifications.product_group.display_value) try: publish_date = edition_info and isoparser.parse( edition_info.publication_date.display_value).strftime( '%b %d, %Y') except Exception: logger.exception("serialize({})".format(product)) publish_date = None book = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (product.asin, h.affiliate_id('amazon')), 'source_records': ['amazon:%s' % product.asin], 'isbn_10': [product.asin], 'isbn_13': [isbn_10_to_isbn_13(product.asin)], 'price': price and price.display_amount, 'price_amt': price and price.amount and int(100 * price.amount), 'title': (item_info and item_info.title and getattr(item_info.title, 'display_value')), 'cover': (images and images.primary and images.primary.large and images.primary.large.url), 'authors': attribution and [{ 'name': contrib.name } for contrib in attribution.contributors], 'publishers': list(set(p for p in (brand, manufacturer) if p)), 'number_of_pages': (edition_info and edition_info.pages_count and edition_info.pages_count.display_value), 'edition_num': (edition_info and edition_info.edition and edition_info.edition.display_value), 'publish_date': publish_date, 'product_group': product_group, 'physical_format': (item_info and item_info.classifications and getattr(item_info.classifications.binding, 'display_value', '').lower()), } return book
from openlibrary.core import cache, helpers as h from openlibrary.utils import dateutil from openlibrary.utils.isbn import (normalize_isbn, isbn_13_to_isbn_10, isbn_10_to_isbn_13) from openlibrary.catalog.add_book import load from openlibrary import accounts logger = logging.getLogger("openlibrary.vendors") BETTERWORLDBOOKS_BASE_URL = 'https://betterworldbooks.com' BETTERWORLDBOOKS_API_URL = ( 'https://products.betterworldbooks.com/service.aspx?' 'IncludeAmazon=True&ItemId=') affiliate_server_url = None BWB_AFFILIATE_LINK = 'http://www.anrdoezrs.net/links/{}/type/dlg/http://www.betterworldbooks.com/-id-%s'.format( h.affiliate_id('betterworldbooks')) AMAZON_FULL_DATE_RE = re.compile(r'\d{4}-\d\d-\d\d') ISBD_UNIT_PUNCT = ' : ' # ISBD cataloging title-unit separator punctuation def setup(config): global affiliate_server_url affiliate_server_url = config.get('affiliate_server') class AmazonAPI: """Amazon Product Advertising API 5.0 wrapper for Python""" RESOURCES = { 'all': [ getattr(GetItemsResource, v) for v in # Hack: pulls all resource consts from GetItemsResource
from openlibrary.utils import dateutil from openlibrary.utils.isbn import ( normalize_isbn, isbn_13_to_isbn_10, isbn_10_to_isbn_13) from openlibrary.catalog.add_book import load from openlibrary import accounts logger = logging.getLogger("openlibrary.vendors") amazon_api = None config_amz_api = None BETTERWORLDBOOKS_BASE_URL = 'https://betterworldbooks.com' BETTERWORLDBOOKS_API_URL = ('https://products.betterworldbooks.com/service.aspx?' 'IncludeAmazon=True&ItemId=') BWB_AFFILIATE_LINK = 'http://www.anrdoezrs.net/links/{}/type/dlg/http://www.betterworldbooks.com/-id-%s'.format(h.affiliate_id('betterworldbooks')) AMAZON_FULL_DATE_RE = re.compile(r'\d{4}-\d\d-\d\d') ISBD_UNIT_PUNCT = ' : ' # ISBD cataloging title-unit separator punctuation def setup(config): global config_amz_api, amazon_api config_amz_api = config.get('amazon_api') try: amazon_api = AmazonAPI( config_amz_api.key, config_amz_api.secret, config_amz_api.id, throttling=0.9) except AttributeError: amazon_api = None
def _serialize_amazon_product(product): """Takes a full Amazon product Advertising API returned AmazonProduct with multiple ResponseGroups, and extracts the data we are interested in. :param amazon.api.AmazonProduct product: :return: Amazon metadata for one product :rtype: dict """ price_fmt = price = qlt = None used = product._safe_get_element_text( 'OfferSummary.LowestUsedPrice.Amount') new = product._safe_get_element_text('OfferSummary.LowestNewPrice.Amount') # prioritize lower prices and newer, all things being equal if used and new: price, qlt = (used, 'used') if int(used) < int(new) else (new, 'new') # accept whichever is available elif used or new: price, qlt = (used, 'used') if used else (new, 'new') if price: price = '{:00,.2f}'.format(int(price) / 100.) if qlt: price_fmt = "$%s (%s)" % (price, qlt) data = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (product.asin, h.affiliate_id('amazon')), 'price': price_fmt, 'price_amt': price, 'qlt': qlt, 'title': product.title, 'authors': [{ 'name': name } for name in product.authors], 'source_records': ['amazon:%s' % product.asin], 'number_of_pages': product.pages, 'languages': list(product.languages), 'cover': product.large_image_url, 'product_group': product.product_group, } if product._safe_get_element('OfferSummary') is not None: data['offer_summary'] = { 'total_new': int(product._safe_get_element_text('OfferSummary.TotalNew')), 'total_used': int(product._safe_get_element_text('OfferSummary.TotalUsed')), 'total_collectible': int(product._safe_get_element_text( 'OfferSummary.TotalCollectible')), } collectible = product._safe_get_element_text( 'OfferSummary.LowestCollectiblePrice.Amount') if new: data['offer_summary']['lowest_new'] = int(new) if used: data['offer_summary']['lowest_used'] = int(used) if collectible: data['offer_summary']['lowest_collectible'] = int(collectible) amazon_offers = product._safe_get_element_text('Offers.TotalOffers') if amazon_offers: data['offer_summary']['amazon_offers'] = int(amazon_offers) if product.publication_date: data['publish_date'] = product._safe_get_element_text( 'ItemAttributes.PublicationDate') if re.match(AMAZON_FULL_DATE_RE, data['publish_date']): data['publish_date'] = product.publication_date.strftime( '%b %d, %Y') if product.binding: data['physical_format'] = product.binding.lower() if product.edition: data['edition'] = product.edition if product.publisher: data['publishers'] = [product.publisher] if product.isbn: isbn = product.isbn if len(isbn) == 10: data['isbn_10'] = [isbn] data['isbn_13'] = [isbn_10_to_isbn_13(isbn)] elif len(isbn) == 13: data['isbn_13'] = [isbn] if isbn.startswith('978'): data['isbn_10'] = [isbn_13_to_isbn_10(isbn)] return data
def serialize(product): """Takes a full Amazon product Advertising API returned AmazonProduct with multiple ResponseGroups, and extracts the data we are interested in. :param AmazonAPI product: :return: Amazon metadata for one product :rtype: dict { 'price': '$54.06', 'price_amt': 5406, 'physical_format': 'Hardcover', 'authors': [{'role': 'Author', 'name': 'Guterson, David'}], 'publish_date': 'Jan 21, 2020', #'dimensions': { # 'width': [1.7, 'Inches'], # 'length': [8.5, 'Inches'], # 'weight': [5.4, 'Pounds'], # 'height': [10.875, 'Inches'] # }, 'publishers': ['Victory Belt Publishing'], 'source_records': ['amazon:1628603976'], 'title': 'Boundless: Upgrade Your Brain, Optimize Your Body & Defy Aging', 'url': 'https://www.amazon.com/dp/1628603976/?tag=internetarchi-20', 'number_of_pages': 640, 'cover': 'https://m.media-amazon.com/images/I/51IT9MV3KqL._AC_.jpg', 'languages': ['English'] 'edition_num': '1' } """ if not product: return {} # no match? item_info = product.item_info edition_info = item_info.content_info attribution = item_info.by_line_info price = product.offers.listings and product.offers.listings[0].price dims = item_info.product_info and item_info.product_info.item_dimensions try: publish_date = isoparser.parse( edition_info.publication_date.display_value).strftime( '%b %d, %Y') except Exception: publish_date = None book = { 'url': "https://www.amazon.com/dp/%s/?tag=%s" % (product.asin, h.affiliate_id('amazon')), 'source_records': ['amazon:%s' % product.asin], 'isbn_10': [product.asin], 'isbn_13': [isbn_10_to_isbn_13(product.asin)], 'price': price and price.display_amount, 'price_amt': price and price.amount and int(100 * price.amount), 'title': item_info.title and item_info.title.display_value, 'cover': (product.images and product.images.primary and product.images.primary.large and product.images.primary.large.url), 'authors': [{ 'name': contrib.name, 'role': contrib.role } for contrib in attribution.contributors], 'publishers': attribution.brand and [attribution.brand.display_value], 'number_of_pages': (edition_info.pages_count and edition_info.pages_count.display_value), 'edition_num': (edition_info.edition and edition_info.edition.display_value), 'publish_date': publish_date, 'languages': (edition_info.languages and list( set(lang.display_value for lang in edition_info.languages.display_values))), 'physical_format': (item_info.classifications and getattr(item_info.classifications.binding, 'display_value')), 'dimensions': dims and { d: [getattr(dims, d).display_value, getattr(dims, d).unit] for d in dims.to_dict() if getattr(dims, d) } } return book