def search_on_amazon(asin, album, artist): ''' Tries to locate the url of album by artis on amazon Returns '' if it can't be found ''' from amazonproduct import API if not AMAZON_KEY or not AMAZON_SECRET or not AMAZON_ASSOCIATE_TAG: return '' api = API(AMAZON_KEY, AMAZON_SECRET, 'us') try: if asin: node = api.item_lookup(asin, AssociateTag=AMAZON_ASSOCIATE_TAG) for item in node.Items: attributes = item.Item.ItemAttributes if attributes.ProductGroup == 'Music': url = item.Item.DetailPageURL if url: return url.text node = api.item_search('MP3Downloads', Keywords=album + ' ' + artist, AssociateTag=AMAZON_ASSOCIATE_TAG) for item in node.Items: attributes = item.Item.ItemAttributes if matching.match(artist, str(attributes.Creator)) \ and matching.match(album, str(attributes.Title)) \ and attributes.ProductGroup == 'Digital Music Album': url = item.Item.DetailPageURL if url: return url.text except : pass return ''
def services_incoming(request): """We have an incoming item (probably from the bookmarklet)""" #TODO: this is nothing more than a test now. cleanup. url = request.GET.get('loc', None) matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url) asin = matches.group(1) aws_key = AMZ.KEY aws_secret_key = AMZ.SECRET_KEY api = API(aws_key, aws_secret_key, 'us') for root in api.item_lookup(asin, IdType='ASIN', AssociateTag= AMZ.ASSOCIATE_TAG): nspace = root.nsmap.get(None, '') amazon_items = root.xpath('//aws:Items/aws:Item', namespaces={'aws' : nspace}) author = u'Unknown' title = u'Unknown' isbn = u'Unknown' for amazon_item in amazon_items: if hasattr(amazon_item.ItemAttributes, 'Author'): author = unicode(amazon_item.ItemAttributes.Author) if hasattr(amazon_item.ItemAttributes, 'Title'): title = unicode(amazon_item.ItemAttributes.Title) return render_to_response('add-item.html', {'user': request.user, 'creator': author, 'title': title, 'isbn': isbn})
def pullItemInfoFromAmazon(job_id): logger = logging.getLogger('tst') asin_obj_list = Asin.objects.filter(job_id = job_id) asin_list = [x.asin for x in asin_obj_list] pull_fail_list = [] insert_fail_list = [] image_fail_list = [] api = API(access_key_id = ACCESS_KEY_ID, secret_access_key = SECRET_ACCESS_KEY, associate_tag = ASSOCIATE_TAG, locale='us') for asin in asin_list[:10]: asin = asin.strip() result = '' for i in range(0,2): try: result = api.item_lookup(asin,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId = 'Amazon',Condition='New') logger.info('ASIN: %s -- %d time -- Success'%(asin,i+1)) break except Exception,e: logger.info('ASIN: %s -- %d time -- Fail'%(asin,i+1)) continue if result == '': logger.info('ASIN: %s Fail after 3 times'%asin) pull_fail_list.append(asin) continue if not insert_item_info(result,asin): logger.error('Insert item info for %s fail'%asin) insert_fail_list.append(asin) continue if not process_image(asin): logger.error('Processing Image for %s fail'%asin) image_fail_list.append(asin) continue
def show_product(locale, asin): api = API(locale=locale) result = api.item_lookup(asin, ResponseGroup="ItemIds, ItemAttributes, Images, OfferSummary, Offers") niceProduct = Product() for product in result.Items.Item: niceProduct.title = product.ItemAttributes.Title niceProduct.ASIN = product.ASIN.text niceProduct.imageUrl = product.MediumImage.URL try: niceProduct.newPrice = float(product.OfferSummary.LowestNewPrice.Amount)/100 niceProduct.newFormattedPrice = product.OfferSummary.LowestNewPrice.FormattedPrice niceProduct.newPriceCurrency = product.OfferSummary.LowestNewPrice.CurrencyCode except: pass try: niceProduct.usedPrice = float(product.OfferSummary.LowestUsedPrice.Amount)/100 niceProduct.usedFormattedPrice = product.OfferSummary.LowestUsedPrice.FormattedPrice niceProduct.usedPriceCurrency = product.OfferSummary.LowestUsedPrice.CurrencyCode except: pass niceProduct.type = product.ItemAttributes.ProductGroup niceProduct.region = getRegionFromUrl(product.DetailPageURL.text).upper() #product.ItemAttributes.RegionCode niceProduct.model = product.ItemAttributes.Model return render_template('product.html', product = niceProduct)
def get_similar_books(ASIN): api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG) for root in api.similarity_lookup(str(ASIN)): try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 #print 'page %d of %d' % (current_page, total_pages) nspace = root.nsmap.get(None, '') books = root.xpath('//aws:Items/aws:Item', namespaces={'aws' : nspace}) similar_items = [] i = 0 for book in books: if (i==3): return similar_items similar_items.append(book) i = i + 1
def lookup_price(searchTerm): AWS_KEY = 'AKIAIILUNE5IYH7BDF2A' SECRET_KEY = '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' ASSOC_TAG = 'camerarecomm-20' api = API(AWS_KEY, SECRET_KEY, 'us', ASSOC_TAG) price = -1 title = '' try: results = api.item_search('Electronics', Keywords=searchTerm, BrowseNode='281052', ResponseGroup='Large', ItemPage=1) if results is not None: for cam in results: try: #asin = cam.Items.Item.ASIN title = cam.Items.Item.ItemAttributes.Title.text price = cam.Items.Item.ItemAttributes.ListPrice.FormattedPrice.text # print title, price break except: price = -1 title = '' except: print 'Item not found' return price, title
def test_associate_tag_is_written_to_url(self): tag = 'ABC12345' api = API(self.ACCESS_KEY, self.SECRET_KEY, 'de', associate_tag=tag) url = api._build_url(Operation='ItemSearch', SearchIndex='Books') qs = parse_qs(urlparse(url)[4]) assert qs['AssociateTag'][0] == tag
def scrape_wish_list_items(list_id): """ Populate wish_list_items with data from wishlist """ print "Scraping wishlist..." wish = Wishlist(list_id) item_ids = wish.get_list_items() wishlist_items = [] api = API(locale='us') for item_id in item_ids: try: result = api.item_lookup(item_id, ResponseGroup="Large") for item in result.Items.Item: itm = { "title": item.ItemAttributes.Title, "price": item.Offers.Offer.OfferListing.Price.FormattedPrice, "amazonid": item.ASIN } wishlist_items.append(itm) except: print "!!! Failed getting " + item_id print "Completed scraping." return wishlist_items
def joo_amazon(username, KEYWORDS): items_list4 = [] client = MongoClient('ds063186.mlab.com', 63186) client.credentials.authenticate('shakedinero', 'a/c57821688') db = client.credentials cursor = db.amazon.find() for i in cursor: x = i config = { "access_key": str(x['access_key']), "secret_key": str(x['secret_key']), "associate_tag": str(x['associate_tag']), "locale": str(x['locale']) } api = API(cfg=config) items = api.item_search('All', Keywords=KEYWORDS, ResponseGroup='Large') for i in items: try: title = i.ItemAttributes.Title item_url = i.DetailPageURL img = i.MediumImage.URL price = i.OfferSummary.LowestNewPrice.FormattedPrice shipping = '-' x = '{"title":"' + title + '","url":"' + item_url + '","image":"' + img + '","price":"' + price + '","shipping":"' + shipping + '","web":"Amazon"}' j = json.loads(x) items_list4.append(j) except: continue command = "db_results.results." + username + ".insert_many(items_list4)" try: exec command except: print "No Amazon Results" return items_list4
def search(title=''): """Amazon quick search function.""" api = API(LOG['AWS_KEY'], LOG['SECRET_KEY'], LOG['LOCAL'], LOG['ASSOC_TAG']) node = api.item_search('Books', Title=title, Publisher=publisher) for page in node: for book in page.Items.Item: print '%s' % (book.ASIN)
def lookup(asin): api = API(locale='jp') #item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All') #item = api.item_lookup(asin) item = api.item_lookup(asin, ResponseGroup='Large') #logging.debug(etree.tostring(item, pretty_print=True)) ## title logging.debug(item.Items.Item.ItemAttributes.Title)
def lookup(asin): api = API(locale="jp") # item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All') # item = api.item_lookup(asin) item = api.item_lookup(asin, ResponseGroup="Large") # logging.debug(etree.tostring(item, pretty_print=True)) ## title logging.debug(item.Items.Item.ItemAttributes.Title)
def amazon_product_search(keyword, storing_class, store, search_index="All", nb_items=10): api = API(settings.AWS_PRODUCT_ACCESS_KEY_ID, settings.AWS_PRODUCT_SECRET_ACCESS_KEY, settings.AWS_LOCALE) try: node = api.item_search(search_index, Keywords=keyword, ResponseGroup="Large", AssociateTag=settings.AWS_ASSOCIATE_TAG) except NoExactMatchesFound: return None except URLError: if settings.DEBUG: raise else: return None nb_pages = int(ceil(nb_items * 0.1)) item_list = [] for root in node: # total_results = root.Items.TotalResults.pyval # total_pages = root.Items.TotalPages.pyval try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 nspace = root.nsmap.get(None, '') items = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace}) item_list.extend(items) if current_page >= nb_pages: break counter = 0 aff_item_list = list() for item in item_list: entry, created = storing_class.objects.get_or_create( store=store, object_id=item.ASIN) entry.store_init(store, item) entry.save() if entry.item is None: aff_item_list.append(entry) counter += 1 if counter == nb_items: break return aff_item_list
def search(): api = API(locale="jp") # total_results = node.Items.TotalResults.pyval # total_pages = node.Items.TotalPages.pyval for book in api.item_search("Books", Publisher=u"村上"): try: print "%s" % (book.ItemAttributes.Title) # print '%s: "%s"' % (book.ItemAttributes.Author, # book.ItemAttributes.Title) except: logging.debug("no author or title")
def search(): api = API(locale='jp') #total_results = node.Items.TotalResults.pyval #total_pages = node.Items.TotalPages.pyval for book in api.item_search('Books', Publisher=u'村上'): try: print '%s' % (book.ItemAttributes.Title) #print '%s: "%s"' % (book.ItemAttributes.Author, # book.ItemAttributes.Title) except: logging.debug("no author or title")
def get_image_from_amazon(artist, album): api = API(access_key_id="First it was fix-ed", secret_access_key="And then it was enabled", associate_tag="But now it's broke again.", locale="us") node = api.item_search('Music', ResponseGroup='Images', Keywords="{} {}".format(artist, album)) url = str(node.page(1).Items.Item.LargeImage.URL) data = requests.get(url).content return data
def search(self, q, country): titles = [] prices = [] urls= [] items = [] api = API(AWS_KEY, SECRET_KEY, country) try: for root in api.item_search('Books', Title=q, AssociateTag='...', ResponseGroup='Large'): # extract paging information total_results = root.Items.TotalResults.pyval total_pages = root.Items.TotalPages.pyval try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 #print 'page %d of %d' % (current_page, total_pages) #~ from lxml import etree #~ print etree.tostring(root, pretty_print=True) nspace = root.nsmap.get(None, '') books = root.xpath('//aws:Items/aws:Item', namespaces={'aws' : nspace}) #return unicode(books[0].ItemAttributes.Title) for book in books: items.append(unicode(book.ItemAttributes.Title)) #print book.ASIN, #if hasattr(book.ItemAttributes, 'Author'): #print unicode(book.ItemAttributes.Author), ':', #print unicode(book.ItemAttributes.Title), #price_offers(book.ASIN) try: if hasattr(book.ItemAttributes, 'ListPrice'): #print unicode(book.ItemAttributes.ListPrice.FormattedPrice) items.append(unicode(book.ItemAttributes.ListPrice.FormattedPrice)) elif hasattr(book.OfferSummary, 'LowestUsedPrice'): #print u'(used from %s)' % book.OfferSummary.LowestUsedPrice.FormattedPrice items.append(unicode(book.OfferSummary.LowestUsedPrice.FormattedPrice)) except: items.append("No price info.") items.append(unicode(book.DetailPageURL)) #print '\n' #print len(items) return items except: return items
def pytest_generate_tests(metafunc): # called once per each test function if 'api' in metafunc.funcargnames and 'operation' in metafunc.funcargnames: for version in TESTABLE_API_VERSIONS: wsdl = os.path.join(XML_TEST_DIR, version, 'AWSECommerceService.wsdl') if not os.path.exists(wsdl): continue api = API('', '', 'de') api.VERSION = version for operation in extract_operations_from_wsdl(wsdl): metafunc.addcall( id='%s/%s' % (version, operation), funcargs={'api' : api, 'operation' : operation})
def __init__(self, some_dict, list_of_numbers, number_of_recs): self.api = API(locale='us', ) self.image_url = [] self.book_numbers = [ book for book in list_of_numbers if book in some_dict ] self.isbns = [(10 - len(i)) * '0' + i if type(i) != float else i for i in [ some_dict[book_number][0] for book_number in list_of_numbers if book_number in some_dict ]] self.some_dict = some_dict self.list_of_numbers = list_of_numbers self.number_of_recs = number_of_recs
class AmazonUtil: def __init__(self): #self.associate_tag = settings.ASSOCIATE_TAG #self.access_key_id = settings.ACCESS_KEY_ID #self.secret_access_key = settings.SECRET_ACCESS_KEY self.api = None def item_lookup(self,asin,locale,retry=3,time_interval=10,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId=None,Condition=None): self.api = API(access_key_id = settings.ACCESS_KEY_ID, secret_access_key = settings.SECRET_ACCESS_KEY, associate_tag = settings.ASSOCIATE_TAG, locale=locale) result = '' #status #0 -- Success #1 -- Socket Timeout #2 -- Invalid ASIN #-1 -- Fail status = -1 for i in range(0,retry): try: #result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup,MerchantId = MerchantId,Condition=Condition) result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup) status = 0 break except urllib2.URLError,e: status = 1 continue except socket.timeout,e: status = 1 continue except InvalidParameterValue,e: status = 2 break
class AmazonMovies(object): def __init__(self, titles): self._pattern1 = re.compile(r"(\[.*\]|\(.*\)|【.*】|<.*>|(.*)|〔.*〕)") self._pattern2 = re.compile(r"(DVD|Blu-ray|ブルーレイ|枚組).*") self._pattern3 = re.compile(r"\s.*(MovieNEX|2D|3D|エディション|ディスク|特別(編|版)).*") self._pattern4 = re.compile(r"\s$") self._api = API(cfg=amazon_keys.config) self._input_movies = self.get_movie_dict(titles) self.movies_dict = self.get_similarproducts(self._input_movies) self.movies = self.get_titles(self.movies_dict) def get_movie_dict(self, titles): tmp_list = [] for title in titles: tmp_list.append({'title': title, 'asin': self.get_asin(title)}) return tmp_list def get_asin(self, title): time.sleep(2) # 1.8sのインターバルあれば制限に引っかからない? asin = u"" try: for items in self._api.item_search('DVD', Keywords=title, limit=1): for item in items: asin = unicode(item.ASIN) break break except AWSError, e: print("code:%s message:%s" % (e.code, e.message)) return asin
class AmazonChecker(object): def __init__(self): AWS_KEY = '' SECRET_KEY = '' ASSOCIATE_TAG = 'stream0a-20' self.api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG) #self.api = JSONAPI(AWS_KEY, SECRET_KEY, 'us') def availability(self, needle): #Instant Video browse node: 16261631 or maybe 2649513011 #api.call(Operation='ItemSearch', SearchIndex='Video') #US/Video? 493964 #data = self.api.browse_node_lookup(16261631) #data = self.api.item_lookup('B0047WJ11G', **params) #data = self.api.item_lookup('Inception', **params) #data = self.api.item_search("DVD", Title="Inception", ResponseGroup="Large") try: data = self.api.item_search("Video", Title=needle, BrowseNode="16261631") except NoExactMatchesFound: return [{"service":"amazon-instant", "available":False}] #print data #print dir(data) #for root in data: # print "root" # print dir(root.Items.Item.ItemAttributes) # print root.Items.Item.ItemAttributes.Title #pp = pprint.PrettyPrinter(indent=3) return [{"service":"amazon-instant", "available":True}]
def main(self): # Amazon consists of multiple webshops from different countries. for locale in self.locales: productDataList = [] self.api = API(locale=locale) products = self.loadProducts(locale) for product in products: if product != '' and product is not None and product[0] != '#': # Comment or blank line. # Product contains two elements: The ASIN and the shipping cost, divided by `:`. product = product.split(':') ASIN = product[0] productData = self.gatherData(ASIN, locale) if productData is not None: # Something went wrong retrieving data. productData["shipping_cost"] = product[1] # Add the product data to a list so we can convert the list to xml once all products are parsed. productDataList.append(productData) time.sleep(2) self.writeXML(productDataList, locale)
def find_asin(title='Around the World in Eighty Days', author='Jules Verne'): """Find the unique ASIN identifier for the book INPUT: Book title and author FUNCTION: find_asin() OUTPUT: The unique asin identifier Time taken: < 1 second""" from amazonproduct import API api = API(locale='uk') items = api.item_search('Books', Title=title, Author=author) # Take the first result for book in items: break asin = str(book.ASIN) return (asin)
class Amazon: def __init__(self): self.api = API(locale='us'); self.cart_exists = False self.items = {} def get_asin_from_url(self, url): parts = urlparse(url) path_parts = parts.path.split("/") max = len(path_parts)-1 reg = re.compile("^([A-Za-z0-9]{10})$") while max >= 0: result = reg.match(path_parts[max]) if result: return path_parts[max] max = max-1 return None def get_item_by_asin(self, asin): item = self.api.item_lookup(asin) return item def get_items(self, list): items = Item.objects.filter(active=True, list=list) for item in items: self.items["%s" % item.asin] = item.quantity def get_cart(self): cart = self.api.cart_create(self.items) print cart.Cart.PurchaseURL print cart.Cart.SubTotal.FormattedPrice # May need this at some point? """ for item in cart.Cart.CartItems: print dir(item.CartItem) """ return cart def main(self): self.get_items() self.get_cart()
class Amazon: def __init__(self): self.api = API(locale='us') self.cart_exists = False self.items = {} def get_asin_from_url(self, url): parts = urlparse(url) path_parts = parts.path.split("/") max = len(path_parts) - 1 reg = re.compile("^([A-Za-z0-9]{10})$") while max >= 0: result = reg.match(path_parts[max]) if result: return path_parts[max] max = max - 1 return None def get_item_by_asin(self, asin): item = self.api.item_lookup(asin) return item def get_items(self, list): items = Item.objects.filter(active=True, list=list) for item in items: self.items["%s" % item.asin] = item.quantity def get_cart(self): cart = self.api.cart_create(self.items) print cart.Cart.PurchaseURL print cart.Cart.SubTotal.FormattedPrice # May need this at some point? """ for item in cart.Cart.CartItems: print dir(item.CartItem) """ return cart def main(self): self.get_items() self.get_cart()
def amazon_res(page_type, words): api = API(locale='us') if page_type == 'food': topic = 'Grocery' else: topic = 'HomeGarden' results = api.item_search(topic , Keywords=words, ResponseGroup="ItemAttributes, OfferSummary, Images", paginate = False) items = [] for it in results.Items.Item: asin = it.ASIN title = it.ItemAttributes.Title link = it.DetailPageURL try: price = it.OfferSummary.LowestNewPrice.FormattedPrice except: price = "no price available" try: image = it.SmallImage.URL except: image = "" if page_type == 'food': try: item = Food.objects.get(asin=asin) print(item.name) except: item = False print('doesnt exist') else: try: item = Equipment.objects.get(asin=asin) except: item = False items.append({'asin':asin, 'title':title, 'link':link, 'price':price, 'image':image, 'db':item}) return items
def get_product_details(cls,asin,product_renderer,locale = 'us'): logging.info('AmazonProductFetcher.get_product_details called, asin: %s, locale: %s' %(asin,locale)) api = API(AWS_KEY, SECRET_KEY, locale) timeout_ms = 100 while True: try: product_node = api.item_lookup(id=asin) #title,product group image_node = api.item_lookup(id=asin, ResponseGroup='Images') #Images break except amazonproduct.TooManyRequests: time.sleep(timeout_ms) timeout_ms *= 2 except AWSError: logging.error('Could not retrieve info for product %s' % asin) return except DownloadError,e: logging.error('%s retrieving URLfor product: %s in RPC' %(e,asin)) return #Early quit
def __init__(self, titles): self._pattern1 = re.compile(r"(\[.*\]|\(.*\)|【.*】|<.*>|(.*)|〔.*〕)") self._pattern2 = re.compile(r"(DVD|Blu-ray|ブルーレイ|枚組).*") self._pattern3 = re.compile(r"\s.*(MovieNEX|2D|3D|エディション|ディスク|特別(編|版)).*") self._pattern4 = re.compile(r"\s$") self._api = API(cfg=amazon_keys.config) self._input_movies = self.get_movie_dict(titles) self.movies_dict = self.get_similarproducts(self._input_movies) self.movies = self.get_titles(self.movies_dict)
def __init__(self, url, locale='us'): self.api = API(locale=locale) self.asin = self.__get_product_id(url) self.result = self.api.item_lookup( self.asin, ResponseGroup='ItemAttributes,Offers,Images' ) try: self.item = self.result.Items.Item[0] except IndexError: self.item = None
def amazon_lookup(asin): api = API(locale='us') result = api.item_lookup(asin, ResponseGroup="ItemAttributes, OfferSummary", paginate = False) it = result.Items.Item asin = it.ASIN title = it.ItemAttributes.Title link = it.DetailPageURL try: price = it.OfferSummary.LowestNewPrice.FormattedPrice except: price = "no price available" item = {'asin':asin, 'title':title, 'link':link, 'price':price} return item
def pullItemInfoFromAmazon(job_id): logger = logging.getLogger('tst') asin_obj_list = Asin.objects.filter(job_id=job_id) asin_list = [x.asin for x in asin_obj_list] pull_fail_list = [] insert_fail_list = [] image_fail_list = [] api = API(access_key_id=ACCESS_KEY_ID, secret_access_key=SECRET_ACCESS_KEY, associate_tag=ASSOCIATE_TAG, locale='us') for asin in asin_list[:10]: asin = asin.strip() result = '' for i in range(0, 2): try: result = api.item_lookup( asin, ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes', MerchantId='Amazon', Condition='New') logger.info('ASIN: %s -- %d time -- Success' % (asin, i + 1)) break except Exception, e: logger.info('ASIN: %s -- %d time -- Fail' % (asin, i + 1)) continue if result == '': logger.info('ASIN: %s Fail after 3 times' % asin) pull_fail_list.append(asin) continue if not insert_item_info(result, asin): logger.error('Insert item info for %s fail' % asin) insert_fail_list.append(asin) continue if not process_image(asin): logger.error('Processing Image for %s fail' % asin) image_fail_list.append(asin) continue
def request_amzn (isbn): r""" Get The Book Title and asin code from Amazon. :param isbn: ISBN string :return: tuple (Book titile, Asin Code, XML RootNode) """ api = API(locale='jp') root = api.item_lookup(isbn,SearchIndex='Books', IdType='ISBN') root = api.item_lookup(isbn,SearchIndex='Books', IdType='ISBN') book = root.xpath( "//aws:Items/aws:Item/aws:ItemAttributes/aws:Title", namespaces={"aws": root.nsmap.get(None, '')} ) asin = root.xpath( "//aws:Items/aws:Item/aws:ASIN", namespaces={"aws": root.nsmap.get(None, '')} ) return (book, asin, root)
def _fetch(self, url): """ Uses XML response from (or stores in) local file. """ # subsequent calls of this API instance # will be stored in different files self.calls += 1 path = self.local_file if self.calls > 1: head, tail = os.path.splitext(self.local_file) path = head + "-%i" % self.calls + tail # If the XML response has not been previously fetched: # retrieve it, obfuscate all sensible data and store it # with the name of the TestCase using it if not os.path.exists(path) or OVERWRITE_TESTS: try: fp = API._fetch(self, url) except urllib2.HTTPError, e: # HTTP errors 400 (Bad Request) and 410 (Gone) send a more # detailed error message as body which can be parsed, too. if e.code in (400, 410): fp = e.fp # otherwise re-raise else: raise try: tree = etree.parse(fp) except AWSError: pass root = tree.getroot() # overwrite sensible data nspace = root.nsmap.get(None, "") for arg in root.xpath("//aws:Arguments/aws:Argument", namespaces={"aws": nspace}): if arg.get("Name") in "AWSAccessKeyId Signature": arg.set("Value", "X" * 15) xml = etree.tostring(root, pretty_print=True) if AWS_KEY != "" and SECRET_KEY != "": xml = xml.replace(AWS_KEY, "X" * 15) xml = xml.replace(SECRET_KEY, "X" * 15) local_dir = os.path.dirname(path) if not os.path.exists(local_dir): # print 'creating %s...' % local_dir os.mkdir(local_dir) fp = open(path, "wb") # print 'storing response in %s...' % self.local_file fp.write(xml) fp.close() return StringIO(xml)
def create_wish_from_url(user, url): AWS_KEY = os.environ['AWS_KEY'] AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY'] ASSOCIATE_TAG = os.environ['ASSOCIATE_TAG'] ASIN_MATCH = 'http://www.amazon.com/([\\w-]+/)?(dp|gp/product)/(\\w+/)?(\\w{10})' asin = list(re.match(ASIN_MATCH, url).groups())[-1] api = API(locale='us', associate_tag=ASSOCIATE_TAG, access_key_id=AWS_KEY, secret_access_key=AWS_SECRET_KEY) result = api.item_lookup(asin, ResponseGroup='ItemAttributes, OfferFull, Images') item = result.Items.Item[0] title = item.ItemAttributes.Title url = item.DetailPageURL if item.OfferSummary: amount = (item.OfferSummary.LowestNewPrice.Amount / 100) elif item.ItemAttributes.ListPrice: amount = (item.ItemAttributes.ListPrice.Amount / 100) else: amount = 0.0 image_url = item.LargeImage.URL wish = Wish( user=user, asin=asin, title=title, amount=amount, is_credit=False, url=url, image_url=image_url ) wish.save() return wish
class Amazon(): def __init__(self, url, locale='us'): self.api = API(locale=locale) self.asin = self.__get_product_id(url) self.result = self.api.item_lookup( self.asin, ResponseGroup='ItemAttributes,Offers,Images' ) try: self.item = self.result.Items.Item[0] except IndexError: self.item = None def __get_product_id(self, url): """ get a amazon asin number from url """ compiled_pattern = re.compile("([A-Z0-9]{10})") search_result = compiled_pattern.search(url) try: return search_result.group(0) except IndexError: return None @property def title(self): """ return title of Item """ return self.item.ItemAttributes.Title.text @property def price(self): """ return lowest price of Amazon Item """ try: return self.item.OfferSummary.LowestNewPrice.FormattedPrice.text except AttributeError: return None @property def currency(self): """ return currency of Amazon Item """ try: return self.item.OfferSummary.LowestNewPrice.CurrencyCode except AttributeError: return None @property def photo(self): """ return small Image of Amazon Item """ try: return self.item.SmallImage.URL.text except AttributeError: return None
def searchAmazon(API): hash_of_items = {} try: for item in API.item_search('VideoGames', Title='Amiibo', MerchantId="Amazon", Availability="Available"): product = item.ItemAttributes product_name = product.Title product_manufacturer = product.Manufacturer.text product_url = item.DetailPageURL.text if 'Nintendo' in product_manufacturer: hash_of_items.update({product_name: product_url}) except: print "NAH" return hash_of_items
def get_similar_books(ASIN): api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG) for root in api.similarity_lookup(str(ASIN)): try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 #print 'page %d of %d' % (current_page, total_pages) nspace = root.nsmap.get(None, '') books = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace}) similar_items = [] i = 0 for book in books: if (i == 3): return similar_items similar_items.append(book) i = i + 1
def __init__(self, locale='de'): gtk.Window.__init__(self, gtk.WINDOW_TOPLEVEL) self.set_title("BrowseNode Explorer") self.set_size_request(400, 200) self.connect("delete_event", self.on_delete) self.locale = locale self.api = API(AWS_KEY, SECRET_KEY, self.locale) # create a TreeStore with one string column to use as the model self.treestore = gtk.TreeStore(int, str) # create the TreeView using treestore self.treeview = gtk.TreeView(self.treestore) # add column id renderer = gtk.CellRendererText() column = gtk.TreeViewColumn('id', renderer, text=0) self.treeview.append_column(column) # add column name renderer = gtk.CellRendererText() column = gtk.TreeViewColumn('name', renderer, text=1) column.set_sort_column_id(1) # Allow sorting on the column self.treeview.append_column(column) # make it clickable self.treeview.add_events(gtk.gdk.BUTTON_PRESS_MASK) self.treeview.connect('button_press_event', self.on_tree_click) scrolled = gtk.ScrolledWindow() scrolled.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC) scrolled.add(self.treeview) self.add(scrolled) self.show_all() # populate with root nodes # but avoid duplicated node ids node_ids = set(NODE_IDS[self.locale].values()) for name, id in NODE_IDS[self.locale].items(): if id in node_ids: self.treestore.append(None, [id, name]) node_ids.remove(id)
class AmazonDvd(): def __init__(self): self.api = API(locale='us') def find_product(self, keywords, Director=None): ''' return top 10 products ''' items = self.api.item_search( 'DVD', Keywords=keywords, Director=None, limit=10, Sort='relevancerank', MerchantId='Amazon', ResponseGroup='Large') dvds = [] for item in items: json_obj = json.loads(ObjectJSONEncoder().encode(item)) dvd = {} dvd['ASIN'] = json_obj['ASIN'] dvd['Title'] = json_obj['ItemAttributes']['Title'] dvd['DetailPageURL'] = json_obj['DetailPageURL'] if json_obj.get('SmallImage', None): dvd['SmallImage'] = json_obj['SmallImage']['URL'] if json_obj.get('CustomerReviews', None): dvd['CustomerReviews'] = json_obj['CustomerReviews']['IFrameURL'] if json_obj.get('EditorialReviews', None): dvd['EditorialReviews'] = json_obj[ 'EditorialReviews']['EditorialReview']['Content'] if json_obj.get('OfferSummary', None): dvd['LowestNewPrice'] = json_obj['OfferSummary'][ 'LowestNewPrice']['FormattedPrice'] if json_obj['ItemAttributes'].get('Actor', None): dvd['Actor'] = json_obj['ItemAttributes']['Actor'] if json_obj['ItemAttributes'].get('Director', None): dvd['Director'] = json_obj['ItemAttributes']['Director'] dvds.append(dvd) return dvds
#from amazon.api import AmazonAPI import amazonproduct from amazonproduct import API api = API(locale='de') ## Credentials access_key = 'AKIAIOWFZ4KTTJAKNLFQ' secret_key = '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' associate_tag = 'q0d9b-20' amazon = AmazonAPI(access_key, secret_key, associate_tag) products = amazon.search_n(1, Keywords='earphone', SearchIndex='All') len(products) #Amazon Credentials: Associate ID: q0d9b-20 #Access Key: AKIAIOWFZ4KTTJAKNLFQ #Secret Access Code: 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
from amazonproduct import API api = API(access_key_id='AKIAJXG6BBQM6YDLYEKA', secret_access_key='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', associate_tag='zhicheng-20', locale='us') for book in api.item_search('Books', Publisher='Galileo Press'): print '%s: "%s"' % (book.ItemAttributes.Author, book.ItemAttributes.Title)
def price_offers(xxx): best=999.00 one=True AutFin="" titFin="" api = API(Pp, Pl, 'it') try:items = api.item_search('Books', Keywords=xxx,AssociateTag=Pk) except Exception: return 999.00 KK=0 noDis=0 try: for book in items: KK+=1 if KK==6: break ID=book.ASIN try: AutP=str(book.ItemAttributes.Author) TitP=str(book.ItemAttributes.Title) url=str(book.DetailPageURL) except Exception as o: continue str_asin = str(ID) try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Condition='New', MerchantId='Amazon',AssociateTag=Pk) except AWSError: continue try: for a in node.Items.Item.Offers.Offer: if("non disponibile" in str(a.OfferListing.Availability).lower()): noDis=noDis+1 continue prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".") prox=float(prix) if(prox<best and one): best=prox AutFin=AutP titFin=TitP one=False session['titoloAMZ']=titFin session['urlAMZ']=url elif(prox<best and one==False and AutP==AutFin and TitP==titFin): best=prox session['titoloAMZ']=titFin session['urlAMZ']=url except Exception as e: continue if(best==999.00 and noDis>=1): KK=0 one=True AutFin="" titFin="" for book in items: KK+=1 if KK==6: break ID=book.ASIN try: AutP=str(book.ItemAttributes.Author) TitP=str(book.ItemAttributes.Title) url=str(book.DetailPageURL) except Exception as o: continue str_asin = str(ID) try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Availability='Available', Condition='New', MerchantId='Amazon',AssociateTag=Pk) except AWSError: continue try: for a in node.Items.Item.Offers.Offer: if("non disponibile" not in str(a.OfferListing.Availability).lower()): continue prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".") prox=float(prix) if(prox<best and one): best=prox AutFin=AutP titFin=TitP one=False session['titoloAMZ']=titFin+"(Attualmente non disponibile)" session['urlAMZ']=url elif(prox<best and one==False and AutP==AutFin and TitP==titFin): best=prox session['titoloAMZ']=titFin+"(Attualmente non disponibile)" session['urlAMZ']=url except Exception as e: continue return best except Exception: return best
def __init__(self): self.api = API(locale='us') self.cart_exists = False self.items = {}
etree.Processor(module='cElementTree'), 'elementtree.ElementTree': etree.Processor(module='elementtree.ElementTree'), 'minidom': minidom.Processor(), } print "Collecting test files..." xml_files = [ os.path.join(root, file) for root, dirs, files in os.walk('.') for file in files if os.path.splitext(file)[1].lower() == '.xml' ] print "Parsing %i XML files..." % (len(xml_files) * RUNS, ) for label, parser in custom_parsers.items(): print label, if getattr(parser, 'etree', '') is None: print 'not installed!' continue start = time.clock() api = API(locale='de', processor=parser) for i in range(RUNS): for path in xml_files: try: api._parse(open(path)) except Exception, e: pass stop = time.clock() print stop - start
def load_amazon_api(): return API(locale='us')
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Semester.ly is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from amazonproduct import API from fake_useragent import UserAgent from bs4 import BeautifulSoup from django.db.models import Q from django.utils.encoding import smart_str import http.cookiejar, django, os, re, requests, sys, time api = API(locale="us") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "semesterly.settings") django.setup() from timetable.models import * from scripts.amazon_helpers import * SESSION = requests.Session() def randomize_ua(): return UserAgent().random def get_request(url): cookies = http.cookiejar.CookieJar()
(w, h) = im.size offset_w = (800 - w) / 2 offset_h = (800 - h) / 2 new_im = Image.new('RGBA', (800, 800), (255, 255, 255, 0)) new_im.paste(im, (offset_w, offset_h)) new_im.save(imageinfo.image_file.path) return True except: return False if __name__ == '__main__': #print startNewItemJob('20150114205955') asin = 'B00001P4ZH' api = API(access_key_id=ACCESS_KEY_ID, secret_access_key=SECRET_ACCESS_KEY, associate_tag=ASSOCIATE_TAG, locale='us') result = api.item_lookup( asin, ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes', MerchantId='Amazon', Condition='New') #print insert_item_info(result,asin) #print result.Items.Item.Offers.TotalOffers #for offer in result.Items.Item.Offers: #print offer.Offer.__dict__ #print offer.Merchant.Name # print offer.Offer.OfferListing.Price.FormattedPrice print insert_item_info(result, asin) #print price_conv('$34.90')
from amazonproduct import API api = API(locale='us') from amazonproduct import errors from random import randint import sys import numpy as np from pyspark import SparkContext #save create CSVLine def toCSVLine(data): return ','.join(str(d) for d in data) def getPrice(price): return price + randint(-100, 100) count = 0 limit_reached = False sc = SparkContext(appName="AskMeMP") amazon_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) walmart_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) ebay_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) result = api.browse_node_lookup(1000) for child1 in result.BrowseNodes.BrowseNode.Children.BrowseNode: if limit_reached: break result1 = api.browse_node_lookup(child1.BrowseNodeId)
import sys reload(sys) sys.setdefaultencoding('utf-8') import numpy as np from pyspark import SparkContext from amazonproduct import API import time api = API(locale='us') def parseVector(line): parts = line.split('+') return parts[0], (parts[1], parts[2], parts[4], parts[3]) pre = time.time() sc = SparkContext(appName="AskMeMPQuery") amazon_rdd = sc.textFile('hdfs://192.168.0.33:54310/final/amazon.csv') walmart_rdd = sc.textFile('hdfs://192.168.0.33:54310/final/walmart.csv') ebay_rdd = sc.textFile('hdfs://192.168.0.33:54310/final/ebay.csv') count = 0 start = time.time() for book in api.item_search('Books', Keywords=sys.argv[1], Condition='New', Availability='Available'): # print '%s %s' %(book.ASIN,book.ItemAttributes.Title)
}) # Check Datatype of dates and see if table can be arranged by price and date # sellerDFPage.sort_index(by=['Ebay_Price', 'Ebay_Link'], ascending=[False, False]) # Ensure that this is acting in place return sellerDFPage ###################################################################### # Amazon ###################################################################### from amazonproduct import API import amazonproduct apiAmazon = API(access_key_id=access_key_id_value, secret_access_key=secret_access_key_value, associate_tag=associate_tag_value, locale='us') def AmazonItems(sellerDF): Amazon_listDF = pd.DataFrame() for j in sellerDF.index: # print "outer" + str(j) outerItemCall = apiAmazon.item_search( 'All', Keywords=sellerDF.Ebay_Title[j].replace('New!', ''), ResponseGroup='OfferFull', Condition='New', Availability='Available') for i in range(0, (len(outerItemCall.page(1).Items.getchildren()) - 4)):
from server import app import os # third-party modules from amazonproduct import API from lxml import etree # my modules from model import Book, connect_to_db, db config = { 'access_key': os.environ['ACCESS_KEY'], 'secret_key': os.environ['SECRET_KEY'], 'associate_tag': os.environ['ASSOCIATE_TAG'], 'locale': 'us' } api = API(cfg=config) ################################################################################ #Seed Book ################################################################################ def seed_books(): book_txt = open("books.txt") for line in book_txt: line = line.split("|") for i, info in enumerate(line): line[i] = info.decode("utf-8").strip() db.session.add( Book(gutenberg_extraction_num=line[0], name=line[1], author=line[2],
import sys reload(sys) sys.setdefaultencoding('utf-8') config = { 'access_key': keys.access_key(), 'secret_key': keys.secret_key(), 'associate_tag': 'maxime-22', 'locale': 'jp' } api = API(cfg=config) with open('../csv/tracks_mood.csv', 'rb') as infile, open('../csv/tracks_mood_amazon.csv', 'wb') as outfile: # with open('tracks_mood.csv', 'rb') as infile: datareader = csv.reader(infile, delimiter=',') datawriter = csv.writer(outfile) datawriter.writerow("tid,title,artist,g1,g2,g5,g6,g7,g8,g9,g11,g12,g14,g15,g16,g17,g25,g28,g29,g31,g32,on_amazon_jp".split(",")) next(datareader, None) for row in datareader: kwds = str(row[1]) + ' ' + str(row[2]) title = str(row[1])
'Bebes': 'Baby', 'Computaci\\u00f3n': 'Computers', 'Consolas y Videojuegos': 'VideoGames', 'Deportes y Fitness': 'SportingGoods', 'Electr\\u00f3nica, Audio y Video': 'Electronics', 'Industrias y Oficinas': 'OfficeProducts', 'Instrumentos Musicales': 'MusicalInstruments', 'Joyas y Relojes': 'Jewelry', 'Juegos y Juguetes': 'Toys', 'Libros, Revistas y Comics': 'Books', 'M\\u00fasica, Pel\\u00edculas y Series': 'DVD', 'Ropa y Accesorios': 'Apparel', 'Salud y Belleza': 'HealthPersonalCare' } amazon_api = API(locale='es') #, access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'), # secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'), # associate_tag=os.environ.get('AWS_ASSOCIATE_TAG')) @app.route('/') def index(): return send_from_directory(STATIC_FOLDER, "index.html") @app.route('/<path:path>') def static_proxy(path): return send_from_directory(STATIC_FOLDER, path)
def main(): api = API(locale='us', access_key_id=ACCESS_KEY, secret_access_key=SECRET_KEY, associate_tag=TAG) #Create List of Tampon Products from Amazon tampon_items = [] response = api.item_search('HealthPersonalCare', Keywords='Tampons', ResponseGroup="Large, Reviews") for i in response: if hasattr(i, 'SalesRank'): product = ProductInfo() product.set_ASIN(i.ASIN) product.set_best_seller_rank(int(i.SalesRank)) product.set_name(i.ItemAttributes.Title.text) product.set_review_iframe_url(i.CustomerReviews.IFrameURL) tampon_items.append(product) #Take top 22 products for fetching reviews top_20_tampons = tampon_items[:22] #Open a Browser to get all the reviews (Dynamic Page Loading Amazon) browser = webdriver.Chrome() #Get link for all reciews from review Iframe for product in top_20_tampons: browser.get(product.review_iframe_url) x = browser.find_elements_by_class_name('small') if x: x = x[0].find_element_by_tag_name('a').get_attribute('href') product.set_all_review_url(str(x)) browser.close() #filter out the product whose reviews are not present top_20_tampons = [ product for product in top_20_tampons if product.all_review_url ] ''' Filter to reviews by "all reviews" otherwise scrap only 'Verified Purchaser Reviews' #top_20_tampons = set_filter_all_review(top_20_tampons) ''' #Scan for all reviews socket.setdefaulttimeout(50) brow = webdriver.Chrome() brow.set_page_load_timeout(30) for product in top_20_tampons: time.sleep(5) brow.get(str(product.all_review_url)) valid = True #Do it till all the previous 1 year reviews are scraped while valid: while True: try: x = brow.find_element_by_id('cm_cr-review_list') break except NoSuchElementException: print 'Excpetion' #get all reviews for the product from that page dt = [ str(i.text)[3:] for i in x.find_elements_by_class_name('review-date') ] dt = map( lambda x: datetime.strptime(x.replace(',', ''), '%B %d %Y'), dt) # setting review dates into product and Checking product.review_dates.extend(dt) #Check of last reiew on the page is 1 year old if (datetime.now() - dt[-1]).days > 365: valid = False # Goto next page to get more reviews else: if len(dt) == 10: last_button = brow.find_element_by_class_name("a-last") next_page_url = last_button.find_element_by_tag_name( 'a').get_attribute('href') print next_page_url brow.get(str(next_page_url)) else: valid = False brow.close() #Write a complete file for reviews write_all_reviews_CSV(top_20_tampons) #Write reviews per month per product for plottring and analysis write_reviews_per_month(top_20_tampons)
from amazonproduct import API from fake_useragent import UserAgent from bs4 import BeautifulSoup from django.db.models import Q from django.utils.encoding import smart_str import cookielib, django, os, re, requests, sys, time api = API(locale='us') os.environ.setdefault("DJANGO_SETTINGS_MODULE", "semesterly.settings") django.setup() from timetable.models import * from scripts.amazon_helpers import * SESSION = requests.Session() def randomize_ua(): return UserAgent().random def get_request(url): cookies = cookielib.CookieJar() headers = { 'User-Agent': randomize_ua(), 'Accept': '*/*', 'Host': 'uoftbookstore.com', 'Referer': 'http://uoftbookstore.com/buy_courselisting.asp', 'Content-Length': '0', 'Content-Type': 'application/x-www-form-urlencoded' }