def scrape_wish_list_items(list_id): """ Populate wish_list_items with data from wishlist """ print "Scraping wishlist..." wish = Wishlist(list_id) item_ids = wish.get_list_items() wishlist_items = [] api = API(locale='us') for item_id in item_ids: try: result = api.item_lookup(item_id, ResponseGroup="Large") for item in result.Items.Item: itm = { "title": item.ItemAttributes.Title, "price": item.Offers.Offer.OfferListing.Price.FormattedPrice, "amazonid": item.ASIN } wishlist_items.append(itm) except: print "!!! Failed getting " + item_id print "Completed scraping." return wishlist_items
def joo_amazon(username, KEYWORDS): items_list4 = [] client = MongoClient('ds063186.mlab.com', 63186) client.credentials.authenticate('shakedinero', 'a/c57821688') db = client.credentials cursor = db.amazon.find() for i in cursor: x = i config = { "access_key": str(x['access_key']), "secret_key": str(x['secret_key']), "associate_tag": str(x['associate_tag']), "locale": str(x['locale']) } api = API(cfg=config) items = api.item_search('All', Keywords=KEYWORDS, ResponseGroup='Large') for i in items: try: title = i.ItemAttributes.Title item_url = i.DetailPageURL img = i.MediumImage.URL price = i.OfferSummary.LowestNewPrice.FormattedPrice shipping = '-' x = '{"title":"' + title + '","url":"' + item_url + '","image":"' + img + '","price":"' + price + '","shipping":"' + shipping + '","web":"Amazon"}' j = json.loads(x) items_list4.append(j) except: continue command = "db_results.results." + username + ".insert_many(items_list4)" try: exec command except: print "No Amazon Results" return items_list4
def search(title=''): """Amazon quick search function.""" api = API(LOG['AWS_KEY'], LOG['SECRET_KEY'], LOG['LOCAL'], LOG['ASSOC_TAG']) node = api.item_search('Books', Title=title, Publisher=publisher) for page in node: for book in page.Items.Item: print '%s' % (book.ASIN)
def lookup(asin): api = API(locale='jp') #item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All') #item = api.item_lookup(asin) item = api.item_lookup(asin, ResponseGroup='Large') #logging.debug(etree.tostring(item, pretty_print=True)) ## title logging.debug(item.Items.Item.ItemAttributes.Title)
def amazon_product_search(keyword, storing_class, store, search_index="All", nb_items=10): api = API(settings.AWS_PRODUCT_ACCESS_KEY_ID, settings.AWS_PRODUCT_SECRET_ACCESS_KEY, settings.AWS_LOCALE) try: node = api.item_search(search_index, Keywords=keyword, ResponseGroup="Large", AssociateTag=settings.AWS_ASSOCIATE_TAG) except NoExactMatchesFound: return None except URLError: if settings.DEBUG: raise else: return None nb_pages = int(ceil(nb_items * 0.1)) item_list = [] for root in node: # total_results = root.Items.TotalResults.pyval # total_pages = root.Items.TotalPages.pyval try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 nspace = root.nsmap.get(None, '') items = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace}) item_list.extend(items) if current_page >= nb_pages: break counter = 0 aff_item_list = list() for item in item_list: entry, created = storing_class.objects.get_or_create( store=store, object_id=item.ASIN) entry.store_init(store, item) entry.save() if entry.item is None: aff_item_list.append(entry) counter += 1 if counter == nb_items: break return aff_item_list
def search(): api = API(locale='jp') #total_results = node.Items.TotalResults.pyval #total_pages = node.Items.TotalPages.pyval for book in api.item_search('Books', Publisher=u'村上'): try: print '%s' % (book.ItemAttributes.Title) #print '%s: "%s"' % (book.ItemAttributes.Author, # book.ItemAttributes.Title) except: logging.debug("no author or title")
def get_image_from_amazon(artist, album): api = API(access_key_id="First it was fix-ed", secret_access_key="And then it was enabled", associate_tag="But now it's broke again.", locale="us") node = api.item_search('Music', ResponseGroup='Images', Keywords="{} {}".format(artist, album)) url = str(node.page(1).Items.Item.LargeImage.URL) data = requests.get(url).content return data
def __init__(self, some_dict, list_of_numbers, number_of_recs): self.api = API(locale='us', ) self.image_url = [] self.book_numbers = [ book for book in list_of_numbers if book in some_dict ] self.isbns = [(10 - len(i)) * '0' + i if type(i) != float else i for i in [ some_dict[book_number][0] for book_number in list_of_numbers if book_number in some_dict ]] self.some_dict = some_dict self.list_of_numbers = list_of_numbers self.number_of_recs = number_of_recs
def find_asin(title='Around the World in Eighty Days', author='Jules Verne'): """Find the unique ASIN identifier for the book INPUT: Book title and author FUNCTION: find_asin() OUTPUT: The unique asin identifier Time taken: < 1 second""" from amazonproduct import API api = API(locale='uk') items = api.item_search('Books', Title=title, Author=author) # Take the first result for book in items: break asin = str(book.ASIN) return (asin)
def get_product_details(cls,asin,product_renderer,locale = 'us'): logging.info('AmazonProductFetcher.get_product_details called, asin: %s, locale: %s' %(asin,locale)) api = API(AWS_KEY, SECRET_KEY, locale) timeout_ms = 100 while True: try: product_node = api.item_lookup(id=asin) #title,product group image_node = api.item_lookup(id=asin, ResponseGroup='Images') #Images break except amazonproduct.TooManyRequests: time.sleep(timeout_ms) timeout_ms *= 2 except AWSError: logging.error('Could not retrieve info for product %s' % asin) return except DownloadError,e: logging.error('%s retrieving URLfor product: %s in RPC' %(e,asin)) return #Early quit
def pullItemInfoFromAmazon(job_id): logger = logging.getLogger('tst') asin_obj_list = Asin.objects.filter(job_id=job_id) asin_list = [x.asin for x in asin_obj_list] pull_fail_list = [] insert_fail_list = [] image_fail_list = [] api = API(access_key_id=ACCESS_KEY_ID, secret_access_key=SECRET_ACCESS_KEY, associate_tag=ASSOCIATE_TAG, locale='us') for asin in asin_list[:10]: asin = asin.strip() result = '' for i in range(0, 2): try: result = api.item_lookup( asin, ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes', MerchantId='Amazon', Condition='New') logger.info('ASIN: %s -- %d time -- Success' % (asin, i + 1)) break except Exception, e: logger.info('ASIN: %s -- %d time -- Fail' % (asin, i + 1)) continue if result == '': logger.info('ASIN: %s Fail after 3 times' % asin) pull_fail_list.append(asin) continue if not insert_item_info(result, asin): logger.error('Insert item info for %s fail' % asin) insert_fail_list.append(asin) continue if not process_image(asin): logger.error('Processing Image for %s fail' % asin) image_fail_list.append(asin) continue
def item_lookup(self,asin,locale,retry=3,time_interval=10,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId=None,Condition=None): self.api = API(access_key_id = settings.ACCESS_KEY_ID, secret_access_key = settings.SECRET_ACCESS_KEY, associate_tag = settings.ASSOCIATE_TAG, locale=locale) result = '' #status #0 -- Success #1 -- Socket Timeout #2 -- Invalid ASIN #-1 -- Fail status = -1 for i in range(0,retry): try: #result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup,MerchantId = MerchantId,Condition=Condition) result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup) status = 0 break except urllib2.URLError,e: status = 1 continue except socket.timeout,e: status = 1 continue
def get_similar_books(ASIN): api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG) for root in api.similarity_lookup(str(ASIN)): try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 #print 'page %d of %d' % (current_page, total_pages) nspace = root.nsmap.get(None, '') books = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace}) similar_items = [] i = 0 for book in books: if (i == 3): return similar_items similar_items.append(book) i = i + 1
import sys reload(sys) sys.setdefaultencoding('utf-8') config = { 'access_key': keys.access_key(), 'secret_key': keys.secret_key(), 'associate_tag': 'maxime-22', 'locale': 'jp' } api = API(cfg=config) with open('../csv/tracks_mood.csv', 'rb') as infile, open('../csv/tracks_mood_amazon.csv', 'wb') as outfile: # with open('tracks_mood.csv', 'rb') as infile: datareader = csv.reader(infile, delimiter=',') datawriter = csv.writer(outfile) datawriter.writerow("tid,title,artist,g1,g2,g5,g6,g7,g8,g9,g11,g12,g14,g15,g16,g17,g25,g28,g29,g31,g32,on_amazon_jp".split(",")) next(datareader, None) for row in datareader: kwds = str(row[1]) + ' ' + str(row[2]) title = str(row[1])
def load_amazon_api(): return API(locale='us')
(w, h) = im.size offset_w = (800 - w) / 2 offset_h = (800 - h) / 2 new_im = Image.new('RGBA', (800, 800), (255, 255, 255, 0)) new_im.paste(im, (offset_w, offset_h)) new_im.save(imageinfo.image_file.path) return True except: return False if __name__ == '__main__': #print startNewItemJob('20150114205955') asin = 'B00001P4ZH' api = API(access_key_id=ACCESS_KEY_ID, secret_access_key=SECRET_ACCESS_KEY, associate_tag=ASSOCIATE_TAG, locale='us') result = api.item_lookup( asin, ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes', MerchantId='Amazon', Condition='New') #print insert_item_info(result,asin) #print result.Items.Item.Offers.TotalOffers #for offer in result.Items.Item.Offers: #print offer.Offer.__dict__ #print offer.Merchant.Name # print offer.Offer.OfferListing.Price.FormattedPrice print insert_item_info(result, asin) #print price_conv('$34.90')
from amazonproduct import API api = API(locale='us') from amazonproduct import errors from random import randint import sys import numpy as np from pyspark import SparkContext #save create CSVLine def toCSVLine(data): return ','.join(str(d) for d in data) def getPrice(price): return price + randint(-100, 100) count = 0 limit_reached = False sc = SparkContext(appName="AskMeMP") amazon_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) walmart_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) ebay_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE']) result = api.browse_node_lookup(1000) for child1 in result.BrowseNodes.BrowseNode.Children.BrowseNode: if limit_reached: break result1 = api.browse_node_lookup(child1.BrowseNodeId)
import random import re from amazonproduct import API from libraryAmz import ReadAsin api = API(locale='in') WORDS = ["SHOPPING", "SHOP", "BUY", "ELECTRONICS", "AMAZON"] def isValid(text): """ Returns True if the text is related to Jasper's status. Arguments: text -- user-input, typically transcribed speech """ shopping = bool(re.search(r'\bshopping\b', text, re.IGNORECASE)) buyelectronics = bool( re.search(r'\bbuy electronics\b', text, re.IGNORECASE)) amazon = bool(re.search(r'\bamazon\b', text, re.IGNORECASE)) shop = bool(re.search(r'\bshop\b', text, re.IGNORECASE)) if shopping: return shopping elif buyelectronics: return buyelectronics elif amazon: return amazon elif shop: return shop else:
def get_book(genre, popularity, pub_era, before): api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG) genre = genre.replace('&', ' ') genre = genre.replace(',', ' ') if (before): param = 'before' else: param = 'after' #print param randSet = 1 found = 0 for root in api.item_search('Books', ResponseGroup='Large', Power='pubdate:' + param + ' ' + str(pub_era) + ' and subject:' + genre, Sort="salesrank"): total_results = root.Items.TotalResults.pyval total_pages = root.Items.TotalPages.pyval if (total_results < RESULT_LIMIT): upper_bound = total_results else: upper_bound = RESULT_LIMIT if (total_results < TOP_TEN): top_results = total_results else: top_results = TOP_TEN #print upper_bound #print top_results if (randSet): if (popularity): num = random.randrange(0, top_results) else: num = random.randrange(top_results, upper_bound) print num pageNum = num / 10 + 1 #starts on page one so if we are under ten this number will be zero exact = num % 10 randSet = 0 print pageNum print exact try: current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval except AttributeError: current_page = 1 #print 'page %d of %d' % (current_page, total_pages) nspace = root.nsmap.get(None, '') books = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace}) if (current_page == pageNum): i = 0 for book in books: if (i == exact): #output = book.ASIN, #if hasattr(book.ItemAttributes, 'Author'): # output = output + book.ItemAttributes.Author + ':' # output = output + book.ItemAttributes.Title #if hasattr(book.ItemAttributes, 'ListPrice'): # output = output + unicode(book.ItemAttributes.ListPrice.FormattedPrice) #elif hasattr(book.OfferSummary, 'LowestUsedPrice'): # output = output + u'(used from %s)' % book.OfferSummary.LowestUsedPrice.FormattedPrice return book i = i + 1
#from amazon.api import AmazonAPI import amazonproduct from amazonproduct import API api = API(locale='de') ## Credentials access_key = 'AKIAIOWFZ4KTTJAKNLFQ' secret_key = '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' associate_tag = 'q0d9b-20' amazon = AmazonAPI(access_key, secret_key, associate_tag) products = amazon.search_n(1, Keywords='earphone', SearchIndex='All') len(products) #Amazon Credentials: Associate ID: q0d9b-20 #Access Key: AKIAIOWFZ4KTTJAKNLFQ #Secret Access Code: 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
'Bebes': 'Baby', 'Computaci\\u00f3n': 'Computers', 'Consolas y Videojuegos': 'VideoGames', 'Deportes y Fitness': 'SportingGoods', 'Electr\\u00f3nica, Audio y Video': 'Electronics', 'Industrias y Oficinas': 'OfficeProducts', 'Instrumentos Musicales': 'MusicalInstruments', 'Joyas y Relojes': 'Jewelry', 'Juegos y Juguetes': 'Toys', 'Libros, Revistas y Comics': 'Books', 'M\\u00fasica, Pel\\u00edculas y Series': 'DVD', 'Ropa y Accesorios': 'Apparel', 'Salud y Belleza': 'HealthPersonalCare' } amazon_api = API(locale='es') #, access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'), # secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'), # associate_tag=os.environ.get('AWS_ASSOCIATE_TAG')) @app.route('/') def index(): return send_from_directory(STATIC_FOLDER, "index.html") @app.route('/<path:path>') def static_proxy(path): return send_from_directory(STATIC_FOLDER, path)
from amazonproduct import API api = API(access_key_id='AKIAJXG6BBQM6YDLYEKA', secret_access_key='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', associate_tag='zhicheng-20', locale='us') for book in api.item_search('Books', Publisher='Galileo Press'): print '%s: "%s"' % (book.ItemAttributes.Author, book.ItemAttributes.Title)
def __init__(self): self.api = API(locale='us') self.datatxt = DataTXT(app_id=settings['DANDELION_APP_ID'], app_key=settings['DANDELION_KEY'])
etree.Processor(module='cElementTree'), 'elementtree.ElementTree': etree.Processor(module='elementtree.ElementTree'), 'minidom': minidom.Processor(), } print "Collecting test files..." xml_files = [ os.path.join(root, file) for root, dirs, files in os.walk('.') for file in files if os.path.splitext(file)[1].lower() == '.xml' ] print "Parsing %i XML files..." % (len(xml_files) * RUNS, ) for label, parser in custom_parsers.items(): print label, if getattr(parser, 'etree', '') is None: print 'not installed!' continue start = time.clock() api = API(locale='de', processor=parser) for i in range(RUNS): for path in xml_files: try: api._parse(open(path)) except Exception, e: pass stop = time.clock() print stop - start
def __init__(self): self.api = API(locale='us') self.cart_exists = False self.items = {}
def main(): api = API(locale='us', access_key_id=ACCESS_KEY, secret_access_key=SECRET_KEY, associate_tag=TAG) #Create List of Tampon Products from Amazon tampon_items = [] response = api.item_search('HealthPersonalCare', Keywords='Tampons', ResponseGroup="Large, Reviews") for i in response: if hasattr(i, 'SalesRank'): product = ProductInfo() product.set_ASIN(i.ASIN) product.set_best_seller_rank(int(i.SalesRank)) product.set_name(i.ItemAttributes.Title.text) product.set_review_iframe_url(i.CustomerReviews.IFrameURL) tampon_items.append(product) #Take top 22 products for fetching reviews top_20_tampons = tampon_items[:22] #Open a Browser to get all the reviews (Dynamic Page Loading Amazon) browser = webdriver.Chrome() #Get link for all reciews from review Iframe for product in top_20_tampons: browser.get(product.review_iframe_url) x = browser.find_elements_by_class_name('small') if x: x = x[0].find_element_by_tag_name('a').get_attribute('href') product.set_all_review_url(str(x)) browser.close() #filter out the product whose reviews are not present top_20_tampons = [ product for product in top_20_tampons if product.all_review_url ] ''' Filter to reviews by "all reviews" otherwise scrap only 'Verified Purchaser Reviews' #top_20_tampons = set_filter_all_review(top_20_tampons) ''' #Scan for all reviews socket.setdefaulttimeout(50) brow = webdriver.Chrome() brow.set_page_load_timeout(30) for product in top_20_tampons: time.sleep(5) brow.get(str(product.all_review_url)) valid = True #Do it till all the previous 1 year reviews are scraped while valid: while True: try: x = brow.find_element_by_id('cm_cr-review_list') break except NoSuchElementException: print 'Excpetion' #get all reviews for the product from that page dt = [ str(i.text)[3:] for i in x.find_elements_by_class_name('review-date') ] dt = map( lambda x: datetime.strptime(x.replace(',', ''), '%B %d %Y'), dt) # setting review dates into product and Checking product.review_dates.extend(dt) #Check of last reiew on the page is 1 year old if (datetime.now() - dt[-1]).days > 365: valid = False # Goto next page to get more reviews else: if len(dt) == 10: last_button = brow.find_element_by_class_name("a-last") next_page_url = last_button.find_element_by_tag_name( 'a').get_attribute('href') print next_page_url brow.get(str(next_page_url)) else: valid = False brow.close() #Write a complete file for reviews write_all_reviews_CSV(top_20_tampons) #Write reviews per month per product for plottring and analysis write_reviews_per_month(top_20_tampons)
SECRET_KEY = '' if AWS_KEY == '': exit(0) if len(sys.argv) > 1: uartist = sys.argv[1] ualbum = sys.argv[2] utrack = sys.argv[3] else: exit(0) score = 0 url = {'Amazon': ''} api = API(AWS_KEY, SECRET_KEY, 'us') node = api.item_search('Music', Artist=uartist, Title=ualbum, Track=utrack, ResponseGroup="Large") for item in node.Items.Item: if not hasattr(item, 'Tracks'): continue if uartist != item.ItemAttributes.Artist.pyval and not cjb.simple_compare( uartist, item.ItemAttributes.Artist.pyval, .90): continue albumscore = 0 if ualbum == item.ItemAttributes.Title:
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Semester.ly is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. from amazonproduct import API from fake_useragent import UserAgent from bs4 import BeautifulSoup from django.db.models import Q from django.utils.encoding import smart_str import http.cookiejar, django, os, re, requests, sys, time api = API(locale="us") os.environ.setdefault("DJANGO_SETTINGS_MODULE", "semesterly.settings") django.setup() from timetable.models import * from scripts.amazon_helpers import * SESSION = requests.Session() def randomize_ua(): return UserAgent().random def get_request(url): cookies = http.cookiejar.CookieJar()
}) # Check Datatype of dates and see if table can be arranged by price and date # sellerDFPage.sort_index(by=['Ebay_Price', 'Ebay_Link'], ascending=[False, False]) # Ensure that this is acting in place return sellerDFPage ###################################################################### # Amazon ###################################################################### from amazonproduct import API import amazonproduct apiAmazon = API(access_key_id=access_key_id_value, secret_access_key=secret_access_key_value, associate_tag=associate_tag_value, locale='us') def AmazonItems(sellerDF): Amazon_listDF = pd.DataFrame() for j in sellerDF.index: # print "outer" + str(j) outerItemCall = apiAmazon.item_search( 'All', Keywords=sellerDF.Ebay_Title[j].replace('New!', ''), ResponseGroup='OfferFull', Condition='New', Availability='Available') for i in range(0, (len(outerItemCall.page(1).Items.getchildren()) - 4)):
def price_offers(xxx): best=999.00 one=True AutFin="" titFin="" api = API(Pp, Pl, 'it') try:items = api.item_search('Books', Keywords=xxx,AssociateTag=Pk) except Exception: return 999.00 KK=0 noDis=0 try: for book in items: KK+=1 if KK==6: break ID=book.ASIN try: AutP=str(book.ItemAttributes.Author) TitP=str(book.ItemAttributes.Title) url=str(book.DetailPageURL) except Exception as o: continue str_asin = str(ID) try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Condition='New', MerchantId='Amazon',AssociateTag=Pk) except AWSError: continue try: for a in node.Items.Item.Offers.Offer: if("non disponibile" in str(a.OfferListing.Availability).lower()): noDis=noDis+1 continue prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".") prox=float(prix) if(prox<best and one): best=prox AutFin=AutP titFin=TitP one=False session['titoloAMZ']=titFin session['urlAMZ']=url elif(prox<best and one==False and AutP==AutFin and TitP==titFin): best=prox session['titoloAMZ']=titFin session['urlAMZ']=url except Exception as e: continue if(best==999.00 and noDis>=1): KK=0 one=True AutFin="" titFin="" for book in items: KK+=1 if KK==6: break ID=book.ASIN try: AutP=str(book.ItemAttributes.Author) TitP=str(book.ItemAttributes.Title) url=str(book.DetailPageURL) except Exception as o: continue str_asin = str(ID) try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Availability='Available', Condition='New', MerchantId='Amazon',AssociateTag=Pk) except AWSError: continue try: for a in node.Items.Item.Offers.Offer: if("non disponibile" not in str(a.OfferListing.Availability).lower()): continue prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".") prox=float(prix) if(prox<best and one): best=prox AutFin=AutP titFin=TitP one=False session['titoloAMZ']=titFin+"(Attualmente non disponibile)" session['urlAMZ']=url elif(prox<best and one==False and AutP==AutFin and TitP==titFin): best=prox session['titoloAMZ']=titFin+"(Attualmente non disponibile)" session['urlAMZ']=url except Exception as e: continue return best except Exception: return best