Ejemplo n.º 1
0
def search_on_amazon(asin, album, artist):
    '''
    Tries to locate the url of album by artis on amazon
    
    Returns '' if it can't be found
    '''
    from amazonproduct import API
    
    if not AMAZON_KEY or not AMAZON_SECRET or not AMAZON_ASSOCIATE_TAG:
        return ''

    api = API(AMAZON_KEY, AMAZON_SECRET, 'us')
    try:
        if asin:
            node = api.item_lookup(asin, AssociateTag=AMAZON_ASSOCIATE_TAG)
            for item in node.Items:
                attributes = item.Item.ItemAttributes
                if attributes.ProductGroup == 'Music':
                    url = item.Item.DetailPageURL
                    if url:
                        return url.text
        node = api.item_search('MP3Downloads', Keywords=album + ' ' + artist, AssociateTag=AMAZON_ASSOCIATE_TAG)
        for item in node.Items:
            attributes = item.Item.ItemAttributes
            if matching.match(artist, str(attributes.Creator)) \
                    and matching.match(album, str(attributes.Title)) \
                    and attributes.ProductGroup == 'Digital Music Album':
                url = item.Item.DetailPageURL
                if url:
                    return url.text
    except :
        pass
    return ''
Ejemplo n.º 2
0
def services_incoming(request):
    """We have an incoming item (probably from the bookmarklet)"""
    
    #TODO: this is nothing more than a test now. cleanup.
    url = request.GET.get('loc', None)
    matches = re.search(r'\/([A-Z0-9]{10})($|\/)', url)
    asin = matches.group(1)
    
    aws_key = AMZ.KEY 
    aws_secret_key = AMZ.SECRET_KEY 
    api = API(aws_key, aws_secret_key, 'us')
    
    for root in api.item_lookup(asin, IdType='ASIN', AssociateTag= AMZ.ASSOCIATE_TAG):
        nspace = root.nsmap.get(None, '')
        amazon_items = root.xpath('//aws:Items/aws:Item', namespaces={'aws' : nspace})
        author = u'Unknown'
        title = u'Unknown'
        isbn = u'Unknown'

        for amazon_item in amazon_items:
            if hasattr(amazon_item.ItemAttributes, 'Author'): 
                author = unicode(amazon_item.ItemAttributes.Author)

            if hasattr(amazon_item.ItemAttributes, 'Title'): 
                title = unicode(amazon_item.ItemAttributes.Title)
    
    return render_to_response('add-item.html', {'user': request.user, 'creator': author, 'title': title, 'isbn': isbn})
Ejemplo n.º 3
0
def pullItemInfoFromAmazon(job_id):
    logger = logging.getLogger('tst')
    asin_obj_list = Asin.objects.filter(job_id = job_id)
    asin_list = [x.asin for x in asin_obj_list]
    pull_fail_list = []
    insert_fail_list = []
    image_fail_list = []
    api = API(access_key_id = ACCESS_KEY_ID, secret_access_key = SECRET_ACCESS_KEY, associate_tag = ASSOCIATE_TAG, locale='us')
    for asin in asin_list[:10]:
        asin = asin.strip()
        result = ''
        for i in range(0,2):
            try:
                result = api.item_lookup(asin,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId = 'Amazon',Condition='New')
                logger.info('ASIN: %s  -- %d time  --  Success'%(asin,i+1))
                break
            except Exception,e:
                logger.info('ASIN: %s  -- %d time  --  Fail'%(asin,i+1))
                continue
        if result == '':
            logger.info('ASIN: %s Fail after 3 times'%asin)
            pull_fail_list.append(asin)
            continue
        
        if not insert_item_info(result,asin):
            logger.error('Insert item info for %s fail'%asin)
            insert_fail_list.append(asin)
            continue

        if not process_image(asin):
            logger.error('Processing Image for %s fail'%asin)
            image_fail_list.append(asin)
            continue
Ejemplo n.º 4
0
def show_product(locale, asin):
    api = API(locale=locale)
    result = api.item_lookup(asin, ResponseGroup="ItemIds, ItemAttributes, Images, OfferSummary, Offers")
    niceProduct = Product()
    for product in result.Items.Item:      
        niceProduct.title = product.ItemAttributes.Title
        niceProduct.ASIN = product.ASIN.text
        
        niceProduct.imageUrl = product.MediumImage.URL               
        
        try:
            niceProduct.newPrice = float(product.OfferSummary.LowestNewPrice.Amount)/100
            niceProduct.newFormattedPrice = product.OfferSummary.LowestNewPrice.FormattedPrice
            niceProduct.newPriceCurrency = product.OfferSummary.LowestNewPrice.CurrencyCode
        except:
            pass

        try:
            niceProduct.usedPrice = float(product.OfferSummary.LowestUsedPrice.Amount)/100
            niceProduct.usedFormattedPrice = product.OfferSummary.LowestUsedPrice.FormattedPrice
            niceProduct.usedPriceCurrency = product.OfferSummary.LowestUsedPrice.CurrencyCode
        except:
            pass
            
        niceProduct.type = product.ItemAttributes.ProductGroup
        niceProduct.region =  getRegionFromUrl(product.DetailPageURL.text).upper() #product.ItemAttributes.RegionCode
        niceProduct.model = product.ItemAttributes.Model

    return render_template('product.html', product = niceProduct)
Ejemplo n.º 5
0
def get_similar_books(ASIN):

    api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG)

    for root in api.similarity_lookup(str(ASIN)):

        try:
            current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval
        except AttributeError:
            current_page = 1

        #print 'page %d of %d' % (current_page, total_pages)


        nspace = root.nsmap.get(None, '')
        books = root.xpath('//aws:Items/aws:Item', namespaces={'aws' : nspace})
        similar_items = []
        i = 0
        for book in books:
            if (i==3):
                return similar_items

            similar_items.append(book)

            i = i + 1
Ejemplo n.º 6
0
def lookup_price(searchTerm):
	AWS_KEY = 'AKIAIILUNE5IYH7BDF2A'
	SECRET_KEY = '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'
	ASSOC_TAG = 'camerarecomm-20'
	
	api = API(AWS_KEY, SECRET_KEY, 'us', ASSOC_TAG)
	price = -1
	title = ''
	
	try:
		results = api.item_search('Electronics', Keywords=searchTerm, 
								BrowseNode='281052', ResponseGroup='Large', ItemPage=1)
		if results is not None:
			for cam in results:
				try:
					#asin = cam.Items.Item.ASIN
					title = cam.Items.Item.ItemAttributes.Title.text
					price = cam.Items.Item.ItemAttributes.ListPrice.FormattedPrice.text
# 					print title, price
					break
				except:	
					price = -1
					title = ''

	except:
		print 'Item not found'
	
	return price, title
    def test_associate_tag_is_written_to_url(self):
        tag = 'ABC12345'
        api = API(self.ACCESS_KEY, self.SECRET_KEY, 'de', associate_tag=tag)
        url = api._build_url(Operation='ItemSearch', SearchIndex='Books')

        qs = parse_qs(urlparse(url)[4])
        assert qs['AssociateTag'][0] == tag
Ejemplo n.º 8
0
def scrape_wish_list_items(list_id):
    """ Populate wish_list_items with data from wishlist """
    print "Scraping wishlist..."

    wish = Wishlist(list_id)
    item_ids = wish.get_list_items()

    wishlist_items = []

    api = API(locale='us')
    for item_id in item_ids:
        try:
            result = api.item_lookup(item_id, ResponseGroup="Large")
            for item in result.Items.Item:
                itm = {
                    "title": item.ItemAttributes.Title,
                    "price":
                    item.Offers.Offer.OfferListing.Price.FormattedPrice,
                    "amazonid": item.ASIN
                }
                wishlist_items.append(itm)
        except:
            print "!!! Failed getting " + item_id

    print "Completed scraping."
    return wishlist_items
Ejemplo n.º 9
0
def joo_amazon(username, KEYWORDS):
    items_list4 = []
    client = MongoClient('ds063186.mlab.com', 63186)
    client.credentials.authenticate('shakedinero', 'a/c57821688')
    db = client.credentials
    cursor = db.amazon.find()
    for i in cursor:
        x = i
    config = {
        "access_key": str(x['access_key']),
        "secret_key": str(x['secret_key']),
        "associate_tag": str(x['associate_tag']),
        "locale": str(x['locale'])
    }
    api = API(cfg=config)
    items = api.item_search('All', Keywords=KEYWORDS, ResponseGroup='Large')
    for i in items:
        try:
            title = i.ItemAttributes.Title
            item_url = i.DetailPageURL
            img = i.MediumImage.URL
            price = i.OfferSummary.LowestNewPrice.FormattedPrice
            shipping = '-'
            x = '{"title":"' + title + '","url":"' + item_url + '","image":"' + img + '","price":"' + price + '","shipping":"' + shipping + '","web":"Amazon"}'
            j = json.loads(x)
            items_list4.append(j)
        except:
            continue
    command = "db_results.results." + username + ".insert_many(items_list4)"
    try:
        exec command
    except:
        print "No Amazon Results"
    return items_list4
Ejemplo n.º 10
0
def search(title=''):
    """Amazon quick search function."""
    api = API(LOG['AWS_KEY'], LOG['SECRET_KEY'], LOG['LOCAL'], LOG['ASSOC_TAG'])
    node = api.item_search('Books', Title=title, Publisher=publisher)
    for page in node:
        for book in page.Items.Item:
            print '%s' % (book.ASIN)
Ejemplo n.º 11
0
def lookup(asin):
    api = API(locale='jp')
    #item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All')
    #item = api.item_lookup(asin)
    item = api.item_lookup(asin, ResponseGroup='Large')
    #logging.debug(etree.tostring(item, pretty_print=True))

    ## title
    logging.debug(item.Items.Item.ItemAttributes.Title)
Ejemplo n.º 12
0
def lookup(asin):
    api = API(locale="jp")
    # item = api.item_lookup(asin, ResponseGroup='OfferFull', Condition='All')
    # item = api.item_lookup(asin)
    item = api.item_lookup(asin, ResponseGroup="Large")
    # logging.debug(etree.tostring(item, pretty_print=True))

    ## title
    logging.debug(item.Items.Item.ItemAttributes.Title)
Ejemplo n.º 13
0
def amazon_product_search(keyword,
                          storing_class,
                          store,
                          search_index="All",
                          nb_items=10):
    api = API(settings.AWS_PRODUCT_ACCESS_KEY_ID,
              settings.AWS_PRODUCT_SECRET_ACCESS_KEY, settings.AWS_LOCALE)

    try:
        node = api.item_search(search_index,
                               Keywords=keyword,
                               ResponseGroup="Large",
                               AssociateTag=settings.AWS_ASSOCIATE_TAG)
    except NoExactMatchesFound:
        return None
    except URLError:
        if settings.DEBUG:
            raise
        else:
            return None

    nb_pages = int(ceil(nb_items * 0.1))

    item_list = []

    for root in node:
        #        total_results = root.Items.TotalResults.pyval
        #        total_pages = root.Items.TotalPages.pyval
        try:
            current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval
        except AttributeError:
            current_page = 1

        nspace = root.nsmap.get(None, '')
        items = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace})

        item_list.extend(items)

        if current_page >= nb_pages:
            break

    counter = 0
    aff_item_list = list()
    for item in item_list:
        entry, created = storing_class.objects.get_or_create(
            store=store, object_id=item.ASIN)
        entry.store_init(store, item)
        entry.save()
        if entry.item is None:
            aff_item_list.append(entry)
            counter += 1
            if counter == nb_items:
                break

    return aff_item_list
Ejemplo n.º 14
0
def search():
    api = API(locale="jp")
    # total_results = node.Items.TotalResults.pyval
    # total_pages = node.Items.TotalPages.pyval
    for book in api.item_search("Books", Publisher=u"村上"):
        try:
            print "%s" % (book.ItemAttributes.Title)
            # print '%s: "%s"' % (book.ItemAttributes.Author,
            #                    book.ItemAttributes.Title)
        except:
            logging.debug("no author or title")
Ejemplo n.º 15
0
def search():
    api = API(locale='jp')
    #total_results = node.Items.TotalResults.pyval
    #total_pages = node.Items.TotalPages.pyval
    for book in api.item_search('Books', Publisher=u'村上'):
        try:
            print '%s' % (book.ItemAttributes.Title)
            #print '%s: "%s"' % (book.ItemAttributes.Author,
            #                    book.ItemAttributes.Title)
        except:
            logging.debug("no author or title")
Ejemplo n.º 16
0
def get_image_from_amazon(artist, album):
    api = API(access_key_id="First it was fix-ed",
              secret_access_key="And then it was enabled",
              associate_tag="But now it's broke again.",
              locale="us")

    node = api.item_search('Music',
                           ResponseGroup='Images',
                           Keywords="{} {}".format(artist, album))
    url = str(node.page(1).Items.Item.LargeImage.URL)
    data = requests.get(url).content
    return data
Ejemplo n.º 17
0
    def search(self, q, country):
        titles = []
        prices = []
        urls= []
        items = []
     
        api = API(AWS_KEY, SECRET_KEY, country)

        try:
            for root in api.item_search('Books', Title=q, AssociateTag='...', ResponseGroup='Large'):

                # extract paging information
                total_results = root.Items.TotalResults.pyval
                total_pages = root.Items.TotalPages.pyval
                try:
                    current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval
                except AttributeError:
                    current_page = 1

                #print 'page %d of %d' % (current_page, total_pages)

                #~ from lxml import etree
                #~ print etree.tostring(root, pretty_print=True)

                nspace = root.nsmap.get(None, '')
                books = root.xpath('//aws:Items/aws:Item', 
                                   namespaces={'aws' : nspace})
                
                #return unicode(books[0].ItemAttributes.Title)
                for book in books:
                    items.append(unicode(book.ItemAttributes.Title))
                    #print book.ASIN,
                    #if hasattr(book.ItemAttributes, 'Author'): 
                    #print unicode(book.ItemAttributes.Author), ':', 
                    #print unicode(book.ItemAttributes.Title),

                    #price_offers(book.ASIN)
                    try:
                        if hasattr(book.ItemAttributes, 'ListPrice'): 
                            #print unicode(book.ItemAttributes.ListPrice.FormattedPrice)
                            items.append(unicode(book.ItemAttributes.ListPrice.FormattedPrice))
                        elif hasattr(book.OfferSummary, 'LowestUsedPrice'):
                            #print u'(used from %s)' % book.OfferSummary.LowestUsedPrice.FormattedPrice
                            items.append(unicode(book.OfferSummary.LowestUsedPrice.FormattedPrice))
                    except:
                        items.append("No price info.")
                    items.append(unicode(book.DetailPageURL))
                    #print '\n'

                #print len(items)
                return items
        except:
            return items
def pytest_generate_tests(metafunc):
    # called once per each test function
    if 'api' in metafunc.funcargnames and 'operation' in metafunc.funcargnames:
        for version in TESTABLE_API_VERSIONS:
            wsdl = os.path.join(XML_TEST_DIR, version, 
                'AWSECommerceService.wsdl')
            if not os.path.exists(wsdl):
                continue
            api = API('', '', 'de')
            api.VERSION = version
            for operation in extract_operations_from_wsdl(wsdl):
                metafunc.addcall(
                    id='%s/%s' % (version, operation),
                    funcargs={'api' : api, 'operation' : operation})
Ejemplo n.º 19
0
 def __init__(self, some_dict, list_of_numbers, number_of_recs):
     self.api = API(locale='us', )
     self.image_url = []
     self.book_numbers = [
         book for book in list_of_numbers if book in some_dict
     ]
     self.isbns = [(10 - len(i)) * '0' + i if type(i) != float else i
                   for i in [
                       some_dict[book_number][0]
                       for book_number in list_of_numbers
                       if book_number in some_dict
                   ]]
     self.some_dict = some_dict
     self.list_of_numbers = list_of_numbers
     self.number_of_recs = number_of_recs
Ejemplo n.º 20
0
class AmazonUtil:
    def __init__(self):
        #self.associate_tag = settings.ASSOCIATE_TAG
        #self.access_key_id = settings.ACCESS_KEY_ID
        #self.secret_access_key = settings.SECRET_ACCESS_KEY
        self.api = None

    def item_lookup(self,asin,locale,retry=3,time_interval=10,ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',MerchantId=None,Condition=None):
        self.api = API(access_key_id = settings.ACCESS_KEY_ID, secret_access_key = settings.SECRET_ACCESS_KEY, associate_tag = settings.ASSOCIATE_TAG, locale=locale)
        result = ''

        #status
        #0 -- Success
        #1 -- Socket Timeout
        #2 -- Invalid ASIN
        #-1 -- Fail
        status = -1   
        for i in range(0,retry):
            try:
                #result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup,MerchantId = MerchantId,Condition=Condition)
                result = self.api.item_lookup(asin,ResponseGroup=ResponseGroup)
                status = 0
                break
            except urllib2.URLError,e:
                status = 1
                continue
            except socket.timeout,e:
                status = 1
                continue
            except InvalidParameterValue,e:
                status = 2
                break
Ejemplo n.º 21
0
class AmazonMovies(object):

    def __init__(self, titles):
        self._pattern1 = re.compile(r"(\[.*\]|\(.*\)|【.*】|<.*>|(.*)|〔.*〕)")
        self._pattern2 = re.compile(r"(DVD|Blu-ray|ブルーレイ|枚組).*")
        self._pattern3 = re.compile(r"\s.*(MovieNEX|2D|3D|エディション|ディスク|特別(編|版)).*")
        self._pattern4 = re.compile(r"\s$")

        self._api = API(cfg=amazon_keys.config)
        self._input_movies = self.get_movie_dict(titles)
        self.movies_dict = self.get_similarproducts(self._input_movies)
        self.movies = self.get_titles(self.movies_dict)

    def get_movie_dict(self, titles):
        tmp_list = []
        for title in titles:
            tmp_list.append({'title': title, 'asin': self.get_asin(title)})
        return tmp_list

    def get_asin(self, title):
        time.sleep(2)  # 1.8sのインターバルあれば制限に引っかからない?

        asin = u""
        try:
            for items in self._api.item_search('DVD', Keywords=title, limit=1):
                for item in items:
                    asin = unicode(item.ASIN)
                    break
                break
        except AWSError, e:
            print("code:%s message:%s" % (e.code, e.message))

        return asin
Ejemplo n.º 22
0
class AmazonChecker(object):
  def __init__(self):
    AWS_KEY = ''
    SECRET_KEY = ''
    ASSOCIATE_TAG = 'stream0a-20'
    
    self.api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG)
    #self.api = JSONAPI(AWS_KEY, SECRET_KEY, 'us')
  
  def availability(self, needle):
    #Instant Video browse node: 16261631 or maybe 2649513011
    #api.call(Operation='ItemSearch', SearchIndex='Video') #US/Video? 493964
    #data = self.api.browse_node_lookup(16261631)
    
    #data = self.api.item_lookup('B0047WJ11G', **params)
    #data = self.api.item_lookup('Inception', **params)
    
    #data = self.api.item_search("DVD", Title="Inception", ResponseGroup="Large")
    try:
      data = self.api.item_search("Video", Title=needle, BrowseNode="16261631")
    except NoExactMatchesFound:
      return [{"service":"amazon-instant", "available":False}]
    
    #print data
    #print dir(data)
    
    #for root in data:
    #  print "root"
    #  print dir(root.Items.Item.ItemAttributes)
    #  print root.Items.Item.ItemAttributes.Title
    
    #pp = pprint.PrettyPrinter(indent=3)
    return [{"service":"amazon-instant", "available":True}]
Ejemplo n.º 23
0
    def main(self):
        # Amazon consists of multiple webshops from different countries.
        for locale in self.locales:
            productDataList = []

            self.api = API(locale=locale)
            products = self.loadProducts(locale)

            for product in products:
                if product != '' and product is not None and product[0] != '#':  # Comment or blank line.
                    # Product contains two elements: The ASIN and the shipping cost, divided by `:`.
                    product = product.split(':')
                    ASIN = product[0]

                    productData = self.gatherData(ASIN, locale)

                    if productData is not None:  # Something went wrong retrieving data.
                        productData["shipping_cost"] = product[1]

                        # Add the product data to a list so we can convert the list to xml once all products are parsed.
                        productDataList.append(productData)

                    time.sleep(2)

            self.writeXML(productDataList, locale)
Ejemplo n.º 24
0
def find_asin(title='Around the World in Eighty Days', author='Jules Verne'):
    """Find the unique ASIN identifier for the book
		INPUT: Book title and author
		FUNCTION: find_asin()
		OUTPUT: The unique asin identifier
        Time taken: < 1 second"""

    from amazonproduct import API
    api = API(locale='uk')
    items = api.item_search('Books', Title=title, Author=author)
    # Take the first result
    for book in items:
        break
    asin = str(book.ASIN)

    return (asin)
Ejemplo n.º 25
0
class Amazon:
    def __init__(self):
        self.api             = API(locale='us'); 
        self.cart_exists     = False
        self.items = {}

    def get_asin_from_url(self, url):
        parts = urlparse(url)
        path_parts = parts.path.split("/")

        max = len(path_parts)-1
        reg = re.compile("^([A-Za-z0-9]{10})$")
        while max >= 0:
            result = reg.match(path_parts[max])
            if result:
                return path_parts[max]
            max = max-1
        return None

    def get_item_by_asin(self, asin):
        item = self.api.item_lookup(asin)
        return item

    def get_items(self, list):
        items = Item.objects.filter(active=True, list=list)

        for item in items:
            self.items["%s" % item.asin] = item.quantity

    def get_cart(self):
        cart = self.api.cart_create(self.items)

        print cart.Cart.PurchaseURL
        print cart.Cart.SubTotal.FormattedPrice

        # May need this at some point?
        """
        for item in cart.Cart.CartItems:
            print dir(item.CartItem)
        """
        return cart

    def main(self):
        self.get_items()
        self.get_cart()
Ejemplo n.º 26
0
class Amazon:
    def __init__(self):
        self.api = API(locale='us')
        self.cart_exists = False
        self.items = {}

    def get_asin_from_url(self, url):
        parts = urlparse(url)
        path_parts = parts.path.split("/")

        max = len(path_parts) - 1
        reg = re.compile("^([A-Za-z0-9]{10})$")
        while max >= 0:
            result = reg.match(path_parts[max])
            if result:
                return path_parts[max]
            max = max - 1
        return None

    def get_item_by_asin(self, asin):
        item = self.api.item_lookup(asin)
        return item

    def get_items(self, list):
        items = Item.objects.filter(active=True, list=list)

        for item in items:
            self.items["%s" % item.asin] = item.quantity

    def get_cart(self):
        cart = self.api.cart_create(self.items)

        print cart.Cart.PurchaseURL
        print cart.Cart.SubTotal.FormattedPrice

        # May need this at some point?
        """
        for item in cart.Cart.CartItems:
            print dir(item.CartItem)
        """
        return cart

    def main(self):
        self.get_items()
        self.get_cart()
Ejemplo n.º 27
0
def amazon_res(page_type, words):
	api = API(locale='us')

	if page_type == 'food':
		topic = 'Grocery'
	else:
		topic = 'HomeGarden'
	
	results = api.item_search(topic , Keywords=words, ResponseGroup="ItemAttributes, OfferSummary, Images", paginate = False)


 	items = []
	for it in results.Items.Item:
		asin = it.ASIN
		title = it.ItemAttributes.Title
		link = it.DetailPageURL
		
		try:
			price = it.OfferSummary.LowestNewPrice.FormattedPrice
		except:
			price = "no price available"
			
		try: 
			image = it.SmallImage.URL
		except:
			image = ""
		
		if page_type == 'food':
			try:
				item = Food.objects.get(asin=asin)
				print(item.name)
			except:
				item = False
				print('doesnt exist')
		else:
			try:
				item = Equipment.objects.get(asin=asin)
			except:
				item = False
		
		items.append({'asin':asin, 'title':title, 'link':link, 'price':price, 'image':image, 'db':item})
		
	return items
Ejemplo n.º 28
0
 def get_product_details(cls,asin,product_renderer,locale = 'us'):
   
   logging.info('AmazonProductFetcher.get_product_details called, asin: %s, locale: %s' %(asin,locale))
   api = API(AWS_KEY, SECRET_KEY, locale)
   timeout_ms = 100
   while True:
     try:
       product_node = api.item_lookup(id=asin)  #title,product group
       image_node = api.item_lookup(id=asin, ResponseGroup='Images') #Images
       break
     except amazonproduct.TooManyRequests:
       time.sleep(timeout_ms)
       timeout_ms *= 2                            
     except AWSError:
       logging.error('Could not retrieve info for product %s' % asin)
       return
     except DownloadError,e:
       logging.error('%s retrieving URLfor product: %s in RPC'   %(e,asin))
       return #Early quit
Ejemplo n.º 29
0
    def __init__(self, titles):
        self._pattern1 = re.compile(r"(\[.*\]|\(.*\)|【.*】|<.*>|(.*)|〔.*〕)")
        self._pattern2 = re.compile(r"(DVD|Blu-ray|ブルーレイ|枚組).*")
        self._pattern3 = re.compile(r"\s.*(MovieNEX|2D|3D|エディション|ディスク|特別(編|版)).*")
        self._pattern4 = re.compile(r"\s$")

        self._api = API(cfg=amazon_keys.config)
        self._input_movies = self.get_movie_dict(titles)
        self.movies_dict = self.get_similarproducts(self._input_movies)
        self.movies = self.get_titles(self.movies_dict)
Ejemplo n.º 30
0
 def __init__(self, url, locale='us'):
     self.api = API(locale=locale)
     self.asin = self.__get_product_id(url)
     self.result = self.api.item_lookup(
         self.asin,
         ResponseGroup='ItemAttributes,Offers,Images'
     )
     try:
         self.item = self.result.Items.Item[0]
     except IndexError:
         self.item = None
Ejemplo n.º 31
0
def amazon_lookup(asin):
	api = API(locale='us')

	
	result = api.item_lookup(asin, ResponseGroup="ItemAttributes, OfferSummary", paginate = False)


	it = result.Items.Item
	asin = it.ASIN
	title = it.ItemAttributes.Title
	link = it.DetailPageURL
	
	try:
		price = it.OfferSummary.LowestNewPrice.FormattedPrice
	except:
		price = "no price available"
				
	item = {'asin':asin, 'title':title, 'link':link, 'price':price}
		
	return item
Ejemplo n.º 32
0
def pullItemInfoFromAmazon(job_id):
    logger = logging.getLogger('tst')
    asin_obj_list = Asin.objects.filter(job_id=job_id)
    asin_list = [x.asin for x in asin_obj_list]
    pull_fail_list = []
    insert_fail_list = []
    image_fail_list = []
    api = API(access_key_id=ACCESS_KEY_ID,
              secret_access_key=SECRET_ACCESS_KEY,
              associate_tag=ASSOCIATE_TAG,
              locale='us')
    for asin in asin_list[:10]:
        asin = asin.strip()
        result = ''
        for i in range(0, 2):
            try:
                result = api.item_lookup(
                    asin,
                    ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',
                    MerchantId='Amazon',
                    Condition='New')
                logger.info('ASIN: %s  -- %d time  --  Success' %
                            (asin, i + 1))
                break
            except Exception, e:
                logger.info('ASIN: %s  -- %d time  --  Fail' % (asin, i + 1))
                continue
        if result == '':
            logger.info('ASIN: %s Fail after 3 times' % asin)
            pull_fail_list.append(asin)
            continue

        if not insert_item_info(result, asin):
            logger.error('Insert item info for %s fail' % asin)
            insert_fail_list.append(asin)
            continue

        if not process_image(asin):
            logger.error('Processing Image for %s fail' % asin)
            image_fail_list.append(asin)
            continue
Ejemplo n.º 33
0
def request_amzn (isbn):
	r"""
	Get The Book Title and asin code from Amazon.

	:param isbn: ISBN string
	:return: tuple (Book titile, Asin Code, XML RootNode)
	"""

	api = API(locale='jp')
	root = api.item_lookup(isbn,SearchIndex='Books', IdType='ISBN')

	root = api.item_lookup(isbn,SearchIndex='Books', IdType='ISBN')
	book = root.xpath(
		"//aws:Items/aws:Item/aws:ItemAttributes/aws:Title",
		namespaces={"aws": root.nsmap.get(None, '')}
	)
	asin = root.xpath(
		"//aws:Items/aws:Item/aws:ASIN",
		namespaces={"aws": root.nsmap.get(None, '')}
	)
	return (book, asin, root)
Ejemplo n.º 34
0
def scrape_wish_list_items(list_id):
    """ Populate wish_list_items with data from wishlist """
    print "Scraping wishlist..."

    wish = Wishlist(list_id)
    item_ids = wish.get_list_items()

    wishlist_items = []

    api = API(locale='us')
    for item_id in item_ids:
        try:
            result = api.item_lookup(item_id, ResponseGroup="Large")
            for item in result.Items.Item:
                itm = { "title": item.ItemAttributes.Title, "price": item.Offers.Offer.OfferListing.Price.FormattedPrice, "amazonid": item.ASIN }
                wishlist_items.append(itm)
        except:
            print "!!! Failed getting " + item_id

    print "Completed scraping."
    return wishlist_items
Ejemplo n.º 35
0
    def _fetch(self, url):
        """
        Uses XML response from (or stores in) local file.
        """
        # subsequent calls of this API instance
        # will be stored in different files
        self.calls += 1
        path = self.local_file
        if self.calls > 1:
            head, tail = os.path.splitext(self.local_file)
            path = head + "-%i" % self.calls + tail

        # If the XML response has not been previously fetched:
        # retrieve it, obfuscate all sensible data and store it
        # with the name of the TestCase using it
        if not os.path.exists(path) or OVERWRITE_TESTS:
            try:
                fp = API._fetch(self, url)
            except urllib2.HTTPError, e:
                # HTTP errors 400 (Bad Request) and 410 (Gone) send a more
                # detailed error message as body which can be parsed, too.
                if e.code in (400, 410):
                    fp = e.fp
                # otherwise re-raise
                else:
                    raise
            try:
                tree = etree.parse(fp)
            except AWSError:
                pass
            root = tree.getroot()

            # overwrite sensible data
            nspace = root.nsmap.get(None, "")
            for arg in root.xpath("//aws:Arguments/aws:Argument", namespaces={"aws": nspace}):
                if arg.get("Name") in "AWSAccessKeyId Signature":
                    arg.set("Value", "X" * 15)

            xml = etree.tostring(root, pretty_print=True)
            if AWS_KEY != "" and SECRET_KEY != "":
                xml = xml.replace(AWS_KEY, "X" * 15)
                xml = xml.replace(SECRET_KEY, "X" * 15)

            local_dir = os.path.dirname(path)
            if not os.path.exists(local_dir):
                # print 'creating %s...' % local_dir
                os.mkdir(local_dir)

            fp = open(path, "wb")
            # print 'storing response in %s...' % self.local_file
            fp.write(xml)
            fp.close()
            return StringIO(xml)
Ejemplo n.º 36
0
def create_wish_from_url(user, url):
    AWS_KEY = os.environ['AWS_KEY']
    AWS_SECRET_KEY = os.environ['AWS_SECRET_KEY']
    ASSOCIATE_TAG = os.environ['ASSOCIATE_TAG']
    ASIN_MATCH = 'http://www.amazon.com/([\\w-]+/)?(dp|gp/product)/(\\w+/)?(\\w{10})'

    asin = list(re.match(ASIN_MATCH, url).groups())[-1]

    api = API(locale='us',
              associate_tag=ASSOCIATE_TAG,
              access_key_id=AWS_KEY,
              secret_access_key=AWS_SECRET_KEY)
    result = api.item_lookup(asin, ResponseGroup='ItemAttributes, OfferFull, Images')
    item = result.Items.Item[0]

    title = item.ItemAttributes.Title
    url = item.DetailPageURL

    if item.OfferSummary:
        amount = (item.OfferSummary.LowestNewPrice.Amount / 100)
    elif item.ItemAttributes.ListPrice:
        amount = (item.ItemAttributes.ListPrice.Amount / 100)
    else:
        amount = 0.0

    image_url = item.LargeImage.URL

    wish = Wish(
        user=user,
        asin=asin,
        title=title,
        amount=amount,
        is_credit=False,
        url=url,
        image_url=image_url
    )
    wish.save()

    return wish
Ejemplo n.º 37
0
class Amazon():

    def __init__(self, url, locale='us'):
        self.api = API(locale=locale)
        self.asin = self.__get_product_id(url)
        self.result = self.api.item_lookup(
            self.asin,
            ResponseGroup='ItemAttributes,Offers,Images'
        )
        try:
            self.item = self.result.Items.Item[0]
        except IndexError:
            self.item = None

    def __get_product_id(self, url):
        """ get a amazon asin number from url """
        compiled_pattern = re.compile("([A-Z0-9]{10})")
        search_result = compiled_pattern.search(url)
        try:
            return search_result.group(0)
        except IndexError:
            return None

    @property
    def title(self):
        """ return title of Item """
        return self.item.ItemAttributes.Title.text

    @property
    def price(self):
        """ return lowest price of Amazon Item """
        try:
            return self.item.OfferSummary.LowestNewPrice.FormattedPrice.text
        except AttributeError:
            return None

    @property
    def currency(self):
        """ return currency of Amazon Item """
        try:
            return self.item.OfferSummary.LowestNewPrice.CurrencyCode
        except AttributeError:
            return None

    @property
    def photo(self):
        """ return small Image of Amazon Item """
        try:
            return self.item.SmallImage.URL.text
        except AttributeError:
            return None
Ejemplo n.º 38
0
def searchAmazon(API):
    hash_of_items = {}
    try:
        for item in API.item_search('VideoGames', Title='Amiibo', MerchantId="Amazon", Availability="Available"):

            product = item.ItemAttributes
            product_name = product.Title
            product_manufacturer = product.Manufacturer.text
            product_url = item.DetailPageURL.text

            if 'Nintendo' in product_manufacturer:
                hash_of_items.update({product_name: product_url})
    except:
        print "NAH"     
    return hash_of_items
Ejemplo n.º 39
0
def get_similar_books(ASIN):

    api = API(AWS_KEY, SECRET_KEY, 'us', ASSOCIATE_TAG)

    for root in api.similarity_lookup(str(ASIN)):

        try:
            current_page = root.Items.Request.ItemSearchRequest.ItemPage.pyval
        except AttributeError:
            current_page = 1

        #print 'page %d of %d' % (current_page, total_pages)

        nspace = root.nsmap.get(None, '')
        books = root.xpath('//aws:Items/aws:Item', namespaces={'aws': nspace})
        similar_items = []
        i = 0
        for book in books:
            if (i == 3):
                return similar_items

            similar_items.append(book)

            i = i + 1
 def __init__(self, locale='de'):
     
     gtk.Window.__init__(self, gtk.WINDOW_TOPLEVEL)
     
     self.set_title("BrowseNode Explorer")
     self.set_size_request(400, 200)
     self.connect("delete_event", self.on_delete)
     
     self.locale = locale
     self.api = API(AWS_KEY, SECRET_KEY, self.locale)
     
     # create a TreeStore with one string column to use as the model
     self.treestore = gtk.TreeStore(int, str)
     
     # create the TreeView using treestore
     self.treeview = gtk.TreeView(self.treestore)
     
     # add column id
     renderer = gtk.CellRendererText()
     column = gtk.TreeViewColumn('id', renderer, text=0)
     self.treeview.append_column(column)
     
     # add column name
     renderer = gtk.CellRendererText()
     column = gtk.TreeViewColumn('name', renderer, text=1)
     column.set_sort_column_id(1) # Allow sorting on the column
     self.treeview.append_column(column)
     
     # make it clickable
     self.treeview.add_events(gtk.gdk.BUTTON_PRESS_MASK)
     self.treeview.connect('button_press_event', self.on_tree_click)
     
     scrolled = gtk.ScrolledWindow()
     scrolled.set_policy(gtk.POLICY_AUTOMATIC, gtk.POLICY_AUTOMATIC)
     scrolled.add(self.treeview)
     
     self.add(scrolled)
     self.show_all()
     
     # populate with root nodes
     # but avoid duplicated node ids
     node_ids = set(NODE_IDS[self.locale].values())
     for name, id in NODE_IDS[self.locale].items():
         if id in node_ids:
             self.treestore.append(None, [id, name])
             node_ids.remove(id)
Ejemplo n.º 41
0
class AmazonDvd():

    def __init__(self):
        self.api = API(locale='us')

    def find_product(self, keywords, Director=None):
        '''
        return top 10 products
        '''
        items = self.api.item_search(
            'DVD', Keywords=keywords, Director=None, limit=10,
            Sort='relevancerank', MerchantId='Amazon', ResponseGroup='Large')

        dvds = []
        for item in items:
            json_obj = json.loads(ObjectJSONEncoder().encode(item))

            dvd = {}
            dvd['ASIN'] = json_obj['ASIN']
            dvd['Title'] = json_obj['ItemAttributes']['Title']
            dvd['DetailPageURL'] = json_obj['DetailPageURL']

            if json_obj.get('SmallImage', None):
                dvd['SmallImage'] = json_obj['SmallImage']['URL']

            if json_obj.get('CustomerReviews', None):
                dvd['CustomerReviews'] = json_obj['CustomerReviews']['IFrameURL']

            if json_obj.get('EditorialReviews', None):
                dvd['EditorialReviews'] = json_obj[
                    'EditorialReviews']['EditorialReview']['Content']

            if json_obj.get('OfferSummary', None):
                dvd['LowestNewPrice'] = json_obj['OfferSummary'][
                    'LowestNewPrice']['FormattedPrice']

            if json_obj['ItemAttributes'].get('Actor', None):
                dvd['Actor'] = json_obj['ItemAttributes']['Actor']

            if json_obj['ItemAttributes'].get('Director', None):
                dvd['Director'] = json_obj['ItemAttributes']['Director']

            dvds.append(dvd)
            
        return dvds
Ejemplo n.º 42
0
#from amazon.api import AmazonAPI
import amazonproduct
from amazonproduct import API
api = API(locale='de')

## Credentials
access_key = 'AKIAIOWFZ4KTTJAKNLFQ'
secret_key = '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'
associate_tag = 'q0d9b-20'

amazon = AmazonAPI(access_key, secret_key, associate_tag)
products = amazon.search_n(1, Keywords='earphone', SearchIndex='All')
len(products)

#Amazon Credentials: Associate ID: q0d9b-20
#Access Key: AKIAIOWFZ4KTTJAKNLFQ
#Secret Access Code: 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
Ejemplo n.º 43
0
from amazonproduct import API
api = API(access_key_id='AKIAJXG6BBQM6YDLYEKA',
          secret_access_key='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',
          associate_tag='zhicheng-20',
          locale='us')

for book in api.item_search('Books', Publisher='Galileo Press'):
    print '%s: "%s"' % (book.ItemAttributes.Author, book.ItemAttributes.Title)
Ejemplo n.º 44
0
def price_offers(xxx):
    best=999.00
    one=True
    AutFin=""
    titFin=""
    api = API(Pp, Pl, 'it')
    try:items = api.item_search('Books', Keywords=xxx,AssociateTag=Pk)
    except Exception:
        return 999.00
    KK=0
    noDis=0
    try:
        for book in items:
            KK+=1
            if KK==6:
                break
            ID=book.ASIN
            try:
                AutP=str(book.ItemAttributes.Author)
                TitP=str(book.ItemAttributes.Title)
                url=str(book.DetailPageURL)
            
            except Exception as o:
                continue
            str_asin = str(ID)
            try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Condition='New', MerchantId='Amazon',AssociateTag=Pk)
            except AWSError:
                continue
            try:
            
                for a in node.Items.Item.Offers.Offer:
                    if("non disponibile" in str(a.OfferListing.Availability).lower()):
                        noDis=noDis+1

                        continue
                    prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".")
                
                    prox=float(prix)
                    if(prox<best and one):
                        best=prox
                        AutFin=AutP
                        titFin=TitP
                        one=False
                        session['titoloAMZ']=titFin
                        session['urlAMZ']=url
                    elif(prox<best and one==False and AutP==AutFin and TitP==titFin):
                        best=prox
                        session['titoloAMZ']=titFin
                        session['urlAMZ']=url
            except Exception as e:
                continue
        if(best==999.00 and noDis>=1):
            KK=0
            one=True
            AutFin=""
            titFin=""
            for book in items:
                KK+=1
                if KK==6:
                    break
                ID=book.ASIN
                try:
                    AutP=str(book.ItemAttributes.Author)
                    TitP=str(book.ItemAttributes.Title)
                    url=str(book.DetailPageURL)
            
                except Exception as o:
                    continue
                str_asin = str(ID)
                try:node = api.item_lookup(ItemId=str_asin, ResponseGroup='Offers', Availability='Available', Condition='New', MerchantId='Amazon',AssociateTag=Pk)
                except AWSError:
                    continue
            
                try:
            
                    for a in node.Items.Item.Offers.Offer:
                        if("non disponibile" not in str(a.OfferListing.Availability).lower()):
                            continue

                        prix=str(a.OfferListing.Price.FormattedPrice).replace("EUR ","").replace(",",".")
                
                        prox=float(prix)
                        if(prox<best and one):
                          best=prox
                          AutFin=AutP
                          titFin=TitP
                          one=False
                          session['titoloAMZ']=titFin+"(Attualmente non disponibile)"
                        
                          session['urlAMZ']=url
                        elif(prox<best and one==False and AutP==AutFin and TitP==titFin):
                            best=prox
                            session['titoloAMZ']=titFin+"(Attualmente non disponibile)"
                            session['urlAMZ']=url
                except Exception as e:
                    continue
    
        return best
    except Exception:
        return best
Ejemplo n.º 45
0
 def __init__(self):
     self.api = API(locale='us')
     self.cart_exists = False
     self.items = {}
Ejemplo n.º 46
0
        etree.Processor(module='cElementTree'),
        'elementtree.ElementTree':
        etree.Processor(module='elementtree.ElementTree'),
        'minidom':
        minidom.Processor(),
    }

    print "Collecting test files..."
    xml_files = [
        os.path.join(root, file) for root, dirs, files in os.walk('.')
        for file in files if os.path.splitext(file)[1].lower() == '.xml'
    ]

    print "Parsing %i XML files..." % (len(xml_files) * RUNS, )
    for label, parser in custom_parsers.items():
        print label,
        if getattr(parser, 'etree', '') is None:
            print 'not installed!'
            continue
        start = time.clock()
        api = API(locale='de', processor=parser)
        for i in range(RUNS):
            for path in xml_files:
                try:
                    api._parse(open(path))
                except Exception, e:
                    pass

        stop = time.clock()
        print stop - start
Ejemplo n.º 47
0
def load_amazon_api():
    return API(locale='us')
Ejemplo n.º 48
0
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Semester.ly is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

from amazonproduct import API
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from django.db.models import Q
from django.utils.encoding import smart_str
import http.cookiejar, django, os, re, requests, sys, time

api = API(locale="us")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "semesterly.settings")
django.setup()

from timetable.models import *
from scripts.amazon_helpers import *

SESSION = requests.Session()


def randomize_ua():
    return UserAgent().random


def get_request(url):
    cookies = http.cookiejar.CookieJar()
Ejemplo n.º 49
0
        (w, h) = im.size
        offset_w = (800 - w) / 2
        offset_h = (800 - h) / 2
        new_im = Image.new('RGBA', (800, 800), (255, 255, 255, 0))
        new_im.paste(im, (offset_w, offset_h))
        new_im.save(imageinfo.image_file.path)
        return True
    except:
        return False


if __name__ == '__main__':
    #print startNewItemJob('20150114205955')
    asin = 'B00001P4ZH'
    api = API(access_key_id=ACCESS_KEY_ID,
              secret_access_key=SECRET_ACCESS_KEY,
              associate_tag=ASSOCIATE_TAG,
              locale='us')
    result = api.item_lookup(
        asin,
        ResponseGroup='Images,ItemAttributes,Offers,BrowseNodes',
        MerchantId='Amazon',
        Condition='New')
    #print insert_item_info(result,asin)
    #print result.Items.Item.Offers.TotalOffers

    #for offer in result.Items.Item.Offers:
    #print offer.Offer.__dict__
    #print offer.Merchant.Name
    #    print offer.Offer.OfferListing.Price.FormattedPrice
    print insert_item_info(result, asin)
    #print price_conv('$34.90')
from amazonproduct import API
api = API(locale='us')
from amazonproduct import errors
from random import randint
import sys
import numpy as np
from pyspark import SparkContext


#save create CSVLine
def toCSVLine(data):
    return ','.join(str(d) for d in data)


def getPrice(price):
    return price + randint(-100, 100)


count = 0
limit_reached = False
sc = SparkContext(appName="AskMeMP")

amazon_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE'])
walmart_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE'])
ebay_rdd = sc.parallelize(['ID+TITLE+AUTHOR+URL+PRICE'])

result = api.browse_node_lookup(1000)
for child1 in result.BrowseNodes.BrowseNode.Children.BrowseNode:
    if limit_reached:
        break
    result1 = api.browse_node_lookup(child1.BrowseNodeId)
import sys

reload(sys)
sys.setdefaultencoding('utf-8')

import numpy as np
from pyspark import SparkContext
from amazonproduct import API
import time

api = API(locale='us')


def parseVector(line):
    parts = line.split('+')
    return parts[0], (parts[1], parts[2], parts[4], parts[3])


pre = time.time()
sc = SparkContext(appName="AskMeMPQuery")
amazon_rdd = sc.textFile('hdfs://192.168.0.33:54310/final/amazon.csv')
walmart_rdd = sc.textFile('hdfs://192.168.0.33:54310/final/walmart.csv')
ebay_rdd = sc.textFile('hdfs://192.168.0.33:54310/final/ebay.csv')

count = 0
start = time.time()
for book in api.item_search('Books',
                            Keywords=sys.argv[1],
                            Condition='New',
                            Availability='Available'):
    #    print '%s %s' %(book.ASIN,book.ItemAttributes.Title)
Ejemplo n.º 52
0
    })
    # Check Datatype of dates and see if table can be arranged by price and date
    # sellerDFPage.sort_index(by=['Ebay_Price', 'Ebay_Link'], ascending=[False, False])
    # Ensure that this is acting in place
    return sellerDFPage


######################################################################
# Amazon
######################################################################

from amazonproduct import API
import amazonproduct

apiAmazon = API(access_key_id=access_key_id_value,
                secret_access_key=secret_access_key_value,
                associate_tag=associate_tag_value,
                locale='us')


def AmazonItems(sellerDF):
    Amazon_listDF = pd.DataFrame()
    for j in sellerDF.index:
        # print "outer" + str(j)
        outerItemCall = apiAmazon.item_search(
            'All',
            Keywords=sellerDF.Ebay_Title[j].replace('New!', ''),
            ResponseGroup='OfferFull',
            Condition='New',
            Availability='Available')
        for i in range(0,
                       (len(outerItemCall.page(1).Items.getchildren()) - 4)):
Ejemplo n.º 53
0
from server import app
import os
# third-party modules
from amazonproduct import API
from lxml import etree
# my modules
from model import Book, connect_to_db, db

config = {
    'access_key': os.environ['ACCESS_KEY'],
    'secret_key': os.environ['SECRET_KEY'],
    'associate_tag': os.environ['ASSOCIATE_TAG'],
    'locale': 'us'
}

api = API(cfg=config)


################################################################################
#Seed Book
################################################################################
def seed_books():
    book_txt = open("books.txt")
    for line in book_txt:
        line = line.split("|")
        for i, info in enumerate(line):
            line[i] = info.decode("utf-8").strip()
        db.session.add(
            Book(gutenberg_extraction_num=line[0],
                 name=line[1],
                 author=line[2],
Ejemplo n.º 54
0
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


config = {
	'access_key': keys.access_key(),
	'secret_key': keys.secret_key(),
	'associate_tag': 'maxime-22',
	'locale': 'jp'
}



api = API(cfg=config)


with open('../csv/tracks_mood.csv', 'rb') as infile, open('../csv/tracks_mood_amazon.csv', 'wb') as outfile:
# with open('tracks_mood.csv', 'rb') as infile:
	datareader = csv.reader(infile, delimiter=',')
	datawriter = csv.writer(outfile)

	datawriter.writerow("tid,title,artist,g1,g2,g5,g6,g7,g8,g9,g11,g12,g14,g15,g16,g17,g25,g28,g29,g31,g32,on_amazon_jp".split(","))
	next(datareader, None)


	for row in datareader:
		
		kwds = str(row[1]) + ' ' + str(row[2])
		title = str(row[1])
Ejemplo n.º 55
0
    'Bebes': 'Baby',
    'Computaci\\u00f3n': 'Computers',
    'Consolas y Videojuegos': 'VideoGames',
    'Deportes y Fitness': 'SportingGoods',
    'Electr\\u00f3nica, Audio y Video': 'Electronics',
    'Industrias y Oficinas': 'OfficeProducts',
    'Instrumentos Musicales': 'MusicalInstruments',
    'Joyas y Relojes': 'Jewelry',
    'Juegos y Juguetes': 'Toys',
    'Libros, Revistas y Comics': 'Books',
    'M\\u00fasica, Pel\\u00edculas y Series': 'DVD',
    'Ropa y Accesorios': 'Apparel',
    'Salud y Belleza': 'HealthPersonalCare'
}

amazon_api = API(locale='es')
#, access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'),
#             secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
#             associate_tag=os.environ.get('AWS_ASSOCIATE_TAG'))


@app.route('/')
def index():
    return send_from_directory(STATIC_FOLDER, "index.html")


@app.route('/<path:path>')
def static_proxy(path):
    return send_from_directory(STATIC_FOLDER, path)

Ejemplo n.º 56
0
def main():
    api = API(locale='us',
              access_key_id=ACCESS_KEY,
              secret_access_key=SECRET_KEY,
              associate_tag=TAG)
    #Create List of Tampon Products from Amazon
    tampon_items = []
    response = api.item_search('HealthPersonalCare',
                               Keywords='Tampons',
                               ResponseGroup="Large, Reviews")
    for i in response:
        if hasattr(i, 'SalesRank'):
            product = ProductInfo()
            product.set_ASIN(i.ASIN)
            product.set_best_seller_rank(int(i.SalesRank))
            product.set_name(i.ItemAttributes.Title.text)
            product.set_review_iframe_url(i.CustomerReviews.IFrameURL)
            tampon_items.append(product)

    #Take top 22 products for fetching reviews
    top_20_tampons = tampon_items[:22]

    #Open a Browser to get all the reviews (Dynamic Page Loading Amazon)
    browser = webdriver.Chrome()

    #Get link for all reciews from review Iframe
    for product in top_20_tampons:
        browser.get(product.review_iframe_url)
        x = browser.find_elements_by_class_name('small')
        if x:
            x = x[0].find_element_by_tag_name('a').get_attribute('href')
            product.set_all_review_url(str(x))

    browser.close()

    #filter out the product whose reviews are not present
    top_20_tampons = [
        product for product in top_20_tampons if product.all_review_url
    ]
    '''
    Filter to reviews by "all reviews" otherwise scrap only 'Verified Purchaser Reviews'
    #top_20_tampons = set_filter_all_review(top_20_tampons)
    '''

    #Scan for all reviews
    socket.setdefaulttimeout(50)
    brow = webdriver.Chrome()
    brow.set_page_load_timeout(30)
    for product in top_20_tampons:
        time.sleep(5)
        brow.get(str(product.all_review_url))
        valid = True
        #Do it till all the previous 1 year reviews are scraped
        while valid:
            while True:
                try:
                    x = brow.find_element_by_id('cm_cr-review_list')
                    break
                except NoSuchElementException:
                    print 'Excpetion'

            #get all reviews for the product from that page
            dt = [
                str(i.text)[3:]
                for i in x.find_elements_by_class_name('review-date')
            ]
            dt = map(
                lambda x: datetime.strptime(x.replace(',', ''), '%B %d %Y'),
                dt)

            # setting review dates into product and Checking
            product.review_dates.extend(dt)

            #Check of last reiew on the page is 1 year old
            if (datetime.now() - dt[-1]).days > 365:
                valid = False
            # Goto next page to get more reviews
            else:
                if len(dt) == 10:
                    last_button = brow.find_element_by_class_name("a-last")
                    next_page_url = last_button.find_element_by_tag_name(
                        'a').get_attribute('href')
                    print next_page_url
                    brow.get(str(next_page_url))
                else:
                    valid = False

    brow.close()

    #Write a complete file for reviews
    write_all_reviews_CSV(top_20_tampons)

    #Write reviews per month per product for plottring and analysis
    write_reviews_per_month(top_20_tampons)
Ejemplo n.º 57
0
from amazonproduct import API
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
from django.db.models import Q
from django.utils.encoding import smart_str
import cookielib, django, os, re, requests, sys, time

api = API(locale='us')
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "semesterly.settings")
django.setup()

from timetable.models import *
from scripts.amazon_helpers import *

SESSION = requests.Session()


def randomize_ua():
    return UserAgent().random


def get_request(url):
    cookies = cookielib.CookieJar()
    headers = {
        'User-Agent': randomize_ua(),
        'Accept': '*/*',
        'Host': 'uoftbookstore.com',
        'Referer': 'http://uoftbookstore.com/buy_courselisting.asp',
        'Content-Length': '0',
        'Content-Type': 'application/x-www-form-urlencoded'
    }