Python AmazonScraperの例、amazon_scraper.AmazonScraper Pythonの例

コード例 #1

0

ファイルを表示

ファイル: main.py プロジェクト: prakashpaudel/amazonreviews

def reviewmain():
    input_file_name = 'data/reviews.xlsx'
    output_file_name = 'data/reviews_out'
    input_sheet_name = 'reviews'
    output_sheet_name = 'reviews'
    
    #Initialize from given settings
    book_in = open_workbook(input_file_name)
    sheet_in = book_in.sheet_by_name(input_sheet_name)
    
    amzn = AmazonScraper(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_ASSOCIATE_TAG)
    book_out = Workbook()
    sheet_out = book_out.add_sheet(output_sheet_name)
    
    ids = sheet_in.col_values(0,1)
    
    io = input('starting point?')
    i = io
    
    
    while i < len(ids):
        row = ids[i][:-1]
        print 'Item ',i+1
        result = 0
        count = 0.0
        for j in row.split(','):
            r = amzn.review(Id=j)
            count +=1
            result += r.rating*5
        add_data(sheet_out, i, [result/count])
        book_out.save(output_file_name + 'helpdec19.xls')
        i += 1

コード例 #2

0

ファイルを表示

ファイル: scraper.py プロジェクト: CatalystOfNostalgia/hoot

def update_reviews(asin_list):
    for asin in asin_list:
        f = open(os.path.dirname(os.path.realpath(__file__)) + "/keys/aws_keys.json")
        configs = json.loads(f.read())
        amzn = AmazonScraper(configs["aws_public_key"], configs["aws_secret_key"], configs["product_api_tag"])
        try:
            p = amzn.lookup(ItemId=asin)
        except amazon.api.AsinNotFound as e:
            continue
        reviews = p.reviews()
        dates = queries.find_date_for_review(asin)
        media_type = queries.find_type_by_id(asin)
        unix_dates = []
        for date in dates:
            unix_dates.append(get_date(date))
        date = max(unix_dates)
        update = False
        for review in reviews:
            if date < int(review.date):  #check if asin needs updating
                print("needs updating")
                update = True
        list_of_review_dicts =[]
        #if the product has new reviews get them from amazon
        if(update):
            all_reviews = list(reviews)
            for review in all_reviews:  #get all reviews and add in values into the dictionary
                 product_api = aws_module.setup_product_api()
                 comment_dict = dict()
                 comment_dict["text"] = url_scrape.parser(review.url)
                 comment_dict["unixtime"] = int(review.date)
                 list_of_review_dicts.append(comment_dict)
        return data_ingester.handleReview(asin, list_of_review_dicts, product_api, media_type)

コード例 #3

0

ファイルを表示

ファイル: main.py プロジェクト: prakashpaudel/amazonreviews

def main():
    #user settings
    input_file_name = 'data/input.xlsx'
    output_file_name = 'data/output_data'
    input_sheet_name = 'product_list'
    output_sheet_name = 'processed_data'
    
    number_of_items = 100

    #Initialize from given settings
    book_in = open_workbook(input_file_name)
    sheet_in = book_in.sheet_by_name(input_sheet_name)
    
    #Get list of items from excel file
    ids = sheet_in.col_values(0,1)
    product_types = sheet_in.col_values(1,1)
    
    io = input('starting point?')
    i = io
    amzn = AmazonScraper(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_ASSOCIATE_TAG)
    book_out = Workbook()
    sheet_out = book_out.add_sheet(output_sheet_name)
    add_data_headers(sheet_out)
    p_count = 0
    
    #iterate through items
    while i < len(ids):
        p = amzn.lookup(ItemId=ids[i])
        p_count += 1
        print 'Processing', p_count
        p_data = data(amzn, p, product_types[i])
        add_data(sheet_out, p_count, p_data)
        book_out.save(output_file_name + '_' + product_types[i] + '3.xls')
        i = i+1

コード例 #4

0

ファイルを表示

def initialize(prodId):

    amzn = AmazonScraper(acess_key, secret_key, customer_tag, Region='IN')
    p = amzn.lookup(ItemId=prodId)
    rs = amzn.reviews(ItemId=prodId)
    reviews, reviews_title = [], []
    i = 1
    for r in rs:
        fr = r.full_review()
        print_review(fr.title, fr.text, i)
        reviews.append(fr.text)
        reviews_title.append(fr.title)
        i += 1
    prodName = p.title
    for x in range(len(prodName)):
        string = list(prodName)
        if string[x] == '.' or string[x] == '/': string[x] = '-'
        prodName = ''.join(string)
    return reviews, reviews_title, prodName

コード例 #5

0

ファイルを表示

 def setUpClass(cls):
     config = {}
     try:
         config['access_key'] = os.environ['AWS_ACCESS_KEY_ID']
         config['secret_key'] = os.environ['AWS_SECRET_ACCESS_KEY']
         config['associate_tag'] = os.environ['AWS_ASSOCIATE_TAG']
     except:
         raise AssertionError('''
             The following environment variables must be set:
                     "AWS_ACCESS_KEY_ID"
                     "AWS_SECRET_ACCESS_KEY"
                     "AWS_ASSOCIATE_TAG"
         ''')
     cls.amzn = AmazonScraper(MaxQPS=0.5, **config)

コード例 #6

0

ファイルを表示

ファイル: search.py プロジェクト: mikuyves/amazon-product

        time.sleep(random.expovariate(0.1))
        return True


# Amazon api 验证资料。
AUTH_ARGS = [AMZ_ACCESS_KEY, AMZ_SECRET_KEY, AMZ_ASSOC_TAG]
# Amazon api 请求设置。
AUTH_KWARGS = {
    'Region': 'CN',
    'MaxQPS': 0.9,
    'Timeout': 5.0,
    'ErrorHandler': error_handler
}

amz_product = AmazonAPI(*AUTH_ARGS, **AUTH_KWARGS)
amz_scraper = AmazonScraper(*AUTH_ARGS, **AUTH_KWARGS)
amz_nose = bottlenose.Amazon(Parser=lambda text: BeautifulSoup(text, 'xml'),
                             *AUTH_ARGS,
                             **AUTH_KWARGS)


class AmazonLookupItem(object):
    # Wrap all the useful api from AmazonAPI and add some new.
    def __init__(self, asin):
        amz = AmazonAPI(*AUTH_ARGS, **AUTH_KWARGS)
        print('\n>>> Parsing item %s from api...' % asin)
        self.item_api = amz.lookup(ItemId=asin)
        print('Done.\n')

    @property
    def is_prime(self):

コード例 #7

0

ファイルを表示

ファイル: amznsrch.py プロジェクト: seifer08ms/amzn-srch

def main(num_items, heading_level, args):
    """Main routine"""

    # Retrieve the contents of the API key file
    apikey = get_api_config(".amznrc")

    # Create AmazonScraper object using API key
    amznscpr = AmazonScraper(*apikey)

    # Check keyword list entered on the command line
    if len(args) < 1:
        print ("Missing search terms. For usage help: python amznsrch.py -h")
        sys.exit(1)

    # Loop through quoted lists of search terms from command line arguments
    for arg in args:

        # Print search terms as a markdown heading
        srch_terms = str(arg)
        if heading_level > 0 and heading_level < 7:
            print "\n" + "#" * heading_level + " " + srch_terms + "\n"

        # Fetch and return results
        for item in itertools.islice(amznscpr.search(Keywords=srch_terms, SearchIndex="Books"), num_items):

            # Skip if no title, else encode, remove parenthetical text, & quote
            if not item.title:
                continue
            else:
                bktitle = item.title.encode("utf8")
                bktitle = re.sub("\s*[(\[].*[)\]]", "", bktitle)
                bktitlesrch = urllib.quote_plus('"' + bktitle + '"')

            # Encode author, if present, and format for printing
            if not item.author:
                bkauthor = ""
            else:
                bkauthor = "by " + item.author.encode("utf8")

            # Add associate tag to item URL
            bkurl = str(item.url) + "/?tag=" + apikey[2]

            # Construct links as desired
            amzn = "[AMZN](" + bkurl + ")"
            goog = (
                "[GOOG]"
                + "(https://www.google.com/"
                + "search?tbo=p&tbm=bks&q=intitle:"
                + bktitlesrch
                + "&num=10&gws_rd=ssl)"
            )
            spl = (
                "[SPL](https://seattle.bibliocommons.com/search?"
                + "t=title&search_category=title&q="
                + bktitlesrch
                + "&commit=Search)"
            )
            uwl = (
                "[UW](http://alliance-primo.hosted.exlibrisgroup.com/"
                + "primo_library/libweb/action/search.do?fn=search&"
                + "ct=search&vid=UW&vl%28753972432UI0%29=title&"
                + "vl%281UIStartWith0%29=starts+with&vl%28freeText0%29="
                + bktitlesrch
                + "&Submit=Search)"
            )
            # Searching UW Libraries through WorldCat to be deprecated 2015-09
            # uwl = ('[UW](http://uwashington.worldcat.org' +
            #       '/search?q=ti%3A' + bktitlesrch + '&qt=advanced)')

            # Print markdown for title, author, and links as bulleted list item
            print ("- _" + bktitle + "_ " + bkauthor + " ( " + goog + " | " + amzn + " | " + spl + " | " + uwl + " )")

コード例 #8

0

ファイルを表示

def main():
    validate_args()
    scrapper = AmazonScraper('products.yml')
    print('===============================INICIO===============================')
    process_search(scrapper)
    print('===============================FIN===============================')

コード例 #9

0

ファイルを表示

import requests, time, bottlenose, math, urllib, csv
from bs4 import BeautifulSoup
from amazon_scraper import AmazonScraper  # https://github.com/adamlwgriffiths/amazon_scraper
from decimal import Decimal
from app.categories import *
from app_config import *

# Amazon scraper + Amazon API wrapper
amazon = AmazonScraper(app.config['AMZ_API_KEY'], app.config['AMZ_API_SECRET'],
                       app.config['AMZ_ASSOCIATE'])

# Access Raw Amazon XML Response
amazon_raw = bottlenose.Amazon(app.config['AMZ_API_KEY'],
                               app.config['AMZ_API_SECRET'],
                               app.config['AMZ_ASSOCIATE'])


def upc_to_asin(upc):
    time.sleep(1)
    p = amazon.lookup(ItemId=upc, IdType='UPC', SearchIndex='All')
    if type(p) != list:
        asin = [p.asin]
    else:
        asin = []
        count = 0
        while count <= len(p) - 1:
            asin.append(p[count].asin)
            count += 1
    return (asin)

コード例 #10

0

ファイルを表示