def main(): #user settings input_file_name = 'data/input.xlsx' output_file_name = 'data/output_data' input_sheet_name = 'product_list' output_sheet_name = 'processed_data' number_of_items = 100 #Initialize from given settings book_in = open_workbook(input_file_name) sheet_in = book_in.sheet_by_name(input_sheet_name) #Get list of items from excel file ids = sheet_in.col_values(0,1) product_types = sheet_in.col_values(1,1) io = input('starting point?') i = io amzn = AmazonScraper(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_ASSOCIATE_TAG) book_out = Workbook() sheet_out = book_out.add_sheet(output_sheet_name) add_data_headers(sheet_out) p_count = 0 #iterate through items while i < len(ids): p = amzn.lookup(ItemId=ids[i]) p_count += 1 print 'Processing', p_count p_data = data(amzn, p, product_types[i]) add_data(sheet_out, p_count, p_data) book_out.save(output_file_name + '_' + product_types[i] + '3.xls') i = i+1
def update_reviews(asin_list): for asin in asin_list: f = open(os.path.dirname(os.path.realpath(__file__)) + "/keys/aws_keys.json") configs = json.loads(f.read()) amzn = AmazonScraper(configs["aws_public_key"], configs["aws_secret_key"], configs["product_api_tag"]) try: p = amzn.lookup(ItemId=asin) except amazon.api.AsinNotFound as e: continue reviews = p.reviews() dates = queries.find_date_for_review(asin) media_type = queries.find_type_by_id(asin) unix_dates = [] for date in dates: unix_dates.append(get_date(date)) date = max(unix_dates) update = False for review in reviews: if date < int(review.date): #check if asin needs updating print("needs updating") update = True list_of_review_dicts =[] #if the product has new reviews get them from amazon if(update): all_reviews = list(reviews) for review in all_reviews: #get all reviews and add in values into the dictionary product_api = aws_module.setup_product_api() comment_dict = dict() comment_dict["text"] = url_scrape.parser(review.url) comment_dict["unixtime"] = int(review.date) list_of_review_dicts.append(comment_dict) return data_ingester.handleReview(asin, list_of_review_dicts, product_api, media_type)
def initialize(prodId): amzn = AmazonScraper(acess_key, secret_key, customer_tag, Region='IN') p = amzn.lookup(ItemId=prodId) rs = amzn.reviews(ItemId=prodId) reviews, reviews_title = [], [] i = 1 for r in rs: fr = r.full_review() print_review(fr.title, fr.text, i) reviews.append(fr.text) reviews_title.append(fr.title) i += 1 prodName = p.title for x in range(len(prodName)): string = list(prodName) if string[x] == '.' or string[x] == '/': string[x] = '-' prodName = ''.join(string) return reviews, reviews_title, prodName
# Amazon Associates account (final code), and then you need to sign up to use # the Product Advertising API within the Associates account filename = "reviews_allinfo.csv" filename2 = "reviews_notext.csv" save_path = 'c:/output/' with open('product_ids.csv', 'rb') as f: csv_f = csv.reader(f) items = [row[0].strip() for row in csv_f] for number in items: try: p = amzn.lookup(ItemId=number) except AsinNotFound as e: print "Product {} was not found".format(number) continue rs = p.reviews() counter = 0 try: for review in rs: print review.asin print review.url print review.soup counter += 1 if (counter % 80) == 0: