def update_reviews(asin_list): for asin in asin_list: f = open(os.path.dirname(os.path.realpath(__file__)) + "/keys/aws_keys.json") configs = json.loads(f.read()) amzn = AmazonScraper(configs["aws_public_key"], configs["aws_secret_key"], configs["product_api_tag"]) try: p = amzn.lookup(ItemId=asin) except amazon.api.AsinNotFound as e: continue reviews = p.reviews() dates = queries.find_date_for_review(asin) media_type = queries.find_type_by_id(asin) unix_dates = [] for date in dates: unix_dates.append(get_date(date)) date = max(unix_dates) update = False for review in reviews: if date < int(review.date): #check if asin needs updating print("needs updating") update = True list_of_review_dicts =[] #if the product has new reviews get them from amazon if(update): all_reviews = list(reviews) for review in all_reviews: #get all reviews and add in values into the dictionary product_api = aws_module.setup_product_api() comment_dict = dict() comment_dict["text"] = url_scrape.parser(review.url) comment_dict["unixtime"] = int(review.date) list_of_review_dicts.append(comment_dict) return data_ingester.handleReview(asin, list_of_review_dicts, product_api, media_type)
product_dict["creator"] = creator return product_dict if __name__ == '__main__': parser = argparse.ArgumentParser(description='Pass in UCSD review collection filename, amt of products to skip, and amt of products to upload') parser.add_argument('-f','--filename', help='filename of the UCSD review collection zip', required=True) parser.add_argument('-s','--skip', help='Skip the first n products in the set', required=False) parser.add_argument('-a', '--amount', help='The amount of products to process and upload to S3 / the database', required=False) parser.add_argument('-t', '--producttype', help='Enter the media type (Movie, TV, Books', required=True) args = vars(parser.parse_args()) filename = args['filename'] skip = args['skip'] amount = args['amount'] producttype = args['producttype'] if skip is not None: skip = int(skip) if amount is not None: amount = int(amount) startTime = datetime.now() print ("starting") productapi = setup_product_api() parse(filename, skip, amount, productapi, producttype) print ("seconds taken: ",datetime.now() - startTime)