Beispiel #1
0
def update_reviews(asin_list):
    for asin in asin_list:
        f = open(os.path.dirname(os.path.realpath(__file__)) + "/keys/aws_keys.json")
        configs = json.loads(f.read())
        amzn = AmazonScraper(configs["aws_public_key"], configs["aws_secret_key"], configs["product_api_tag"])
        try:
            p = amzn.lookup(ItemId=asin)
        except amazon.api.AsinNotFound as e:
            continue
        reviews = p.reviews()
        dates = queries.find_date_for_review(asin)
        media_type = queries.find_type_by_id(asin)
        unix_dates = []
        for date in dates:
            unix_dates.append(get_date(date))
        date = max(unix_dates)
        update = False
        for review in reviews:
            if date < int(review.date):  #check if asin needs updating
                print("needs updating")
                update = True
        list_of_review_dicts =[]
        #if the product has new reviews get them from amazon
        if(update):
            all_reviews = list(reviews)
            for review in all_reviews:  #get all reviews and add in values into the dictionary
                 product_api = aws_module.setup_product_api()
                 comment_dict = dict()
                 comment_dict["text"] = url_scrape.parser(review.url)
                 comment_dict["unixtime"] = int(review.date)
                 list_of_review_dicts.append(comment_dict)
        return data_ingester.handleReview(asin, list_of_review_dicts, product_api, media_type)
        product_dict["creator"] = creator

        return product_dict

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Pass in UCSD review collection filename, amt of products to skip, and amt of products to upload')
    parser.add_argument('-f','--filename', help='filename of the UCSD review collection zip', required=True)
    parser.add_argument('-s','--skip', help='Skip the first n products in the set', required=False)
    parser.add_argument('-a', '--amount', help='The amount of products to process and upload to S3 / the database', required=False)
    parser.add_argument('-t', '--producttype', help='Enter the media type (Movie, TV, Books', required=True)
    args = vars(parser.parse_args())

    filename = args['filename']
    skip = args['skip']
    amount = args['amount']
    producttype = args['producttype']

    if skip is not None:
        skip = int(skip)
    if amount is not None:
        amount = int(amount)


    startTime = datetime.now()
    print ("starting")

    productapi = setup_product_api()

    parse(filename, skip, amount, productapi, producttype)

    print ("seconds taken: ",datetime.now() - startTime)