def getReviews(target_url): # Check if url has been cached. else scrape page for reviews ### !!! does the parser return json or just text??? try: print "[server] Gathering reviews from file..." ###### Just some TEST shiz ####### with open('cached_reviews/iphone_case.json') as reviews_from_file: reviews = json.load(reviews_from_file) ###### this is the real code ####### # # turn the given url into a file name. # # (Files of cached reviews will be named after their url) # reviews_file = target_url.replace('/','_'); # with open('cached_reviews/'+reviews_file) as reviews_from_file # reviews = json.load(reviews_from_file) print "[server] Success. Gathered reviews from file." # if reviews not cached, scrape Amazon page for reviews except: # Scrape Amazon page for reviews print "[server] Gathering reviews from Amazon.com..." item_id = review_parser.get_item_id(target_url) reviews = review_parser.get_reviews(item_id) print "[server] Success. Scraped reviews from Amazon.com." # parse reviews. Outputs to stdout and a file print "[server] Analyzing reviews..." parsed_reviews = nlp.nlp_analyze(reviews, top_n=10) d = json.dumps(dict(url=target_url, revs=parsed_reviews)) return 'myParser(' + d + ');'
import nlp import json # build review list from file reviews_list = json.load(open('reviews.json')) nlp.nlp_analyze(reviews_list, 4, 20, True)