Esempio n. 1
0
def getReviews(target_url):

	# Check if url has been cached. else scrape page for reviews
	###  !!!  does the parser return json or just text???
	try:
		print "[server] Gathering reviews from file..."
		
		######  Just some TEST shiz  #######
		with open('cached_reviews/iphone_case.json') as reviews_from_file:
			reviews = json.load(reviews_from_file)

		######  this is the real code  #######
		# # turn the given url into a file name. 
		# # (Files of cached reviews will be named after their url)
		# reviews_file = target_url.replace('/','_');
		# with open('cached_reviews/'+reviews_file) as reviews_from_file
		# reviews = json.load(reviews_from_file)

		print "[server] Success. Gathered reviews from file."


	# if reviews not cached, scrape Amazon page for reviews
	except: 
		# Scrape Amazon page for reviews
		print "[server] Gathering reviews from Amazon.com..."
		item_id = review_parser.get_item_id(target_url)
		reviews = review_parser.get_reviews(item_id)
		print "[server] Success. Scraped reviews from Amazon.com."

	# parse reviews. Outputs to stdout and a file
	print "[server] Analyzing reviews..."
	parsed_reviews = nlp.nlp_analyze(reviews, top_n=10)

	d = json.dumps(dict(url=target_url, revs=parsed_reviews))
	return 'myParser(' + d + ');'
Esempio n. 2
0
import nlp
import json

# build review list from file
reviews_list = json.load(open('reviews.json'))

nlp.nlp_analyze(reviews_list, 4, 20, True)