def parseReviews(review_filepath): f_in = open(review_filepath, 'r') f_out = open ("out.csv", "w") csv_writer = csv.writer(f_out) count = 0 sentiment_analyzer = Sentiment() for review in f_in: count+=1 if count % 1000 == 0: print(count) review_obj = json.loads(review) text = review_obj["text"] date = review_obj["date"] votes = review_obj["votes"] stars = review_obj["stars"] funny = votes["funny"] useful = votes["useful"] cool = votes["cool"] year = int(date.split("-")[0]) normalize_factor = float(2016-year)/time_normalize_factor funny_norm = float(funny/normalize_factor) useful_norm = float(useful/normalize_factor) cool_norm = float(cool/normalize_factor) sentiment = sentiment_analyzer.getSentiment(text) csv_writer.writerow([sentiment, funny, useful, cool, funny_norm, useful_norm, cool_norm, stars]) f_in.close() f_out.close()
from bs4 import BeautifulSoup import os from sentiment import Sentiment import csv if __name__ == '__main__': files = os.listdir('./zagat') csvFile = open('out.csv','w') writer = csv.writer(csvFile) sentiment = Sentiment() for f in files: with open('./zagat/'+f) as review: soup = BeautifulSoup(review) review_city = soup.find(itemprop='addressLocality').text review_state = soup.find(itemprop='addressRegion').text review_text = soup.find(itemprop='reviewBody').text review_sent = sentiment.getSentiment(review_text) writer.writerow([str(review_sent),review_state,review_city]) close(csvFile)