def run(self): rev = ReviewParser(open(settings.reviews_path + ReviewParser.map_cid_to_name(self.cid), 'rb',), review_files[choice].split('.')[-1]) rev.parse() print "Mining", len(rev.reviews), "reviews" text = rev.get_raw_text() f = FeatureExtractor(text, ReviewParser.map_cid_to_name(self.cid)) self.features = f.get_frequent_features(self.min_support) for ftr in self.features: self.ratings[ftr[0]] = {'positive': 0, 'negative': 0, 'neutral': 0} o = OpinionSentenceFinder(self.features, f.feature_sentences) #Extract all sentences which express some opinion opinion_sents = map(lambda y: y['opinion_sent'], filter(lambda x: len(x['opinion_sent']) > 1, o.feature_sentences)) temp = [] for os in opinion_sents: temp.extend(os) opinion_sents = temp for ftr, sentiment in opinion_sents: if sentiment[0] is True: self.ratings[ftr]['positive'] += 1 elif sentiment[0] is False: self.ratings[ftr]['negative'] += 1 else: self.ratings[ftr]['neutral'] += 1 pp = pprint.PrettyPrinter(indent = 4) print "Is this a %s?" % f.product_category print "%d features are interesting" % len(self.features) #pp.pprint(opinion_sents) pp.pprint(self.ratings)
def run(self): rev = ReviewParser( open(settings.reviews_path + ReviewParser.map_cid_to_name(self.cid), "rb"), review_files[choice].split(".")[-1], ) rev.parse() print "Mining", len(rev.reviews), "reviews" text = rev.get_raw_text() f = FeatureExtractor(text, ReviewParser.map_cid_to_name(self.cid)) self.features = f.get_frequent_features(self.min_support) for ftr in self.features: self.ratings[ftr[0]] = {"positive": 0, "negative": 0, "neutral": 0} o = OpinionSentenceFinder(self.features, f.feature_sentences) # Extract all sentences which express some opinion opinion_sents = map( lambda y: y["opinion_sent"], filter(lambda x: len(x["opinion_sent"]) > 1, o.feature_sentences) ) temp = [] for os in opinion_sents: temp.extend(os) opinion_sents = temp for ftr, sentiment in opinion_sents: if sentiment[0] is True: self.ratings[ftr]["positive"] += 1 elif sentiment[0] is False: self.ratings[ftr]["negative"] += 1 else: self.ratings[ftr]["neutral"] += 1 pp = pprint.PrettyPrinter(indent=4) print "Is this a %s?" % f.product_category print "%d features are interesting" % len(self.features) # pp.pprint(opinion_sents) pp.pprint(self.ratings)
elif sentiment[0] is False: self.ratings[ftr]['negative'] += 1 else: self.ratings[ftr]['neutral'] += 1 pp = pprint.PrettyPrinter(indent = 4) print "Is this a %s?" % f.product_category print "%d features are interesting" % len(self.features) #pp.pprint(opinion_sents) pp.pprint(self.ratings) if __name__ == "__main__": try: min_support = int(sys.argv[1]) except: min_support = 4 review_files = ReviewParser.get_available_reviews() for review_file in review_files: print review_files.index(review_file), ' ' + review_file choice = int(input('#')) if choice not in xrange(0, len(review_files)): print 'Error' exit(-1) cid = review_files[choice].split(".")[0].split("_")[-2] om = OpinionMiner(cid, min_support) om.run()
from ReviewParser import ReviewParser from FeatureExtractor import FeatureExtractor review_file = ['Apple_iPhone_4.csv', 'Blackberry_Torch_9800.csv', 'Nikon_D90.csv', 'Canon_ELPH_300_HS.csv'] rev = ReviewParser(open('../data/reviews/' + review_file[3], 'rb',), 'CSV') rev.parse() text = rev.get_raw_text() f = FeatureExtractor(text) print f.get_frequent_features(5) """ #tokenize_patterns = ['[Nn]ikon ?[dD][0-9]+', '([0-9]+ ?mm)', '(auto[ -_]?focus)', '(Apple)[ ]?(iphone)??[0-5]?[gs]*'] features = [w.lower() for (w,t) in tags if t.startswith('N') and t != 'NNP'] features = p.stemmer(features) dist = nltk.FreqDist(features) obs = [ob for ob in dist.iteritems()] logfile = open('/tmp/log.txt', 'w') logfile.write("".join(str(obs)).replace("), (", ")\n(")) logfile.close() """
self.ratings[ftr]["positive"] += 1 elif sentiment[0] is False: self.ratings[ftr]["negative"] += 1 else: self.ratings[ftr]["neutral"] += 1 pp = pprint.PrettyPrinter(indent=4) print "Is this a %s?" % f.product_category print "%d features are interesting" % len(self.features) # pp.pprint(opinion_sents) pp.pprint(self.ratings) if __name__ == "__main__": try: min_support = int(sys.argv[1]) except: min_support = 4 review_files = ReviewParser.get_available_reviews() for review_file in review_files: print review_files.index(review_file), " " + review_file choice = int(input("#")) if choice not in xrange(0, len(review_files)): print "Error" exit(-1) cid = review_files[choice].split(".")[0].split("_")[-2] om = OpinionMiner(cid, min_support) om.run()