def main(): with open('secret.json', 'r') as fp: credentials = json.load(fp) client = UserClient(**credentials) response = client.api.search.tweets.get(q=sys.argv[1], count=150) # Need to break down hierarchy (example 'user' column) tweets = pd.DataFrame(response.data['statuses']) # Doesnt seem to the best classifier - its quite shit actually # Maybe movie reviews for a training corpus isn't the most ideal # representation of tweets? # A lot of false positives and a few false negatives print('Training classifier...') classifier = NaiveBayesAnalyzer() classifier.train() # Train on a Movie Review Corpus print('Performing Sentiment Analysis...') counter = Counter() for text in tweets['text']: result = classifier.analyze(text) counter[result.classification] += 1 print '%s: %s' % (result.classification, text) print 'Total: ', counter
class TestNaiveBayesAnalyzer(unittest.TestCase): def setUp(self): self.analyzer = NaiveBayesAnalyzer() def test_kind(self): assert_equal(self.analyzer.kind, DISCRETE) @attr('slow') def test_analyze(self): p1 = 'I feel great this morning.' n1 = 'This is a terrible car.' p1_result = self.analyzer.analyze(p1) assert_equal(p1_result[0], 'pos') assert_equal(self.analyzer.analyze(n1)[0], 'neg') # The 2nd item should be the probability that it is positive assert_true(isinstance(p1_result[1], float)) # 3rd item is probability that it is negative assert_true(isinstance(p1_result[2], float)) assert_about_equal(p1_result[1] + p1_result[2], 1)
class TestNaiveBayesAnalyzer(unittest.TestCase): def setUp(self): self.analyzer = NaiveBayesAnalyzer() def test_kind(self): assert_equal(self.analyzer.kind, DISCRETE) @attr('slow') def test_analyze(self): p1 = 'I feel great this morning.' n1 = 'This is a terrible car.' p1_result = self.analyzer.analyze(p1) assert_equal(p1_result[0], 'pos') assert_equal(self.analyzer.analyze(n1)[0], 'neg') # The 2nd item should be the probability that it is positive assert_true(isinstance(p1_result[1], float)) # 3rd item is probability that it is negative assert_true(isinstance(p1_result[2], float)) assert_about_equal(p1_result[1] + p1_result[2], 1)
sdata = _scale_data(data, self.ranges) self.ax.plot(self.angle, np.r_[sdata, sdata[0]], *args, **kw) def fill(self, data, *args, **kw): sdata = _scale_data(data, self.ranges) self.ax.fill(self.angle, np.r_[sdata, sdata[0]], *args, **kw) if __name__ == "__main__": start = timeit.default_timer() # init the analyzers analyzerBayes = NaiveBayesAnalyzer() analyzerPattern = PatternAnalyzer() #training first resultBayes = analyzerBayes.analyze("train this") resultPattern = analyzerPattern.analyze("train this") sc = SparkContext(appName="MovieSentiment") # map reduce lines = sc.textFile("movieData.txt") posNneg = lines.map(sentimentAnalysis) \ .reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])) output = posNneg.collect() ScoreDict = {} for (feature, posAndNeg) in output:
from textblob import TextBlob from textblob.sentiments import NaiveBayesAnalyzer import pickle import sys from collections import namedtuple sentimentTuple = namedtuple("Sentiment", field_names=["sentiment", "p_pos", "p_neg"]) n = NaiveBayesAnalyzer() for file in sys.argv[1:]: with open(file, "rb") as f: df = pickle.load(f) l = [] for i, tweet in enumerate(df['tweet']): #print("{}/{}".format(i, len(df['tweet']))) s = n.analyze(tweet) l.append((s.classification, s.p_pos, s.p_neg)) pos = len(list(filter(lambda x: x[0] == 'pos', l))) neg = len(list(filter(lambda x: x[0] == 'neg', l))) print("Candidate Name: "+file) print("Total positive tweets: {}".format(pos)) print("Total negative tweets: {}".format(neg)) print('negative to total ratio: {}'.format(neg / (neg + pos))) with open(file.replace(".pkl", "")+"Sentiment.pkl", "wb") as f: pickle.dump(l, f) f.close()