def span(common_words): top_tweets = [] final_list = [] getSentiment = act.load_classifier() with open('uploads/TWEETS.csv', 'r', encoding="utf8") as wash: tReader = csv.reader(wash, delimiter=',', quotechar='|') csv.field_size_limit(20000000) for i in tReader: top_tweets.append(i) for g in top_tweets: if len(g) > 5 and common_words in g[5]: text = act.processTweet2(g[5]) final = act.postprocess(text) sentTweets = act.get_sentiment(final, getSentiment) final_list.append([g[1], final, sentTweets]) if (len(final_list) == 10): break date = [] for span in top_tweets: if len(span) > 5 and common_words in span[5]: date.append(span[1]) if (len(date) == len(top_tweets)): break Fdate = date[1:2] Bdate = date[-1:] return [Fdate, Bdate]
def class_dataB_test(clf="Support Vector Machine"): ts = [] classifier = fact.load_classifier(clf) with open('uploads/dataB.csv', 'r', encoding="utf8") as wash: tReader = csv.reader(wash, delimiter=',', quotechar='|') csv.field_size_limit(20000000) for i in tReader: ts.append(i) gPlot={'jan':[0,0,0],'feb':[0,0,0],'mar':[0,0,0],'april':[0,0,0],'oct':[0,0,0],'jun':[0,0,0],'jul':[0,0,0],'may':[0,0,0],'aug':[0,0,0],'sep':[0,0,0],'nov':[0,0,0],'dec':[0,0,0]} monthtocheck=['jan','feb','mar','april','oct','jun','jul','may','aug','sep','nov','dec'] postweets = 0 negtweets = 0 for t in ts: # text = fact.processTweet(t) if len(t) > 5: text = t[5] try: #month = text.split(',')[1].split(' ')[1] month = t[1] month = month.lower() print(month) sentiment = fact.get_sentiment(text, classifier) if(sentiment == "positive"): if month in monthtocheck: gPlot[month][0] += 1 postweets+=1 elif(sentiment == "negative"): if month in monthtocheck: gPlot[month][1] += 1 negtweets+=1 except IndexError: continue positives = [] negatives = [] months = ['jan', 'feb', 'mar', 'april', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] for month in months: positives.append(gPlot[month][0]) negatives.append(gPlot[month][1]) #print(positives, negatives) return positives, negatives # print('\n') # class_dataB()
def test(testset=testset, weight_file=args.weights): """tests classifier on name->gender Args: train: % of examples to train with (e.g., 0.8) """ start = time.time() classifier = load_classifier(weight_file) print("Testing Naive Bayes Classifer on %d examples (%s)" % (len(testset), time_since(start))) testset = apply_features(_gender_features, testset, labeled=True) acc = nltk.classify.accuracy(classifier, testset) print("Testing accuracy is %.2f%% on %d examples (%s)" % (acc * 100, len(testset), time_since(start))) return acc
def class_dataB(clf="Support Vector Machine"): ts = compB() classifier = fact.load_classifier(clf) sen = [] gPlot={'jan':[0,0,0],'feb':[0,0,0],'mar':[0,0,0],'april':[0,0,0],'oct':[0,0,0],'jun':[0,0,0],'jul':[0,0,0],'may':[0,0,0],'aug':[0,0,0],'sep':[0,0,0],'nov':[0,0,0],'dec':[0,0,0]} monthtocheck=['jan','feb','mar','april','oct','jun','jul','may','aug','sep','nov','dec'] postweets = 0 negtweets = 0 for t in ts: text = fact.processTweet(t) try: month = text.split(',')[1].split(' ')[1] month = month.lower() sentiment = fact.get_sentiment(text, classifier) if(sentiment == "positive"): if month in monthtocheck: gPlot[month][0] += 1 postweets+=1 elif(sentiment == "negative"): if month in monthtocheck: gPlot[month][1] += 1 negtweets+=1 except IndexError: continue posData = [] negData = [] months = ['jan', 'feb', 'mar', 'april', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] for month in months: posData.append(gPlot[month][0]) negData.append(gPlot[month][1]) print(posData, negData) pe = (100 * postweets / len(ts)) ne = (100 * negtweets / len(ts)) p = format(pe, '.2f') n = format(ne, '.2f') return posData, negData, p, n
def caTe(categories="categories"): confirm = [] classifier = act.load_classifier() holdOn = [] with open('uploads/TWEETS.csv', 'r', encoding="utf8") as wash: tReader = csv.reader(wash, delimiter=',', quotechar='|') csv.field_size_limit(20000000) for i in tReader: confirm.append(i) for conF in confirm: if len(conF) > 5 and categories in conF[5]: text = act.processTweet2(conF[5]) sentiment = act.get_sentiment(text, classifier) holdOn.append([text, sentiment]) if len(holdOn) == 10: break return holdOn
def viewMore(monthData, sentData): more = [] classifier = act.load_classifier() collectData = [] with open('uploads/TWEETS.csv', 'r', encoding="utf8") as wash: tReader = csv.reader(wash, delimiter=',', quotechar='|') csv.field_size_limit(20000000) for i in tReader: more.append(i) for vMore in more: if len(vMore) > 5 and monthData in vMore[1]: text = act.processTweet2(vMore[5]) sentiment2 = act.get_sentiment(text, classifier) if (sentiment2 == sentData): collectData.append([vMore[1], text, sentiment2]) if len(collectData) == 10: break return collectData
def classify__(clf="Support Vector Machine"): ts = getTweetFromFile() getIt = len(ts) classifier = act.load_classifier(clf) avgPos = [] avgNeg = [] gPlot = { 'jan': [0, 0, 0], 'feb': [0, 0, 0], 'mar': [0, 0, 0], 'april': [0, 0, 0], 'oct': [0, 0, 0], 'jun': [0, 0, 0], 'jul': [0, 0, 0], 'may': [0, 0, 0], 'aug': [0, 0, 0], 'sep': [0, 0, 0], 'nov': [0, 0, 0], 'dec': [0, 0, 0] } monthtocheck = [ 'jan', 'feb', 'mar', 'april', 'oct', 'jun', 'jul', 'may', 'aug', 'sep', 'nov', 'dec' ] postweets = 0 negtweets = 0 neutral = 0 for t in ts: text = act.processTweet(t) try: month = text.split(',')[1].split(' ')[1] month = month.lower() sentiment = act.get_sentiment(text, classifier) if sentiment == "positive": if month in monthtocheck: gPlot[month][0] += 1 postweets += 1 elif sentiment == "negative": if month in monthtocheck: gPlot[month][1] += 1 negtweets += 1 else: if month in monthtocheck: gPlot[month][2] += 1 neutral += 1 except IndexError: continue # for sent in sentiment: # if sentiment == "neutral": # sen.append(sent) # print(gPlot) # print("Positive tweets percentage: {} %".format(100 * postweets / len(ts))) # print("Negative tweets percentage: {} %".format(100 * negtweets / len(ts))) # print("Neutral tweets percentage: {} %".format(100 * (len(ts) - negtweets - postweets) / len(ts))) p = (100 * postweets / len(ts)) n = (100 * negtweets / len(ts)) ne = (100 * (len(ts) - negtweets - postweets) / len(ts)) if (p < n): print("Negative") elif (p > n): print("Positive") else: print("Neutral") pe = format(p, '.2f') nee = format(n, '.2f') neu = format(ne, '.2f') positives = [] negatives = [] neutrals = [] months = [ 'jan', 'feb', 'mar', 'april', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec' ] for month in months: positives.append(gPlot[month][0]) negatives.append(gPlot[month][1]) neutrals.append(gPlot[month][2]) for ee, gg in gPlot.items(): avgPos.append(gg[0]) avgNeg.append(gg[1]) aPos = sum(avgPos) / len(ts) aNeg = sum(avgNeg) / len(ts) avPos = format(aPos, '.4f') avNeg = format(aNeg, '.4f') return pe, nee, neu, positives, negatives, neutrals, avPos, avNeg, getIt