-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentimentAnalysis.py
80 lines (59 loc) · 2.62 KB
/
sentimentAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from textblob.classifiers import NaiveBayesClassifier
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from rake_nltk import Rake
import re
def SA():
r = Rake()
# Opens file and reads in training data
# NB classifier trains using the read in data
with open("datasets/trainingData.csv", 'r') as trainingdata:
classifier = NaiveBayesClassifier(trainingdata, format="csv")
print("Training Data")
classifier.show_informative_features(15)
# Opens file and reads in testing data
# Prints testing data accuracy
# Not needed for final product
with open("datasets/testingData.csv", 'r') as testingdata:
print("Testing data accuracy", classifier.accuracy(testingdata))
# Asks for user input
userInput = input("Please provide a test input: ")
# Removes all non letter characters
regex = re.compile('[^a-zA-Z ]')
punctuationRemoved = regex.sub('', userInput)
print("Punctuation removed: ", punctuationRemoved)
# Defines stopwords
stop_words = set(stopwords.words('english'))
# Takes user input, removes stopwords
word_tokens = word_tokenize(punctuationRemoved)
# Creates list size based on number of words left after stop words are removed
filtered_sentence = [w for w in word_tokens if not w in stop_words]
# Initialize empty list
filtered_sentence = []
# Appends each word to end of list
# Runs for as many words are stored in word_tokens
for w in word_tokens:
# If word is not in stop_words, append to end of list
if w not in stop_words:
filtered_sentence.append(w)
# Prints list to see new sentence with stopwords removed
print("Stopwords removed: ", filtered_sentence)
# Converts the filtered stop word sentence to string
stringWithoutStopwords = ' '.join([str(elem) for elem in filtered_sentence])
# Extracts keywords from the filtered sentence
r.extract_keywords_from_text(stringWithoutStopwords)
# Ranks the keywords that have been extracted
ranked_phrases = r.get_ranked_phrases()
print("Keywords extracted: ", ranked_phrases)
# Converts extracted keywords list to string
listToStr = ' '.join([str(elem) for elem in ranked_phrases])
# Runs string through trained NB classifier
finalString = TextBlob(listToStr, classifier=classifier)
# Print string followed by classification
print("String followed by classification: ", finalString, finalString.classify())
if finalString.classify() == ("pos"):
binaryClassify = 1
else:
binaryClassify = 0
print(binaryClassify)