def pos_feature(features, sentence): sentence_pos = exp_replace.replace_emo(sentence) tokens = nltk.word_tokenize(sentence_pos) tokens = [(t.lower()) for t in tokens] pos_vector = sentiments.posvector(tokens) for j in range(len(pos_vector)): features["POS" + str(j + 1)] = pos_vector[j]
def pos_feature(features, sentence): sentence_pos = exp_replace.replace_emo(sentence) tokens = nltk.word_tokenize(sentence_pos) tokens = [(t.lower()) for t in tokens] pos_vector = sentiments.posvector(tokens) for j in range(len(pos_vector)): features['POS' + str(j + 1)] = pos_vector[j]
def sent_feature(features, sentence): sentence_sentiment = exp_replace.replace_emo(sentence) ## :) is replaced by good and :( is replaced by sad tokens = nltk.word_tokenize(sentence_sentiment) tokens = [(t.lower()) for t in tokens] mean_sentiment = sentiments.score_sentence(tokens) features['Positive sentiment'] = mean_sentiment[0] features['Negative sentiment'] = mean_sentiment[1] features['Sentiment'] = mean_sentiment[0] - mean_sentiment[1] # TextBlob sentiment analysis for full sentence try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in tokens]).strip()) features['BlobSentiment'] = blob.sentiment.polarity except: features['BlobSentiment'] = 0.0 # Splitting the sentence into 2 parts and then calculating sentiment analysis on the sentence if len(tokens) == 1: tokens += ['.'] f_half = tokens[0:len(tokens) / 2] s_half = tokens[len(tokens) / 2:] mean_sentiment_f = sentiments.score_sentence(f_half) features['PosSentiment1/2'] = mean_sentiment_f[0] features['Negsentiment1/2'] = mean_sentiment_f[1] features['Sentiment1/2'] = mean_sentiment_f[0] - mean_sentiment_f[1] mean_sentiment_s = sentiments.score_sentence(s_half) features['PosSentiment2/2'] = mean_sentiment_s[0] features['NegSentiment2/2'] = mean_sentiment_s[1] features['Sentiment2/2'] = mean_sentiment_s[0] - mean_sentiment_s[1] features['SentimentContrast'] = np.abs(features['Sentiment 1/2'] - features['Sentiment 2/2']) # TextBlob sentiment analysis for bith the halves try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half]).strip()) features['BlobSentiment1/2'] = blob.sentiment.polarity except: features['BlobSentiment1/2'] = 0.0 try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half]).strip()) features['BlobSentiment2/2'] = blob.sentiment.polarity except: features['BlobSentiment2/2'] = 0.0 features['BlobSubjectivity2/2'] = 0.0 features['BlobSentimentContrast'] = np.abs(features['BlobSentiment1/2'] - features['BlobSentiment2/2'])
def sent_feature(features, sentence): sentence_sentiment = exp_replace.replace_emo(sentence) tokens = nltk.word_tokenize(sentence_sentiment) tokens = [(t.lower()) for t in tokens] mean_sentiment = sentiments.score_sentence(tokens) features['Positive sentiment'] = mean_sentiment[0] features['Negative sentiment'] = mean_sentiment[1] features['Sentiment'] = mean_sentiment[0] - mean_sentiment[1] #TextBlob sentiment analysis try: blob = TextBlob("".join([ " " + i if not i.startswith("'") and i not in string.punctuation else i for i in tokens ]).strip()) features['Blob sentiment'] = blob.sentiment.polarity features['Blob subjectivity'] = blob.sentiment.subjectivity except: features['Blob sentiment'] = 0.0 features['Blob subjectivity'] = 0.0 #Split in 2 if len(tokens) == 1: tokens += ['.'] f_half = tokens[0:len(tokens) / 2] s_half = tokens[len(tokens) / 2:] mean_sentiment_f = sentiments.score_sentence(f_half) features['Positive sentiment 1/2'] = mean_sentiment_f[0] features['Negative sentiment 1/2'] = mean_sentiment_f[1] features['Sentiment 1/2'] = mean_sentiment_f[0] - mean_sentiment_f[1] mean_sentiment_s = sentiments.score_sentence(s_half) features['Positive sentiment 2/2'] = mean_sentiment_s[0] features['Negative sentiment 2/2'] = mean_sentiment_s[1] features['Sentiment 2/2'] = mean_sentiment_s[0] - mean_sentiment_s[1] features['Sentiment contrast 2'] = np.abs(features['Sentiment 1/2'] - features['Sentiment 2/2']) #TextBlob sentiment analysis try: blob = TextBlob("".join([ " " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half ]).strip()) features['Blob sentiment 1/2'] = blob.sentiment.polarity features['Blob subjectivity 1/2'] = blob.sentiment.subjectivity except: features['Blob sentiment 1/2'] = 0.0 features['Blob subjectivity 1/2'] = 0.0 try: blob = TextBlob("".join([ " " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half ]).strip()) features['Blob sentiment 2/2'] = blob.sentiment.polarity features['Blob subjectivity 2/2'] = blob.sentiment.subjectivity except: features['Blob sentiment 2/2'] = 0.0 features['Blob subjectivity 2/2'] = 0.0 features['Blob Sentiment contrast 2'] = np.abs( features['Blob sentiment 1/2'] - features['Blob sentiment 2/2']) #Split in 3 if len(tokens) == 2: tokens += ['.'] f_half = tokens[0:len(tokens) / 3] s_half = tokens[len(tokens) / 3:2 * len(tokens) / 3] t_half = tokens[2 * len(tokens) / 3:] mean_sentiment_f = sentiments.score_sentence(f_half) features['Positive sentiment 1/3'] = mean_sentiment_f[0] features['Negative sentiment 1/3'] = mean_sentiment_f[1] features['Sentiment 1/3'] = mean_sentiment_f[0] - mean_sentiment_f[1] mean_sentiment_s = sentiments.score_sentence(s_half) features['Positive sentiment 2/3'] = mean_sentiment_s[0] features['Negative sentiment 2/3'] = mean_sentiment_s[1] features['Sentiment 2/3'] = mean_sentiment_s[0] - mean_sentiment_s[1] mean_sentiment_t = sentiments.score_sentence(t_half) features['Positive sentiment 3/3'] = mean_sentiment_t[0] features['Negative sentiment 3/3'] = mean_sentiment_t[1] features['Sentiment 3/3'] = mean_sentiment_t[0] - mean_sentiment_t[1] features['Sentiment contrast 3'] = np.abs(features['Sentiment 1/3'] - features['Sentiment 3/3']) #TextBlob sentiment analysis try: blob = TextBlob("".join([ " " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half ]).strip()) features['Blob sentiment 1/3'] = blob.sentiment.polarity features['Blob subjectivity 1/3'] = blob.sentiment.subjectivity except: features['Blob sentiment 1/3'] = 0.0 features['Blob subjectivity 1/3'] = 0.0 try: blob = TextBlob("".join([ " " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half ]).strip()) features['Blob sentiment 2/3'] = blob.sentiment.polarity features['Blob subjectivity 2/3'] = blob.sentiment.subjectivity except: features['Blob sentiment 2/3'] = 0.0 features['Blob subjectivity 2/3'] = 0.0 try: blob = TextBlob("".join([ " " + i if not i.startswith("'") and i not in string.punctuation else i for i in t_half ]).strip()) features['Blob sentiment 3/3'] = blob.sentiment.polarity features['Blob subjectivity 3/3'] = blob.sentiment.subjectivity except: features['Blob sentiment 3/3'] = 0.0 features['Blob subjectivity 3/3'] = 0.0 features['Blob Sentiment contrast 3'] = np.abs( features['Blob sentiment 1/3'] - features['Blob sentiment 3/3'])
def sent_feature(features, sentence): sentence_sentiment = exp_replace.replace_emo(sentence) tokens = nltk.word_tokenize(sentence_sentiment) tokens = [(t.lower()) for t in tokens] mean_sentiment = sentiments.score_sentence(tokens) features["Positive sentiment"] = mean_sentiment[0] features["Negative sentiment"] = mean_sentiment[1] features["Sentiment"] = mean_sentiment[0] - mean_sentiment[1] # TextBlob sentiment analysis try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in tokens]).strip() ) features["Blob sentiment"] = blob.sentiment.polarity features["Blob subjectivity"] = blob.sentiment.subjectivity except: features["Blob sentiment"] = 0.0 features["Blob subjectivity"] = 0.0 # Split in 2 if len(tokens) == 1: tokens += ["."] f_half = tokens[0 : len(tokens) / 2] s_half = tokens[len(tokens) / 2 :] mean_sentiment_f = sentiments.score_sentence(f_half) features["Positive sentiment 1/2"] = mean_sentiment_f[0] features["Negative sentiment 1/2"] = mean_sentiment_f[1] features["Sentiment 1/2"] = mean_sentiment_f[0] - mean_sentiment_f[1] mean_sentiment_s = sentiments.score_sentence(s_half) features["Positive sentiment 2/2"] = mean_sentiment_s[0] features["Negative sentiment 2/2"] = mean_sentiment_s[1] features["Sentiment 2/2"] = mean_sentiment_s[0] - mean_sentiment_s[1] features["Sentiment contrast 2"] = np.abs(features["Sentiment 1/2"] - features["Sentiment 2/2"]) # TextBlob sentiment analysis try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half]).strip() ) features["Blob sentiment 1/2"] = blob.sentiment.polarity features["Blob subjectivity 1/2"] = blob.sentiment.subjectivity except: features["Blob sentiment 1/2"] = 0.0 features["Blob subjectivity 1/2"] = 0.0 try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half]).strip() ) features["Blob sentiment 2/2"] = blob.sentiment.polarity features["Blob subjectivity 2/2"] = blob.sentiment.subjectivity except: features["Blob sentiment 2/2"] = 0.0 features["Blob subjectivity 2/2"] = 0.0 features["Blob Sentiment contrast 2"] = np.abs(features["Blob sentiment 1/2"] - features["Blob sentiment 2/2"]) # Split in 3 if len(tokens) == 2: tokens += ["."] f_half = tokens[0 : len(tokens) / 3] s_half = tokens[len(tokens) / 3 : 2 * len(tokens) / 3] t_half = tokens[2 * len(tokens) / 3 :] mean_sentiment_f = sentiments.score_sentence(f_half) features["Positive sentiment 1/3"] = mean_sentiment_f[0] features["Negative sentiment 1/3"] = mean_sentiment_f[1] features["Sentiment 1/3"] = mean_sentiment_f[0] - mean_sentiment_f[1] mean_sentiment_s = sentiments.score_sentence(s_half) features["Positive sentiment 2/3"] = mean_sentiment_s[0] features["Negative sentiment 2/3"] = mean_sentiment_s[1] features["Sentiment 2/3"] = mean_sentiment_s[0] - mean_sentiment_s[1] mean_sentiment_t = sentiments.score_sentence(t_half) features["Positive sentiment 3/3"] = mean_sentiment_t[0] features["Negative sentiment 3/3"] = mean_sentiment_t[1] features["Sentiment 3/3"] = mean_sentiment_t[0] - mean_sentiment_t[1] features["Sentiment contrast 3"] = np.abs(features["Sentiment 1/3"] - features["Sentiment 3/3"]) # TextBlob sentiment analysis try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in f_half]).strip() ) features["Blob sentiment 1/3"] = blob.sentiment.polarity features["Blob subjectivity 1/3"] = blob.sentiment.subjectivity except: features["Blob sentiment 1/3"] = 0.0 features["Blob subjectivity 1/3"] = 0.0 try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in s_half]).strip() ) features["Blob sentiment 2/3"] = blob.sentiment.polarity features["Blob subjectivity 2/3"] = blob.sentiment.subjectivity except: features["Blob sentiment 2/3"] = 0.0 features["Blob subjectivity 2/3"] = 0.0 try: blob = TextBlob( "".join([" " + i if not i.startswith("'") and i not in string.punctuation else i for i in t_half]).strip() ) features["Blob sentiment 3/3"] = blob.sentiment.polarity features["Blob subjectivity 3/3"] = blob.sentiment.subjectivity except: features["Blob sentiment 3/3"] = 0.0 features["Blob subjectivity 3/3"] = 0.0 features["Blob Sentiment contrast 3"] = np.abs(features["Blob sentiment 1/3"] - features["Blob sentiment 3/3"])
def extractFeatureOfASentence(sen): """ This method extracts features of a single sentence. We have following list of features being extracted. 1. Full sentence Polarity 2. Full sentence Subjectivity 3. Half sentence Polarity (1/2 and 2/2) 4. Half sentence Subjectivity (1/2 and 2/2) 5. Difference between polarities of two halves 6. Third sentence Polarity (1/3, 2/3 and 3/3) 7. Third sentence Subjectivity (1/3, 2/3 and 3/3) 8. Difference between max and min polarity of the thirds. 9. Fourth sentence Polarity (1/4, 2/4, 3/4 and 4/4) 10. Fourth sentence Subjectivity (1/4, 2/4, 3/4 and 4/4) 11. Difference between max and min polarities of the fourths. Like this we extract 23 features of a single sentence. :param sen: :return: """ features = [] # adding capitalization feature counter = 0 threshold = 4 sentence_plain = sen.decode('UTF-8') for j in range(len(sentence_plain)): counter += int(sentence_plain[j].isupper()) features.append(counter) # end of adding capitalization feature # Tokenize the sentence and then convert everthing to lower case. tokens = nltk.word_tokenize(exp_replace.replace_emo(str(sen))) tokens = [(t.lower()) for t in tokens] # Adding pos_feature pos_vector = posvector(tokens) for j in range(len(pos_vector)): features.append(pos_vector[j]) # End of adding pos_feature # Extract features of full sentence. fullBlob = TextBlob(joinTokens(tokens)) features.append(fullBlob.sentiment.polarity) features.append(fullBlob.sentiment.subjectivity) # Extract features of halves. size = len(tokens) // 2 parts = [] i = 0 while i <= len(tokens): if i == size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size for x in range(0, len(parts)): part = parts[x] halfBlob = TextBlob(joinTokens(part)) features.append(halfBlob.sentiment.polarity) features.append(halfBlob.sentiment.subjectivity) features.append(np.abs(features[-2] - features[-4])) # Extract features of thirds. size = len(tokens) // 3 parts = [] i = 0 while i <= len(tokens): if i == 2 * size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size ma = -2 mi = 2 for x in range(0, len(parts)): part = parts[x] thirdsBlob = TextBlob(joinTokens(part)) pol = thirdsBlob.sentiment.polarity sub = thirdsBlob.sentiment.subjectivity if pol > ma: ma = pol if pol < mi: mi = pol features.append(pol) features.append(sub) features.append(np.abs(ma - mi)) # Extract features of fourths. size = len(tokens) // 4 parts = [] i = 0 while i <= len(tokens): if i == 3 * size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size ma = -2 mi = 2 for x in range(0, len(parts)): part = parts[x] fourthsBlob = TextBlob(joinTokens(part)) pol = fourthsBlob.sentiment.polarity sub = fourthsBlob.sentiment.subjectivity if pol > ma: ma = pol if pol < mi: mi = pol features.append(pol) features.append(sub) features.append(np.abs(ma - mi)) return features
def extractFeatureOfASentence(sen): features = [] # Tokenize the sentence and then convert everthing to lower case. tokens = nltk.word_tokenize(exp_replace.replace_emo(str(sen))) tokens = [(t.lower()) for t in tokens] # Extract features of full sentence. fullBlob = TextBlob(joinTokens(tokens)) features.append(fullBlob.sentiment.polarity) features.append(fullBlob.sentiment.subjectivity) # Extract features of halves. size = len(tokens) // 2 parts = [] i = 0 while i <= len(tokens): if i == size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size for x in range(0, len(parts)): part = parts[x] halfBlob = TextBlob(joinTokens(part)) features.append(halfBlob.sentiment.polarity) features.append(halfBlob.sentiment.subjectivity) features.append(np.abs(features[-2] - features[-4])) # Extract features of thirds. size = len(tokens) // 3 parts = [] i = 0 while i <= len(tokens): if i == 2 * size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size ma = -2 mi = 2 for x in range(0, len(parts)): part = parts[x] thirdsBlob = TextBlob(joinTokens(part)) pol = thirdsBlob.sentiment.polarity sub = thirdsBlob.sentiment.subjectivity if pol > ma: ma = pol if pol < mi: mi = pol features.append(pol) features.append(sub) features.append(np.abs(ma - mi)) # Extract features of fourths. size = len(tokens) // 4 parts = [] i = 0 while i <= len(tokens): if i == 3 * size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size ma = -2 mi = 2 for x in range(0, len(parts)): part = parts[x] fourthsBlob = TextBlob(joinTokens(part)) pol = fourthsBlob.sentiment.polarity sub = fourthsBlob.sentiment.subjectivity if pol > ma: ma = pol if pol < mi: mi = pol features.append(pol) features.append(sub) features.append(np.abs(ma - mi)) return features
def extract_feature_of_sentence(self, sen): # type: (object) -> object """ This method extracts features of a single sentence. We have following list of features being extracted. 1. Full sentence Polarity 2. Full sentence Subjectivity 3. Half sentence Polarity (1/2 and 2/2) 4. Half sentence Subjectivity (1/2 and 2/2) 5. Difference between polarities of two halves 6. Third sentence Polarity (1/3, 2/3 and 3/3) 7. Third sentence Subjectivity (1/3, 2/3 and 3/3) 8. Difference between max and min polarity of the thirds. 9. Fourth sentence Polarity (1/4, 2/4, 3/4 and 4/4) 10. Fourth sentence Subjectivity (1/4, 2/4, 3/4 and 4/4) 11. Difference between max and min polarities of the fourths. Like this we extract 23 features of a single sentence. :param sen: :return: """ features = [] # Tokenize the sentence and then convert everything to lower case. tokens = nltk.word_tokenize(exp_replace.replace_emo(str(sen))) tokens = [(t.lower()) for t in tokens] # Extract features of full sentence. fullBlob = TextBlob(self.join_tokens(tokens)) features.append(fullBlob.sentiment.polarity) features.append(fullBlob.sentiment.subjectivity) # Extract features of halves. size = len(tokens) // 2 parts = [] i = 0 while i <= len(tokens): if i == size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size for x in range(0, len(parts)): part = parts[x] halfBlob = TextBlob(self.join_tokens(part)) features.append(halfBlob.sentiment.polarity) features.append(halfBlob.sentiment.subjectivity) features.append(np.abs(features[-2] - features[-4])) # Extract features of thirds. size = len(tokens) // 3 parts = [] i = 0 while i <= len(tokens): if i == 2 * size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size ma = -2 mi = 2 for x in range(0, len(parts)): part = parts[x] thirdsBlob = TextBlob(self.join_tokens(part)) pol = thirdsBlob.sentiment.polarity sub = thirdsBlob.sentiment.subjectivity if pol > ma: ma = pol if pol < mi: mi = pol features.append(pol) features.append(sub) features.append(np.abs(ma - mi)) # Extract features of fourths. size = len(tokens) // 4 parts = [] i = 0 while i <= len(tokens): if i == 3 * size: parts.append(tokens[i:]) break else: parts.append(tokens[i:i + size]) i += size ma = -2 mi = 2 for x in range(0, len(parts)): part = parts[x] fourths_blob = TextBlob(self.join_tokens(part)) pol = fourths_blob.sentiment.polarity sub = fourths_blob.sentiment.subjectivity if pol > ma: ma = pol if pol < mi: mi = pol features.append(pol) features.append(sub) features.append(np.abs(ma - mi)) return features