def evaluate_without_negations(feature_select): replacer = AntonymReplacer() posSentences = open('\\resources\\original files\\pos.txt', 'r') negSentences = open('\\resources\\original files\\neg.txt', 'r') posSentences = re.split(r'\n', posSentences.read().translate(None, string.punctuation)) negSentences = re.split(r'\n', negSentences.read().translate(None, string.punctuation)) posFeatures = [] negFeatures = [] for i in posSentences: posWords = re.findall(r"[\w']+|[.,!?;]", i) posWords = replacer.replace_negations(posWords) posWords = [feature_select(posWords), 'pos'] posFeatures.append(posWords) for i in negSentences: negWords = re.findall(r"[\w']+|[.,!?;]", i) negWords = replacer.replace_negations(negWords) negWords = [feature_select(negWords), 'neg'] negFeatures.append(negWords) posCutoff = int(math.floor(len(posFeatures) * 9 / 10)) negCutoff = int(math.floor(len(negFeatures) * 9 / 10)) trainFeatures = posFeatures[:posCutoff] + negFeatures[:negCutoff] testFeatures = posFeatures[posCutoff:] + negFeatures[negCutoff:] # print trainFeatures classifier = NaiveBayesClassifier.train(trainFeatures) referenceSets = defaultdict(set) testSets = defaultdict(set) for i, (features, label) in enumerate(testFeatures): referenceSets[label].add(i) predicted = classifier.classify(features) testSets[predicted].add(i) # print testFeatures accuracy = nltk.classify.util.accuracy(classifier, testFeatures) print 'train on %d instances, test on %d instances' % (len(trainFeatures), len(testFeatures)) print 'accuracy:', nltk.classify.util.accuracy(classifier, testFeatures) print 'pos precision:', nltk.metrics.precision(referenceSets['pos'], testSets['pos']) print 'pos recall:', nltk.metrics.recall(referenceSets['pos'], testSets['pos']) print 'neg precision:', nltk.metrics.precision(referenceSets['neg'], testSets['neg']) print 'neg recall:', nltk.metrics.recall(referenceSets['neg'], testSets['neg']) classifier.show_most_informative_features(10) return accuracy
def negations(text): replacer = AntonymReplacer() sent = text.split() noneg = replacer.replace_negations(sent) separator = ' ' out = separator.join(noneg) return out
def antonym_dealer(document): from replacers import AntonymReplacer replacer = AntonymReplacer() return replacer.replace_negations(document)
df=pd.read_csv(csv_file) saved_column=df['text'] list1=list(saved_column) #print (list1) replacer=AntonymReplacer() rep1=RepeatReplacer() rep2=RegexpReplacer() for i in range(0,len(list1)): list1[i]=re.sub(r'[^\x00-\x7F]',r' ',list1[i]) #Replacing non-ascii characters with a space list1[i]=rep2.replace(list1[i]) #texts like can't are converted into can not list1[i]=list1[i].split() #Splitting each sentence into words #list1[i]=[w for w in list1[i] if (len(w)>2)] #String length of a word is more than 2 list1[i]=replacer.replace_negations(list1[i]) #Replaces the negative words with antonyms emo={} f=open('emotions.txt','r') for line in f: line=line.split(',') emo[line[0]]=line[1].rstrip() #print(emo) abb={} f=open('abb.txt','r') for line in f: line=line.split(',') abb[line[0].lower()]=(line[1].rstrip()).lower() #print(abb) #for i in ran
from nltk.tokenize import word_tokenize from replacers import AntonymReplacer replacer = AntonymReplacer() sent = "let's not uglify our code" print(replacer.replace_negations(word_tokenize(sent))) print(replacer.replace_negations(word_tokenize("it is not small"))) print(replacer.replace_negations(word_tokenize("it is not high"))) print(replacer.replace_negations(word_tokenize("it is not fine")))
def negations(text): sent = text.split() noneg = replacer.replace_negations(sent) separator = ' ' out = separator.join(noneg) return out replacer = AntonymReplacer() #replacer.replace('good') #replacer.replace('uglify') sent = ['good', 'do', 'not', 'go'] aaa = replacer.replace_negations(sent) L = pd.read_csv("hip_hop_nocontracted_v4_lowercase.csv", index_col=0) separator = ' ' bbb = separator.join(aaa) L3 = L L3['Lyrics'] = L3['Lyrics'].apply(negations) def negations(text): sent = text.split() noneg = replacer.replace_negations(sent)