from TextClean import textClean from dictCount import dictCount import numpy as np import os import re from nltk.corpus import wordnet as wn from collections import Counter from nltk.corpus import stopwords import matplotlib.pyplot as plt allStories = textClean() for k in range(0,len(allStories)-1): story = allStories[k] happy = [line.rstrip('\n') for line in open('Stories/emotions/happy.txt')] neg = [line.rstrip('\n') for line in open('Stories/emotions/negative.txt')] #allEmotion = happy+neg totHap = [] totNeg = [] totSplit = 26 split = len(story) / totSplit for i in range(0,totSplit-1): textChunk = dictCount(story[split*i:split*(i+1)]) hCount = 0.0 nCount = 0.0 for j in range(0, len(happy)): hCount += textChunk[happy[j]]
import pandas from nltk.corpus import stopwords import re from dictCount import dictCount from TextClean import textClean import numpy as np import os import re from nltk.corpus import stopwords from splitSent import split_into_sentences from collections import Counter from radar import * df = pandas.read_csv('ISEAR_FULL.csv', sep=',', ) stories = textClean() stop = set(stopwords.words('english')) sentdf = [] Y_labels = [] for i in range(0, len(df)): sentence = df['SIT'][i] sentence = re.sub(r"[^\w\d'\s]+", ' ', sentence.lower()) sentence = sentence.split() sentence = [word for word in sentence if word not in stop] sentence = ' '.join(sentence) sentdf.append(sentence) Y_labels.append(df['FIELD1'][i]) text_clf = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', SGDClassifier(loss='hinge', penalty='l2',