예제 #1
0
from TextClean import textClean
from dictCount import dictCount
import numpy as np
import os
import re
from nltk.corpus import wordnet as wn
from collections import Counter
from nltk.corpus import stopwords
import matplotlib.pyplot as plt


allStories = textClean()
for k in range(0,len(allStories)-1):
    story = allStories[k]
    happy = [line.rstrip('\n') for line in open('Stories/emotions/happy.txt')]
    neg = [line.rstrip('\n') for line in open('Stories/emotions/negative.txt')]

    #allEmotion = happy+neg

    totHap = []
    totNeg = []
    totSplit = 26
    split = len(story) / totSplit

    for i in range(0,totSplit-1):
        textChunk = dictCount(story[split*i:split*(i+1)])
        hCount = 0.0
        nCount = 0.0
        for j in range(0, len(happy)):
            hCount += textChunk[happy[j]]
예제 #2
0
import pandas
from nltk.corpus import stopwords
import re
from dictCount import dictCount
from TextClean import textClean
import numpy as np
import os
import re
from nltk.corpus import stopwords
from splitSent import split_into_sentences
from collections import Counter
from radar import *


df = pandas.read_csv('ISEAR_FULL.csv', sep=',', )
stories = textClean()
stop = set(stopwords.words('english'))
sentdf = []
Y_labels = []
for i in range(0, len(df)):
    sentence = df['SIT'][i]
    sentence = re.sub(r"[^\w\d'\s]+", ' ', sentence.lower())
    sentence = sentence.split()
    sentence = [word for word in sentence if word not in stop]
    sentence = ' '.join(sentence)
    sentdf.append(sentence)
    Y_labels.append(df['FIELD1'][i])

text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('clf', SGDClassifier(loss='hinge', penalty='l2',