Python textClean 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: TextClean

메소드/함수: textClean

hotexamples.com에서의 예제들: 2

Python textClean - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 TextClean.textClean에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: BagOWordsPosNeg.py 프로젝트: nplevitt/SentimentCap

from TextClean import textClean
from dictCount import dictCount
import numpy as np
import os
import re
from nltk.corpus import wordnet as wn
from collections import Counter
from nltk.corpus import stopwords
import matplotlib.pyplot as plt


allStories = textClean()
for k in range(0,len(allStories)-1):
    story = allStories[k]
    happy = [line.rstrip('\n') for line in open('Stories/emotions/happy.txt')]
    neg = [line.rstrip('\n') for line in open('Stories/emotions/negative.txt')]

    #allEmotion = happy+neg

    totHap = []
    totNeg = []
    totSplit = 26
    split = len(story) / totSplit

    for i in range(0,totSplit-1):
        textChunk = dictCount(story[split*i:split*(i+1)])
        hCount = 0.0
        nCount = 0.0
        for j in range(0, len(happy)):
            hCount += textChunk[happy[j]]

예제 #2

파일 보기

파일: SVM_Viz.py 프로젝트: nplevitt/SentimentCap

import pandas
from nltk.corpus import stopwords
import re
from dictCount import dictCount
from TextClean import textClean
import numpy as np
import os
import re
from nltk.corpus import stopwords
from splitSent import split_into_sentences
from collections import Counter
from radar import *


df = pandas.read_csv('ISEAR_FULL.csv', sep=',', )
stories = textClean()
stop = set(stopwords.words('english'))
sentdf = []
Y_labels = []
for i in range(0, len(df)):
    sentence = df['SIT'][i]
    sentence = re.sub(r"[^\w\d'\s]+", ' ', sentence.lower())
    sentence = sentence.split()
    sentence = [word for word in sentence if word not in stop]
    sentence = ' '.join(sentence)
    sentdf.append(sentence)
    Y_labels.append(df['FIELD1'][i])

text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('clf', SGDClassifier(loss='hinge', penalty='l2',