コード例 #1
0
def anew_estimator(words=None):
    english_stemmer = nltk.stem.SnowballStemmer('english')
    words, _, _ = load_extend_anew()
    words = [english_stemmer.stem(w) for w in words]
    words = set(words)
    # Note: the max_features parameter is ignored if vocabulary is not None
    vectorizer = TfidfVectorizer(vocabulary=words, binary=True, norm='l1', use_idf=False,
                                 sublinear_tf=True, max_df=0.5)
    return vectorizer
コード例 #2
0
def anew_estimator(words=None):
    english_stemmer = nltk.stem.SnowballStemmer('english')
    words, _, _ = load_extend_anew()
    words = [english_stemmer.stem(w) for w in words]
    words = set(words)
    # Note: the max_features parameter is ignored if vocabulary is not None
    vectorizer = TfidfVectorizer(vocabulary=words,
                                 binary=True,
                                 norm='l1',
                                 use_idf=False,
                                 sublinear_tf=True,
                                 max_df=0.5)
    return vectorizer
コード例 #3
0
 def __init__(self):
     self.words, _, self.valence = load_extend_anew()
     self.stemmer = nltk.stem.SnowballStemmer('english')
     self.max = 9
     self.stemmed_dict = [self.stemmer.stem(w) for w in self.words]
コード例 #4
0
__author__ = 'NLP-PC'
from load_data import load_pickle
import nltk
from load_data import load_extend_anew

words, _, _ = load_extend_anew()

feature_names = load_pickle('./data/features/feature_names.p')
print(feature_names)
english_stemmer = nltk.stem.SnowballStemmer('english')
stemmed_dict = [english_stemmer.stem(w) for w in words]
print(len(stemmed_dict))
overlapping_words = (set(feature_names) & set(stemmed_dict))
print(len(overlapping_words))
print(english_stemmer.stem(''))
features = load_pickle('./data/transformed_data/transformed_train.p')
print(features[1, 249])
print(type(features))

d = 'We are very nice goes I am nicely'
sent = list(d.split())
print(sent)
stemmed_sent = [english_stemmer.stem(w) for w in sent]
print(stemmed_sent)
コード例 #5
0
def generate_extend_anew_synsets_data():
    anew_words, _, _ = load_extend_anew()
    build_synsets(anew_words)
    print('Saved.')
コード例 #6
0
__author__ = 'NLP-PC'
from load_data import load_anew
from visualization import draw_scatter_with_labels
from load_data import load_extend_anew
words, arousal, valence = load_extend_anew()

draw_scatter_with_labels(arousal,valence, words, 'arousal', 'valence')

word, a, v = load_anew()
draw_scatter_with_labels(a, v, word, 'arousal', 'valence')
コード例 #7
0
__author__ = "NLP-PC"
from load_data import load_anew
from visualization import draw_scatter_with_labels
from load_data import load_extend_anew

words, arousal, valence = load_extend_anew()

draw_scatter_with_labels(arousal, valence, words, "arousal", "valence")

word, a, v = load_anew()
draw_scatter_with_labels(a, v, word, "arousal", "valence")
コード例 #8
0
 def __init__(self):
     self.words, _, self.valence = load_extend_anew()
     self.stemmer = nltk.stem.SnowballStemmer("english")
     self.max = 9
     self.stemmed_dict = [self.stemmer.stem(w) for w in self.words]
コード例 #9
0
def generate_extend_anew_synsets_data():
    anew_words,_,_ = load_extend_anew()
    build_synsets(anew_words)
    print('Saved.')
コード例 #10
0
__author__ = 'NLP-PC'
from load_data import load_pickle
import nltk
from load_data import load_extend_anew

words, _, _=load_extend_anew()

feature_names = load_pickle('./data/features/feature_names.p')
print(feature_names)
english_stemmer=nltk.stem.SnowballStemmer('english')
stemmed_dict = [english_stemmer.stem(w) for w in words]
print(len(stemmed_dict))
overlapping_words= (set(feature_names) & set(stemmed_dict))
print(len(overlapping_words))
print(english_stemmer.stem(''))
features = load_pickle('./data/transformed_data/transformed_train.p')
print(features[1,249])
print(type(features))

d='We are very nice goes I am nicely'
sent = list(d.split())
print(sent)
stemmed_sent = [english_stemmer.stem(w) for w in sent]
print(stemmed_sent)