def anew_estimator(words=None): english_stemmer = nltk.stem.SnowballStemmer('english') words, _, _ = load_extend_anew() words = [english_stemmer.stem(w) for w in words] words = set(words) # Note: the max_features parameter is ignored if vocabulary is not None vectorizer = TfidfVectorizer(vocabulary=words, binary=True, norm='l1', use_idf=False, sublinear_tf=True, max_df=0.5) return vectorizer
def __init__(self): self.words, _, self.valence = load_extend_anew() self.stemmer = nltk.stem.SnowballStemmer('english') self.max = 9 self.stemmed_dict = [self.stemmer.stem(w) for w in self.words]
__author__ = 'NLP-PC' from load_data import load_pickle import nltk from load_data import load_extend_anew words, _, _ = load_extend_anew() feature_names = load_pickle('./data/features/feature_names.p') print(feature_names) english_stemmer = nltk.stem.SnowballStemmer('english') stemmed_dict = [english_stemmer.stem(w) for w in words] print(len(stemmed_dict)) overlapping_words = (set(feature_names) & set(stemmed_dict)) print(len(overlapping_words)) print(english_stemmer.stem('')) features = load_pickle('./data/transformed_data/transformed_train.p') print(features[1, 249]) print(type(features)) d = 'We are very nice goes I am nicely' sent = list(d.split()) print(sent) stemmed_sent = [english_stemmer.stem(w) for w in sent] print(stemmed_sent)
def generate_extend_anew_synsets_data(): anew_words, _, _ = load_extend_anew() build_synsets(anew_words) print('Saved.')
__author__ = 'NLP-PC' from load_data import load_anew from visualization import draw_scatter_with_labels from load_data import load_extend_anew words, arousal, valence = load_extend_anew() draw_scatter_with_labels(arousal,valence, words, 'arousal', 'valence') word, a, v = load_anew() draw_scatter_with_labels(a, v, word, 'arousal', 'valence')
__author__ = "NLP-PC" from load_data import load_anew from visualization import draw_scatter_with_labels from load_data import load_extend_anew words, arousal, valence = load_extend_anew() draw_scatter_with_labels(arousal, valence, words, "arousal", "valence") word, a, v = load_anew() draw_scatter_with_labels(a, v, word, "arousal", "valence")
def __init__(self): self.words, _, self.valence = load_extend_anew() self.stemmer = nltk.stem.SnowballStemmer("english") self.max = 9 self.stemmed_dict = [self.stemmer.stem(w) for w in self.words]
def generate_extend_anew_synsets_data(): anew_words,_,_ = load_extend_anew() build_synsets(anew_words) print('Saved.')
__author__ = 'NLP-PC' from load_data import load_pickle import nltk from load_data import load_extend_anew words, _, _=load_extend_anew() feature_names = load_pickle('./data/features/feature_names.p') print(feature_names) english_stemmer=nltk.stem.SnowballStemmer('english') stemmed_dict = [english_stemmer.stem(w) for w in words] print(len(stemmed_dict)) overlapping_words= (set(feature_names) & set(stemmed_dict)) print(len(overlapping_words)) print(english_stemmer.stem('')) features = load_pickle('./data/transformed_data/transformed_train.p') print(features[1,249]) print(type(features)) d='We are very nice goes I am nicely' sent = list(d.split()) print(sent) stemmed_sent = [english_stemmer.stem(w) for w in sent] print(stemmed_sent)