Python Vocabulary.getUniGrams Exemples

Langage de programmation: Python

Espace de nommage/Pack: vocabulary

Class/Type: Vocabulary

Méthode/Fonction: getUniGrams

Exemples au hotexamples.com: 2

Python Vocabulary.getUniGrams - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de vocabulary.Vocabulary.getUniGrams extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

Méthodes fréquemment utilisées

Vocabulary (30)

add_word (15)

clean_text (8)

build_vocab (8)

add_words (8)

deserialize (7)

compile (4)

add (4)

antonym (4)

auto_punctuate (3)

Méthodes fréquemment utilisées

add_token (3)

encode (3)

add_from_file (2)

decode_output (2)

getUniGrams (2)

from_documents (2)

build_corpus (2)

getVocabularyByDocument (2)

getBiGrams (2)

get_id_from_token (2)

add_a_word (2)

add_text (2)

add_many (2)

getFullDict (2)

gen_DAG (1)

from_text_files (1)

from_text (1)

from_serializable (1)

from_sentences (1)

get (1)

Méthodes fréquemment utilisées

add_a_word (2)

add_text (2)

add_many (2)

getFullDict (2)

gen_DAG (1)

from_text_files (1)

from_text (1)

from_serializable (1)

from_sentences (1)

get (1)

add_constant (1)

getPTStopWords (1)

getQuestions (1)

getVocabularySize (1)

get_all_source_words (1)

get_all_translations (1)

get_pos (1)

get_term_text (1)

make_dictionary (1)

seg_content (1)

from_nlp_data (1)

encode_sent (1)

from_idx2word_dict (1)

convert_sentence (1)

add_new_word (1)

add_sentence (1)

add_chunk (1)

add_word_lst (1)

append (1)

build (1)

Méthodes fréquemment utilisées

add_constant (1)

getPTStopWords (1)

getQuestions (1)

getVocabularySize (1)

get_all_source_words (1)

get_all_translations (1)

get_pos (1)

get_term_text (1)

make_dictionary (1)

seg_content (1)

from_nlp_data (1)

encode_sent (1)

from_idx2word_dict (1)

convert_sentence (1)

add_new_word (1)

add_sentence (1)

add_chunk (1)

add_word_lst (1)

append (1)

build (1)

addSentence (1)

build_vocabulary (1)

calculate (1)

count_longest_sentence (1)

from_dict (1)

create_vocabulary (1)

dataset (1)

decode (1)

__dict__ (1)

encode_fast (1)

expand_vocab (1)

featurize (1)

featurize_reviews (1)

fit (1)

sort (1)

Exemple #1

0

Afficher le fichier

def getFullVocab(self, data): if 'uni' in self.grams: unigrams = Vocabulary.getUniGrams(data.x_data) else: unigrams = set() if 'bi' in self.grams: bigrams = Vocabulary.getBiGrams(data.x_data) else: bigrams = set() allgrams = unigrams | bigrams assert len(unigrams) + len(bigrams) == len(allgrams) # Now reduce the vocabulary size counts = Vocabulary.getFullDict(data.x_data, allgrams, self.grams) counts = {k: val for k,val in counts.items() if NBModel.thr_condition(k,\ val,\ unigrams,\ bigrams,\ self.threshold)} self.vocabulary = counts.keys()

Exemple #2

0

Afficher le fichier

def train(self, x_train, y_train): assert len(self.grams) > 0, "You must provide what n-grams to use" assert (self.grams == 'uni' or self.grams == 'bi' or \ self.grams == ['uni', 'bi'] or self.grams == ['bi','uni']), \ "Only uni or bi grams are implemented!" # First extract vocabulary if 'uni' in self.grams: unigrams = Vocabulary.getUniGrams(x_train) else: unigrams = set() if 'bi' in self.grams: bigrams = Vocabulary.getBiGrams(x_train) else: bigrams = set() allgrams = unigrams | bigrams assert len(unigrams) + len(bigrams) == len(allgrams) # Get number of total documents # Pc will be probability of class positive (class 1) N = len(x_train) Pc = sum(y_train) / N probs = {} # Remember that y_train = 1 -> positive for cl in np.unique(y_train): # Get documents for that class cl_docs = [ x_train[i] for i in np.where(np.array(y_train) == cl)[0].tolist() ] counts = Vocabulary.getFullDict(cl_docs, allgrams, self.grams) # Implement the threshold - get rid of features # which appear less than threshold number of times counts = {k: val for k,val in counts.items() if NBModel.thr_condition(k,\ val,\ unigrams,\ bigrams,\ self.threshold)} probs['{}_occr'.format(cl)] = counts probs['{}_tot'.format(cl)] = sum(counts.values()) probs['{}_Pc'.format(cl)] = Pc if cl == 1 else 1 - Pc self.model = probs self.y_train = y_train