Esempi in Python per Vocabulary.sorted_tokens

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Vocabulary

Classe/tipologia: Vocabulary

Metodo/funzione: sorted_tokens

Esempi su hotexamples.com: 1

Vocabulary.sorted_tokens in Python: 1 esempio trovato. Questo è il miglior esempio reale in Python per Vocabulary.Vocabulary.sorted_tokens, estratto da progetti open source. Lo puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Vocabulary(30)

add_token(5)

load(5)

add_word(5)

save(3)

get_vocab(3)

from_serializable(3)

get_word(3)

index(2)

build_from_token(2)

make_vocab_charts(2)

readPostProcessingVoc(2)

get_index(2)

getIndex(2)

fetch(2)

addSentence(1)

load_bigquery_vocab_from_indexed(1)

load_vocab_from_local(1)

load_word_from_data(1)

make_array_of_words_from_sentences(1)

prune(1)

restore_text(1)

add_sentence_pair(1)

save_dict(1)

loadIndexFile(1)

sentence2indices(1)

similar(1)

size(1)

sorted_tokens(1)

startSymbolWordID(1)

symbol(1)

text2ids(1)

to_index(1)

unknownWordID(1)

sentence2index(1)

incrementDF(1)

addSymbol(1)

from_serialiable(1)

add_words(1)

build_vocabulary(1)

checkIndex(1)

create(1)

create_from_text(1)

de_tokenize_data(1)

endSymbolWordID(1)

expand(1)

export_vocabulary(1)

addWord(1)

isATerm(1)

getCF(1)

Esempio n. 1

Mostra file

class TransitionMatrix:
    def __init__(self, corpus = ""):
        self.voc = Vocabulary()
        self.tm = scipy.sparse.dok_matrix((1000,1000), dtype=np.float32)

        self.add_from_text(corpus)
        self.start = UnigramLM(self.voc)
        self.valid = False
        self.sorted_tokens = []

    def add_from_text(self, text):
        self.valid = False
        tss = tokenize_corpus(text)

        for ts in tss:
            if len(ts) > 0:
                self.start.add_token(ts[0])

            self.voc.expand(ts, from_tokens=True)
            wids = self.voc.get_word_id(ts)
            
            maxwid = max(wids)

            if maxwid >= self.tm.shape[0]:
                self.tm.resize((maxwid + 1, maxwid + 1))

            grams = getNGrams(wids)

            for g in grams:
                self.tm[g] += 1

    def validate(self):
        if not self.valid:
            self.p = self.tm.tocsr()
            s = self.p.sum(axis=1)
            self.p /= s
            self.valid = True
            self.sorted_tokens = self.voc.sorted_tokens()


    def sample_start(self):
        t, p = self.start.get_dist()
        # print(t, p)
        return np.random.choice(t, p = p)

    def sample(self, t):
        self.validate()
        wid = self.voc.get_word_id([t])[0]
        p = np.squeeze(np.asarray(self.p[wid, :]).reshape(-1,1))
        return np.random.choice(self.sorted_tokens, p = p)


    def __str__(self):
        return repr(self.tm)