Esempi in Python per Vocabulary.save

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Vocabulary

Classe/tipologia: Vocabulary

Metodo/funzione: save

Esempi su hotexamples.com: 3

Vocabulary.save in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per Vocabulary.Vocabulary.save, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Vocabulary(30)

add_token(5)

load(5)

add_word(5)

save(3)

get_vocab(3)

from_serializable(3)

get_word(3)

index(2)

build_from_token(2)

make_vocab_charts(2)

readPostProcessingVoc(2)

get_index(2)

getIndex(2)

fetch(2)

addSentence(1)

load_bigquery_vocab_from_indexed(1)

load_vocab_from_local(1)

load_word_from_data(1)

make_array_of_words_from_sentences(1)

prune(1)

restore_text(1)

add_sentence_pair(1)

save_dict(1)

loadIndexFile(1)

sentence2indices(1)

similar(1)

size(1)

sorted_tokens(1)

startSymbolWordID(1)

symbol(1)

text2ids(1)

to_index(1)

unknownWordID(1)

sentence2index(1)

incrementDF(1)

addSymbol(1)

from_serialiable(1)

add_words(1)

build_vocabulary(1)

checkIndex(1)

create(1)

create_from_text(1)

de_tokenize_data(1)

endSymbolWordID(1)

expand(1)

export_vocabulary(1)

addWord(1)

isATerm(1)

getCF(1)

Esempio n. 1

Mostra file

def main(args):
    train_input_filepath = args.train_input
    dev_input_filepath = args.dev_input

    vocab = Vocabulary()
    vocab.load_word_from_data(train_path=train_input_filepath,
                              dev_path=dev_input_filepath)
    vocab.save(vocab_file=PREPROCESSED_DIR + 'vocab_file.vocab')
    print("vocab size:{}".format(len(vocab)))

    max_length = -1
    with open(train_input_filepath, 'r') as f_t, open(dev_input_filepath,
                                                      'r') as f_d:
        for line in chain(f_t, f_d):
            line = line.rstrip().split()
            max_length = max(max_length, len(line))
    # max_length = 17

    for file_name in [train_input_filepath, dev_input_filepath]:
        output_file = file_name.split('/')[-1] + '.preprocessed'
        data = []
        with open(file_name, 'r') as fin, open(PREPROCESSED_DIR + output_file,
                                               'w') as fout:
            total_len = ilen(fin)
            bar = tqdm(total=total_len)
            fin.seek(0)
            for i, line in enumerate(fin, start=1):
                line = line.rstrip().split()
                data.append(
                    torch.LongTensor(vocab.sentence2index(line, max_length)))
                # data = ' '.join(
                #     list(map(str, vocab.sentence2index(line, max_length))))
                # if i == total_len:
                #     print(data, file=fout, end='')
                # else:
                #     print(data, file=fout)
                bar.update(1)
        torch.save(data, PREPROCESSED_DIR + output_file)

Esempio n. 2

Mostra file

def make_data_set_and_vocab(trainpath=None, vectorpath=None, threshhold=0):
    vocab = Vocabulary()
    if vectorpath is not None:
        vocab.load(vectorpath)

    counter = collections.Counter()
    with open(trainpath, 'r') as f:
        for line in f:
            words = make_wakati(line.strip())
            for word in words:
                counter[word] += 1

    # for word, _ in counter.most_common(self.n_max_word - 2):
    for word, cnt in counter.most_common():
        if cnt <= threshhold:
            break
        if word not in vocab:
            vocab.add_word(word)
    vocab.save('vocab')

    # ここからデータセット作成
    data_set = MyDataset(trainpath=trainpath, vocab=vocab)

    return data_set, vocab

Esempio n. 3

Mostra file

File: main.py Progetto: ht22pt/method-embedding

import time
print("Reading data", time.asctime(time.localtime(time.time())))

# Estimate vocabulary from training data
# voc = pickle.load(open("voc.pkl", "rb"))
voc = Vocabulary()
with open(data_path, "r") as data:
    line = data.readline()
    while line:
        tokens = line.strip().split()
        voc.add_words(tokens)
        line = data.readline()
voc.prune(top_words)
voc.export_vocabulary(top_words, "voc.tsv")
voc.save("voc.pkl")

print("Starting training", time.asctime(time.localtime(time.time())))

reader = Reader(data_path, voc, n_contexts, window_size, k)

terminals = assemble_graph(top_words, n_dims)

first_batch = None

in_words_ = terminals['in_words']
out_words_ = terminals['out_words']
labels_ = terminals['labels']
train_ = terminals['train']
loss_ = terminals['loss']
adder_ = terminals['adder']