Python Vocabulary.encodeの例

プログラミング言語: Python

名前空間/パッケージ名: vocabulary

クラス/型: Vocabulary

メソッド/関数: encode

hotexamples.comのコード掲載数: 4

Python Vocabulary.encode - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのvocabulary.Vocabulary.encodeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

コード例 #1

ファイルを表示

ファイル: evaluation.py プロジェクト: Baaart25/Hungarian-diacritic-restoration

def main(cfg: DictConfig):
    model_file = os.path.join(cfg.model_dir, 'model.pt')
    model = torch.load(model_file,
                       map_location=torch.device(device)).to(device)
    model.eval()

    vocab_file = os.path.join(cfg.model_dir, 'vocab.pkl')
    vocab_dec_file = os.path.join(cfg.model_dir, 'vocab_dec.pkl')
    with open(vocab_file, 'rb') as file:
        vocab_enc = pickle.load(file)
    with open(vocab_dec_file, 'rb') as file:
        vocab_dec = pickle.load(file)
    vocab = Vocabulary(vocab=vocab_enc, vocab_dec=vocab_dec)

    eval_df = pd.read_table(cfg.dev_file, header=None, names=['target'])
    eval_df = eval_df.iloc[100:102]
    eval_df['source'] = eval_df.apply(lambda x: remove_diacritics(x.target),
                                      axis=1)
    eval_df['src_encoded'] = eval_df.apply(lambda x: vocab.encode(x.source),
                                           axis=1)

    target = eval_df.target.to_numpy(dtype=str)

    target_words = np.hstack(np.char.split(target, sep=' '))
    target_words = np.array(list(filter(lambda x: len(x) > 1, target_words)))

    print(eval_df.iloc[0].source)
    print(eval_df.iloc[1].source)

    X_dev = eval_df.src_encoded.to_numpy()

    predicted = []
    test_iter = BatchedIterator(X_dev, batch_size=10)

    for bi, src in enumerate(test_iter.iterate_once()):
        src_padded = pad_data(src[0], vocab_enc['<PAD>']).to(device)

        outputs = model(src_padded)
        print(outputs.shape)
        outputs_pred = outputs.argmax(-1)

        for output in outputs_pred:
            decodec_sentence = vocab.decode_output(output.tolist())
            print(decodec_sentence)
            predicted.append(decodec_sentence)

    predicted = np.hstack(np.char.split(predicted, sep=' '))
    predicted = np.array(list(filter(lambda x: len(x) > 1, predicted)))

    print(predicted.shape)
    print(target_words.shape)
    correct = (target_words == predicted).sum()
    accuracy = correct / len(predicted)
    print(accuracy)

コード例 #2

ファイルを表示

def main(cfg: DictConfig):
    model_file = os.path.join(cfg.exp_dir, 'model.pt')
    model = torch.load(model_file,
                       map_location=torch.device(device)).to(device)

    model.eval()

    vocab_file = os.path.join(cfg.exp_dir, 'vocab.pkl')
    vocab_dec_file = os.path.join(cfg.exp_dir, 'vocab_dec.pkl')
    with open(vocab_file, 'rb') as file:
        vocab_enc = pickle.load(file)
    with open(vocab_dec_file, 'rb') as file:
        vocab_dec = pickle.load(file)

    vocab = Vocabulary(vocab=vocab_enc, vocab_dec=vocab_dec)

    if cfg.use_file:
        source = get_processed_data(cfg.file, vocab)
        predicted = []
        test_iter = BatchedIterator(source, batch_size=128)

        for bi, src in enumerate(test_iter.iterate_once()):
            src_padded = pad_data(src[0], vocab_enc['<PAD>']).to(device)

            outputs = model(src_padded)

            outputs_pred = outputs.argmax(-1)

            for output in outputs_pred:
                predicted.append(vocab.decode_output(output.tolist()))

        pred_file = os.path.join(cfg.exp_dir,
                                 f'inference/{cfg.lang}_predicted.txt')
        os.makedirs(os.path.dirname(pred_file), exist_ok=True)

        with open(pred_file, 'w') as file:
            file.write('\n'.join(predicted))
    else:
        sentence = input("Sentence: ")
        while sentence != "exit":
            sentence = sentence.lower()
            encoded = vocab.encode(sentence)
            encoded = torch.tensor(encoded)
            encoded = torch.unsqueeze(encoded, 0).to(device)
            output = model(encoded)
            output = output.argmax(-1).to('cpu').tolist()
            decoded = vocab.decode_output(output[0])
            print(f"Restored diacritics version: {decoded}")
            sentence = input("Sentence: ")

コード例 #3

ファイルを表示

ファイル: export_timeseries.py プロジェクト: enoriega/bio-context

''' Generates time series latent and observed state for the HMMesque models '''
import pandas as pd
from analytics import *
from vocabulary import Vocabulary
from fillin_heuristics import *

### Build vocabularies
obs_voc = Vocabulary()

for val in species.text.drop_duplicates():
    obs_voc.encode(val, 'species')

for val in cells.text.drop_duplicates():
    obs_voc.encode(val, 'cells')

for val in genes.text.drop_duplicates():
    obs_voc.encode(val, 'genes')

for val in relations[['first', 'second', 'type']].drop_duplicates().iterrows():
    t = val[1]
    val = '%s|%s|%s' % (t[0], t[1], t[2])
    obs_voc.encode(val, 'relations')


lat_voc = Vocabulary()

for ix, t in context[['type', 'text']].drop_duplicates().iterrows():
    kind, val = t
    lat_voc.encode(val, kind)
#####################

コード例 #4

ファイルを表示

from vocabulary import Vocabulary
from collections import Counter
review = [
    "The", "pizza", "is", "excellent", ".", "The", "wine", "is", "not", "."
]
count = Counter(review)
print(count)
vocabulary = Vocabulary(count)
print(vocabulary)
print(vocabulary.encode(review))
print(vocabulary.decode(vocabulary.encode(review)))