Python Vocabulary.encode Examples

Programming Language: Python

Namespace/Package Name: vocabulary

Class/Type: Vocabulary

Method/Function: encode

Examples at hotexamples.com: 4

Python Vocabulary.encode - 4 examples found. These are the top rated real world Python examples of vocabulary.Vocabulary.encode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Vocabulary(30)

add_word(15)

clean_text(8)

build_vocab(8)

add_words(8)

deserialize(7)

compile(4)

add(4)

antonym(4)

auto_punctuate(3)

add_token(3)

encode(3)

add_from_file(2)

decode_output(2)

getUniGrams(2)

from_documents(2)

build_corpus(2)

getVocabularyByDocument(2)

getBiGrams(2)

get_id_from_token(2)

add_a_word(2)

add_text(2)

add_many(2)

getFullDict(2)

gen_DAG(1)

from_text_files(1)

from_text(1)

from_serializable(1)

from_sentences(1)

get(1)

add_constant(1)

getPTStopWords(1)

getQuestions(1)

getVocabularySize(1)

get_all_source_words(1)

get_all_translations(1)

get_pos(1)

get_term_text(1)

make_dictionary(1)

seg_content(1)

from_nlp_data(1)

encode_sent(1)

from_idx2word_dict(1)

convert_sentence(1)

add_new_word(1)

add_sentence(1)

add_chunk(1)

add_word_lst(1)

append(1)

build(1)

Example #1

Show file

File: evaluation.py Project: Baaart25/Hungarian-diacritic-restoration

def main(cfg: DictConfig):
    model_file = os.path.join(cfg.model_dir, 'model.pt')
    model = torch.load(model_file,
                       map_location=torch.device(device)).to(device)
    model.eval()

    vocab_file = os.path.join(cfg.model_dir, 'vocab.pkl')
    vocab_dec_file = os.path.join(cfg.model_dir, 'vocab_dec.pkl')
    with open(vocab_file, 'rb') as file:
        vocab_enc = pickle.load(file)
    with open(vocab_dec_file, 'rb') as file:
        vocab_dec = pickle.load(file)
    vocab = Vocabulary(vocab=vocab_enc, vocab_dec=vocab_dec)

    eval_df = pd.read_table(cfg.dev_file, header=None, names=['target'])
    eval_df = eval_df.iloc[100:102]
    eval_df['source'] = eval_df.apply(lambda x: remove_diacritics(x.target),
                                      axis=1)
    eval_df['src_encoded'] = eval_df.apply(lambda x: vocab.encode(x.source),
                                           axis=1)

    target = eval_df.target.to_numpy(dtype=str)

    target_words = np.hstack(np.char.split(target, sep=' '))
    target_words = np.array(list(filter(lambda x: len(x) > 1, target_words)))

    print(eval_df.iloc[0].source)
    print(eval_df.iloc[1].source)

    X_dev = eval_df.src_encoded.to_numpy()

    predicted = []
    test_iter = BatchedIterator(X_dev, batch_size=10)

    for bi, src in enumerate(test_iter.iterate_once()):
        src_padded = pad_data(src[0], vocab_enc['<PAD>']).to(device)

        outputs = model(src_padded)
        print(outputs.shape)
        outputs_pred = outputs.argmax(-1)

        for output in outputs_pred:
            decodec_sentence = vocab.decode_output(output.tolist())
            print(decodec_sentence)
            predicted.append(decodec_sentence)

    predicted = np.hstack(np.char.split(predicted, sep=' '))
    predicted = np.array(list(filter(lambda x: len(x) > 1, predicted)))

    print(predicted.shape)
    print(target_words.shape)
    correct = (target_words == predicted).sum()
    accuracy = correct / len(predicted)
    print(accuracy)

Example #2

Show file

def main(cfg: DictConfig):
    model_file = os.path.join(cfg.exp_dir, 'model.pt')
    model = torch.load(model_file,
                       map_location=torch.device(device)).to(device)

    model.eval()

    vocab_file = os.path.join(cfg.exp_dir, 'vocab.pkl')
    vocab_dec_file = os.path.join(cfg.exp_dir, 'vocab_dec.pkl')
    with open(vocab_file, 'rb') as file:
        vocab_enc = pickle.load(file)
    with open(vocab_dec_file, 'rb') as file:
        vocab_dec = pickle.load(file)

    vocab = Vocabulary(vocab=vocab_enc, vocab_dec=vocab_dec)

    if cfg.use_file:
        source = get_processed_data(cfg.file, vocab)
        predicted = []
        test_iter = BatchedIterator(source, batch_size=128)

        for bi, src in enumerate(test_iter.iterate_once()):
            src_padded = pad_data(src[0], vocab_enc['<PAD>']).to(device)

            outputs = model(src_padded)

            outputs_pred = outputs.argmax(-1)

            for output in outputs_pred:
                predicted.append(vocab.decode_output(output.tolist()))

        pred_file = os.path.join(cfg.exp_dir,
                                 f'inference/{cfg.lang}_predicted.txt')
        os.makedirs(os.path.dirname(pred_file), exist_ok=True)

        with open(pred_file, 'w') as file:
            file.write('\n'.join(predicted))
    else:
        sentence = input("Sentence: ")
        while sentence != "exit":
            sentence = sentence.lower()
            encoded = vocab.encode(sentence)
            encoded = torch.tensor(encoded)
            encoded = torch.unsqueeze(encoded, 0).to(device)
            output = model(encoded)
            output = output.argmax(-1).to('cpu').tolist()
            decoded = vocab.decode_output(output[0])
            print(f"Restored diacritics version: {decoded}")
            sentence = input("Sentence: ")

Example #3

Show file

File: export_timeseries.py Project: enoriega/bio-context

''' Generates time series latent and observed state for the HMMesque models '''
import pandas as pd
from analytics import *
from vocabulary import Vocabulary
from fillin_heuristics import *

### Build vocabularies
obs_voc = Vocabulary()

for val in species.text.drop_duplicates():
    obs_voc.encode(val, 'species')

for val in cells.text.drop_duplicates():
    obs_voc.encode(val, 'cells')

for val in genes.text.drop_duplicates():
    obs_voc.encode(val, 'genes')

for val in relations[['first', 'second', 'type']].drop_duplicates().iterrows():
    t = val[1]
    val = '%s|%s|%s' % (t[0], t[1], t[2])
    obs_voc.encode(val, 'relations')


lat_voc = Vocabulary()

for ix, t in context[['type', 'text']].drop_duplicates().iterrows():
    kind, val = t
    lat_voc.encode(val, kind)
#####################

Example #4

Show file

from vocabulary import Vocabulary
from collections import Counter
review = [
    "The", "pizza", "is", "excellent", ".", "The", "wine", "is", "not", "."
]
count = Counter(review)
print(count)
vocabulary = Vocabulary(count)
print(vocabulary)
print(vocabulary.encode(review))
print(vocabulary.decode(vocabulary.encode(review)))