Python Tokenizer.encode_sentence Examples

Programming Language: Python

Namespace/Package Name: utils

Class/Type: Tokenizer

Method/Function: encode_sentence

Examples at hotexamples.com: 2

Python Tokenizer.encode_sentence - 2 examples found. These are the top rated real world Python examples of utils.Tokenizer.encode_sentence extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Tokenizer(30)

set_vocab(13)

collect_words(4)

tokenize(4)

decode(4)

split_sentence(3)

encode(3)

from_pretrained(2)

fit_word(2)

decode_sentence(2)

encode_sentence(2)

fit_tokenizer(1)

fit_transform(1)

build_vocab_from_dataset(1)

get_num_embedding(1)

get_vocal_length(1)

convert_tokens_to_ids(1)

convert_id_to_token(1)

tokenize_pipe(1)

vocab_tag(1)

vocab_word(1)

vocabulary_size(1)

Example #1

Show file

save_every = 100

vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)
glove = np.load(glove_path)

enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
feature_size = FEATURE_SIZE

visEncoder = try_cuda(SpeakerEncoderLSTM(
        action_embedding_size, feature_size, enc_hidden_size, dropout_ratio,
        bidirectional=bidirectional))    
lanEncoder = try_cuda(EncoderLSTM(
        len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
        dropout_ratio, bidirectional=False, glove=glove))
dotSim = try_cuda(dotSimilarity(batch_size, enc_hidden_size))

agent = compatModel(None, "", visEncoder, lanEncoder, dotSim)
#agent.load('tasks/R2R/snapshots/release/speaker_final_release', map_location = 'cpu')
agent.load('tasks/R2R/compat/trained_1/compat_sample_imagenet_mean_pooled_train_iter_1000', map_location = 'cpu')
if __name__ == "__main__":
    traj = {'scan':'5q7pvUzZiYa', 'path':["7dc12a67ddfc4a4a849ce620db5b777b", "0e84cf4dec784bc28b78a80bee35c550", "a77784b955454209857d745976a1676d", "67971a17c26f4e2ca117b4fca73507fe", "8db06d3a0dd44508b3c078d60126ce19", "43ac37dfa1db4a13a8a9df4e454eb016", "4bd82c990a6548a994daa97c8f52db06", "6d11ca4d41e04bb1a725c2223c36b2aa", "29fb3c58b29348558d36a9f9440a1379", "c23f26401359426982d11ca494ee739b", "397403366d784caf804d741f32fd68b9", "3c6a35e15ada4b649990d6568cce8bd9", "55e4436f528c4bf09e4550079c572f7b", "69fad7dd177847dbabf69e8fb7c00ddf", "c629c7f1cf6f47a78c45a8ae9ff82247", "21fca0d6192940e580587fe317440f56", "4b85d61dd3a94e8a812affe78f3a322d", "3c025b8e3d2040969cd00dd0e9f29b09"][:2], 'heading':0.0,'elevation_init':0.0}
    encoded_instructions, _ = tok.encode_sentence('')
    encoded_instructions = torch.tensor([encoded_instructions], device = 'cpu')
    rdv_test = rdv(traj)
    
    path_obs, path_actions = rdv_test.obs_and_acts()   
        # predicted
    score = agent.predict(path_obs,path_actions,encoded_instructions)
    
    print(score)

Example #2

Show file

weight_decay = 0.00005
#weight_decay = 0.0001
FEATURE_SIZE = 2048 + 128
n_iters = 5000
log_every = 100
save_every = 100

vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)

# load hard negatives
with open('tasks/R2R/hardNeg_train.json', 'r') as f:
    hardNeg_train = json.load(f)
for item in hardNeg_train:
    instr = item['instructions']
    item['instr_encoding'], item['instr_length'] = tok.encode_sentence(instr)

with open('tasks/R2R/hardNeg_val_seen.json', 'r') as f:
    hardNeg_val_seen = json.load(f)
with open('tasks/R2R/hardNeg_val_unseen.json', 'r') as f:
    hardNeg_val_unseen = json.load(f)
for item in hardNeg_val_seen:
    instr = item['instructions']
    item['instr_encoding'], item['instr_length'] = tok.encode_sentence(instr)
for item in hardNeg_val_unseen:
    instr = item['instructions']
    item['instr_encoding'], item['instr_length'] = tok.encode_sentence(instr)


def get_model_prefix(args, image_feature_list):
    image_feature_name = "+".join(