Example #1
0
save_every = 100

vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)
glove = np.load(glove_path)

enc_hidden_size = hidden_size//2 if bidirectional else hidden_size
feature_size = FEATURE_SIZE

visEncoder = try_cuda(SpeakerEncoderLSTM(
        action_embedding_size, feature_size, enc_hidden_size, dropout_ratio,
        bidirectional=bidirectional))    
lanEncoder = try_cuda(EncoderLSTM(
        len(vocab), word_embedding_size, enc_hidden_size, vocab_pad_idx,
        dropout_ratio, bidirectional=False, glove=glove))
dotSim = try_cuda(dotSimilarity(batch_size, enc_hidden_size))

agent = compatModel(None, "", visEncoder, lanEncoder, dotSim)
#agent.load('tasks/R2R/snapshots/release/speaker_final_release', map_location = 'cpu')
agent.load('tasks/R2R/compat/trained_1/compat_sample_imagenet_mean_pooled_train_iter_1000', map_location = 'cpu')
if __name__ == "__main__":
    traj = {'scan':'5q7pvUzZiYa', 'path':["7dc12a67ddfc4a4a849ce620db5b777b", "0e84cf4dec784bc28b78a80bee35c550", "a77784b955454209857d745976a1676d", "67971a17c26f4e2ca117b4fca73507fe", "8db06d3a0dd44508b3c078d60126ce19", "43ac37dfa1db4a13a8a9df4e454eb016", "4bd82c990a6548a994daa97c8f52db06", "6d11ca4d41e04bb1a725c2223c36b2aa", "29fb3c58b29348558d36a9f9440a1379", "c23f26401359426982d11ca494ee739b", "397403366d784caf804d741f32fd68b9", "3c6a35e15ada4b649990d6568cce8bd9", "55e4436f528c4bf09e4550079c572f7b", "69fad7dd177847dbabf69e8fb7c00ddf", "c629c7f1cf6f47a78c45a8ae9ff82247", "21fca0d6192940e580587fe317440f56", "4b85d61dd3a94e8a812affe78f3a322d", "3c025b8e3d2040969cd00dd0e9f29b09"][:2], 'heading':0.0,'elevation_init':0.0}
    encoded_instructions, _ = tok.encode_sentence('')
    encoded_instructions = torch.tensor([encoded_instructions], device = 'cpu')
    rdv_test = rdv(traj)
    
    path_obs, path_actions = rdv_test.obs_and_acts()   
        # predicted
    score = agent.predict(path_obs,path_actions,encoded_instructions)
    
    print(score)
Example #2
0
weight_decay = 0.00005
#weight_decay = 0.0001
FEATURE_SIZE = 2048 + 128
n_iters = 5000
log_every = 100
save_every = 100

vocab = read_vocab(TRAIN_VOCAB)
tok = Tokenizer(vocab=vocab)

# load hard negatives
with open('tasks/R2R/hardNeg_train.json', 'r') as f:
    hardNeg_train = json.load(f)
for item in hardNeg_train:
    instr = item['instructions']
    item['instr_encoding'], item['instr_length'] = tok.encode_sentence(instr)

with open('tasks/R2R/hardNeg_val_seen.json', 'r') as f:
    hardNeg_val_seen = json.load(f)
with open('tasks/R2R/hardNeg_val_unseen.json', 'r') as f:
    hardNeg_val_unseen = json.load(f)
for item in hardNeg_val_seen:
    instr = item['instructions']
    item['instr_encoding'], item['instr_length'] = tok.encode_sentence(instr)
for item in hardNeg_val_unseen:
    instr = item['instructions']
    item['instr_encoding'], item['instr_length'] = tok.encode_sentence(instr)


def get_model_prefix(args, image_feature_list):
    image_feature_name = "+".join(