예제 #1
0
def init_uvfa_text(args, layout_vocab_size, object_vocab_size, text_vocab_size, rank = 7):
    print '<Models> Using UVFA variant, consider using a lower learning rate (eg, 0.0001)'
    print '<Models> UVFA rank: {} '.format(rank)

    args.rank = rank
    args.lstm_out = rank

    text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out)
    model = cudit(models.UVFA_text(text_model, layout_vocab_size, object_vocab_size, args, map_dim=args.map_dim))
    return model
예제 #2
0
def init_cnn_lstm(args, layout_vocab_size, object_vocab_size, text_vocab_size):
    args.lstm_out = 16
    args.cnn_out_dim = 2*args.lstm_out

    state_model = models.LookupModel(layout_vocab_size, args.state_embed)
    object_model = models.LookupModel(object_vocab_size, args.obj_embed)

    lstm = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out)

    model = cudit(models.CNN_LSTM(state_model, object_model, lstm, args))
    return model
예제 #3
0
def init_nogradient(args, layout_vocab_size, object_vocab_size, text_vocab_size):
    args.global_coeffs = 0
    args.attention_in_dim = args.obj_embed
    args.lstm_out = args.attention_in_dim * args.attention_out_dim * args.attention_kernel**2

    state_model = cudit(models.LookupModel(layout_vocab_size, args.state_embed))
    object_model = models.LookupModel(object_vocab_size, args.obj_embed)

    text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out)
    heatmap_model = cudit(models.AttentionHeatmap(text_model, args, map_dim=args.map_dim))

    model = cudit(models.MultiNoBases(state_model, object_model, heatmap_model, args, map_dim=args.map_dim))
    return model
예제 #4
0
def init_full(args, layout_vocab_size, object_vocab_size, text_vocab_size):
    args.global_coeffs = 3
    args.attention_in_dim = args.obj_embed
    args.lstm_out = args.attention_in_dim * args.attention_out_dim * args.attention_kernel**2 + args.global_coeffs

    state_model = models.LookupModel(layout_vocab_size,
                                     args.state_embed).cuda()
    object_model = models.LookupModel(object_vocab_size, args.obj_embed)

    text_model = models.TextModel(text_vocab_size, args.lstm_inp,
                                  args.lstm_hid, args.lstm_layers,
                                  args.lstm_out)
    heatmap_model = models.AttentionGlobal(text_model,
                                           args,
                                           map_dim=args.map_dim).cuda()

    model = models.MultiNoRBF(state_model,
                              object_model,
                              heatmap_model,
                              args,
                              map_dim=args.map_dim).cuda()
    return model
예제 #5
0
print '<Main> Vocabulary size: objects %d | text %d' % (obj_vocab_size,
                                                        text_vocab_size)
sys.stdout.flush()

######## Instruction model ########

goal_obs = pickle.load(
    open(
        os.path.join(args.load_path + 'goal_obs' + str(args.num_worlds) +
                     '.p'), 'r'))
indices_obs = pickle.load(
    open(
        os.path.join(args.load_path + 'indices_obs' + str(args.num_worlds) +
                     '.p'), 'r'))
targets = pickle.load(
    open(os.path.join(args.load_path, 'targets' + str(args.num_worlds) + '.p'),
         'r'))
rank = targets.size(1)

text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid,
                              args.lstm_layers, args.lstm_out)
object_model = models.ObjectModel(obj_vocab_size, args.obj_embed,
                                  goal_obs[0].size(), args.lstm_out)
psi = models.Psi(text_model, object_model, args.lstm_out, args.goal_hid,
                 rank).cuda()
psi = pipeline.Trainer(psi, args.lr, args.batch_size)

print '\n<Main> Training psi: (', goal_obs.size(), 'x', indices_obs.size(
), ') -->', targets.size()
psi.train((goal_obs, indices_obs), targets, iters=args.psi_iters)