def init_uvfa_text(args, layout_vocab_size, object_vocab_size, text_vocab_size, rank = 7): print '<Models> Using UVFA variant, consider using a lower learning rate (eg, 0.0001)' print '<Models> UVFA rank: {} '.format(rank) args.rank = rank args.lstm_out = rank text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) model = cudit(models.UVFA_text(text_model, layout_vocab_size, object_vocab_size, args, map_dim=args.map_dim)) return model
def init_cnn_lstm(args, layout_vocab_size, object_vocab_size, text_vocab_size): args.lstm_out = 16 args.cnn_out_dim = 2*args.lstm_out state_model = models.LookupModel(layout_vocab_size, args.state_embed) object_model = models.LookupModel(object_vocab_size, args.obj_embed) lstm = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) model = cudit(models.CNN_LSTM(state_model, object_model, lstm, args)) return model
def init_nogradient(args, layout_vocab_size, object_vocab_size, text_vocab_size): args.global_coeffs = 0 args.attention_in_dim = args.obj_embed args.lstm_out = args.attention_in_dim * args.attention_out_dim * args.attention_kernel**2 state_model = cudit(models.LookupModel(layout_vocab_size, args.state_embed)) object_model = models.LookupModel(object_vocab_size, args.obj_embed) text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) heatmap_model = cudit(models.AttentionHeatmap(text_model, args, map_dim=args.map_dim)) model = cudit(models.MultiNoBases(state_model, object_model, heatmap_model, args, map_dim=args.map_dim)) return model
def init_full(args, layout_vocab_size, object_vocab_size, text_vocab_size): args.global_coeffs = 3 args.attention_in_dim = args.obj_embed args.lstm_out = args.attention_in_dim * args.attention_out_dim * args.attention_kernel**2 + args.global_coeffs state_model = models.LookupModel(layout_vocab_size, args.state_embed).cuda() object_model = models.LookupModel(object_vocab_size, args.obj_embed) text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) heatmap_model = models.AttentionGlobal(text_model, args, map_dim=args.map_dim).cuda() model = models.MultiNoRBF(state_model, object_model, heatmap_model, args, map_dim=args.map_dim).cuda() return model
print '<Main> Vocabulary size: objects %d | text %d' % (obj_vocab_size, text_vocab_size) sys.stdout.flush() ######## Instruction model ######## goal_obs = pickle.load( open( os.path.join(args.load_path + 'goal_obs' + str(args.num_worlds) + '.p'), 'r')) indices_obs = pickle.load( open( os.path.join(args.load_path + 'indices_obs' + str(args.num_worlds) + '.p'), 'r')) targets = pickle.load( open(os.path.join(args.load_path, 'targets' + str(args.num_worlds) + '.p'), 'r')) rank = targets.size(1) text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) object_model = models.ObjectModel(obj_vocab_size, args.obj_embed, goal_obs[0].size(), args.lstm_out) psi = models.Psi(text_model, object_model, args.lstm_out, args.goal_hid, rank).cuda() psi = pipeline.Trainer(psi, args.lr, args.batch_size) print '\n<Main> Training psi: (', goal_obs.size(), 'x', indices_obs.size( ), ') -->', targets.size() psi.train((goal_obs, indices_obs), targets, iters=args.psi_iters)