def init_cnn_lstm(args, layout_vocab_size, object_vocab_size, text_vocab_size): args.lstm_out = 16 args.cnn_out_dim = 2*args.lstm_out state_model = models.LookupModel(layout_vocab_size, args.state_embed) object_model = models.LookupModel(object_vocab_size, args.obj_embed) lstm = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) model = cudit(models.CNN_LSTM(state_model, object_model, lstm, args)) return model
def init_nogradient(args, layout_vocab_size, object_vocab_size, text_vocab_size): args.global_coeffs = 0 args.attention_in_dim = args.obj_embed args.lstm_out = args.attention_in_dim * args.attention_out_dim * args.attention_kernel**2 state_model = cudit(models.LookupModel(layout_vocab_size, args.state_embed)) object_model = models.LookupModel(object_vocab_size, args.obj_embed) text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) heatmap_model = cudit(models.AttentionHeatmap(text_model, args, map_dim=args.map_dim)) model = cudit(models.MultiNoBases(state_model, object_model, heatmap_model, args, map_dim=args.map_dim)) return model
def init_full(args, layout_vocab_size, object_vocab_size, text_vocab_size): args.global_coeffs = 3 args.attention_in_dim = args.obj_embed args.lstm_out = args.attention_in_dim * args.attention_out_dim * args.attention_kernel**2 + args.global_coeffs state_model = models.LookupModel(layout_vocab_size, args.state_embed).cuda() object_model = models.LookupModel(object_vocab_size, args.obj_embed) text_model = models.TextModel(text_vocab_size, args.lstm_inp, args.lstm_hid, args.lstm_layers, args.lstm_out) heatmap_model = models.AttentionGlobal(text_model, args, map_dim=args.map_dim).cuda() model = models.MultiNoRBF(state_model, object_model, heatmap_model, args, map_dim=args.map_dim).cuda() return model