def __init__(self, state_size=256, nested_state_size=256): super(NestedLSTM, self).__init__() self.state_size = state_size self.nested_state_size = nested_state_size # Internal LSTM Entry Parameters means = torch.zeros(4, state_size, state_size) self.W = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(state_size))), requires_grad=True) # Internal LSTM State Parameters means = torch.zeros(4, state_size, state_size) self.U = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(state_size))), requires_grad=True) # Internal LSTM Bias Parameters self.b = nn.Parameter(torch.zeros(4, 1, state_size), requires_grad=True) # Nested LSTM self.LSTM = LSTM(nested_state_size) # Non Linear Activation Functions self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh()
def __init__(self, opt): super(MoreSupWeightModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_size = opt.att_size self.batch_size = opt.batch_size * opt.seq_per_img self.rnn_atten = opt.rnn_atten self.num_parallels = opt.num_parallels self.sample_rate = opt.sample_rate self.use_linear = opt.use_linear self.rnn_size_list = opt.rnn_size_list self.gram_num = opt.gram_num self.logprob_pool_type = opt.logprob_pool_type # 0 mean 1 max # reviewnet self.use_reviewnet = opt.use_reviewnet if self.use_reviewnet == 1: self.review_length = opt.review_length self.review_nets = nn.ModuleList() for i in range(self.review_length): self.review_nets[i] = LSTM.LSTM_SOFT_ATT_NOX(self.rnn_size, self.att_size, self.drop_prob_lm) opt.att_size = self.review_length # LSTM # opt.input_encoding_size = opt.input_encoding_size * 2 self.core = rnn_utils.get_lstm(opt) if self.rnn_atten == "ATT_LSTM": self.atten = LSTM.LSTM_ATTEN_LAYER(self.rnn_size) # self.vocab_size + 1 -> self.input_encoding_size # self.vocab_size + 1 -> self.input_encoding_size if self.gram_num > 0: self.embed = nn.Sequential(nn.Embedding(self.vocab_size + 1, self.input_encoding_size), Embed.WordEmbed(self.gram_num)) # self.embed_tc = nn.Sequential(nn.Embedding(self.vocab_size + 1, self.input_encoding_size), # Embed.WordEmbed(self.gram_num)) # self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) # self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) else: self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) if self.use_linear: # (batch_size * fc_feat_size) -> (batch_size * input_encoding_size) self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size) self.att_embed = nn.Linear(self.att_feat_size, self.input_encoding_size) # self.relu = nn.RReLU(inplace=True) self.relu = nn.ReLU() self.init_weight()
def __init__(self, opt): super(SCSTModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_size = opt.att_size self.batch_size = opt.batch_size * opt.seq_per_img self.rnn_atten = opt.rnn_atten # LSTM if self.rnn_atten == "ATT_LSTM": self.atten = LSTM.LSTM_ATTEN_LAYER(self.rnn_size) # LSTM if self.rnn_type == "LSTM": self.core = LSTM.LSTM(self.input_encoding_size * 2, self.vocab_size + 1, self.rnn_size, dropout=self.drop_prob_lm) elif self.rnn_type == "LSTM_SOFT_ATT": self.core = LSTM.LSTM_SOFT_ATT(self.input_encoding_size * 2, self.vocab_size + 1, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) elif self.rnn_type == "LSTM_DOUBLE_ATT": self.core = LSTM.LSTM_DOUBLE_ATT(self.input_encoding_size * 2, self.vocab_size + 1, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) # self.vocab_size + 1 -> self.input_encoding_size self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) # (batch_size * fc_feat_size) -> (batch_size * input_encoding_size) self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size) self.att_embed = nn.Linear(self.att_feat_size, self.input_encoding_size) self.init_weights()
def __init__(self, opt): super(DoubleAttenMModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_size = opt.att_size self.batch_size = opt.batch_size * opt.seq_per_img self.rnn_atten = opt.rnn_atten self.num_layers = opt.num_layers self.num_parallels = opt.num_parallels self.sample_rate = opt.sample_rate self.use_linear = opt.use_linear self.rnn_size_list = opt.rnn_size_list # reviewnet self.use_reviewnet = opt.use_reviewnet if self.use_reviewnet == 1: self.review_length = opt.review_length self.review_nets = nn.ModuleList() for i in range(self.review_length): self.review_nets[i] = LSTM.LSTM_SOFT_ATT_NOX( self.rnn_size, self.att_size, self.drop_prob_lm) opt.att_size = self.review_length # LSTM self.core = rnn_utils.get_lstm(opt) if self.rnn_atten == "ATT_LSTM": self.atten = LSTM.LSTM_ATTEN_LAYER(self.rnn_size) # self.vocab_size + 1 -> self.input_encoding_size self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) # self.embed_tc = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) if self.use_linear: # (batch_size * fc_feat_size) -> (batch_size * input_encoding_size) self.img_embed = nn.Linear(self.fc_feat_size, self.rnn_size) self.att_embed = nn.Linear(self.att_feat_size, self.rnn_size) # self.relu = nn.RReLU(inplace=True) self.relu = nn.ReLU() self.init_weight()
class MPNEncoder(nn.Module): def __init__(self, rnn_type, input_size, node_fdim, hidden_size, depth): super(MPNEncoder, self).__init__() self.hidden_size = hidden_size self.input_size = input_size self.depth = depth self.W_o = nn.Sequential( nn.Linear(node_fdim + hidden_size, hidden_size), nn.ReLU() ) if rnn_type == 'GRU': self.rnn = GRU(input_size, hidden_size, depth) elif rnn_type == 'LSTM': self.rnn = LSTM(input_size, hidden_size, depth) else: raise ValueError('unsupported rnn cell type ' + rnn_type) def forward(self, fnode, fmess, agraph, bgraph, mask): h = self.rnn(fmess, bgraph) h = self.rnn.get_hidden_state(h) nei_message = index_select_ND(h, 0, agraph) nei_message = nei_message.sum(dim=1) node_hiddens = torch.cat([fnode, nei_message], dim=1) node_hiddens = self.W_o(node_hiddens) if mask is None: mask = torch.ones(node_hiddens.size(0), 1, device=fnode.device) mask[0, 0] = 0 #first node is padding return node_hiddens * mask, h
def __init__(self, rnn_type, input_size, node_fdim, hidden_size, depth): super(MPNEncoder, self).__init__() self.hidden_size = hidden_size self.input_size = input_size self.depth = depth self.W_o = nn.Sequential( nn.Linear(node_fdim + hidden_size, hidden_size), nn.ReLU() ) if rnn_type == 'GRU': self.rnn = GRU(input_size, hidden_size, depth) elif rnn_type == 'LSTM': self.rnn = LSTM(input_size, hidden_size, depth) else: raise ValueError('unsupported rnn cell type ' + rnn_type)
def __init__(self, opt): super(MixerModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_size = opt.att_size self.batch_size = 80 # LSTM self.core = LSTM.LSTM_DOUBLE_ATT_TOP(self.input_encoding_size, self.vocab_size + 1, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) # self.vocab_size + 1 -> self.input_encoding_size self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) # (batch_size * fc_feat_size) -> (batch_size * input_encoding_size) self.img_embed = nn.Linear(self.fc_feat_size, self.input_encoding_size) self.att_embed = nn.Linear(self.att_feat_size, self.input_encoding_size)
def main(): #这里tr_data按每个用户一个list作为一个训练batch,数据表示为 #item编号,还没有表示为隐向量 #tr_data = getTrainData() tr_data = getBatchTrainData() item_latent_vec = getMFData() #设置LSTM模型的参数 #tr_data[0]为一个batch,tr_data[0][0]为第一个batch中第一个序列的长度,包括用户编号 #n_step = len(tr_data[0][0])-2 #最后一个留作训练目标 n_step = tr_data.shape[1]-2 #这里循环神经网络隐单元的数量与物品隐向量设置为相同(也可不同) lat_vec_size = item_latent_vec.shape[1] hidden_size = 20 #这里用户数量,为了在模型中基于用户的偏置list的大小 #n_user = len(tr_data) model = LSTM(n_step,lat_vec_size,hidden_size) #训练轮数 epoch = 6 learning_rate = 0.01 batch_size = 5000 optimizer = tf.train.GradientDescentOptimizer(learning_rate) #在一个session内完成训练与预测 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #model.batch_train(sess,tr_data,item_latent_vec,optimizer,epoch) model.batch_train(sess,tr_data,item_latent_vec,optimizer,epoch,batch_size) ##预测结果以字典保存,关键字为用户编号 te_input,te_target = getTestData() pred_res = model.pred(sess,te_input,item_latent_vec) recall = evaluate(pred_res,te_target,item_latent_vec) print "recall is %f"%recall
class NestedLSTM(nn.Module): def __init__(self, state_size=256, nested_state_size=256): super(NestedLSTM, self).__init__() self.state_size = state_size self.nested_state_size = nested_state_size # Internal LSTM Entry Parameters means = torch.zeros(4, state_size, state_size) self.W = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(state_size))), requires_grad=True) # Internal LSTM State Parameters means = torch.zeros(4, state_size, state_size) self.U = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(state_size))), requires_grad=True) # Internal LSTM Bias Parameters self.b = nn.Parameter(torch.zeros(4, 1, state_size), requires_grad=True) # Nested LSTM self.LSTM = LSTM(nested_state_size) # Non Linear Activation Functions self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() def forward(self, x, old_h, old_c, old_c_hat): """ x - New data coming into LSTM. FloatTensor Variable with shape (batch_size, state_size) old_h - short term memory of LSTM. FloatTensor Variable with shape (batch_size, state_size) old_c - long term memory of LSTM. FloatTensor Variable with shape (batch_size, state_size) old_c - long term memory of nested LSTM. FloatTensor Variable with shape (batch_size, state_size) returns: h - new short term memory of LSTM. FloatTensor Variable with shape (batch_size, state_size) c - new long term memory of LSTM. FloatTensor Variable with shape (batch_size, state_size) c_hat - new long term memory of nested LSTM. FloatTensor Variable of shape (batch_size, state_size) """ i = self.sigmoid(x.mm(self.W[0]) + old_h.mm(self.U[0]) + self.b[0]) f = self.sigmoid(x.mm(self.W[1]) + old_h.mm(self.U[1]) + self.b[1]) old_h_hat = f * old_c x_hat = i * self.tanh( x.mm(self.W[2]) + old_h.mm(self.U[2]) + self.b[2]) h_hat, c_hat = self.LSTM.forward(x_hat, old_h_hat, old_c_hat) c = h_hat o = self.sigmoid(x.mm(self.W[3]) + old_h.mm(self.U[3]) + self.b[3]) h = o * self.tanh(c) return h, c, c_hat
def main(args): ### Loading Dataset ### data_root = os.path.join(args.data_path, "train") dataset, _ = load_data(data_root) rnn = LSTM(args) data = dataset[None, 0] pt_file = load_model(args.model_load_path, "*.pt") rnn.load_state_dict(torch.load(pt_file)) rnn.eval() output_log = [] for i in range(data.shape[1] - 1): if i == 0: cur_input = data[:, 0, :] state = None else: cur_input = torch.cat([output[0, :, :8], data[:, i, 8:]], dim=-1) #cur_input = data[:, i, :] state = prev_state output, prev_state = rnn(cur_input.view(1, 1, -1), state) output_log.append(output.detach()) output_log = torch.stack(output_log, dim=1) np.savetxt("{}LSTMPB_closed_predict.txt".format(args.log_path), output_log[0, 0].numpy(), delimiter=",") fig = make_fig([[output_log[0, :, 0, :], dataset[1]]], figsize=(16, 16)) fig.savefig("{}LSTMPB_closed_predict.png".format(args.log_path)) plt.close(fig)
def __init__(self, ntoken, ninp, nhid, nlayers, dropout=0.5): super(RNNModel, self).__init__() self.model_type = 'LSTM' self.ntoken = ntoken self.drop = nn.Dropout(dropout) self.encoder = nn.Embedding(ntoken, ninp) self.rnn = LSTM(ninp, nhid, nlayers, dropout=dropout) self.decoder = nn.Linear(nhid, ntoken) self.init_weights() self.nhid = nhid self.nlayers = nlayers
def __init__(self, opt): super(BiShowAttenTellModel, self).__init__() self.vocab_size = opt.vocab_size self.input_encoding_size = opt.input_encoding_size self.rnn_type = opt.rnn_type self.rnn_size = opt.rnn_size self.num_layers = opt.num_layers self.drop_prob_lm = opt.drop_prob_lm self.seq_length = opt.seq_length self.fc_feat_size = opt.fc_feat_size self.att_feat_size = opt.att_feat_size self.att_size = opt.att_size self.output_size = self.vocab_size + 1 # LSTM # self.core = nn.LSTM(self.input_encoding_size, self.rnn_size, self.num_layers, bias=False, dropout=self.drop_prob_lm) if self.rnn_type == "LSTM_SOFT_ATT": self.core = LSTM.LSTM_SOFT_ATT_TOP(self.input_encoding_size, self.output_size, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) self.core1 = LSTM.LSTM_SOFT_ATT_TOP(self.input_encoding_size, self.output_size, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) elif self.rnn_type == "LSTM_DOUBLE_ATT": self.core = LSTM.LSTM_DOUBLE_ATT_TOP(self.input_encoding_size, self.output_size, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) self.core1 = LSTM.LSTM_DOUBLE_ATT_TOP(self.input_encoding_size, self.output_size, self.rnn_size, self.att_size, dropout=self.drop_prob_lm) else: raise Exception("rnn type not supported: {}".format(self.rnn_type)) # self.vocab_size + 1 -> self.input_encoding_size self.embed = nn.Embedding(self.vocab_size + 1, self.input_encoding_size) self.img_embed = nn.Linear(self.fc_feat_size, self.rnn_size) self.att_embed = nn.Linear(self.att_feat_size, self.rnn_size) self.proj = nn.Linear(self.rnn_size, self.output_size) # self.relu = nn.RReLU(inplace=True) self.relu = nn.PReLU() self.init_weight()
def __init__(self, vocab_size, embedding_dim, hidden_dim, n_classes=1, bidirectional=False, padding_idx=0, n_layers=1, dropout=0.2): super(SentimentLSTM, self).__init__() self.bridge = nn.Linear(embedding_dim, embedding_dim) self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx) self.rnn = LSTM( embedding_dim, hidden_dim, ) self.out = nn.Linear(hidden_dim * n_layers, n_classes) self.n_layers = n_layers
for rnn_type in rnn_types: print("RNN TYPE:", rnn_type) loss_logs = [] for trial in range(n_trials): loss_log = [] if 'ResNestedLSTM' == rnn_type: rnn = ResNestedLSTM(x_size, state_size, layer_norm=layer_norm) elif 'ResLSTM' == rnn_type: rnn = ResLSTM(x_size, state_size, layer_norm=layer_norm) elif 'ResRNN' == rnn_type: rnn = ResRNN(x_size, state_size, layer_norm=layer_norm) elif 'NestedLSTM' == rnn_type: rnn = NestedLSTM(x_size, state_size, layer_norm=layer_norm) elif 'LSTM' == rnn_type: rnn = LSTM(x_size, state_size, layer_norm=layer_norm) elif 'DoubleGRU' == rnn_type: rnn = DoubleGRU(x_size, state_size, layer_norm=layer_norm) elif 'GRU' == rnn_type: rnn = GRU(x_size, state_size, layer_norm=layer_norm) elif "ResGRU" == rnn_type: rnn = ResGRU(x_size, state_size, layer_norm=layer_norm) adam = optim.SGD(rnn.parameters(), lr=lr) adam.zero_grad() classifier = Variable(rand_vector.clone(), requires_grad=True) for i in range(n_epochs): X, Y = Variable(X.data), Variable(Y.data) state_vars = [ Variable(torch.zeros(batch_size, state_size)) for i in range(rnn.n_state_vars)
def __init__(self, n_words, emb_size, state_size=256, seq_len=30, rnn_type='nested_lstm'): super(RecurrentUnit, self).__init__() """ rnn_type - Available arguments: nested_lstm lstm gru """ self.n_words = n_words self.emb_size = emb_size self.state_size = state_size self.seq_len = seq_len self.rnn_type = rnn_type.lower() # Embedding Sets means = torch.zeros(n_words, emb_size) self.embeddings = nn.Parameter(torch.normal(means, std=0.01), requires_grad=True) # Entry Parameters self.entry_bnorm = nn.BatchNorm1d(emb_size) means = torch.zeros(emb_size, state_size) self.entry = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(state_size))), requires_grad=True) self.pre_rnn_bnorm = nn.BatchNorm1d(state_size) if 'nested' in self.rnn_type: self.rnn = NestedLSTM(state_size, state_size) elif "lstm" in self.rnn_type: self.rnn = LSTM(state_size, state_size) else: self.rnn = DoubleGRU(state_size) # Exit Parameters self.exit_bnorm = nn.BatchNorm1d(state_size) means = torch.zeros(state_size, emb_size) self.exit = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(state_size))), requires_grad=True) # Classifier self.classifier_bnorm = nn.BatchNorm1d(emb_size) means = torch.zeros(emb_size, n_words) self.classifier = nn.Parameter(torch.normal(means, std=1 / float(np.sqrt(emb_size))), requires_grad=True) # Non Linear Activation Functions self.sigmoid = nn.Sigmoid() self.tanh = nn.Tanh() self.relu = nn.ReLU() self.mse = nn.MSELoss() self.softmax = nn.Softmax(-1) self.log = [] # Used to track loss
def get_lstm(opt): print('rnn_type ', opt.rnn_type) # LSTM if opt.rnn_type == "LSTM": core = LSTM.LSTM(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT": core = LSTM.LSTM_SOFT_ATT(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.att_size, opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT": core = LSTM.LSTM_DOUBLE_ATT(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.att_size, opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK": core = LSTM.LSTM_SOFT_ATT_STACK(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK": core = LSTM.LSTM_DOUBLE_ATT_STACK(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL": core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_POLICY": core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_POLICY(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_BN": core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_BN(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_BN_RELU": core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_BN_RELU(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT": core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT_SET": core = LSTM.LSTM_DOUBLE_ATT_STACK_PARALLEL_DROPOUT_SET(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.rnn_size_list, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "GRU_DOUBLE_ATT_STACK_PARALLEL_DROPOUT": core = GRU.GRU_DOUBLE_ATT_STACK_PARALLEL_DROPOUT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_IT_ATT": core = LSTM1.LSTM_IT_ATT(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.att_size, opt.drop_prob_lm, opt.num_layers, opt.word_input_layer, opt.att_input_layer) elif opt.rnn_type == "LSTM_IT_ATT_COMBINE": core = LSTM1.LSTM_IT_ATT_COMBINE(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.att_size, opt.drop_prob_lm, opt.num_layers, opt.word_input_layer, opt.att_input_layer) elif opt.rnn_type == "FO_IT_ATT_COMBINE": core = LSTM1.FO_IT_ATT_COMBINE(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.att_size, opt.drop_prob_lm, opt.num_layers, opt.word_input_layer, opt.att_input_layer) elif opt.rnn_type == "CONV_IT_ATT_COMBINE": core = LSTM1.CONV_IT_ATT_COMBINE(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.att_size, opt.drop_prob_lm, opt.num_layers, opt.word_input_layer, opt.att_input_layer) elif opt.rnn_type == "CONV_LSTM": core = LSTM1.CONV_LSTM(opt.input_encoding_size, opt.vocab_size + 1, opt.rnn_size, opt.drop_prob_lm, opt.num_layers, opt.block_num, opt.use_proj_mul) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_NEW": core = LSTM1.LSTM_DOUBLE_ATT_STACK_PARALLEL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT": core = LSTM1.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_NEW": core = LSTM1.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_NEW(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT": core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_BU": core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_BU(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_NEW": core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_NEW(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_LSTM_MUL": core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_LSTM_MUL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.drop_prob_lm, opt.block_num) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_A": core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_A(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL": core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT": core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_MUL_WEIGHT": core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_MUL_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_WEIGHT": core = LSTM2.LSTM_DOUBLE_ATT_STACK_PARALLEL_MUL_OUT_ATT_WITH_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_SPP": core = LSTM3.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_SPP(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.pool_size, opt.spp_num, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_SPP": core = LSTM3.LSTM_SOFT_ATT_STACK_PARALLEL_SPP(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.pool_size, opt.spp_num, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_MEMORY": core = LSTM4.LSTM_SOFT_ATT_STACK_PARALLEL_MEMORY(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.memory_num_hop, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_NO_MEMORY": core = LSTM4.LSTM_SOFT_ATT_STACK_PARALLEL_NO_MEMORY(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_BU": core = LSTM5.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_WEIGHT_BU(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_C_S_ATT_STACK_PARALLEL_WITH_WEIGHT_BU": core = LSTM5.LSTM_C_S_ATT_STACK_PARALLEL_WITH_WEIGHT_BU(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_WITH_TOP_DOWN_ATTEN": core = LSTM6.LSTM_WITH_TOP_DOWN_ATTEN(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, opt.bu_size, opt.bu_num, dropout=opt.drop_prob_lm) elif opt.rnn_type == "LSTM_SOFT_ATT_STACK_PARALLEL_WITH_FC_WEIGHT": core = LSTM2.LSTM_SOFT_ATT_STACK_PARALLEL_WITH_FC_WEIGHT(opt.input_encoding_size, opt.vocab_size + 1, opt.num_layers, opt.num_parallels, opt.rnn_size, opt.att_size, dropout=opt.drop_prob_lm) else: raise Exception("rnn type not supported: {}".format(opt.rnn_type)) return core
test_x = np.load(data_folder + "test_x.npy")[0:test_size] test_y = np.load(data_folder + "test_y.npy")[0:test_size] # Load models. features_cols = train_x[0].shape[0] features_rows = train_x[0].shape[1] parameter_size = train_y[0].shape[0] features = tf.placeholder(tf.float32, [None, features_cols, features_rows]) patches = tf.placeholder(tf.float32, [None, parameter_size]) prob_keep_input = tf.placeholder(tf.float32) prob_keep_hidden = tf.placeholder(tf.float32) batch_size = tf.placeholder(tf.int32) warnings.simplefilter("ignore") lstm = LSTM(features=features, labels=patches, batch_size=batch_size) mlp = MLP(features=features, labels=patches, parameters=[50, 40, 30], prob_keep_input=prob_keep_input, prob_keep_hidden=prob_keep_hidden) if parameter_size == 155: hier_mlp = RecursiveMLP(features=features, labels=patches, parameters=[50, 40, 30], prob_keep_input=prob_keep_input, prob_keep_hidden=prob_keep_hidden) print "Initialising TensorFlow variables and building tensor graph..."