def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0,trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[],name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) #embedding layer with tf.device("/cpu:0"),tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests =tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests =tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests =tf.nn.embedding_lookup(W, self.neg_input_quests) test_q =tf.nn.embedding_lookup(W, self.test_input_q) test_a =tf.nn.embedding_lookup(W, self.test_input_a) #build LSTM network with tf.variable_scope("LSTM_scope", reuse=None): ori_q = LSTM(ori_quests, self.rnn_size, self.batch_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) with tf.variable_scope("LSTM_scope", reuse=True): cand_a = LSTM(cand_quests, self.rnn_size, self.batch_size) neg_a = LSTM(neg_quests, self.rnn_size, self.batch_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = LSTM(test_q, self.rnn_size, self.batch_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_a_out = LSTM(test_a, self.rnn_size, self.batch_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg = feature2cos_sim(ori_q_feat, neg_q_feat) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg) self.test_q_a = feature2cos_sim(test_q_out, test_a_out)
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, attention_matrix_size, loss_ratio, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True, m=0.1): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps], name='test_q') self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps], name='test_a') self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids') #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_q = tf.nn.embedding_lookup(W, self.test_input_q) test_a = tf.nn.embedding_lookup(W, self.test_input_a) # run lstm without attention with tf.variable_scope("LSTM_scope") as scope: ori_q = biLSTM(ori_quests, self.rnn_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) scope.reuse_variables() cand_a = biLSTM(cand_quests, self.rnn_size) neg_a = biLSTM(neg_quests, self.rnn_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = biLSTM(test_q, self.rnn_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_a_out = biLSTM(test_a, self.rnn_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) # build LSTM network # with tf.variable_scope("LSTM_scope") as scope: # ori_q = biLSTM(ori_quests, self.rnn_size) # #ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) # # scope.reuse_variables() # # cand_a = biLSTM(cand_quests, self.rnn_size) # neg_a = biLSTM(neg_quests, self.rnn_size) # #cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) # #neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) # # test_q_out = biLSTM(test_q, self.rnn_size) # #test_q_out = tf.nn.tanh(max_pooling(test_q_out)) # test_a_out = biLSTM(test_a, self.rnn_size) # #test_a_out = tf.nn.tanh(max_pooling(test_a_out)) # with tf.name_scope("att_weight"): # # attention params # att_W = { # 'Wam': tf.Variable(tf.truncated_normal([2 * self.rnn_size, attention_matrix_size], stddev=0.1)), # 'Wqm': tf.Variable(tf.truncated_normal([2 * self.rnn_size, attention_matrix_size], stddev=0.1)), # 'Wms': tf.Variable(tf.truncated_normal([attention_matrix_size, 1], stddev=0.1)) # } # ori_q_feat, cand_q_feat = get_feature(ori_q, cand_a, att_W) # ori_nq_feat, neg_q_feat = get_feature(ori_q, neg_a, att_W) # test_q_out, test_a_out = get_feature(test_q_out, test_a_out, att_W) # multitasking with tf.name_scope("multitasking"): feature_size = int(ori_q_feat.get_shape()[1]) w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER), initializer=tf.random_normal_initializer()) b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer()) # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask") logits = tf.matmul(ori_q_feat, w) + b entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=self.cat_ids, name='loss') loss_multitask = tf.reduce_mean(entropy) # acc self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat) loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m) self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio self.test_q_a = feature2cos_sim(test_q_out, test_a_out) #multitasking_acc with tf.name_scope("multi_acc"): self.preds = tf.nn.softmax(logits) self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1)) self.multi_acc = tf.reduce_sum( tf.cast(self.correct_preds, tf.float32))
def __init__(self, batch_size, quest_len, answer_len, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm,loss_ratio, l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True,m=0.1): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.quest_len = quest_len self.answer_len = answer_len self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0,trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[],name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="ori_quest") self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="cand_quest") self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="neg_quest") self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="test_input_q") self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="test_input_a") self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids') #embedding layer with tf.device("/cpu:0"),tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests =tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests =tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests =tf.nn.embedding_lookup(W, self.neg_input_quests) test_quest =tf.nn.embedding_lookup(W, self.test_input_q) test_answer =tf.nn.embedding_lookup(W, self.test_input_a) #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob) #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob) #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob) #build LSTM network with tf.variable_scope("LSTM_scope", reuse=None): ori_q = biLSTM(ori_quests, self.rnn_size) with tf.variable_scope("LSTM_scope", reuse=True): cand_a = biLSTM(cand_quests, self.rnn_size) neg_a = biLSTM(neg_quests, self.rnn_size) test_q = biLSTM(test_quest, self.rnn_size) test_a = biLSTM(test_answer, self.rnn_size) #----------------------------- cal attention ------------------------------- with tf.variable_scope("attention", reuse=None) as scope: U = tf.get_variable("U", [2 * self.rnn_size, 2 * rnn_size], initializer=tf.truncated_normal_initializer(stddev=0.1)) G = tf.matmul(tf.matmul(ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), cand_a, adjoint_b=True) delta_q = tf.nn.softmax(tf.reduce_max(G, 2)) delta_a = tf.nn.softmax(tf.reduce_max(G, 1)) neg_G = tf.matmul(tf.matmul(ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), neg_a, adjoint_b=True) delta_neg_q = tf.nn.softmax(tf.reduce_max(neg_G, 2)) delta_neg_a = tf.nn.softmax(tf.reduce_max(neg_G, 1)) with tf.variable_scope("attention", reuse=True) as scope: test_G = tf.matmul(tf.matmul(test_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), test_a, adjoint_b=True) delta_test_q = tf.nn.softmax(tf.reduce_max(test_G, 2)) delta_test_a = tf.nn.softmax(tf.reduce_max(test_G, 1)) #-------------------------- recalculate lstm output ------------------------- #ori_q_feat = tf.squeeze(tf.matmul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]), adjoint_a=True)) #cand_q_feat = tf.squeeze(tf.matmul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]), adjoint_a=True)) #neg_ori_q_feat = tf.squeeze(tf.matmul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]), adjoint_a=True)) #neg_q_feat = tf.squeeze(tf.matmul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]), adjoint_a=True)) #test_q_out = tf.squeeze(tf.matmul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]), adjoint_a=True)) #test_a_out = tf.squeeze(tf.matmul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]), adjoint_a=True)) ori_q_feat = max_pooling(tf.multiply(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]))) cand_q_feat = max_pooling(tf.multiply(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]))) neg_ori_q_feat = max_pooling(tf.multiply(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]))) neg_q_feat = max_pooling(tf.multiply(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]))) test_q_out = max_pooling(tf.multiply(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]))) test_a_out = max_pooling(tf.multiply(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]))) #-------------------------- recalculate lstm output end --------------------- # dropout #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob) #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob) #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob) # multitasking with tf.name_scope("multitasking"): feature_size = int(ori_q_feat.get_shape()[1]) fc1 = tf.layers.dense(ori_q_feat, feature_size * 2, activation=tf.nn.relu, name='fc1') fc2 = tf.layers.dense(fc1, feature_size, activation=tf.nn.relu, name='fc2') logits = tf.layers.dense(fc2, CAT_NUMBER, activation=tf.nn.sigmoid) # feature_size = int(ori_q_feat.get_shape()[1]) # w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER, initializer=tf.random_normal_initializer()) # b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer()) # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask") # logits = tf.matmul(ori_q_feat, w) + b entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.cat_ids, name='loss') loss_multitask = tf.reduce_mean(entropy) # acc self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat) loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m) self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio self.test_q_a = feature2cos_sim(test_q_out, test_a_out) # multitasking_acc with tf.name_scope("multi_acc"): self.preds = tf.nn.softmax(logits) self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1)) self.multi_acc = tf.reduce_sum(tf.cast(self.correct_preds, tf.float32))
def __init__(self, batch_size, quest_len, answer_len, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.quest_len = quest_len self.answer_len = answer_len self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="ori_quest") self.cand_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="cand_quest") self.neg_input_quests = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="neg_quest") self.test_input_quests = tf.placeholder(tf.int32, shape=[None, self.quest_len], name="test_quest") self.test_input_answer = tf.placeholder(tf.int32, shape=[None, self.answer_len], name="test_cand_quest") #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_quest = tf.nn.embedding_lookup(W, self.test_input_quests) test_answer = tf.nn.embedding_lookup(W, self.test_input_answer) #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob) #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob) #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob) #build LSTM network with tf.variable_scope("LSTM_scope", reuse=None): ori_q = BILSTM(ori_quests, self.rnn_size) with tf.variable_scope("LSTM_scope", reuse=True): cand_a = BILSTM(cand_quests, self.rnn_size) neg_a = BILSTM(neg_quests, self.rnn_size) test_q = BILSTM(test_quest, self.rnn_size) test_a = BILSTM(test_answer, self.rnn_size) #----------------------------- cal attention ------------------------------- with tf.variable_scope("attention", reuse=None) as scope: U = tf.get_variable( "U", [2 * self.rnn_size, 2 * rnn_size], initializer=tf.truncated_normal_initializer(stddev=0.1)) G = tf.nn.tanh( tf.batch_matmul(tf.batch_matmul( ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), cand_a, adj_y=True)) delta_q = tf.nn.softmax(tf.reduce_max(G, 2)) delta_a = tf.nn.softmax(tf.reduce_max(G, 1)) neg_G = tf.nn.tanh( tf.batch_matmul(tf.batch_matmul( ori_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), neg_a, adj_y=True)) delta_neg_q = tf.nn.softmax(tf.reduce_max(neg_G, 2)) delta_neg_a = tf.nn.softmax(tf.reduce_max(neg_G, 1)) with tf.variable_scope("attention", reuse=True) as scope: test_G = tf.nn.tanh( tf.batch_matmul(tf.batch_matmul( test_q, tf.tile(tf.expand_dims(U, 0), [batch_size, 1, 1])), test_a, adj_y=True)) delta_test_q = tf.nn.softmax(tf.reduce_max(test_G, 2)) delta_test_a = tf.nn.softmax(tf.reduce_max(test_G, 1)) #-------------------------- recalculate lstm output ------------------------- #ori_q_feat = tf.squeeze(tf.batch_matmul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]), adj_x=True)) #cand_q_feat = tf.squeeze(tf.batch_matmul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]), adj_x=True)) #neg_ori_q_feat = tf.squeeze(tf.batch_matmul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]), adj_x=True)) #neg_q_feat = tf.squeeze(tf.batch_matmul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]), adj_x=True)) #test_q_feat = tf.squeeze(tf.batch_matmul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]), adj_x=True)) #test_a_feat = tf.squeeze(tf.batch_matmul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]), adj_x=True)) ori_q_feat = max_pooling( tf.mul(ori_q, tf.reshape(delta_q, [-1, self.quest_len, 1]))) cand_q_feat = max_pooling( tf.mul(cand_a, tf.reshape(delta_a, [-1, self.answer_len, 1]))) neg_ori_q_feat = max_pooling( tf.mul(ori_q, tf.reshape(delta_neg_q, [-1, self.quest_len, 1]))) neg_q_feat = max_pooling( tf.mul(neg_a, tf.reshape(delta_neg_a, [-1, self.answer_len, 1]))) test_q_feat = max_pooling( tf.mul(test_q, tf.reshape(delta_test_q, [-1, self.quest_len, 1]))) test_a_feat = max_pooling( tf.mul(test_a, tf.reshape(delta_test_a, [-1, self.answer_len, 1]))) #-------------------------- recalculate lstm output end --------------------- # dropout #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob) #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob) #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob) # cal cosine simulation self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg = feature2cos_sim(neg_ori_q_feat, neg_q_feat) self.test_q_a = feature2cos_sim(test_q_feat, test_a_feat) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg)
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, attention_matrix_size, loss_ratio, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True, m=0.1): """ LSTM-BASED DEEP LEARNING MODELS FOR NON-FACTOID ANSWER SELECTION """ # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.cat_ids = tf.placeholder(tf.int32, [None, CAT_NUMBER], name='cat_ids') #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_q = tf.nn.embedding_lookup(W, self.test_input_q) test_a = tf.nn.embedding_lookup(W, self.test_input_a) #build LSTM network U = tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.embedding_size], stddev=0.1), name="U") with tf.variable_scope("LSTM_scope", reuse=None): ori_q = biLSTM(ori_quests, self.rnn_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) with tf.variable_scope("LSTM_scope", reuse=True): cand_att_weight = tf.sigmoid( tf.matmul( cand_quests, tf.reshape(tf.expand_dims(tf.matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) neg_att_weight = tf.sigmoid( tf.matmul( neg_quests, tf.reshape(tf.expand_dims(tf.matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) cand_a = biLSTM( tf.multiply( cand_quests, tf.tile(cand_att_weight, [1, 1, self.embedding_size])), self.rnn_size) neg_a = biLSTM( tf.multiply( neg_quests, tf.tile(neg_att_weight, [1, 1, self.embedding_size])), self.rnn_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = biLSTM(test_q, self.rnn_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_att_weight = tf.sigmoid( tf.matmul( test_a, tf.reshape(tf.expand_dims(tf.matmul(test_q_out, U), 1), [-1, self.embedding_size, 1]))) test_a_out = biLSTM( tf.multiply( test_a, tf.tile(test_att_weight, [1, 1, self.embedding_size])), self.rnn_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) # multitasking with tf.name_scope("multitasking"): feature_size = int(ori_q_feat.get_shape()[1]) fc1 = tf.layers.dense(ori_q_feat, feature_size * 2, activation=tf.nn.relu, name='fc1') fc2 = tf.layers.dense(fc1, feature_size, activation=tf.nn.relu, name='fc2') logits = tf.layers.dense(fc2, CAT_NUMBER, activation=tf.nn.sigmoid) # feature_size = int(ori_q_feat.get_shape()[1]) # w = tf.get_variable(name='weights', shape=(feature_size, CAT_NUMBER, initializer=tf.random_normal_initializer()) # b = tf.get_variable(name='bias', shape=(1, CAT_NUMBER), initializer=tf.zeros_initializer()) # positive_qa = tf.concat([out_ori,out_cand],1,name="embedding_for_multitask") # logits = tf.matmul(ori_q_feat, w) + b entropy = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=self.cat_ids, name='loss') loss_multitask = tf.reduce_mean(entropy) # acc self.ori_cand_score = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg_score = feature2cos_sim(ori_q_feat, neg_q_feat) loss_origin, self.acc = cal_loss_and_acc(self.ori_cand_score, self.ori_neg_score, m) self.loss = loss_origin * (1 - loss_ratio) + loss_multitask * loss_ratio self.test_q_a = feature2cos_sim(test_q_out, test_a_out) # multitasking_acc with tf.name_scope("multi_acc"): self.preds = tf.nn.softmax(logits) self.correct_preds = tf.equal(tf.argmax(self.preds, 1), tf.argmax(self.cat_ids, 1)) self.multi_acc = tf.reduce_sum( tf.cast(self.correct_preds, tf.float32)) def assign_new_lr(self, session, lr_value): session.run(self._lr_update, feed_dict={self.new_lr: lr_value})
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, attention_matrix_size, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True): """ LSTM-BASED DEEP LEARNING MODELS FOR NON-FACTOID ANSWER SELECTION """ # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_q = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) self.test_input_a = tf.placeholder(tf.int32, shape=[None, self.num_unroll_steps]) #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) test_q = tf.nn.embedding_lookup(W, self.test_input_q) test_a = tf.nn.embedding_lookup(W, self.test_input_a) #build LSTM network U = tf.Variable(tf.truncated_normal( [2 * self.rnn_size, self.embedding_size], stddev=0.1), name="U") with tf.variable_scope("LSTM_scope", reuse=None): ori_q = biLSTM(ori_quests, self.rnn_size) ori_q_feat = tf.nn.tanh(max_pooling(ori_q)) with tf.variable_scope("LSTM_scope", reuse=True): cand_att_weight = tf.sigmoid( tf.batch_matmul( cand_quests, tf.reshape( tf.expand_dims(tf.batch_matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) neg_att_weight = tf.sigmoid( tf.batch_matmul( neg_quests, tf.reshape( tf.expand_dims(tf.batch_matmul(ori_q_feat, U), 1), [-1, self.embedding_size, 1]))) cand_a = biLSTM( tf.mul(cand_quests, tf.tile(cand_att_weight, [1, 1, self.embedding_size])), self.rnn_size) neg_a = biLSTM( tf.mul(neg_quests, tf.tile(neg_att_weight, [1, 1, self.embedding_size])), self.rnn_size) cand_q_feat = tf.nn.tanh(max_pooling(cand_a)) neg_q_feat = tf.nn.tanh(max_pooling(neg_a)) test_q_out = biLSTM(test_q, self.rnn_size) test_q_out = tf.nn.tanh(max_pooling(test_q_out)) test_att_weight = tf.sigmoid( tf.batch_matmul( test_a, tf.reshape( tf.expand_dims(tf.batch_matmul(test_q_out, U), 1), [-1, self.embedding_size, 1]))) test_a_out = biLSTM( tf.mul(test_a, tf.tile(test_att_weight, [1, 1, self.embedding_size])), self.rnn_size) test_a_out = tf.nn.tanh(max_pooling(test_a_out)) self.ori_cand = feature2cos_sim(ori_q_feat, cand_q_feat) self.ori_neg = feature2cos_sim(ori_q_feat, neg_q_feat) self.loss, self.acc = cal_loss_and_acc(self.ori_cand, self.ori_neg) self.test_q_a = feature2cos_sim(test_q_out, test_a_out)