def build_model(self): rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem self.logits = dense_layer(tf.concat( [last_output_hypo, last_output_prem], axis=1), 3, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem self.logits = dense_layer(tf.concat( [last_output_hypo, last_output_prem], axis=1), 3, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) adv_in_hypo = tf.reshape( self.e_hypo, [-1, self.e_hypo.shape[1] * self.e_hypo.shape[2]]) adv_in_prem = tf.reshape( self.e_prem, [-1, self.e_prem.shape[1] * self.e_hypo.shape[2]]) """ ### Debug ### self.w_adv = tf.get_variable("w", shape=[adv_in.shape[-1], 2], initializer=tf.truncated_normal_initializer()) self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32) adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv ############ """ adv_logits = dense_layer(tf.concat([adv_in_hypo, adv_in_prem], axis=1), 3, activation=None, name="adv_encoder") adv_cost = 1 / tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(self.y_holder, depth=3), logits=adv_logits)) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) self.cost = self.cost + 0.01 * adv_cost self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size, self.batch_size, self.seq_len) last_output, self.alphas = attention_layer(self.attention_size, rnn_outputs, "pred_encoder", sparse=self.sparse) last_output = tf.nn.dropout(last_output, self.keep_probs) self.logits = dense_layer(last_output, 2, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): # shape = (batch_size, sentence_length, emb_dim) rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size, self.batch_size, self.seq_len) last_output, self.alphas = attention_layer(self.attention_size, rnn_outputs, "encoder", sparse=self.sparse) self.logits = dense_layer(last_output, 2, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. # Do not call this op with the output of softmax, as it will produce incorrect results. self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) reg = get_reg(self.alphas, lam=self.lam, type=self.reg) self.cost += reg self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): # input shape = (batch_size, sentence_length, emb_dim) rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem self.logits = dense_layer(tf.concat( [last_output_hypo, last_output_prem], axis=1), 3, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. # Do not call this op with the output of softmax, as it will produce incorrect results. self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) reg1 = get_reg(alphas_hypo, lam=self.lam, type=self.reg) reg2 = get_reg(alphas_prem, lam=self.lam, type=self.reg) self.cost += reg1 + reg2 self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size, self.batch_size, self.seq_len) last_output, self.alphas = attention_layer(self.attention_size, rnn_outputs, "pred_encoder", sparse=self.sparse) last_output = tf.nn.dropout(last_output, self.keep_probs) self.logits = dense_layer(last_output, 2, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) ### Debug ### adv_in = tf.reshape(self.e, [-1, self.e.shape[1] * self.e.shape[2]]) self.w_adv = tf.get_variable( "w", shape=[adv_in.shape[-1], 2], initializer=tf.truncated_normal_initializer()) self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32) adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv ############ # adv_logits = dense_layer(adv_in, 2, activation=None, name="adv_encoder") adv_cost = 1 / tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(self.y_holder, depth=2), logits=adv_logits)) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) self.cost = self.cost + 0.01 * adv_cost self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): # Define prediction rnn rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem # last_output = tf.nn.dropout(last_output, self.keep_probs) # Define key-word model rnn kwm_rnn_outputs_hypo, kwm_final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, scope="kwm_hypo") kwm_rnn_outputs_prem, kwm_final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, scope="kwm_prem") kwm_last_output_hypo, kwm_alphas_hypo = attention_layer( self.attention_size, kwm_rnn_outputs_hypo, "kwm_encoder_hypo", sparse=self.sparse) kwm_last_output_prem, kwm_alphas_prem = attention_layer( self.attention_size, kwm_rnn_outputs_prem, "kwm_encoder_prem", sparse=self.sparse) last_output = tf.concat([last_output_hypo, last_output_prem], axis=1) kwm_last_output = tf.concat( [kwm_last_output_hypo, kwm_last_output_prem], axis=1) ############################ # Hex ######################### h_fc1 = last_output h_fc2 = kwm_last_output # Hex layer definition """ self.W_cl_1 = tf.Variable(tf.random_normal([self.dim, 3], stddev=0.1)) self.W_cl_2 = tf.Variable(tf.random_normal([1200, 3]), trainable=True) self.b_cl = tf.Variable(tf.random_normal((3,)), trainable=True) self.W_cl = tf.concat([self.W_cl_1, self.W_cl_2], 0) """ # Compute prediction using [h_fc1, 0(pad)] pad = tf.zeros_like(h_fc2, tf.float32) # print(pad.shape) -> (?, 600) yconv_contact_pred = tf.nn.dropout(tf.concat([h_fc1, pad], 1), self.keep_probs) # y_conv_pred = tf.matmul(yconv_contact_pred, self.W_cl) + self.b_cl y_conv_pred = dense_layer(yconv_contact_pred, 3, name="conv_pred") self.logits = y_conv_pred # Prediction # Compute loss using [h_fc1, h_fc2] and [0(pad2), h_fc2] pad2 = tf.zeros_like(h_fc1, tf.float32) yconv_contact_H = tf.concat([pad2, h_fc2], 1) # Get Fg # y_conv_H = tf.matmul(yconv_contact_H, self.W_cl) + self.b_cl # get Fg y_conv_H = dense_layer(yconv_contact_H, 3, name="conv_H") yconv_contact_loss = tf.nn.dropout(tf.concat([h_fc1, h_fc2], 1), self.keep_probs) # Get Fb # y_conv_loss = tf.matmul(yconv_contact_loss, self.W_cl) + self.b_cl # get Fb y_conv_loss = dense_layer(yconv_contact_loss, 3, name="conv_loss") temp = tf.matmul(y_conv_H, y_conv_H, transpose_a=True) self.temp = temp y_conv_loss = y_conv_loss - tf.matmul( tf.matmul(tf.matmul(y_conv_H, tf.matrix_inverse(temp)), y_conv_H, transpose_b=True), y_conv_loss) # get loss self.logits = y_conv_loss self.y = tf.nn.softmax(self.logits) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) # Regularize kwm attention reg1 = get_reg(kwm_alphas_hypo, lam=self.lam, type=self.reg) reg2 = get_reg(kwm_alphas_prem, lam=self.lam, type=self.reg) self.cost += reg1 + reg2 self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))