def normal_sampling(features, t, k=100): b, n, d = features.get_shape().as_list() # b, n, d # score for each point score_h1 = model_utils.dense_layer(tf.reshape(features, [-1, d]), 256, 'score_h1') # b*n, 256 origin_score = model_utils.dense_layer(score_h1, 1, 'score', activation=tf.nn.sigmoid) # b*n, 1 score = tf.reshape(origin_score, [b, n]) # b, n noise = tf.nn.relu(tf.random.truncated_normal([b, n], stddev=t**2)) # b, n score += noise # b, n # sort with top_k sorted_score, sorted_indicies = tf.nn.top_k(score, n) # b, n coord1 = tf.reshape(tf.tile(tf.expand_dims(tf.range(b), axis=-1), [1, n]), [-1]) # b*k coord2 = tf.reshape(sorted_indicies, [-1]) # b*n coords = tf.reshape(tf.stack([coord1, coord2], axis=1), [b, n, 2]) # b, n, 2 sorted_features = tf.gather_nd(features, coords) # b, n, d top_features, bot_features = tf.split(sorted_features, [k, n - k], axis=1) top_scores, bot_scores = tf.split(sorted_score, [k, n - k], axis=1) # sampled features top_scores = tf.tile(tf.expand_dims(top_scores, axis=2), [1, 1, d]) # b, k, d # sub_features = tf.pow(top_scores, t) * top_features sub_features = top_scores * top_features return sub_features, tf.reshape(score, [b, n])
def build_model(self): rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem self.logits = dense_layer(tf.concat( [last_output_hypo, last_output_prem], axis=1), 3, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) adv_in_hypo = tf.reshape( self.e_hypo, [-1, self.e_hypo.shape[1] * self.e_hypo.shape[2]]) adv_in_prem = tf.reshape( self.e_prem, [-1, self.e_prem.shape[1] * self.e_hypo.shape[2]]) """ ### Debug ### self.w_adv = tf.get_variable("w", shape=[adv_in.shape[-1], 2], initializer=tf.truncated_normal_initializer()) self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32) adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv ############ """ adv_logits = dense_layer(tf.concat([adv_in_hypo, adv_in_prem], axis=1), 3, activation=None, name="adv_encoder") adv_cost = 1 / tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(self.y_holder, depth=3), logits=adv_logits)) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) self.cost = self.cost + 0.01 * adv_cost self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def concrete_sampling(features, t, k=100): b, n, d = features.get_shape().as_list() # b, n, d alpha_h = model_utils.dense_layer(tf.reshape(features, [-1, d]), 256, 'alpha_h') # b*n, 256 alpha = model_utils.dense_layer(alpha_h, 1, 'alpha', activation=None) # b*n, 1 alpha_n = tf.tile(tf.reshape(alpha, [b, 1, n]), [1, k, 1]) # b, k, n uniform_noise = tf.random_uniform([b, k, n]) # b, k, n gumble_noise = -tf.log(-tf.log(uniform_noise)) # b, k, n noisy_alpha = (alpha_n + gumble_noise) / (t * 10.) samples = tf.nn.softmax(noisy_alpha, axis=-1) # b, k, n sub_features = tf.matmul(samples, features) # b, k, d return sub_features, tf.reshape(alpha, [b, n])
def build_model(self): rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem self.logits = dense_layer(tf.concat( [last_output_hypo, last_output_prem], axis=1), 3, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size, self.batch_size, self.seq_len) last_output, self.alphas = attention_layer(self.attention_size, rnn_outputs, "pred_encoder", sparse=self.sparse) last_output = tf.nn.dropout(last_output, self.keep_probs) self.logits = dense_layer(last_output, 2, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): # shape = (batch_size, sentence_length, emb_dim) rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size, self.batch_size, self.seq_len) last_output, self.alphas = attention_layer(self.attention_size, rnn_outputs, "encoder", sparse=self.sparse) self.logits = dense_layer(last_output, 2, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. # Do not call this op with the output of softmax, as it will produce incorrect results. self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) reg = get_reg(self.alphas, lam=self.lam, type=self.reg) self.cost += reg self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): inputs = tf.reshape(self.e, [-1, self.e.shape[1] * self.e.shape[2]]) self.logits = dense_layer(inputs, 2, name="pred_out", activation=None) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost) self.y = tf.nn.softmax(self.logits) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))
def build_model(self): rnn_outputs, final_state = lstm_layer(self.e, self.lstm_size, self.batch_size, self.seq_len) last_output, self.alphas = attention_layer(self.attention_size, rnn_outputs, "pred_encoder", sparse=self.sparse) last_output = tf.nn.dropout(last_output, self.keep_probs) self.logits = dense_layer(last_output, 2, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) ### Debug ### adv_in = tf.reshape(self.e, [-1, self.e.shape[1] * self.e.shape[2]]) self.w_adv = tf.get_variable( "w", shape=[adv_in.shape[-1], 2], initializer=tf.truncated_normal_initializer()) self.b_adv = tf.get_variable("b", shape=[2], dtype=tf.float32) adv_logits = tf.matmul(adv_in, self.w_adv) + self.b_adv ############ # adv_logits = dense_layer(adv_in, 2, activation=None, name="adv_encoder") adv_cost = 1 / tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(self.y_holder, depth=2), logits=adv_logits)) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=2), logits=self.logits)) self.cost = self.cost + 0.01 * adv_cost self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): # input shape = (batch_size, sentence_length, emb_dim) rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem self.logits = dense_layer(tf.concat( [last_output_hypo, last_output_prem], axis=1), 3, activation=None, name="pred_out") self.y = tf.nn.softmax(self.logits) # WARNING: This op expects unscaled logits, since it performs a softmax on logits internally for efficiency. # Do not call this op with the output of softmax, as it will produce incorrect results. self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) reg1 = get_reg(alphas_hypo, lam=self.lam, type=self.reg) reg2 = get_reg(alphas_prem, lam=self.lam, type=self.reg) self.cost += reg1 + reg2 self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32)) self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost)
def build_model(self): # Define prediction rnn rnn_outputs_hypo, final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, "hypo") rnn_outputs_prem, final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, "prem") last_output_hypo, alphas_hypo = attention_layer(self.attention_size, rnn_outputs_hypo, "encoder_hypo", sparse=self.sparse) last_output_prem, alphas_prem = attention_layer(self.attention_size, rnn_outputs_prem, "encoder_prem", sparse=self.sparse) self.alphas_hypo = alphas_hypo self.alphas_prem = alphas_prem # last_output = tf.nn.dropout(last_output, self.keep_probs) # Define key-word model rnn kwm_rnn_outputs_hypo, kwm_final_state_hypo = lstm_layer( self.e_hypo, self.lstm_size, self.batch_size, self.seq_len_hypo, scope="kwm_hypo") kwm_rnn_outputs_prem, kwm_final_state_prem = lstm_layer( self.e_prem, self.lstm_size, self.batch_size, self.seq_len_prem, scope="kwm_prem") kwm_last_output_hypo, kwm_alphas_hypo = attention_layer( self.attention_size, kwm_rnn_outputs_hypo, "kwm_encoder_hypo", sparse=self.sparse) kwm_last_output_prem, kwm_alphas_prem = attention_layer( self.attention_size, kwm_rnn_outputs_prem, "kwm_encoder_prem", sparse=self.sparse) last_output = tf.concat([last_output_hypo, last_output_prem], axis=1) kwm_last_output = tf.concat( [kwm_last_output_hypo, kwm_last_output_prem], axis=1) ############################ # Hex ######################### h_fc1 = last_output h_fc2 = kwm_last_output # Hex layer definition """ self.W_cl_1 = tf.Variable(tf.random_normal([self.dim, 3], stddev=0.1)) self.W_cl_2 = tf.Variable(tf.random_normal([1200, 3]), trainable=True) self.b_cl = tf.Variable(tf.random_normal((3,)), trainable=True) self.W_cl = tf.concat([self.W_cl_1, self.W_cl_2], 0) """ # Compute prediction using [h_fc1, 0(pad)] pad = tf.zeros_like(h_fc2, tf.float32) # print(pad.shape) -> (?, 600) yconv_contact_pred = tf.nn.dropout(tf.concat([h_fc1, pad], 1), self.keep_probs) # y_conv_pred = tf.matmul(yconv_contact_pred, self.W_cl) + self.b_cl y_conv_pred = dense_layer(yconv_contact_pred, 3, name="conv_pred") self.logits = y_conv_pred # Prediction # Compute loss using [h_fc1, h_fc2] and [0(pad2), h_fc2] pad2 = tf.zeros_like(h_fc1, tf.float32) yconv_contact_H = tf.concat([pad2, h_fc2], 1) # Get Fg # y_conv_H = tf.matmul(yconv_contact_H, self.W_cl) + self.b_cl # get Fg y_conv_H = dense_layer(yconv_contact_H, 3, name="conv_H") yconv_contact_loss = tf.nn.dropout(tf.concat([h_fc1, h_fc2], 1), self.keep_probs) # Get Fb # y_conv_loss = tf.matmul(yconv_contact_loss, self.W_cl) + self.b_cl # get Fb y_conv_loss = dense_layer(yconv_contact_loss, 3, name="conv_loss") temp = tf.matmul(y_conv_H, y_conv_H, transpose_a=True) self.temp = temp y_conv_loss = y_conv_loss - tf.matmul( tf.matmul(tf.matmul(y_conv_H, tf.matrix_inverse(temp)), y_conv_H, transpose_b=True), y_conv_loss) # get loss self.logits = y_conv_loss self.y = tf.nn.softmax(self.logits) self.cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.y_holder, depth=3), logits=self.logits)) # Regularize kwm attention reg1 = get_reg(kwm_alphas_hypo, lam=self.lam, type=self.reg) reg2 = get_reg(kwm_alphas_prem, lam=self.lam, type=self.reg) self.cost += reg1 + reg2 self.optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) self.train_op = self.optimizer.minimize(self.cost) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.y_holder, tf.argmax(self.y, 1)), tf.float32))