def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") self.dense_dropout_keep_prob = tf.placeholder( tf.float64, name="dense_dropout_keep_prob") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] query_max_length = tf.shape(self.input_queries)[1] reply_max_length = tf.shape(self.input_replies)[1] # learning rate and optimizer learning_rate = tf.train.exponential_decay( self.config.learning_rate, self.global_step_tensor, decay_steps=50000, decay_rate=0.96) self.optimizer = tf.train.AdamOptimizer(learning_rate) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout(embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[90000, 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) # build LSTM layer with tf.variable_scope("query_lstm_layer") as vs: lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True) lstm_cell_fw = tf.contrib.rnn.DropoutWrapper( lstm_cell_fw, input_keep_prob=self.lstm_dropout_keep_prob) lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True) lstm_cell_bw = tf.contrib.rnn.DropoutWrapper( lstm_cell_bw, input_keep_prob=self.lstm_dropout_keep_prob) _, queries_encoded = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell_fw, cell_bw=lstm_cell_bw, inputs=queries_embedded, sequence_length=self.query_lengths, dtype=tf.float32) self.queries_encoded = tf.cast( tf.concat([queries_encoded[0].h, queries_encoded[1].h], 1), tf.float64) with tf.variable_scope("reply_lstm_layer") as vs: lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True, reuse=tf.AUTO_REUSE) lstm_cell_fw = tf.contrib.rnn.DropoutWrapper( lstm_cell_fw, input_keep_prob=self.lstm_dropout_keep_prob) lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True, reuse=tf.AUTO_REUSE) lstm_cell_bw = tf.contrib.rnn.DropoutWrapper( lstm_cell_bw, input_keep_prob=self.lstm_dropout_keep_prob) _, replies_encoded = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell_fw, cell_bw=lstm_cell_bw, inputs=replies_embedded, sequence_length=self.reply_lengths, dtype=tf.float32) self.replies_encoded = tf.cast( tf.concat([replies_encoded[0].h, replies_encoded[1].h], 1), tf.float64) # build dense layer with tf.variable_scope("dense_layer"): M = tf.get_variable( "M", shape=[self.config.lstm_dim * 2, self.config.lstm_dim * 2], initializer=tf.contrib.layers.xavier_initializer()) M = tf.nn.dropout(M, keep_prob=self.config.dense_dropout_keep_prob) self.queries_transformed = tf.matmul(self.queries_encoded, tf.cast(M, tf.float64)) with tf.variable_scope("sampling"): self.distances = tf.matmul(self.queries_encoded, self.replies_encoded, transpose_b=True) positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1]) negative_mask = tf.reshape( make_negative_mask( self.distances, method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples), [-1]) with tf.variable_scope("prediction"): distances_flattened = tf.reshape(self.distances, [-1]) self.positive_logits = tf.gather(distances_flattened, tf.where(positive_mask), 1) self.negative_logits = tf.gather(distances_flattened, tf.where(negative_mask), 1) self.logits = tf.concat( [self.positive_logits, self.negative_logits], axis=0) self.labels = tf.concat([ tf.ones_like(self.positive_logits), tf.zeros_like(self.negative_logits) ], axis=0) self.positive_probs = tf.sigmoid(self.positive_logits) self.probs = tf.sigmoid(self.logits) self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.logits)) # gvs = self.optimizer.compute_gradients(self.loss) # capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] # self.train_step = self.optimizer.apply_gradients(capped_gvs) self.train_step = self.optimizer.minimize(self.loss) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.to_int32(self.labels)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") self.weak_distances = tf.placeholder(tf.float32, [None, None], name="weak_distances") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float32, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") self.add_echo = tf.placeholder(tf.bool, name="add_echo") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] query_max_length = tf.shape(self.input_queries)[1] reply_max_length = tf.shape(self.input_replies)[1] # learning rate and optimizer # self.optimizer = tf.train.GradientDescentOptimizer(self.config.learning_rate) # delstm1024_nsrandom9_ws_sgd_lr1e-1 learning_rate = tf.train.exponential_decay( self.config.learning_rate, self.global_step_tensor, decay_steps=50000, decay_rate=0.96) self.optimizer = tf.train.AdamOptimizer( learning_rate) # delstm1024_nsrandom9_ws_adam_lr1e-3 # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) # build LSTM layer with tf.variable_scope("lstm_layer") as vs: query_lstm_cell = tf.nn.rnn_cell.LSTMCell( self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True, # initializer=tf.orthogonal_initializer(), ) query_lstm_cell = tf.contrib.rnn.DropoutWrapper( query_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob) reply_lstm_cell = tf.nn.rnn_cell.LSTMCell( self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True, # initializer=tf.orthogonal_initializer(), reuse=True) reply_lstm_cell = tf.contrib.rnn.DropoutWrapper( reply_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob) _, queries_encoded = tf.nn.dynamic_rnn( cell=query_lstm_cell, inputs=queries_embedded, sequence_length=tf.cast(self.query_lengths, tf.float32), dtype=tf.float32, ) _, replies_encoded = tf.nn.dynamic_rnn( cell=reply_lstm_cell, inputs=replies_embedded, sequence_length=tf.cast(self.reply_lengths, tf.float32), dtype=tf.float32, ) self.queries_encoded = tf.cast(queries_encoded.h, tf.float64) self.replies_encoded = tf.cast(replies_encoded.h, tf.float64) # build dense layer with tf.variable_scope("dense_layer"): M = tf.get_variable( "M", shape=[self.config.lstm_dim, self.config.lstm_dim], initializer=tf.initializers.truncated_normal()) self.queries_transformed = tf.matmul(self.queries_encoded, tf.cast(M, tf.float64)) with tf.variable_scope("sampling"): self.distances = tf.matmul(self.queries_transformed, self.replies_encoded, transpose_b=True) positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1]) negative_mask = tf.reshape( make_negative_mask( self.distances, method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples), [-1]) candidates_mask = positive_mask + negative_mask with tf.variable_scope("weak_supervision"): self.weak_positives = tf.gather( tf.reshape(self.weak_distances, [-1]), tf.where(positive_mask), 1) self.weak_positives_tiled = tf.tile(self.weak_positives, [1, cur_batch_length]) self.weak_distances_normalized = tf.maximum( 0., self.weak_distances * tf.reciprocal(self.weak_positives_tiled) - 1) with tf.variable_scope("prediction"): distances_flattened = tf.reshape(self.distances, [-1]) self.positive_logits = tf.gather(distances_flattened, tf.where(positive_mask), 1) self.negative_logits = tf.gather(distances_flattened, tf.where(negative_mask), 1) self.positive_logits_tiled = tf.transpose( tf.tile(self.positive_logits, [1, cur_batch_length])) self.logits = tf.concat( [self.positive_logits, self.negative_logits], axis=0) self.labels = tf.concat([ tf.ones_like(self.positive_logits), tf.zeros_like(self.negative_logits) ], axis=0) with tf.variable_scope("loss"): self.supervised_distances = tf.maximum( 0., tf.to_float(self.distances) - tf.to_float(self.positive_logits_tiled) + self.weak_distances_normalized) self.loss = tf.reduce_sum( tf.gather(tf.reshape(self.supervised_distances, [-1]), tf.where(candidates_mask), 1)) #gvs = self.optimizer.compute_gradients(self.loss) #capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] #self.train_step = self.optimizer.apply_gradients(capped_gvs) self.train_step = self.optimizer.minimize(self.loss) with tf.variable_scope("score"): self.positive_probs = tf.sigmoid(self.positive_logits) self.probs = tf.sigmoid(self.logits) self.predictions = tf.to_int32(self.probs > 0.5) correct_predictions = tf.equal(self.predictions, tf.to_int32(self.labels)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float32, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") self.add_echo = tf.placeholder(tf.bool, name="add_echo") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # learning rate and optimizer learning_rate = tf.train.exponential_decay( self.config.learning_rate, self.global_step_tensor, decay_steps=100000, decay_rate=0.9) self.optimizer = tf.train.AdamOptimizer(learning_rate) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") queries_embedded = tf.expand_dims( tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")), -1) replies_embedded = tf.expand_dims( tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")), -1) # build CNN layer with tf.variable_scope("convolution_layer"): queries_pooled_outputs = list() replies_pooled_outputs = list() for i, filter_size in enumerate(self.filter_sizes): filter_shape = [ filter_size, self.config.embed_dim, 1, self.config.num_filters ] with tf.name_scope( "conv-maxpool-query-{}".format(filter_size)): W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant( 0.1, shape=[self.config.num_filters]), name="b") conv = tf.nn.conv2d(queries_embedded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool(h, ksize=[ 1, self.config.max_length - filter_size + 1, 1, 1 ], strides=[1, 1, 1, 1], padding="VALID", name="pool") queries_pooled_outputs.append(pooled) with tf.name_scope( "conv-maxpool-reply-{}".format(filter_size)): W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant( 0.1, shape=[self.config.num_filters]), name="b") conv = tf.nn.conv2d( replies_embedded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv", # reuse=True, ) h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool(h, ksize=[ 1, self.config.max_length - filter_size + 1, 1, 1 ], strides=[1, 1, 1, 1], padding="VALID", name="pool") replies_pooled_outputs.append(pooled) # conv_echo = tf.nn.conv2d(queries_embedded, # W, # strides=[1, 1, 1, 1], # padding="VALID", # name="conv", # reuse=True) # h_echo = tf.nn.relu(tf.nn.bias_add(conv_echo, b), name="relu_echo") # pooled_echo = tf.nn.max_pool(h_echo, # ksize=[1, self.config.max_length - filter_size + 1, 1, 1], # strides=[1, 1, 1, 1], # padding="VALID", # name="pool_echo") # echo_pooled_outputs.append(pooled_echo) # combine all pooled outputs num_filters_total = self.config.num_filters * len(self.filter_sizes) self.queries_encoded = tf.reshape(tf.concat(queries_pooled_outputs, 3), [-1, num_filters_total], name="queries_encoded") self.replies_encoded = tf.reshape(tf.concat(replies_pooled_outputs, 3), [-1, num_filters_total], name="replies_encoded") with tf.variable_scope("dense_layer"): M = tf.get_variable( "M", shape=[num_filters_total, num_filters_total], initializer=tf.contrib.layers.xavier_initializer()) self.queries_transformed = tf.matmul(self.queries_encoded, M) with tf.variable_scope("sampling"): self.distances = tf.matmul(self.queries_transformed, self.replies_encoded, transpose_b=True) # self.echo_distances = tf.matmul(self.queries_transformed, self.echo_encoded, transpose_b=True) positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1]) negative_mask = tf.reshape( make_negative_mask( self.distances, method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples), [-1]) with tf.variable_scope("prediction"): distances_flattened = tf.reshape(self.distances, [-1]) # echo_distances_flattened = tf.reshape(self.echo_distances, [-1]) self.positive_logits = tf.gather(distances_flattened, tf.where(positive_mask), 1) self.negative_logits = tf.gather(distances_flattened, tf.where(negative_mask), 1) self.logits = tf.concat( [self.positive_logits, self.negative_logits], axis=0) self.labels = tf.concat([ tf.ones_like(self.positive_logits), tf.zeros_like(self.negative_logits) ], axis=0) # self.echo_logits = tf.gather(echo_distances_flattened, tf.where(positive_mask), 1) # self.logits = tf.cond(self.add_echo, # lambda: tf.concat([self.positive_logits, # self.negative_logits, # self.echo_logits], axis=0), # lambda: tf.concat([self.positive_logits, # self.negative_logits], axis=0)) # self.labels = tf.cond(self.add_echo, # lambda: tf.concat([tf.ones_like(self.positive_logits), # tf.zeros_like(self.negative_logits), # tf.zeros_like(self.echo_logits)], axis=0), # lambda: tf.concat([tf.ones_like(self.positive_logits), # tf.zeros_like(self.negative_logits)], axis=0)) self.positive_probs = tf.sigmoid(self.positive_logits) # self.echo_probs = tf.sigmoid(self.echo_logits) self.probs = tf.sigmoid(self.logits) self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.logits)) gvs = self.optimizer.compute_gradients(self.loss) capped_gvs = [(tf.clip_by_norm(grad, 5), var) for grad, var in gvs] self.train_step = self.optimizer.apply_gradients(capped_gvs) # self.train_step = self.optimizer.minimize(self.loss) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.to_int32(self.labels)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # learning rate and optimizer learning_rate = tf.train.exponential_decay( self.config.learning_rate, self.global_step_tensor, decay_steps=100000, decay_rate=0.96) self.optimizer = tf.train.AdamOptimizer(learning_rate) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout( embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[tf.shape(embeddings)[0], 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) # gru layer with tf.variable_scope("gru_layer"): sentence_gru_cell = tf.nn.rnn_cell.GRUCell( self.config.lstm_dim, kernel_initializer=tf.initializers.orthogonal(), reuse=tf.AUTO_REUSE) sentence_gru_cell = tf.contrib.rnn.DropoutWrapper( sentence_gru_cell, input_keep_prob=self.lstm_dropout_keep_prob) self.query_rnn_outputs, _ = tf.nn.dynamic_rnn( sentence_gru_cell, queries_embedded, sequence_length=self.query_lengths, dtype=tf.float32, scope="sentence_gru") self.reply_rnn_outputs, _ = tf.nn.dynamic_rnn( sentence_gru_cell, replies_embedded, sequence_length=self.reply_lengths, dtype=tf.float32, scope="sentence_gru") # negative sampling with tf.variable_scope("negative_sampling"): negative_mask = make_negative_mask( tf.zeros([cur_batch_length, cur_batch_length]), method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples) negative_queries_indices, negative_replies_indices = tf.split( tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1) self.negative_queries_indices = tf.squeeze( negative_queries_indices) self.negative_replies_indices = tf.squeeze( negative_replies_indices) self.num_negatives = tf.shape(self.negative_replies_indices)[0] queries_embedded_neg = tf.nn.embedding_lookup( queries_embedded, self.negative_queries_indices) replies_embedded_neg = tf.nn.embedding_lookup( replies_embedded, self.negative_replies_indices) self.query_rnn_outputs_neg = tf.reshape( tf.nn.embedding_lookup(self.query_rnn_outputs, self.negative_queries_indices), [ self.num_negatives, self.config.max_length, self.config.lstm_dim ]) self.reply_rnn_outputs_neg = tf.reshape( tf.nn.embedding_lookup(self.reply_rnn_outputs, self.negative_replies_indices), [ self.num_negatives, self.config.max_length, self.config.lstm_dim ]) # build matrix for convolution with tf.variable_scope("matrix"): A_matrix = tf.get_variable( "A_matrix_v", shape=(self.config.lstm_dim, self.config.lstm_dim), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) replies_embedded_transposed = tf.transpose(replies_embedded, [0, 2, 1]) reply_rnn_outputs_transposed = tf.transpose( self.reply_rnn_outputs, [0, 2, 1]) replies_embedded_neg_transposed = tf.transpose( replies_embedded_neg, [0, 2, 1]) reply_rnn_outputs_neg_transposed = tf.transpose( self.reply_rnn_outputs_neg, [0, 2, 1]) embed_matrix = tf.matmul(queries_embedded, replies_embedded_transposed) rnn_outputs = tf.einsum("aij,jk->aik", self.query_rnn_outputs, A_matrix) rnn_outputs = tf.matmul(rnn_outputs, reply_rnn_outputs_transposed) self.matrix_stacked = tf.stack([embed_matrix, rnn_outputs], axis=3, name="matrix_stacked") # build negative matrix for convolution with tf.variable_scope("matrix", reuse=True): A_matrix_neg = tf.get_variable( "A_matrix_v", shape=(self.config.lstm_dim, self.config.lstm_dim), initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) embed_matrix_neg = tf.matmul(queries_embedded_neg, replies_embedded_neg_transposed) rnn_outputs_neg = tf.einsum("aij,jk->aik", self.query_rnn_outputs_neg, A_matrix_neg) rnn_outputs_neg = tf.matmul(rnn_outputs_neg, reply_rnn_outputs_neg_transposed) self.matrix_stacked_neg = tf.stack( [embed_matrix_neg, rnn_outputs_neg], axis=3, name="matrix_stacked_neg") # cnn layer with tf.variable_scope("convolution_layer"): conv = tf.layers.conv2d( self.matrix_stacked, filters=8, kernel_size=(3, 3), padding="VALID", kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.relu, reuse=None, name="conv") pooled = tf.layers.max_pooling2d(conv, (3, 3), strides=(3, 3), padding="VALID", name="max_pooling") self.hidden_outputs = tf.expand_dims( tf.layers.dense( tf.contrib.layers.flatten(pooled), 50, kernel_initializer=tf.contrib.layers.xavier_initializer()), -1) # cnn layer with tf.variable_scope("convolution_layer", reuse=True): conv_neg = tf.layers.conv2d( self.matrix_stacked_neg, filters=8, kernel_size=(3, 3), padding="VALID", kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.relu, reuse=True, name="conv") pooled_neg = tf.layers.max_pooling2d(conv_neg, (3, 3), strides=(3, 3), padding="VALID", name="max_pooling_neg") self.hidden_outputs_neg = tf.expand_dims( tf.layers.dense( tf.contrib.layers.flatten(pooled_neg), 50, kernel_initializer=tf.contrib.layers.xavier_initializer(), reuse=True), -1) # matching gru layer with tf.variable_scope("matching_gru_layer"): matching_gru_cell = tf.nn.rnn_cell.GRUCell( self.config.lstm_dim, kernel_initializer=tf.initializers.orthogonal(), name="gru_cell", reuse=tf.AUTO_REUSE) _, positive_state = tf.nn.dynamic_rnn(matching_gru_cell, self.hidden_outputs, dtype=tf.float32, scope="matching_gru") _, negative_state = tf.nn.dynamic_rnn(matching_gru_cell, self.hidden_outputs_neg, dtype=tf.float32, scope="matching_gru") self.positive_logits = tf.layers.dense( positive_state, 2, kernel_initializer=tf.contrib.layers.xavier_initializer(), name="predict") self.negative_logits = tf.layers.dense( negative_state, 2, kernel_initializer=tf.contrib.layers.xavier_initializer(), name="predict", reuse=True) # build loss with tf.variable_scope("loss"): self.logits = tf.concat( [self.positive_logits, self.negative_logits], 0) self.positive_probs = tf.nn.softmax(self.positive_logits) self.probs = tf.nn.softmax(self.logits) self.labels = tf.concat([ tf.ones_like(self.positive_logits), tf.zeros_like(self.negative_logits) ], 0) losses = tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.labels, logits=self.logits) self.loss = tf.reduce_mean(losses) self.train_step = self.optimizer.minimize(self.loss) with tf.variable_scope("score"): self.predictions = tf.argmax(self.probs, 1) correct_predictions = tf.equal(self.predictions, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # get hparms from tensor2tensor.models.transformer hparams = transformer.transformer_small() hparams.batch_size = self.config.batch_size hparams.learning_rate_decay_steps = 10000 hparams.learning_rate_minimum = 3e-5 # learning rate lr = learning_rate.learning_rate_schedule(hparams) self.learning_rate = lr # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout( embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[tf.shape(embeddings)[0], 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) self.queries_embedded = queries_embedded self.replies_embedded = replies_embedded # transformer layer with tf.variable_scope("transformer"): queries_expanded = tf.expand_dims(queries_embedded, axis=2, name="queries_expanded") replies_expanded = tf.expand_dims(replies_embedded, axis=2, name="replies_expanded") hparams = transformer.transformer_small() hparams.set_hparam("batch_size", self.config.batch_size) hparams.set_hparam("hidden_size", self.config.embed_dim) encoder = transformer.TransformerEncoder(hparams, mode=self.mode) self.queries_encoded = encoder({ "inputs": queries_expanded, "targets": queries_expanded })[0] self.replies_encoded = encoder({ "inputs": replies_expanded, "targets": replies_expanded })[0] self.queries_encoded = tf.squeeze( tf.reduce_sum(self.queries_encoded, axis=1, keep_dims=True)) self.replies_encoded = tf.squeeze( tf.reduce_sum(self.replies_encoded, axis=1, keep_dims=True)) with tf.variable_scope("sampling"): positive_mask = tf.eye(cur_batch_length) negative_mask = make_negative_mask( tf.zeros([cur_batch_length, cur_batch_length]), method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples) negative_queries_indices, negative_replies_indices = tf.split( tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1) self.distances = tf.matmul(self.queries_encoded, self.replies_encoded, transpose_b=True) self.distances_flattened = tf.reshape(self.distances, [-1]) self.positive_distances = tf.gather( self.distances_flattened, tf.where(tf.reshape(positive_mask, [-1]))) self.negative_distances = tf.gather( self.distances_flattened, tf.where(tf.reshape(negative_mask, [-1]))) self.negative_queries_indices = tf.squeeze( negative_queries_indices) self.negative_replies_indices = tf.squeeze( negative_replies_indices) self.positive_inputs = tf.concat([ self.queries_encoded, self.positive_distances, self.replies_encoded ], 1) self.negative_inputs = tf.reshape( tf.concat([ tf.nn.embedding_lookup(self.queries_encoded, self.negative_queries_indices), self.negative_distances, tf.nn.embedding_lookup(self.replies_encoded, self.negative_replies_indices) ], 1), [ tf.shape(negative_queries_indices)[0], self.config.embed_dim * 2 + 1 ]) with tf.variable_scope("prediction"): self.hidden_outputs = tf.layers.dense(tf.concat( [self.positive_inputs, self.negative_inputs], 0), 256, tf.nn.relu, name="hidden_layer") self.logits = tf.layers.dense(self.hidden_outputs, 2, tf.nn.relu, name="output_layer") labels = tf.concat([ tf.ones([tf.shape(self.positive_inputs)[0]], tf.float64), tf.zeros([tf.shape(self.negative_inputs)[0]], tf.float64) ], 0) self.labels = tf.one_hot(tf.to_int32(labels), 2) self.probs = tf.sigmoid(self.logits) self.predictions = tf.argmax(self.probs, 1) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.labels, logits=self.logits)) self.train_step = optimize.optimize(self.loss, lr, hparams, use_tpu=False) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] query_max_length = tf.shape(self.input_queries)[1] reply_max_length = tf.shape(self.input_replies)[1] # learning rate and optimizer learning_rate = tf.train.exponential_decay( self.config.learning_rate, self.global_step_tensor, decay_steps=20000, decay_rate=0.96) self.optimizer = tf.train.AdamOptimizer(learning_rate) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout(embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[90000, 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) self.queries_embedded = queries_embedded self.replies_embedded = replies_embedded # build LSTM layer with tf.variable_scope("lstm_layer") as vs: query_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True) query_lstm_cell = tf.contrib.rnn.DropoutWrapper( query_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob) reply_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True, reuse=True) reply_lstm_cell = tf.contrib.rnn.DropoutWrapper( reply_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob) queries_encoded, queries_state = tf.nn.dynamic_rnn( cell=query_lstm_cell, inputs=queries_embedded, sequence_length=tf.cast(self.query_lengths, tf.float32), dtype=tf.float32, ) replies_encoded, replies_state = tf.nn.dynamic_rnn( cell=reply_lstm_cell, inputs=replies_embedded, sequence_length=tf.cast(self.reply_lengths, tf.float32), dtype=tf.float32, ) self.queries_encoded = tf.expand_dims(queries_encoded, -1) self.replies_encoded = tf.expand_dims(replies_encoded, -1) # Create a convolution + maxpool layer for each filter size queries_pooled_outputs = list() replies_pooled_outputs = list() for i, filter_size in enumerate([1, 2, 3, 4, 5]): filter_shape = [filter_size, self.config.lstm_dim, 1, 128] # queries with tf.name_scope("conv-maxpool-query-%s" % filter_size): # Convolution Layer W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[128]), name="bias") conv = tf.nn.conv2d(self.queries_encoded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, self.config.max_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") queries_pooled_outputs.append(pooled) # replies with tf.name_scope("conv-maxpool-reply-%s" % filter_size): # Convolution Layer W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[128]), name="bias") conv = tf.nn.conv2d(self.replies_encoded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, self.config.max_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") replies_pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = 128 * 5 self.queries_conv_output = tf.reshape( tf.concat(queries_pooled_outputs, 3), [-1, num_filters_total]) self.replies_conv_output = tf.reshape( tf.concat(replies_pooled_outputs, 3), [-1, num_filters_total]) with tf.variable_scope("sampling"): positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1]) negative_mask = make_negative_mask( tf.zeros([cur_batch_length, cur_batch_length]), method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples) negative_queries_indices, negative_replies_indices = tf.split( tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1) self.negative_queries_indices = tf.squeeze( negative_queries_indices) self.negative_replies_indices = tf.squeeze( negative_replies_indices) self.distances = tf.matmul(queries_state.h, replies_state.h, transpose_b=True) self.distances_flattened = tf.reshape(self.distances, [-1]) self.positive_distances = tf.gather(self.distances_flattened, tf.where(positive_mask), 1) self.negative_distances = tf.gather( self.distances_flattened, tf.where(tf.reshape(negative_mask, [-1])), 1) self.positive_inputs = tf.concat([ self.queries_conv_output, self.positive_distances, self.replies_conv_output ], 1) self.negative_inputs = tf.reshape( tf.concat([ tf.nn.embedding_lookup(self.queries_conv_output, self.negative_queries_indices), self.negative_distances, tf.nn.embedding_lookup(self.replies_conv_output, self.negative_replies_indices) ], 1), [ tf.shape(negative_queries_indices)[0], num_filters_total * 2 + 1 ]) self.num_positives = tf.shape(self.positive_inputs)[0] self.num_negatives = tf.shape(self.negative_inputs)[0] # hidden layer with tf.name_scope("hidden"): W = tf.get_variable( "W_hidden", shape=[2 * num_filters_total + 1, 100], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[100]), name="bias") self.hidden_output = tf.nn.relu( tf.nn.xw_plus_b(tf.concat( [self.positive_inputs, self.negative_inputs], 0), W, b, name="hidden_output")) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.hidden_output, self.dense_dropout_keep_prob, name="hidden_output_drop") # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W_output", shape=[100, 1], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[1]), name="bias") self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logits") self.positive_logits, self.negative_logits = tf.split( self.logits, [self.num_positives, self.num_negatives]) self.probs = tf.sigmoid(self.logits) self.predictions = tf.to_int32(self.probs > 0.5, name="predictions") labels = tf.concat([ tf.ones([self.num_positives], tf.float64), tf.zeros([self.num_negatives], tf.float64) ], 0) self.labels = tf.to_int32(labels) with tf.variable_scope("loss"): self.positive_scores = tf.expand_dims(self.positive_logits, 1) self.negative_scores = self.negative_logits self.ranking_loss = tf.reduce_sum( tf.maximum( 0.0, self.config.hinge_loss - self.positive_scores + self.negative_scores)) l2_vars = [ v for v in tf.trainable_variables() if 'bias' not in v.name and 'embedding' not in v.name ] l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in l2_vars]) self.loss = self.ranking_loss + l2_loss self.train_step = self.optimizer.minimize( self.loss, global_step=self.global_step_tensor) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, self.labels) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # learning rate and optimizer learning_rate = tf.train.exponential_decay( self.config.learning_rate, self.global_step_tensor, decay_steps=100000, decay_rate=0.96) self.optimizer = tf.train.AdamOptimizer(learning_rate) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout(embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[90000, 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) self.queries_embedded = queries_embedded self.replies_embedded = replies_embedded # build LSTM layer with tf.variable_scope("lstm_layer") as vs: query_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True) query_lstm_cell = tf.contrib.rnn.DropoutWrapper( query_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob) reply_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.config.lstm_dim, forget_bias=2.0, use_peepholes=True, state_is_tuple=True, reuse=True) reply_lstm_cell = tf.contrib.rnn.DropoutWrapper( reply_lstm_cell, input_keep_prob=self.lstm_dropout_keep_prob) _, queries_encoded = tf.nn.dynamic_rnn( cell=query_lstm_cell, inputs=queries_embedded, sequence_length=tf.cast(self.query_lengths, tf.float32), dtype=tf.float32, ) _, replies_encoded = tf.nn.dynamic_rnn( cell=reply_lstm_cell, inputs=replies_embedded, sequence_length=tf.cast(self.reply_lengths, tf.float32), dtype=tf.float32, ) self.queries_encoded = tf.cast(queries_encoded.h, tf.float64) self.replies_encoded = tf.cast(replies_encoded.h, tf.float64) with tf.variable_scope("sampling"): negative_mask = make_negative_mask( tf.zeros([cur_batch_length, cur_batch_length]), method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples) negative_queries_indices, negative_replies_indices = tf.split( tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1) # self.distances = tf.matmul(self.queries_encoded, self.replies_encoded, transpose_b=True) # self.distances_flattened = tf.reshape(self.distances, [-1]) # self.positive_distances = tf.gather(self.distances_flattened, tf.where(tf.reshape(tf.eye(cur_batch_length), -1))) # self.negative_distances = tf.gather(self.distances_flattened, tf.where(tf.reshape(negative_mask, -1))) self.negative_queries_indices = tf.squeeze( negative_queries_indices) self.negative_replies_indices = tf.squeeze( negative_replies_indices) self.positive_inputs = tf.concat( [self.queries_encoded, self.replies_encoded], 1) self.negative_inputs = tf.reshape( tf.concat([ tf.nn.embedding_lookup(self.queries_encoded, self.negative_queries_indices), tf.nn.embedding_lookup(self.replies_encoded, self.negative_replies_indices) ], 1), [ tf.shape(negative_queries_indices)[0], self.config.lstm_dim * 2 ]) with tf.variable_scope("prediction"): self.hidden_outputs = tf.layers.dense(tf.concat( [self.positive_inputs, self.negative_inputs], 0), 256, tf.nn.relu, name="hidden_layer") self.logits = tf.layers.dense(self.hidden_outputs, 2, tf.nn.relu, name="output_layer") labels = tf.concat([ tf.ones([tf.shape(self.positive_inputs)[0]], tf.float64), tf.zeros([tf.shape(self.negative_inputs)[0]], tf.float64) ], 0) self.labels = tf.one_hot(tf.to_int32(labels), 2) self.probs = tf.sigmoid(self.logits) self.predictions = tf.argmax(self.probs, 1) self.positive_probs = tf.slice(self.probs, [0, 0], [cur_batch_length, -1]) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.labels, logits=self.logits)) self.train_step = self.optimizer.minimize( self.loss, global_step=self.global_step_tensor) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # get hparms from tensor2tensor.models.transformer hparams = transformer.transformer_small() hparams.batch_size = self.config.batch_size # learning rate lr = learning_rate.learning_rate_schedule(hparams) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout( embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[tf.shape(embeddings)[0], 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) self.queries_embedded = queries_embedded self.replies_embedded = replies_embedded # transformer layer with tf.variable_scope("transformer"): queries_expanded = tf.expand_dims(queries_embedded, axis=2, name="queries_expanded") replies_expanded = tf.expand_dims(replies_embedded, axis=2, name="replies_expanded") hparams = transformer.transformer_small() hparams.set_hparam("batch_size", self.config.batch_size) hparams.set_hparam("hidden_size", self.config.embed_dim) encoder = transformer.TransformerEncoder(hparams, mode=self.mode) self.queries_encoded = encoder({ "inputs": queries_expanded, "targets": queries_expanded })[0] self.replies_encoded = encoder({ "inputs": replies_expanded, "targets": replies_expanded })[0] self.queries_pooled = tf.nn.max_pool( self.queries_encoded, ksize=[1, self.config.max_length, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="queries_pooled") self.replies_pooled = tf.nn.max_pool( self.replies_encoded, ksize=[1, self.config.max_length, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="replies_pooled") self.queries_flattened = tf.reshape(self.queries_pooled, [cur_batch_length, -1]) self.replies_flattened = tf.reshape(self.replies_pooled, [cur_batch_length, -1]) # build dense layer with tf.variable_scope("dense_layer"): M = tf.get_variable( "M", shape=[self.config.embed_dim, self.config.embed_dim], initializer=tf.initializers.truncated_normal()) M = tf.nn.dropout(M, self.dense_dropout_keep_prob) self.queries_transformed = tf.matmul(self.queries_flattened, M) with tf.variable_scope("sampling"): self.distances = tf.matmul(self.queries_transformed, self.replies_flattened, transpose_b=True) positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1]) negative_mask = tf.reshape( make_negative_mask( self.distances, method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples), [-1]) with tf.variable_scope("prediction"): distances_flattened = tf.reshape(self.distances, [-1]) self.positive_logits = tf.gather(distances_flattened, tf.where(positive_mask), 1) self.negative_logits = tf.gather(distances_flattened, tf.where(negative_mask), 1) self.logits = tf.concat( [self.positive_logits, self.negative_logits], axis=0) self.labels = tf.concat([ tf.ones_like(self.positive_logits), tf.zeros_like(self.negative_logits) ], axis=0) self.positive_probs = tf.sigmoid(self.positive_logits) self.probs = tf.sigmoid(self.logits) self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.logits)) self.train_step = optimize.optimize(self.loss, lr, hparams, use_tpu=False) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.to_int32(self.labels)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")