def __init__(self, sequence_length, num_hidden_layers, vocab_size, feature_dim_size, batch_size, num_heads, ff_hidden_size, initialization, num_sampled, num_neighbors, use_pos): # Placeholders for input, output self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.int32, [batch_size*sequence_length*num_neighbors, 1], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Embedding layer with tf.name_scope("input_feature"): if initialization != []: self.input_feature = tf.get_variable(name="input_feature_1", initializer=initialization, trainable=False) else: self.input_feature = tf.get_variable(name="input_feature_2", shape=[vocab_size, feature_dim_size], initializer=tf.contrib.layers.xavier_initializer(seed=1234)) #Inputs for Transformer Encoder self.inputTransfG = tf.nn.embedding_lookup(self.input_feature, self.input_x) self.inputTransfG = tf.expand_dims(self.inputTransfG, axis=-1) self.inputTransfG = squash(self.inputTransfG) self.inputTransfG = tf.reshape(self.inputTransfG, [batch_size, sequence_length, 1, feature_dim_size]) self.hparams = transformer.transformer_base() self.hparams.hidden_size = feature_dim_size self.hparams.batch_size = batch_size * sequence_length self.hparams.max_length = sequence_length self.hparams.num_hidden_layers = num_hidden_layers self.hparams.num_heads = num_heads self.hparams.filter_size = ff_hidden_size self.hparams.use_target_space_embedding = False # No positional embedding if use_pos == 0: self.hparams.pos = None #Transformer Encoder self.encoder = transformer.TransformerEncoder(self.hparams, mode=tf.estimator.ModeKeys.TRAIN) self.outputEncoder = self.encoder({"inputs":self.inputTransfG, "targets": 0, "target_space_id": 0})[0] self.outputEncoder = tf.reshape(self.outputEncoder, [batch_size, sequence_length, feature_dim_size, 1]) self.outputEncoder = squash(self.outputEncoder) self.outputEncoder = tf.squeeze(self.outputEncoder) self.outputEncoderInd = tf.reshape(self.outputEncoder, [batch_size*sequence_length, feature_dim_size]) self.outputEncoder = tf.tile(self.outputEncoder, [1, 1, num_neighbors]) self.outputEncoder = tf.reshape(self.outputEncoder, [batch_size*sequence_length*num_neighbors, feature_dim_size]) self.outputEncoder = tf.nn.dropout(self.outputEncoder, self.dropout_keep_prob) with tf.name_scope("embedding"): self.embedding_matrix = tf.get_variable( "W", shape=[vocab_size, feature_dim_size], initializer=tf.contrib.layers.xavier_initializer(seed=1234)) self.softmax_biases = tf.Variable(tf.zeros([vocab_size])) self.total_loss = tf.reduce_mean( tf.nn.sampled_softmax_loss(weights=self.embedding_matrix, biases=self.softmax_biases, inputs=self.outputEncoder, labels=self.input_y, num_sampled=num_sampled, num_classes=vocab_size)) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=500) tf.logging.info('Seting up the main structure')
def transformer_encoding(node_seq_input, num_nodes, params, mode): """Construct a node-level encoder based on the transformer module. Args: node_seq_input : tf.Tensor. A tensor with 3 dimensions. num_nodes: tf.Tensor. Number of nodes per instance. params : dict. A parameter dictionary. mode : tf.estimator.ModeKeys object. Returns: node_seq_output: tf.Tensor. A tensor with 3 dimensions. """ node_weights = tf.sequence_mask(num_nodes) hparams = transformer.transformer_tiny() hparams.hidden_size = params["transformer_hidden_unit"] hparams.num_heads = params["transformer_head"] hparams.num_hidden_layers = params["transformer_hidden_layer"] if hparams.hidden_size % hparams.num_heads != 0: raise ValueError( "The hidden_size needs to be divisible by trans_head.") transformer_encoder = transformer.TransformerEncoder(hparams, mode=mode) # Input shape [batch_size, sequence_length, 1, hidden_dim]. node_seq_input = tf.layers.dense(node_seq_input, hparams.hidden_size) node_seq_input_reshape = tf.expand_dims(node_seq_input, 2) # Targets and target_space_id are required by decoder of transformer, # are both set as 0 for encoder. node_seq_output = transformer_encoder( { "inputs": node_seq_input_reshape, "targets": 0, "target_space_id": 0, }, nonpadding=node_weights) node_seq_output = tf.squeeze(node_seq_output[0], 2) # Construct a residue network by adding up the input and output node_seq_output = tf.add(node_seq_input, node_seq_output) return node_seq_output
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # get hparms from tensor2tensor.models.transformer hparams = transformer.transformer_small() hparams.batch_size = self.config.batch_size hparams.learning_rate_decay_steps = 10000 hparams.learning_rate_minimum = 3e-5 # learning rate lr = learning_rate.learning_rate_schedule(hparams) self.learning_rate = lr # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout( embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[tf.shape(embeddings)[0], 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) self.queries_embedded = queries_embedded self.replies_embedded = replies_embedded # transformer layer with tf.variable_scope("transformer"): queries_expanded = tf.expand_dims(queries_embedded, axis=2, name="queries_expanded") replies_expanded = tf.expand_dims(replies_embedded, axis=2, name="replies_expanded") hparams = transformer.transformer_small() hparams.set_hparam("batch_size", self.config.batch_size) hparams.set_hparam("hidden_size", self.config.embed_dim) encoder = transformer.TransformerEncoder(hparams, mode=self.mode) self.queries_encoded = encoder({ "inputs": queries_expanded, "targets": queries_expanded })[0] self.replies_encoded = encoder({ "inputs": replies_expanded, "targets": replies_expanded })[0] self.queries_encoded = tf.squeeze( tf.reduce_sum(self.queries_encoded, axis=1, keep_dims=True)) self.replies_encoded = tf.squeeze( tf.reduce_sum(self.replies_encoded, axis=1, keep_dims=True)) with tf.variable_scope("sampling"): positive_mask = tf.eye(cur_batch_length) negative_mask = make_negative_mask( tf.zeros([cur_batch_length, cur_batch_length]), method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples) negative_queries_indices, negative_replies_indices = tf.split( tf.where(tf.not_equal(negative_mask, 0)), [1, 1], 1) self.distances = tf.matmul(self.queries_encoded, self.replies_encoded, transpose_b=True) self.distances_flattened = tf.reshape(self.distances, [-1]) self.positive_distances = tf.gather( self.distances_flattened, tf.where(tf.reshape(positive_mask, [-1]))) self.negative_distances = tf.gather( self.distances_flattened, tf.where(tf.reshape(negative_mask, [-1]))) self.negative_queries_indices = tf.squeeze( negative_queries_indices) self.negative_replies_indices = tf.squeeze( negative_replies_indices) self.positive_inputs = tf.concat([ self.queries_encoded, self.positive_distances, self.replies_encoded ], 1) self.negative_inputs = tf.reshape( tf.concat([ tf.nn.embedding_lookup(self.queries_encoded, self.negative_queries_indices), self.negative_distances, tf.nn.embedding_lookup(self.replies_encoded, self.negative_replies_indices) ], 1), [ tf.shape(negative_queries_indices)[0], self.config.embed_dim * 2 + 1 ]) with tf.variable_scope("prediction"): self.hidden_outputs = tf.layers.dense(tf.concat( [self.positive_inputs, self.negative_inputs], 0), 256, tf.nn.relu, name="hidden_layer") self.logits = tf.layers.dense(self.hidden_outputs, 2, tf.nn.relu, name="output_layer") labels = tf.concat([ tf.ones([tf.shape(self.positive_inputs)[0]], tf.float64), tf.zeros([tf.shape(self.negative_inputs)[0]], tf.float64) ], 0) self.labels = tf.one_hot(tf.to_int32(labels), 2) self.probs = tf.sigmoid(self.logits) self.predictions = tf.argmax(self.probs, 1) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.labels, logits=self.logits)) self.train_step = optimize.optimize(self.loss, lr, hparams, use_tpu=False) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def build_model(self): # build index table index_table = tf.contrib.lookup.index_table_from_file( vocabulary_file=self.config.vocab_list, num_oov_buckets=0, default_value=0) # get data iterator self.data_iterator = self.data.get_data_iterator(index_table, mode=self.mode) # get inputs with tf.variable_scope("inputs"): # get next batch if there is no feeded data next_batch = self.data_iterator.get_next() self.input_queries = tf.placeholder_with_default( next_batch["input_queries"], [None, self.config.max_length], name="input_queries") self.input_replies = tf.placeholder_with_default( next_batch["input_replies"], [None, self.config.max_length], name="input_replies") self.query_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["query_lengths"]), [None], name="query_lengths") self.reply_lengths = tf.placeholder_with_default( tf.squeeze(next_batch["reply_lengths"]), [None], name="reply_lengths") # get hyperparams self.embed_dropout_keep_prob = tf.placeholder( tf.float64, name="embed_dropout_keep_prob") self.lstm_dropout_keep_prob = tf.placeholder( tf.float32, name="lstm_dropout_keep_prob") self.dense_dropout_keep_prob = tf.placeholder( tf.float32, name="dense_dropout_keep_prob") self.num_negative_samples = tf.placeholder( tf.int32, name="num_negative_samples") with tf.variable_scope("properties"): # length properties cur_batch_length = tf.shape(self.input_queries)[0] # get hparms from tensor2tensor.models.transformer hparams = transformer.transformer_small() hparams.batch_size = self.config.batch_size # learning rate lr = learning_rate.learning_rate_schedule(hparams) # embedding layer with tf.variable_scope("embedding"): embeddings = tf.Variable(get_embeddings( self.config.vocab_list, self.config.pretrained_embed_dir, self.config.vocab_size, self.config.embed_dim), trainable=True, name="embeddings") embeddings = tf.nn.dropout( embeddings, keep_prob=self.embed_dropout_keep_prob, noise_shape=[tf.shape(embeddings)[0], 1]) queries_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_queries, name="queries_embedded")) replies_embedded = tf.to_float( tf.nn.embedding_lookup(embeddings, self.input_replies, name="replies_embedded")) self.queries_embedded = queries_embedded self.replies_embedded = replies_embedded # transformer layer with tf.variable_scope("transformer"): queries_expanded = tf.expand_dims(queries_embedded, axis=2, name="queries_expanded") replies_expanded = tf.expand_dims(replies_embedded, axis=2, name="replies_expanded") hparams = transformer.transformer_small() hparams.set_hparam("batch_size", self.config.batch_size) hparams.set_hparam("hidden_size", self.config.embed_dim) encoder = transformer.TransformerEncoder(hparams, mode=self.mode) self.queries_encoded = encoder({ "inputs": queries_expanded, "targets": queries_expanded })[0] self.replies_encoded = encoder({ "inputs": replies_expanded, "targets": replies_expanded })[0] self.queries_pooled = tf.nn.max_pool( self.queries_encoded, ksize=[1, self.config.max_length, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="queries_pooled") self.replies_pooled = tf.nn.max_pool( self.replies_encoded, ksize=[1, self.config.max_length, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="replies_pooled") self.queries_flattened = tf.reshape(self.queries_pooled, [cur_batch_length, -1]) self.replies_flattened = tf.reshape(self.replies_pooled, [cur_batch_length, -1]) # build dense layer with tf.variable_scope("dense_layer"): M = tf.get_variable( "M", shape=[self.config.embed_dim, self.config.embed_dim], initializer=tf.initializers.truncated_normal()) M = tf.nn.dropout(M, self.dense_dropout_keep_prob) self.queries_transformed = tf.matmul(self.queries_flattened, M) with tf.variable_scope("sampling"): self.distances = tf.matmul(self.queries_transformed, self.replies_flattened, transpose_b=True) positive_mask = tf.reshape(tf.eye(cur_batch_length), [-1]) negative_mask = tf.reshape( make_negative_mask( self.distances, method=self.config.negative_sampling, num_negative_samples=self.num_negative_samples), [-1]) with tf.variable_scope("prediction"): distances_flattened = tf.reshape(self.distances, [-1]) self.positive_logits = tf.gather(distances_flattened, tf.where(positive_mask), 1) self.negative_logits = tf.gather(distances_flattened, tf.where(negative_mask), 1) self.logits = tf.concat( [self.positive_logits, self.negative_logits], axis=0) self.labels = tf.concat([ tf.ones_like(self.positive_logits), tf.zeros_like(self.negative_logits) ], axis=0) self.positive_probs = tf.sigmoid(self.positive_logits) self.probs = tf.sigmoid(self.logits) self.predictions = tf.cast(self.probs > 0.5, dtype=tf.int32) with tf.variable_scope("loss"): self.loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels, logits=self.logits)) self.train_step = optimize.optimize(self.loss, lr, hparams, use_tpu=False) with tf.variable_scope("score"): correct_predictions = tf.equal(self.predictions, tf.to_int32(self.labels)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")