def z1_pre_encoder(x, z2, rhus=[256, 256]): """ Pre-stochastic layer encoder for z1 (latent segment variable) Args: x(tf.Tensor): tensor of shape (bs, T, F) z2(tf.Tensor): tensor of shape (bs, D1) rhus(list): list of numbers of LSTM layer hidden units Return: out(tf.Tensor): concatenation of hidden states of all LSTM layers """ bs, T = tf.shape(x)[0], tf.shape(x)[1] z2 = tf.tile(tf.expand_dims(z2, 1), (1, T, 1)) x_z2 = tf.concat([x, z2], axis=-1) cell = MultiRNNCell([BasicLSTMCell(rhu) for rhu in rhus]) init_state = cell.zero_state(bs, x.dtype) name = "z1_enc_lstm_%s" % ("_".join(map(str, rhus)), ) _, final_state = dynamic_rnn(cell, x_z2, dtype=x.dtype, initial_state=init_state, time_major=False, scope=name) out = [l_final_state.h for l_final_state in final_state] out = tf.concat(out, axis=-1) return out
def decoder(z1, z2, x, rhus=[256, 256], x_mu_nl=None, x_logvar_nl=None): """ decoder Args: z1(tf.Tensor) z2(tf.Tensor) x(tf.Tensor): tensor of shape (bs, T, F). only shape is used rhus(list) """ bs = tf.shape(x)[0] z1_z2 = tf.concat([z1, z2], axis=-1) cell = MultiRNNCell([BasicLSTMCell(rhu) for rhu in rhus]) state_t = cell.zero_state(bs, x.dtype) name = "dec_lstm_%s_step" % ("_".join(map(str, rhus)), ) def cell_step(inp, prev_state): return cell(inp, prev_state, scope=name) gdim = x.get_shape().as_list()[2] gname = "dec_gauss_step" def glayer_step(inp): return gauss_layer(inp, gdim, x_mu_nl, x_logvar_nl, gname) out, x_mu, x_logvar, x_sample = [], [], [], [] for t in xrange(x.get_shape().as_list()[1]): if t > 0: tf.get_variable_scope().reuse_variables() out_t, state_t, x_mu_t, x_logvar_t, x_sample_t = decoder_step( z1_z2, state_t, cell_step, glayer_step) out.append(out_t) x_mu.append(x_mu_t) x_logvar.append(x_logvar_t) x_sample.append(x_sample_t) out = tf.stack(out, axis=1, name="dec_pre_out") x_mu = tf.stack(x_mu, axis=1, name="dec_x_mu") x_logvar = tf.stack(x_logvar, axis=1, name="dec_x_logvar") x_sample = tf.stack(x_sample, axis=1, name="dec_x_sample") px_z = [x_mu, x_logvar] return out, px_z, x_sample
def z2_pre_encoder(x, rhus=[256, 256]): """ Pre-stochastic layer encoder for z2 (latent sequence variable) Args: x(tf.Tensor): tensor of shape (bs, T, F) rhus(list): list of numbers of LSTM layer hidden units Return: out(tf.Tensor): concatenation of hidden states of all LSTM layers """ bs = tf.shape(x)[0] cell = MultiRNNCell([BasicLSTMCell(rhu) for rhu in rhus]) init_state = cell.zero_state(bs, x.dtype) name = "z2_enc_lstm_%s" % ("_".join(map(str, rhus)),) _, final_state = dynamic_rnn(cell, x, dtype=x.dtype, initial_state=init_state, time_major=False, scope=name) out = [l_final_state.h for l_final_state in final_state] out = tf.concat(out, axis=-1) return out
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, is_train, vocab=None, embed=None, learning_rate=0.1, learning_rate_decay_factor=0.95, max_gradient_norm=5.0, num_samples=512, max_length=30, use_lstm=True): self.posts_1 = tf.placeholder(tf.string, shape=(None, None)) self.posts_2 = tf.placeholder(tf.string, shape=(None, None)) self.posts_3 = tf.placeholder(tf.string, shape=(None, None)) self.posts_4 = tf.placeholder(tf.string, shape=(None, None)) self.entity_1 = tf.placeholder(tf.string, shape=(None, None, None, 3)) self.entity_2 = tf.placeholder(tf.string, shape=(None, None, None, 3)) self.entity_3 = tf.placeholder(tf.string, shape=(None, None, None, 3)) self.entity_4 = tf.placeholder(tf.string, shape=(None, None, None, 3)) self.entity_mask_1 = tf.placeholder(tf.float32, shape=(None, None, None)) self.entity_mask_2 = tf.placeholder(tf.float32, shape=(None, None, None)) self.entity_mask_3 = tf.placeholder(tf.float32, shape=(None, None, None)) self.entity_mask_4 = tf.placeholder(tf.float32, shape=(None, None, None)) self.posts_length_1 = tf.placeholder(tf.int32, shape=(None)) self.posts_length_2 = tf.placeholder(tf.int32, shape=(None)) self.posts_length_3 = tf.placeholder(tf.int32, shape=(None)) self.posts_length_4 = tf.placeholder(tf.int32, shape=(None)) self.responses = tf.placeholder(tf.string, shape=(None, None)) self.responses_length = tf.placeholder(tf.int32, shape=(None)) self.epoch = tf.Variable(0, trainable=False, name='epoch') self.epoch_add_op = self.epoch.assign(self.epoch + 1) if is_train: self.symbols = tf.Variable(vocab, trainable=False, name="symbols") else: self.symbols = tf.Variable(np.array(['.'] * num_symbols), name="symbols") self.symbol2index = HashTable(KeyValueTensorInitializer( self.symbols, tf.Variable( np.array([i for i in range(num_symbols)], dtype=np.int32), False)), default_value=UNK_ID, name="symbol2index") self.posts_input_1 = self.symbol2index.lookup(self.posts_1) self.posts_2_target = self.posts_2_embed = self.symbol2index.lookup( self.posts_2) self.posts_3_target = self.posts_3_embed = self.symbol2index.lookup( self.posts_3) self.posts_4_target = self.posts_4_embed = self.symbol2index.lookup( self.posts_4) self.responses_target = self.symbol2index.lookup(self.responses) batch_size, decoder_len = tf.shape(self.posts_1)[0], tf.shape( self.responses)[1] self.posts_input_2 = tf.concat([ tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID, tf.split(self.posts_2_embed, [tf.shape(self.posts_2)[1] - 1, 1], 1)[0] ], 1) self.posts_input_3 = tf.concat([ tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID, tf.split(self.posts_3_embed, [tf.shape(self.posts_3)[1] - 1, 1], 1)[0] ], 1) self.posts_input_4 = tf.concat([ tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID, tf.split(self.posts_4_embed, [tf.shape(self.posts_4)[1] - 1, 1], 1)[0] ], 1) self.responses_target = self.symbol2index.lookup(self.responses) batch_size, decoder_len = tf.shape(self.posts_1)[0], tf.shape( self.responses)[1] self.responses_input = tf.concat([ tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID, tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0] ], 1) self.encoder_2_mask = tf.reshape( tf.cumsum(tf.one_hot(self.posts_length_2 - 1, tf.shape(self.posts_2)[1]), reverse=True, axis=1), [-1, tf.shape(self.posts_2)[1]]) self.encoder_3_mask = tf.reshape( tf.cumsum(tf.one_hot(self.posts_length_3 - 1, tf.shape(self.posts_3)[1]), reverse=True, axis=1), [-1, tf.shape(self.posts_3)[1]]) self.encoder_4_mask = tf.reshape( tf.cumsum(tf.one_hot(self.posts_length_4 - 1, tf.shape(self.posts_4)[1]), reverse=True, axis=1), [-1, tf.shape(self.posts_4)[1]]) self.decoder_mask = tf.reshape( tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len), reverse=True, axis=1), [-1, decoder_len]) if embed is None: self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.encoder_input_1 = tf.nn.embedding_lookup(self.embed, self.posts_input_1) self.encoder_input_2 = tf.nn.embedding_lookup(self.embed, self.posts_input_2) self.encoder_input_3 = tf.nn.embedding_lookup(self.embed, self.posts_input_3) self.encoder_input_4 = tf.nn.embedding_lookup(self.embed, self.posts_input_4) self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_input) entity_embedding_1 = tf.reshape( tf.nn.embedding_lookup(self.embed, self.symbol2index.lookup(self.entity_1)), [ batch_size, tf.shape(self.entity_1)[1], tf.shape(self.entity_1)[2], 3 * num_embed_units ]) entity_embedding_2 = tf.reshape( tf.nn.embedding_lookup(self.embed, self.symbol2index.lookup(self.entity_2)), [ batch_size, tf.shape(self.entity_2)[1], tf.shape(self.entity_2)[2], 3 * num_embed_units ]) entity_embedding_3 = tf.reshape( tf.nn.embedding_lookup(self.embed, self.symbol2index.lookup(self.entity_3)), [ batch_size, tf.shape(self.entity_3)[1], tf.shape(self.entity_3)[2], 3 * num_embed_units ]) entity_embedding_4 = tf.reshape( tf.nn.embedding_lookup(self.embed, self.symbol2index.lookup(self.entity_4)), [ batch_size, tf.shape(self.entity_4)[1], tf.shape(self.entity_4)[2], 3 * num_embed_units ]) head_1, relation_1, tail_1 = tf.split(entity_embedding_1, [num_embed_units] * 3, axis=3) head_2, relation_2, tail_2 = tf.split(entity_embedding_2, [num_embed_units] * 3, axis=3) head_3, relation_3, tail_3 = tf.split(entity_embedding_3, [num_embed_units] * 3, axis=3) head_4, relation_4, tail_4 = tf.split(entity_embedding_4, [num_embed_units] * 3, axis=3) with tf.variable_scope('graph_attention'): #[batch_size, max_reponse_length, max_triple_num, 2*embed_units] head_tail_1 = tf.concat([head_1, tail_1], axis=3) #[batch_size, max_reponse_length, max_triple_num, embed_units] head_tail_transformed_1 = tf.layers.dense( head_tail_1, num_embed_units, activation=tf.tanh, name='head_tail_transform') #[batch_size, max_reponse_length, max_triple_num, embed_units] relation_transformed_1 = tf.layers.dense(relation_1, num_embed_units, name='relation_transform') #[batch_size, max_reponse_length, max_triple_num] e_weight_1 = tf.reduce_sum(relation_transformed_1 * head_tail_transformed_1, axis=3) #[batch_size, max_reponse_length, max_triple_num] alpha_weight_1 = tf.nn.softmax(e_weight_1) #[batch_size, max_reponse_length, embed_units] graph_embed_1 = tf.reduce_sum( tf.expand_dims(alpha_weight_1, 3) * (tf.expand_dims(self.entity_mask_1, 3) * head_tail_1), axis=2) with tf.variable_scope('graph_attention', reuse=True): head_tail_2 = tf.concat([head_2, tail_2], axis=3) head_tail_transformed_2 = tf.layers.dense( head_tail_2, num_embed_units, activation=tf.tanh, name='head_tail_transform') relation_transformed_2 = tf.layers.dense(relation_2, num_embed_units, name='relation_transform') e_weight_2 = tf.reduce_sum(relation_transformed_2 * head_tail_transformed_2, axis=3) alpha_weight_2 = tf.nn.softmax(e_weight_2) graph_embed_2 = tf.reduce_sum( tf.expand_dims(alpha_weight_2, 3) * (tf.expand_dims(self.entity_mask_2, 3) * head_tail_2), axis=2) with tf.variable_scope('graph_attention', reuse=True): head_tail_3 = tf.concat([head_3, tail_3], axis=3) head_tail_transformed_3 = tf.layers.dense( head_tail_3, num_embed_units, activation=tf.tanh, name='head_tail_transform') relation_transformed_3 = tf.layers.dense(relation_3, num_embed_units, name='relation_transform') e_weight_3 = tf.reduce_sum(relation_transformed_3 * head_tail_transformed_3, axis=3) alpha_weight_3 = tf.nn.softmax(e_weight_3) graph_embed_3 = tf.reduce_sum( tf.expand_dims(alpha_weight_3, 3) * (tf.expand_dims(self.entity_mask_3, 3) * head_tail_3), axis=2) with tf.variable_scope('graph_attention', reuse=True): head_tail_4 = tf.concat([head_4, tail_4], axis=3) head_tail_transformed_4 = tf.layers.dense( head_tail_4, num_embed_units, activation=tf.tanh, name='head_tail_transform') relation_transformed_4 = tf.layers.dense(relation_4, num_embed_units, name='relation_transform') e_weight_4 = tf.reduce_sum(relation_transformed_4 * head_tail_transformed_4, axis=3) alpha_weight_4 = tf.nn.softmax(e_weight_4) graph_embed_4 = tf.reduce_sum( tf.expand_dims(alpha_weight_4, 3) * (tf.expand_dims(self.entity_mask_4, 3) * head_tail_4), axis=2) if use_lstm: cell = MultiRNNCell([LSTMCell(num_units)] * num_layers) else: cell = MultiRNNCell([GRUCell(num_units)] * num_layers) output_fn, sampled_sequence_loss = output_projection_layer( num_units, num_symbols, num_samples) encoder_output_1, encoder_state_1 = dynamic_rnn(cell, self.encoder_input_1, self.posts_length_1, dtype=tf.float32, scope="encoder") attention_keys_1, attention_values_1, attention_score_fn_1, attention_construct_fn_1 \ = attention_decoder_fn.prepare_attention(graph_embed_1, encoder_output_1, 'luong', num_units) decoder_fn_train_1 = attention_decoder_fn.attention_decoder_fn_train( encoder_state_1, attention_keys_1, attention_values_1, attention_score_fn_1, attention_construct_fn_1, max_length=tf.reduce_max(self.posts_length_2)) encoder_output_2, encoder_state_2, alignments_ta_2 = dynamic_rnn_decoder( cell, decoder_fn_train_1, self.encoder_input_2, self.posts_length_2, scope="decoder") self.alignments_2 = tf.transpose(alignments_ta_2.stack(), perm=[1, 0, 2]) self.decoder_loss_2 = sampled_sequence_loss(encoder_output_2, self.posts_2_target, self.encoder_2_mask) with variable_scope.variable_scope('', reuse=True): attention_keys_2, attention_values_2, attention_score_fn_2, attention_construct_fn_2 \ = attention_decoder_fn.prepare_attention(graph_embed_2, encoder_output_2, 'luong', num_units) decoder_fn_train_2 = attention_decoder_fn.attention_decoder_fn_train( encoder_state_2, attention_keys_2, attention_values_2, attention_score_fn_2, attention_construct_fn_2, max_length=tf.reduce_max(self.posts_length_3)) encoder_output_3, encoder_state_3, alignments_ta_3 = dynamic_rnn_decoder( cell, decoder_fn_train_2, self.encoder_input_3, self.posts_length_3, scope="decoder") self.alignments_3 = tf.transpose(alignments_ta_3.stack(), perm=[1, 0, 2]) self.decoder_loss_3 = sampled_sequence_loss( encoder_output_3, self.posts_3_target, self.encoder_3_mask) attention_keys_3, attention_values_3, attention_score_fn_3, attention_construct_fn_3 \ = attention_decoder_fn.prepare_attention(graph_embed_3, encoder_output_3, 'luong', num_units) decoder_fn_train_3 = attention_decoder_fn.attention_decoder_fn_train( encoder_state_3, attention_keys_3, attention_values_3, attention_score_fn_3, attention_construct_fn_3, max_length=tf.reduce_max(self.posts_length_4)) encoder_output_4, encoder_state_4, alignments_ta_4 = dynamic_rnn_decoder( cell, decoder_fn_train_3, self.encoder_input_4, self.posts_length_4, scope="decoder") self.alignments_4 = tf.transpose(alignments_ta_4.stack(), perm=[1, 0, 2]) self.decoder_loss_4 = sampled_sequence_loss( encoder_output_4, self.posts_4_target, self.encoder_4_mask) attention_keys, attention_values, attention_score_fn, attention_construct_fn \ = attention_decoder_fn.prepare_attention(graph_embed_4, encoder_output_4, 'luong', num_units) if is_train: with variable_scope.variable_scope('', reuse=True): decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train( encoder_state_4, attention_keys, attention_values, attention_score_fn, attention_construct_fn, max_length=tf.reduce_max(self.responses_length)) self.decoder_output, _, alignments_ta = dynamic_rnn_decoder( cell, decoder_fn_train, self.decoder_input, self.responses_length, scope="decoder") self.alignments = tf.transpose(alignments_ta.stack(), perm=[1, 0, 2]) self.decoder_loss = sampled_sequence_loss( self.decoder_output, self.responses_target, self.decoder_mask) self.params = tf.trainable_variables() self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) #opt = tf.train.GradientDescentOptimizer(self.learning_rate) opt = tf.train.MomentumOptimizer(self.learning_rate, 0.9) gradients = tf.gradients( self.decoder_loss + self.decoder_loss_2 + self.decoder_loss_3 + self.decoder_loss_4, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) else: with variable_scope.variable_scope('', reuse=True): decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference( output_fn, encoder_state_4, attention_keys, attention_values, attention_score_fn, attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols) self.decoder_distribution, _, alignments_ta = dynamic_rnn_decoder( cell, decoder_fn_inference, scope="decoder") output_len = tf.shape(self.decoder_distribution)[1] self.alignments = tf.transpose( alignments_ta.gather(tf.range(output_len)), [1, 0, 2]) self.generation_index = tf.argmax( tf.split(self.decoder_distribution, [2, num_symbols - 2], 2)[1], 2) + 2 # for removing UNK self.generation = tf.nn.embedding_lookup(self.symbols, self.generation_index, name="generation") self.params = tf.trainable_variables() self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2, max_to_keep=10, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def __init__(self, model_parameters, training_parameters, directories, **kwargs): """ Initialization of the RNN Model as TensorFlow computational graph """ self.model_parameters = model_parameters self.training_parameters = training_parameters self.directories = directories # Define model hyperparameters Tensors with tf.name_scope("Parameters"): self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") self.momentum = tf.placeholder(tf.float32, name="momentum") self.input_keep_probability = tf.placeholder( tf.float32, name="input_keep_probability") self.output_keep_probability = tf.placeholder( tf.float32, name="output_keep_probability") # Define input, output and initialization Tensors with tf.name_scope("Input"): self.inputs = tf.placeholder("float", [ None, self.model_parameters.sequence_length, self.model_parameters.input_dimension ], name='input_placeholder') self.targets = tf.placeholder("float", [ None, self.model_parameters.sequence_length, self.model_parameters.n_classes ], name='labels_placeholder') self.init = tf.placeholder( tf.float32, shape=[None, self.model_parameters.state_size], name="init") # Define the TensorFlow RNN computational graph with tf.name_scope("RNN"): cells = [] # Define the layers for _ in range(self.model_parameters.n_layers): if self.model_parameters.model == 'rnn': cell = BasicRNNCell(self.model_parameters.state_size) elif self.model_parameters.model == 'gru': cell = GRUCell(self.model_parameters.state_size) elif self.model_parameters.model == 'lstm': cell = BasicLSTMCell(self.model_parameters.state_size, state_is_tuple=True) elif self.model_parameters.model == 'nas': cell = NASCell(self.model_parameters.state_size) else: raise Exception("model type not supported: {}".format( self.model_parameters.model)) if (self.model_parameters.output_keep_probability < 1.0 or self.model_parameters.input_keep_probability < 1.0): if self.model_parameters.output_keep_probability < 1.0: cell = DropoutWrapper( cell, output_keep_prob=self.output_keep_probability) if self.model_parameters.input_keep_probability < 1.0: cell = DropoutWrapper( cell, input_keep_prob=self.input_keep_probability) cells.append(cell) cell = MultiRNNCell(cells) # Simulate time steps and get RNN cell output self.outputs, self.next_state = tf.nn.dynamic_rnn(cell, self.inputs, dtype=tf.float32) # Define cost Tensors with tf.name_scope("Cost"): # Flatten to apply same weights to all time steps self.flattened_outputs = tf.reshape( self.outputs, [-1, self.model_parameters.state_size], name="flattened_outputs") self.softmax_w = tf.Variable(tf.truncated_normal([ self.model_parameters.state_size, self.model_parameters.n_classes ], stddev=0.01), name="softmax_weights") self.softmax_b = tf.Variable(tf.constant( 0.1, shape=[self.model_parameters.n_classes]), name="softmax_biases") # Softmax activation layer, using RNN inner loop last output # logits and labels must have the same shape [batch_size, num_classes] self.logits = tf.matmul(self.flattened_outputs, self.softmax_w) + self.softmax_b self.unshaped_predictions = tf.nn.softmax( self.logits, name="unshaped_predictions") tf.summary.histogram('logits', self.logits) # Return to the initial predictions shape self.predictions = tf.reshape(self.unshaped_predictions, [ -1, self.model_parameters.sequence_length, self.model_parameters.n_classes ], name="predictions") self.cross_entropy = tf.reduce_mean(-tf.reduce_sum( self.targets * tf.log(self.predictions), reduction_indices=[2])) # Get the most likely label for each input self.label_prediction = tf.argmax(self.predictions, 2, name="label_predictions") # Compare predictions to labels self.correct_prediction = tf.equal(tf.argmax(self.predictions, 2), tf.argmax(self.targets, 2), name="correct_predictions") self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32), name="accuracy") # Define Training Tensors with tf.name_scope("Train"): #self.validation_perplexity = tf.Variable(dtype=tf.float32, initial_value=float("inf"), #trainable=False, #name="validation_perplexity") #self.validation_accuracy = tf.Variable(dtype=tf.float32, initial_value=float("inf"), #trainable=False, #name="validation_accuracy") #tf.scalar_summary(self.validation_perplexity.op.name, self.validation_perplexity) #tf.scalar_summary(self.validation_accuracy.op.name, self.validation_accuracy) #self.training_epoch_perplexity = tf.Variable(dtype=tf.float32, initial_value=float("inf"), #trainable=False, #name="training_epoch_perplexity") #self.training_epoch_accuracy = tf.Variable(dtype=tf.float32, initial_value=float("inf"), #trainable=False, #name="training_epoch_accuracy") #tf.scalar_summary(self.training_epoch_perplexity.op.name, self.training_epoch_perplexity) #tf.scalar_summary(self.training_epoch_accuracy.op.name, self.training_epoch_accuracy) #self.iteration = tf.Variable(0, dtype=tf.int64, name="iteration", trainable=False) # Momentum optimisation self.optimizer = tf.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=self.momentum, name="optimizer") self.train_step = self.optimizer.minimize(self.cross_entropy, name="train_step") # Initializing the variables self.initializer = tf.global_variables_initializer()
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, beam_size, embed, learning_rate=0.5, remove_unk=False, learning_rate_decay_factor=0.95, max_gradient_norm=5.0, num_samples=512, max_length=8, use_lstm=False): self.posts = tf.placeholder(tf.string, (None, None), 'enc_inps') # batch*len self.posts_length = tf.placeholder(tf.int32, (None), 'enc_lens') # batch self.responses = tf.placeholder(tf.string, (None, None), 'dec_inps') # batch*len self.responses_length = tf.placeholder(tf.int32, (None), 'dec_lens') # batch # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.symbol2index = MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) self.index2symbol = MutableHashTable(key_dtype=tf.int64, value_dtype=tf.string, default_value='_UNK', shared_name="out_table", name="out_table", checkpoint=True) # build the vocab table (string to index) self.posts_input = self.symbol2index.lookup(self.posts) # batch*len self.responses_target = self.symbol2index.lookup( self.responses) #batch*len batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape( self.responses)[1] self.responses_input = tf.concat([ tf.ones([batch_size, 1], dtype=tf.int64) * GO_ID, tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0] ], 1) # batch*len self.decoder_mask = tf.reshape( tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len), reverse=True, axis=1), [-1, decoder_len]) # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.encoder_input = tf.nn.embedding_lookup( self.embed, self.posts_input) #batch*len*unit self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_input) if use_lstm: cell = MultiRNNCell([LSTMCell(num_units)] * num_layers) else: cell = MultiRNNCell([GRUCell(num_units)] * num_layers) # rnn encoder encoder_output, encoder_state = dynamic_rnn(cell, self.encoder_input, self.posts_length, dtype=tf.float32, scope="encoder") # get output projection function output_fn, sampled_sequence_loss = output_projection_layer( num_units, num_symbols, num_samples) # get attention function attention_keys, attention_values, attention_score_fn, attention_construct_fn \ = attention_decoder_fn.prepare_attention(encoder_output, 'luong', num_units) with tf.variable_scope('decoder'): decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train( encoder_state, attention_keys, attention_values, attention_score_fn, attention_construct_fn) self.decoder_output, _, _ = dynamic_rnn_decoder( cell, decoder_fn_train, self.decoder_input, self.responses_length, scope="decoder_rnn") self.decoder_loss = sampled_sequence_loss(self.decoder_output, self.responses_target, self.decoder_mask) with tf.variable_scope('decoder', reuse=True): decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference( output_fn, encoder_state, attention_keys, attention_values, attention_score_fn, attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols) self.decoder_distribution, _, _ = dynamic_rnn_decoder( cell, decoder_fn_inference, scope="decoder_rnn") self.generation_index = tf.argmax( tf.split(self.decoder_distribution, [2, num_symbols - 2], 2)[1], 2) + 2 # for removing UNK self.generation = self.index2symbol.lookup(self.generation_index, name='generation') with tf.variable_scope('decoder', reuse=True): decoder_fn_beam_inference = attention_decoder_fn_beam_inference( output_fn, encoder_state, attention_keys, attention_values, attention_score_fn, attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols, beam_size, remove_unk) _, _, self.context_state = dynamic_rnn_decoder( cell, decoder_fn_beam_inference, scope="decoder_rnn") (log_beam_probs, beam_parents, beam_symbols, result_probs, result_parents, result_symbols) = self.context_state self.beam_parents = tf.transpose(tf.reshape( beam_parents.stack(), [max_length + 1, -1, beam_size]), [1, 0, 2], name='beam_parents') self.beam_symbols = tf.transpose( tf.reshape(beam_symbols.stack(), [max_length + 1, -1, beam_size]), [1, 0, 2]) self.beam_symbols = self.index2symbol.lookup(tf.cast( self.beam_symbols, tf.int64), name="beam_symbols") self.result_probs = tf.transpose(tf.reshape( result_probs.stack(), [max_length + 1, -1, beam_size * 2]), [1, 0, 2], name='result_probs') self.result_symbols = tf.transpose( tf.reshape(result_symbols.stack(), [max_length + 1, -1, beam_size * 2]), [1, 0, 2]) self.result_parents = tf.transpose(tf.reshape( result_parents.stack(), [max_length + 1, -1, beam_size * 2]), [1, 0, 2], name='result_parents') self.result_symbols = self.index2symbol.lookup( tf.cast(self.result_symbols, tf.int64), name='result_symbols') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.decoder_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0) # Exporter for serving self.model_exporter = exporter.Exporter(self.saver) inputs = {"enc_inps:0": self.posts, "enc_lens:0": self.posts_length} outputs = { "beam_symbols": self.beam_symbols, "beam_parents": self.beam_parents, "result_probs": self.result_probs, "result_symbols": self.result_symbols, "result_parents": self.result_parents } self.model_exporter.init(tf.get_default_graph().as_graph_def(), named_graph_signatures={ "inputs": exporter.generic_signature(inputs), "outputs": exporter.generic_signature(outputs) })
def __init__(self, num_symbols, num_qwords, #modify num_embed_units, num_units, num_layers, is_train, vocab=None, embed=None, question_data=True, learning_rate=0.5, learning_rate_decay_factor=0.95, max_gradient_norm=5.0, num_samples=512, max_length=30, use_lstm=False): self.posts = tf.placeholder(tf.string, shape=(None, None)) # batch*len self.posts_length = tf.placeholder(tf.int32, shape=(None)) # batch self.responses = tf.placeholder(tf.string, shape=(None, None)) # batch*len self.responses_length = tf.placeholder(tf.int32, shape=(None)) # batch self.keyword_tensor = tf.placeholder(tf.float32, shape=(None, 3, None)) #(batch * len) * 3 * numsymbol self.word_type = tf.placeholder(tf.int32, shape=(None)) #(batch * len) # build the vocab table (string to index) if is_train: self.symbols = tf.Variable(vocab, trainable=False, name="symbols") else: self.symbols = tf.Variable(np.array(['.']*num_symbols), name="symbols") self.symbol2index = HashTable(KeyValueTensorInitializer(self.symbols, tf.Variable(np.array([i for i in range(num_symbols)], dtype=np.int32), False)), default_value=UNK_ID, name="symbol2index") self.posts_input = self.symbol2index.lookup(self.posts) # batch*len self.responses_target = self.symbol2index.lookup(self.responses) #batch*len batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(self.responses)[1] self.responses_input = tf.concat([tf.ones([batch_size, 1], dtype=tf.int32)*GO_ID, tf.split(self.responses_target, [decoder_len-1, 1], 1)[0]], 1) # batch*len #delete the last column of responses_target) and add 'GO at the front of it. self.decoder_mask = tf.reshape(tf.cumsum(tf.one_hot(self.responses_length-1, decoder_len), reverse=True, axis=1), [-1, decoder_len]) # bacth * len print "embedding..." # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: print len(vocab), len(embed), len(embed[0]) print embed # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.encoder_input = tf.nn.embedding_lookup(self.embed, self.posts_input) #batch*len*unit self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_input) print "embedding finished" if use_lstm: cell = MultiRNNCell([LSTMCell(num_units)] * num_layers) else: cell = MultiRNNCell([GRUCell(num_units)] * num_layers) # rnn encoder encoder_output, encoder_state = dynamic_rnn(cell, self.encoder_input, self.posts_length, dtype=tf.float32, scope="encoder") # get output projection function output_fn, sampled_sequence_loss = output_projection_layer(num_units, num_symbols, num_qwords, num_samples, question_data) print "encoder_output.shape:", encoder_output.get_shape() # get attention function attention_keys, attention_values, attention_score_fn, attention_construct_fn \ = attention_decoder_fn.prepare_attention(encoder_output, 'luong', num_units) # get decoding loop function decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(encoder_state, attention_keys, attention_values, attention_score_fn, attention_construct_fn) decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(output_fn, self.keyword_tensor, encoder_state, attention_keys, attention_values, attention_score_fn, attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols) if is_train: # rnn decoder self.decoder_output, _, _ = dynamic_rnn_decoder(cell, decoder_fn_train, self.decoder_input, self.responses_length, scope="decoder") # calculate the loss of decoder # self.decoder_output = tf.Print(self.decoder_output, [self.decoder_output]) self.decoder_loss, self.log_perplexity = sampled_sequence_loss(self.decoder_output, self.responses_target, self.decoder_mask, self.keyword_tensor, self.word_type) # building graph finished and get all parameters self.params = tf.trainable_variables() for item in tf.trainable_variables(): print item.name, item.get_shape() # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.decoder_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) else: # rnn decoder self.decoder_distribution, _, _ = dynamic_rnn_decoder(cell, decoder_fn_inference, scope="decoder") print("self.decoder_distribution.shape():",self.decoder_distribution.get_shape()) self.decoder_distribution = tf.Print(self.decoder_distribution, ["distribution.shape()", tf.reduce_sum(self.decoder_distribution)]) # generating the response self.generation_index = tf.argmax(tf.split(self.decoder_distribution, [2, num_symbols-2], 2)[1], 2) + 2 # for removing UNK self.generation = tf.nn.embedding_lookup(self.symbols, self.generation_index) self.params = tf.trainable_variables() self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def _build_rnn_decoder_and_recon_x(self, inputs, targets, training, reuse=False): with tf.variable_scope("dec_rec_and_recon_x", reuse=reuse): C, T, F = self._model_conf["target_shape"] Cell = _cell_dict[self._model_conf["rec_cell_type"]] cell = MultiRNNCell([Cell(hu) \ for hu in self._model_conf["rec_dec"]]) if self._model_conf["rec_learn_init"]: raise NotImplementedError else: input_shape = tuple(array_ops.shape(input_) \ for input_ in nest.flatten(inputs)) batch_size = input_shape[0][0] init_state = cell.zero_state(batch_size, self._model_conf["input_dtype"]) rec_dec_inp = self._model_conf["rec_dec_inp_test"] if training: rec_dec_inp = self._model_conf["rec_dec_inp_train"] if rec_dec_inp is not None: n_concur = self._model_conf["rec_dec_concur"] if T % n_concur != 0: raise ValueError("total time steps must be " + \ "multiples of rec_dec_concur") n_frame = T // n_concur else: n_frame = T n_hist = self._model_conf["rec_dec_inp_hist"] info("decoder: n_frame=%s, n_concur=%s, n_hist=%s" % (n_frame, n_concur, n_hist)) def make_hist(hist, new_hist): with tf.name_scope("make_hist"): if not self._model_conf["x_conti"]: # TODO add target embedding? new_hist = tf.cast(new_hist, tf.float32) if n_hist > n_concur: diff = n_hist - n_concur return tf.concat([hist[:, :, -diff:, :], new_hist], axis=-2) else: return new_hist[:, :, -n_hist:, :] outputs = [] if self._model_conf["x_conti"]: x_mu, x_logvar, x = [], [], [] else: x_logits, x = [], [] state_f = init_state hist = tf.zeros((array_ops.shape(inputs)[0], C, n_hist, F), dtype=self._model_conf["input_dtype"], name="init_hist") for f in xrange(n_frame): input_f = inputs if rec_dec_inp: input_f = tf.concat( [inputs, tf.reshape(hist, (-1, C * n_hist * F))], axis=-1, name="input_f_%s" % f) if f > 0: tf.get_variable_scope().reuse_variables() output_f, state_f = cell(input_f, state_f) outputs.append(output_f) # TODO: input hist as well (like sampleRNN)? if self._model_conf["x_conti"]: x_mu_f, x_logvar_f, x_f = dense_latent( inputs=output_f, num_outputs=C * n_concur * F, mu_nl=self._model_conf["x_mu_nl"], logvar_nl=self._model_conf["x_logvar_nl"], scope="recon_x_f") x_mu.append( tf.reshape(x_mu_f, (-1, C, n_concur, F), name="recon_x_mu_f_4d")) x_logvar.append( tf.reshape(x_logvar_f, (-1, C, n_concur, F), name="recon_x_logvar_f_4d")) x.append( tf.reshape(x_f, (-1, C, n_concur, F), name="recon_x_f_4d")) if rec_dec_inp == "targets": t_slice = slice(f * n_concur, (f + 1) * n_concur) hist = make_hist(hist, targets[:, :, t_slice, :]) elif rec_dec_inp == "x_mu": hist = make_hist(hist, x_mu[-1]) elif rec_dec_inp == "x": hist = make_hist(hist, x[-1]) elif rec_dec_inp: raise ValueError("unsupported rec_dec_inp (%s)" % (rec_dec_inp)) else: raise ValueError # n_bins = self._model_conf["n_bins"] # x_logits_f, x_f = cat_dense_latent( # inputs=output_f, # num_outputs=C * n_concur * F, # n_bins=n_bins, # scope="recon_x_f") # x_logits.append(tf.reshape( # x_logits_f, # (-1, C, n_concur, F, n_bins), # name="recon_x_logits_f_5d")) # x.append(tf.reshape( # x_f, # (-1, C, n_concur, F), # name="recon_x_f_4d")) # if rec_dec_inp == "targets": # t_slice = slice(f * n_concur, (f + 1) * n_concur) # hist = make_hist(hist, targets[:, :, t_slice, :]) # elif rec_dec_inp == "x_max": # hist = make_hist(hist, tf.argmax(x_logits[-1], -1)) # elif rec_dec_inp == "x": # hist = make_hist(hist, x[-1]) # elif rec_dec_inp: # raise ValueError("unsupported rec_dec_inp (%s)" % ( # rec_dec_inp)) # (bs, n_frame, top_rnn_hu) outputs = tf.stack(outputs, axis=1, name="rec_outputs") x = tf.concat(x, axis=2, name="recon_x_t_4d") if self._model_conf["x_conti"]: x_mu = tf.concat(x_mu, axis=2, name="recon_x_mu_t_4d") x_logvar = tf.concat(x_logvar, axis=2, name="recon_x_logvar_t_4d") px = [x_mu, x_logvar] else: x_logits = tf.concat(x_logits, axis=2, name="recon_x_logits_t_5d") px = x_logits return outputs, px, x
def _build_z2_encoder(self, inputs, z1, reuse=False): weights_regularizer = l2_regularizer(self._train_conf["l2_weight"]) normalizer_fn = batch_norm if self._model_conf["if_bn"] else None normalizer_params = None if self._model_conf["if_bn"]: normalizer_params = { "scope": "BatchNorm", "is_training": self._feed_dict["is_train"], "reuse": reuse } # TODO: need to upgrade to latest, # which commit support param_regularizers args if not hasattr(self, "_debug_outputs"): self._debug_outputs = {} C, T, F = self._model_conf["target_shape"] n_concur = self._model_conf["rec_z2_enc_concur"] if T % n_concur != 0: raise ValueError("total time steps must be multiples of %s" % (n_concur)) n_frame = T // n_concur info("z2_encoder: n_frame=%s, n_concur=%s" % (n_frame, n_concur)) # input_dim = np.prod(inputs.get_shape().as_list()[1:]) # outputs = tf.concat([tf.reshape(inputs, [-1, input_dim]), z1], axis=1) with tf.variable_scope("z2_enc", reuse=reuse): # recurrent layers if self._model_conf["rec_z2_enc"]: # reshape to (N, n_frame, n_concur*C*F) inputs = array_ops.transpose(inputs, (0, 2, 1, 3)) inputs_shape = inputs.get_shape().as_list() inputs_depth = np.prod(inputs_shape[2:]) new_shape = (-1, n_frame, n_concur * inputs_depth) inputs = tf.reshape(inputs, new_shape) # append z1 to each frame tiled_z1 = tf.tile(tf.expand_dims(z1, 1), (1, n_frame, 1)) inputs = tf.concat([inputs, tiled_z1], axis=-1) self._debug_outputs["inp_reshape"] = inputs if self._model_conf["rec_z2_enc_bi"]: raise NotImplementedError else: Cell = _cell_dict[self._model_conf["rec_cell_type"]] cell = MultiRNNCell([Cell(hu) \ for hu in self._model_conf["rec_z2_enc"]]) if self._model_conf["rec_learn_init"]: raise NotImplementedError else: input_shape = tuple(array_ops.shape(input_) \ for input_ in nest.flatten(inputs)) batch_size = input_shape[0][0] init_state = cell.zero_state( batch_size, self._model_conf["input_dtype"]) _, final_states = dynamic_rnn( cell, inputs, dtype=self._model_conf["input_dtype"], initial_state=init_state, time_major=False, scope="z2_enc_%sL_rec" % len(self._model_conf["rec_z2_enc"])) self._debug_outputs["raw_rnn_out"] = _ self._debug_outputs["raw_rnn_final"] = final_states if self._model_conf["rec_z2_enc_out"].startswith("last"): final_states = final_states[-1:] if self._model_conf["rec_cell_type"] == "lstm": outputs = [] for state in final_states: if "h" in self._model_conf["rec_z2_enc_out"].split( "_")[1]: outputs.append(state.h) if "c" in self._model_conf["rec_z2_enc_out"].split( "_")[1]: outputs.append(state.c) else: outputs = final_states outputs = tf.concat(outputs, axis=-1) self._debug_outputs["concat_rnn_out"] = outputs else: input_dim = np.prod(inputs.get_shape().as_list()[1:]) outputs = tf.concat([tf.reshape(inputs, [-1, input_dim]), z1], axis=1) # fully connected layers output_dim = np.prod(outputs.get_shape().as_list()[1:]) outputs = tf.reshape(outputs, [-1, output_dim]) for i, hu in enumerate(self._model_conf["hu_z2_enc"]): outputs = fully_connected( inputs=outputs, num_outputs=hu, activation_fn=nn.relu, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_regularizer=weights_regularizer, reuse=reuse, scope="z2_enc_fc%s" % (i + 1)) z2_mu, z2_logvar, z2 = dense_latent( outputs, self._model_conf["n_latent2"], logvar_nl=self._model_conf["z2_logvar_nl"], reuse=reuse, scope="z2_enc_lat") return [z2_mu, z2_logvar], z2
def __init__(self, model_parameters, training_parameters, directories, **kwargs): """ Initialization of the RNN Model as TensorFlow computational graph """ self.model_parameters = model_parameters self.training_parameters = training_parameters self.directories = directories # Define model hyperparameters Tensors with tf.name_scope("Parameters"): self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") self.momentum = tf.placeholder(tf.float32, name="momentum") self.input_keep_probability = tf.placeholder(tf.float32, name="input_keep_probability") self.output_keep_probability = tf.placeholder(tf.float32, name="output_keep_probability") self.is_training = tf.placeholder(tf.bool) # Define input, output and initialization Tensors with tf.name_scope("Input"): self.inputs = tf.placeholder("float", [None, self.model_parameters.sequence_length, self.model_parameters.input_dimension], name='input_placeholder') self.targets = tf.placeholder("float", [None, self.model_parameters.sequence_length, 1], name='labels_placeholder') self.init = tf.placeholder(tf.float32, shape=[None, self.model_parameters.state_size], name="init") # Define the TensorFlow RNN computational graph with tf.name_scope("LSTMRNN_RNN"): cells = [] # Define the layers for _ in range(self.model_parameters.n_layers): if self.model_parameters.model == 'rnn': cell = BasicRNNCell(self.model_parameters.state_size) elif self.model_parameters.model == 'gru': cell = GRUCell(self.model_parameters.state_size) elif self.model_parameters.model == 'lstm': cell = BasicLSTMCell(self.model_parameters.state_size, state_is_tuple=True) elif self.model_parameters.model == 'nas': cell = NASCell(self.model_parameters.state_size) else: raise Exception("model type not supported: {}".format(self.model_parameters.model)) if (self.model_parameters.output_keep_probability < 1.0 or self.model_parameters.input_keep_probability < 1.0): if self.model_parameters.output_keep_probability < 1.0 : cell = DropoutWrapper(cell, output_keep_prob=self.output_keep_probability) if self.model_parameters.input_keep_probability < 1.0 : cell = DropoutWrapper(cell, input_keep_prob=self.input_keep_probability) cells.append(cell) cell = MultiRNNCell(cells) # Simulate time steps and get RNN cell output self.outputs, self.next_state = tf.nn.dynamic_rnn(cell, self.inputs, dtype = tf.float32) # Define cost Tensors with tf.name_scope("LSTMRNN_Cost"): # Flatten to apply same weights to all time steps self.flattened_outputs = tf.reshape(self.outputs, [-1, self.model_parameters.state_size], name="flattened_outputs") self.output_w = tf.Variable(tf.truncated_normal([ self.model_parameters.state_size, 1], stddev=0.01), name="output_weights") self.variable_summaries(self.output_w, 'output_weights') self.output_b = tf.Variable(tf.constant(0.1), name="output_biases") self.variable_summaries(self.output_w, 'output_biases') # Define decision threshold Tensor self.decision_threshold = tf.Variable(self.model_parameters.threshold, name="decision_threshold") # Define moving average step Tensor self.ma_step = tf.Variable(self.model_parameters.ma_step, name="ma_step") # Softmax activation layer, using RNN inner loop last output # logits and labels must have the same shape [batch_size, num_classes] self.logits = tf.add(tf.matmul(self.flattened_outputs, self.output_w), self.output_b, name="logits") self.logits_bn = self.batch_norm_wrapper(inputs=self.logits, is_training=self.is_training) tf.summary.histogram('logits', self.logits) tf.summary.histogram('logits_bn', self.logits_bn) self.predictions = tf.reshape(self.logits, [-1, self.model_parameters.sequence_length, 1], name="predictions") self.shaped_predictions = tf.reshape(self.predictions, [-1], name="shaped_predictions") self.tmp_smoothed_predictions = tf.concat([self.shaped_predictions, tf.fill(tf.expand_dims(self.ma_step-1, 0), self.shaped_predictions[tf.shape(self.shaped_predictions)[0]-1])], axis=0, name="tmp_smoothed_predictions") self.ma_loop_idx = tf.constant(0, dtype='int32') self.shaped_smoothed_predictions = tf.zeros([0], dtype='float32') _, self.shaped_smoothed_predictions = tf.while_loop(lambda i, _: i < tf.shape(self.shaped_predictions)[0], self.ma_while_body, [self.ma_loop_idx, self.shaped_smoothed_predictions], shape_invariants=[tf.TensorShape([]), tf.TensorShape([None])]) self.smoothed_predictions = tf.reshape(self.shaped_smoothed_predictions, [-1, self.model_parameters.sequence_length, 1], name="smoothed_predictions") self.soft_predictions_summary = tf.summary.tensor_summary("soft_predictions", self.smoothed_predictions) # self.soft_predictions_summary = tf.summary.tensor_summary("soft_predictions", self.predictions) # self.shaped_logits = tf.reshape(self.logits, # [-1, self.model_parameters.sequence_length, 1], # name="shaped_logits") # Cross-Entropy # self.cost = tf.reduce_mean(-tf.reduce_sum( # self.targets * tf.log(self.predictions), # reduction_indices=[2]), name="cross_entropy") # self.cross_entropy = tf.reduce_mean( # tf.nn.sigmoid_cross_entropy_with_logits(_sentinel=None, # labels=self.targets, # logits=self.predictions), # name="cross_entropy") # self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits( # _sentinel=None, # labels=self.targets, # logits=self.shaped_logits, # name="cross_entropy") # Root Mean Squared Error # self.mean_squared_error = tf.losses.mean_squared_error( # labels=self.targets, # predictions=self.predictions) self.cost = tf.sqrt(tf.reduce_mean( tf.squared_difference( self.smoothed_predictions, self.targets))) # self.cost = tf.sqrt(tf.reduce_mean( # tf.squared_difference( # self.predictions, self.targets))) tf.summary.scalar('training_cost', self.cost) # self.cost = tf.reduce_mean( # self.cross_entropy, # name="cost") voicing_condition = tf.greater(self.smoothed_predictions, tf.fill(tf.shape(self.smoothed_predictions), self.decision_threshold), name="thresholding") # voicing_condition = tf.greater(self.predictions, # tf.fill(tf.shape(self.predictions), self.decision_threshold), # name="thresholding") self.label_predictions = tf.where(voicing_condition, tf.ones_like(self.smoothed_predictions) , tf.zeros_like(self.smoothed_predictions), name="label_predictions") # self.label_predictions = tf.where(voicing_condition, # tf.ones_like(self.predictions) , # tf.zeros_like(self.predictions), # name="label_predictions") self.hard_predictions_summary = tf.summary.tensor_summary("hard_predictions", self.label_predictions) self.correct_prediction = tf.equal(self.label_predictions, self.targets, name="correct_predictions") self.r = tf.reshape(self.targets, [-1]) self.h = tf.reshape(self.label_predictions, [-1]) # Defined outside the while loop to avoid problems self.dump_one = tf.constant(1, dtype=tf.int32, shape=[]) self.temp_pk_miss = tf.Variable([0], tf.int32, name='temp_pk_miss') self.temp_pk_falsealarm = tf.Variable([0], tf.int32, name='temp_pk_falsealarm') self.loop_idx = tf.constant(0, dtype=tf.int32, name='loop_idx') self.loop_vars = self.loop_idx, self.temp_pk_miss, self.temp_pk_falsealarm _, self.all_temp_pk_miss, self.all_temp_pk_falsealarm = tf.while_loop( self.while_condition, self.while_body, self.loop_vars, shape_invariants=(self.loop_idx.get_shape(), tf.TensorShape([None]), tf.TensorShape([None]))) self.pk_miss = tf.reduce_mean( tf.cast(self.all_temp_pk_miss, tf.float32)) tf.summary.scalar('p_miss', self.pk_miss) self.pk_falsealarm = tf.reduce_mean( tf.cast(self.all_temp_pk_falsealarm, tf.float32)) tf.summary.scalar('p_falsealarm', self.pk_falsealarm) self.pk = tf.reduce_mean( tf.cast( tf.add(self.all_temp_pk_miss, self.all_temp_pk_falsealarm), tf.float32), name='pk') tf.summary.scalar('pk', self.pk) self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32), name="accuracy") tf.summary.scalar('accuracy', self.accuracy) self.recall, self.update_op_recall = tf.metrics.recall( labels=self.targets, predictions=self.label_predictions, name="recall") tf.summary.scalar('recall', self.recall) self.precision, self.update_op_precision = tf.metrics.precision( labels=self.targets, predictions=self.label_predictions, name="precision") tf.summary.scalar('precision', self.precision) # Define Training Tensors with tf.name_scope("LSTMRNN_Train"): # Momentum optimisation self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=self.momentum, name="optimizer") self.train_step = self.optimizer.minimize(self.cost, name="train_step") # Initializing the variables self.initializer = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())