def _encode(self): # the embedding is shared between encoder and decoder # since the source and the target for an autoencoder are the same with tf.variable_scope('encoder'): tied_embedding = tf.get_variable('tied_embedding', initializer=tf.constant( get_w2v_model(self.vocab)), trainable=True) lookup_result = tf.nn.embedding_lookup(tied_embedding, self.enc_inp) masked_emb = tf.concat([ tf.zeros([1, 1]), tf.ones([tied_embedding.get_shape()[0] - 1, 1]) ], axis=0) mask_lookup_result = tf.nn.embedding_lookup( masked_emb, self.enc_inp) lookup_result = tf.multiply(lookup_result, mask_lookup_result) encoder_proj = tf.layers.dense(lookup_result, self.config['rnn_size']) if self.config.get('bi_lstm'): forward_encoder = self._rnn_cell( self.config['w2v_embedding_size'] // 2) backward_encoder = self._rnn_cell( self.config['w2v_embedding_size'] // 2) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw=forward_encoder, cell_bw=backward_encoder, inputs=encoder_proj, sequence_length=self.enc_seq_len, dtype=tf.float32) encoded_state = tf.concat((state_fw, state_bw), -1) else: _, encoded_state = tf.nn.dynamic_rnn( cell=self._rnn_cell(self.config['w2v_embedding_size']), inputs=encoder_proj, sequence_length=self.enc_seq_len, dtype=tf.float32) self.lookup_result = lookup_result self.z_mean = tf.layers.dense(encoded_state, self.config['latent_size']) self.z_logvar = tf.layers.dense(encoded_state, self.config['latent_size']) return self.z_mean, self.z_logvar
def __graph__(self): # entry points input_words_ = tf.placeholder(tf.int32, [None, self.max_input_length, self.max_sequence_length], name='input_words') bow_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.config['vocabulary_size']], name='bow_features') context_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.feature_vector_size], name='input_features') action_ = tf.placeholder(tf.int32, [None, self.max_input_length], name='ground_truth_action') prev_action_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='prev_action') action_mask_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='action_mask') # action_seq_length = tf.count_nonzero(action_, -1) embedding_matrix = tf.get_variable('emb', initializer=tf.constant(get_w2v_model(self.vocab)), trainable=True) lookup_result = tf.nn.embedding_lookup(embedding_matrix, input_words_) masked_emb = tf.concat([tf.zeros([1, 1]), tf.ones([embedding_matrix.get_shape()[0] - 1, 1])], axis=0) mask_lookup_result = tf.nn.embedding_lookup(masked_emb, input_words_) lookup_result = tf.multiply(lookup_result, mask_lookup_result) utterance_embeddings = tf.reduce_mean(lookup_result, axis=2) all_input = tf.concat([utterance_embeddings, bow_features_, context_features_, prev_action_, action_mask_], axis=-1) # input projection projected_features = tf.layers.dense(all_input, self.nb_hidden, name='input_projection') lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.nb_hidden, state_is_tuple=True) outputs, states = tf.nn.dynamic_rnn(lstm_cell, projected_features, dtype=tf.float32) # output projection logits = tf.layers.dense(outputs, self.action_size) # probabilities # normalization : elemwise multiply with action mask # not doing softmax because it's taken care of in the cross-entropy! probs = tf.multiply(logits, action_mask_) # prediction prediction = tf.argmax(probs, axis=-1) # self.all_model_weights = tf.concat([tf.reshape(var, (-1,)) for var in tf.trainable_variables()], axis=-1) # self.initial_weights = tf.Variable(initial_value=tf.zeros_like(self.all_model_weights), name='initial_weights', trainable=False) # self.euclidean_loss = tf.nn.l2_loss(self.all_model_weights - self.initial_weights) # euclidean_loss_weight = float(self.model_folder is not None) # mask_fn = lambda l: tf.sequence_mask(l, self.max_input_length, dtype=tf.float32) # sequence_mask = mask_fn(action_seq_length) sequence_mask = tf.placeholder(tf.float32, [None, self.max_input_length], name='sequence_mask') # loss l2_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v.name[0] != 'b']) * self.config['l2_coef'] hcn_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=action_, weights=sequence_mask, average_across_batch=False) loss = hcn_loss + l2_loss # + self.euclidean_loss * euclidean_loss_weight # train op self.lr = tf.train.exponential_decay(self.config['learning_rate'], self.global_step, self.config.get('steps_before_decay', 0), self.config.get('learning_rate_decay', 1.0), staircase=True) optimizer = getattr(tf.train, self.config['optimizer'])(self.lr) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients_filtered, variables_filtered = [], [] if len(self.trainable_vars): for gradient, variable in zip(gradients, variables): if variable.name in self.trainable_vars: gradients_filtered.append(gradient) variables_filtered.append(variable) else: gradients_filtered, variables_filtered = gradients, variables gradients_filtered, _ = tf.clip_by_global_norm(gradients_filtered, self.config['clip_norm']) train_op = optimizer.apply_gradients(zip(gradients_filtered, variables_filtered), global_step=self.global_step) # attach symbols to self self.loss = loss self.prediction = prediction self.probs = probs self.logits = logits self.train_op = train_op # attach placeholders self.input_words_ = input_words_ self.context_features_ = context_features_ self.bow_features_ = bow_features_ self.action_ = action_ self.prev_action_ = prev_action_ self.action_mask_ = action_mask_ self.sequence_mask_ = sequence_mask
def __graph__(self): # entry points input_words = tf.placeholder(tf.int32, [None, self.max_input_length, self.max_sequence_length], name='input_words') input_contexts = tf.placeholder(tf.float32, [None, self.max_input_length, self.feature_vector_size], name='input_contexts') bow_features = tf.placeholder(tf.float32, [None, self.max_input_length, len(self.vocab)], name='bow_features') action_ = tf.placeholder(tf.int32, [None, self.max_input_length], name='ground_truth_action') prev_action_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='prev_action') action_mask_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='action_mask') # action_seq_length = tf.count_nonzero(action_, -1) input_words_reshaped = tf.reshape(input_words, shape=[-1, self.max_sequence_length]) embedding_matrix = tf.get_variable('emb', initializer=tf.constant(get_w2v_model(self.vocab)), trainable=self.config['trainable_embeddings']) lookup_result = tf.nn.embedding_lookup(embedding_matrix, input_words_reshaped) masked_emb = tf.concat([tf.zeros([1, 1]), tf.ones([embedding_matrix.get_shape()[0] - 1, 1])], axis=0) mask_lookup_result = tf.nn.embedding_lookup(masked_emb, input_words_reshaped) lookup_result = tf.multiply(lookup_result, mask_lookup_result) self.turn_level_proj = tf.layers.dense(lookup_result, self.nb_hidden, name='turn_level_proj') if self.config['bi_lstm']: self.turn_fw_cell = tf.contrib.rnn.BasicLSTMCell(int(self.nb_hidden / 2), state_is_tuple=True, name='turn_encoder_fw') self.turn_bw_cell = tf.contrib.rnn.BasicLSTMCell(int(self.nb_hidden / 2), state_is_tuple=True, name='turn_encoder_bw') _, turn_states = tf.nn.bidirectional_dynamic_rnn(self.turn_fw_cell, self.turn_bw_cell, self.turn_level_proj, dtype=tf.float32) fw_states, bw_states = turn_states fw_states_reshaped = tf.reshape(fw_states.c, shape=[-1, self.max_input_length, int(self.nb_hidden / 2)]) bw_states_reshaped = tf.reshape(bw_states.c, shape=[-1, self.max_input_length, int(self.nb_hidden / 2)]) self.turn_states_reshaped = tf.concat([fw_states_reshaped, bw_states_reshaped], axis=-1) else: self.turn_lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.nb_hidden, state_is_tuple=True, name='turn_encoder') turn_outputs, turn_states = tf.nn.dynamic_rnn(self.turn_lstm_cell, self.turn_level_proj, dtype=tf.float32) self.turn_states_reshaped = tf.reshape(turn_states.c, shape=[-1, self.max_input_length, self.nb_hidden]) self.z_mean = tf.layers.dense(self.turn_states_reshaped, self.config['latent_size'], name='z_mean') self.z_logvar = tf.layers.dense(self.turn_states_reshaped, self.config['latent_size'], name='z_logvar') gaussian_noise = tf.truncated_normal(tf.shape(self.z_logvar)) self.z = self.z_mean + tf.exp(0.5 * self.z_logvar) * gaussian_noise bow_logits = tf.layers.dense(self.z, len(self.vocab), name='bow_logits') # input projection all_inputs = tf.concat([self.z, bow_features, input_contexts, action_mask_, prev_action_], axis=-1) # add relu/tanh here if necessary dialog_lstm_projection = tf.layers.dense(all_inputs, self.config['embedding_size'], name='dialog_lstm_projection') lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.config['embedding_size'], state_is_tuple=True, name='dialog_encoder') outputs, states = tf.nn.dynamic_rnn(lstm_cell, dialog_lstm_projection, dtype=tf.float32) # output projection logits = tf.layers.dense(outputs, self.action_size, name='output_projection') # probabilities # normalization : elemwise multiply with action mask # not doing softmax because it's taken care of in the cross-entropy! probs = tf.multiply(logits, action_mask_) # prediction prediction = tf.argmax(probs, axis=-1) # mask_fn = lambda l: tf.sequence_mask(l, self.max_input_length, dtype=tf.float32) # sequence_mask = mask_fn(action_seq_length) sequence_mask = tf.placeholder(tf.float32, [None, self.max_input_length], name='sequence_mask') # loss self.hcn_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=action_, weights=sequence_mask, average_across_batch=False) # self.vae_kl_loss = tf.reduce_mean(tf.reshape(self.vrae._kl_loss_fn(self.vrae.z_mean, self.vrae.z_logvar), shape=[-1, self.max_input_length]), axis=-1) # self.vae_bow_loss = tf.reduce_mean(tf.reshape(self.vrae._bow_loss_fn(self.vrae.bow_logits, self.vrae.bow_targets), shape=[-1, self.max_input_length]), axis=-1) self.vae_kl_loss = -0.5 * tf.reduce_mean(tf.reduce_sum(1.0 + self.z_logvar - tf.square(self.z_mean) - tf.exp(self.z_logvar), axis=-1), axis=-1) self.vae_bow_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=bow_features, logits=bow_logits), axis=-1), axis=-1) self.vae_overall_loss = self.vae_kl_loss + self.vae_bow_loss self.l2_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v.name[0] != 'b']) * self.config['l2_coef'] self.loss = self.hcn_loss + self.vae_overall_loss + self.l2_loss self.lr = tf.train.exponential_decay(self.config['learning_rate'], self.global_step, self.config.get('steps_before_decay', 0), self.config.get('learning_rate_decay', 1.0), staircase=True) # train op optimizer = getattr(tf.train, self.config['optimizer'])(self.lr) gradients, variables = zip(*optimizer.compute_gradients(self.loss)) gradients_filtered, variables_filtered = [], [] if len(self.trainable_vars): for gradient, variable in zip(gradients, variables): if variable.name in self.trainable_vars: gradients_filtered.append(gradient) variables_filtered.append(variable) else: gradients_filtered, variables_filtered = gradients, variables gradients_filtered, _ = tf.clip_by_global_norm(gradients_filtered, self.config['clip_norm']) train_op = optimizer.apply_gradients(zip(gradients_filtered, variables_filtered), global_step=self.global_step) # attach symbols to self self.prediction = prediction self.probs = probs self.logits = logits self.sequence_mask_ = sequence_mask self.train_op = train_op # attach placeholders self.input_words = input_words self.input_contexts = input_contexts self.bow_features_ = bow_features self.action_ = action_ self.action_mask_ = action_mask_ self.prev_action_ = prev_action_
def __graph__(self): # entry points input_words_ = tf.placeholder(tf.int32, [None, self.max_input_length, self.max_sequence_length], name='input_words') bow_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.config['vocabulary_size']], name='bow_features') context_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.feature_vector_size], name='input_features') action_ = tf.placeholder(tf.int32, [None, self.max_input_length], name='ground_truth_action') prev_action_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='prev_action') action_mask_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='action_mask') # action_seq_length = tf.count_nonzero(action_, -1) embedding_matrix = tf.get_variable('emb', initializer=tf.constant(get_w2v_model(self.vocab)), trainable=True) lookup_result = tf.nn.embedding_lookup(embedding_matrix, input_words_) masked_emb = tf.concat([tf.zeros([1, 1]), tf.ones([embedding_matrix.get_shape()[0] - 1, 1])], axis=0) mask_lookup_result = tf.nn.embedding_lookup(masked_emb, input_words_) lookup_result = tf.multiply(lookup_result, mask_lookup_result) filter_sizes = self.config['filter_sizes'] num_filters = self.config['num_filters'] # Create a convolution + maxpool layer for each filter size pooled_outputs = [] lookup_result_reshaped_expanded = tf.expand_dims(tf.reshape(lookup_result, shape=(-1, self.max_sequence_length, self.config['w2v_embedding_size'])), -1) for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, self.config['w2v_embedding_size'], 1, num_filters] W_conv = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W_conv") b_conv = tf.Variable(tf.constant(0.0, shape=[num_filters]), name="b_conv") conv = tf.nn.conv2d(lookup_result_reshaped_expanded, W_conv, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b_conv), name="relu_conv") # Maxpooling over the outputs pooled = tf.nn.max_pool(h, ksize=[1, self.config['max_sequence_length'] - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # Add dropout # with tf.name_scope("dropout"): # h_drop_flat = tf.nn.dropout(h_pool_flat, self.config['conv_dropout_keep_prob']) h_drop = tf.reshape(h_pool_flat, shape=(-1, self.max_input_length, num_filters_total)) # h_drop = tf.reshape(h_drop_flat, shape=(-1, self.max_input_length, num_filters_total)) self.z_mean = tf.layers.dense(h_drop, self.config['latent_size']) self.z_logvar = tf.layers.dense(h_drop, self.config['latent_size']) gaussian_noise = tf.random_normal(tf.shape(self.z_logvar)) self.z = self.z_mean + tf.exp(0.5 * self.z_logvar) * gaussian_noise bow_logits = tf.layers.dense(self.z, len(self.vocab), name='bow_logits') all_input = tf.concat([self.z, bow_features_, context_features_, prev_action_, action_mask_], axis=-1) # input projection projected_features = tf.layers.dense(all_input, self.nb_hidden, name='input_projection') lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.nb_hidden, state_is_tuple=True) outputs, states = tf.nn.dynamic_rnn(lstm_cell, projected_features, dtype=tf.float32) # output projection logits = tf.layers.dense(outputs, self.action_size, name='logits') # probabilities # normalization : elemwise multiply with action mask # not doing softmax because it's taken care of in the cross-entropy! probs = tf.multiply(logits, action_mask_) # prediction prediction = tf.argmax(probs, axis=-1) mask_fn = lambda l: tf.sequence_mask(l, self.max_input_length, dtype=tf.float32) # sequence_mask = mask_fn(action_seq_length) sequence_mask = tf.placeholder(tf.float32, [None, self.max_input_length], name='sequence_mask') # loss self.l2_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v.name[0] != 'b']) * self.config['l2_coef'] self.kl_loss = -0.5 * tf.reduce_mean(tf.reduce_sum(1.0 + self.z_logvar - tf.square(self.z_mean) - tf.exp(self.z_logvar), axis=-1), axis=-1) self.bow_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=bow_features_, logits=bow_logits), axis=-1), axis=-1) self.hcn_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=action_, weights=sequence_mask, average_across_batch=False) loss = self.hcn_loss + self.kl_loss + self.bow_loss + self.l2_loss # train op self.lr = tf.train.exponential_decay(self.config['learning_rate'], self.global_step, self.config.get('steps_before_decay', 0), self.config.get('learning_rate_decay', 1.0), staircase=True) optimizer = getattr(tf.train, self.config['optimizer'])(self.lr) gradients, variables = zip(*optimizer.compute_gradients(loss)) gradients_filtered, variables_filtered = [], [] if len(self.trainable_vars): for gradient, variable in zip(gradients, variables): if variable.name in self.trainable_vars: gradients_filtered.append(gradient) variables_filtered.append(variable) else: gradients_filtered, variables_filtered = gradients, variables gradients_filtered, _ = tf.clip_by_global_norm(gradients_filtered, self.config['clip_norm']) train_op = optimizer.apply_gradients(zip(gradients_filtered, variables_filtered), global_step=self.global_step) # attach symbols to self self.loss = loss self.prediction = prediction self.probs = probs self.logits = logits self.train_op = train_op # attach placeholders self.input_words_ = input_words_ self.context_features_ = context_features_ self.bow_features_ = bow_features_ self.action_ = action_ self.prev_action_ = prev_action_ self.action_mask_ = action_mask_ self.sequence_mask_ = sequence_mask