Example #1
0
    def _encode(self):
        # the embedding is shared between encoder and decoder
        # since the source and the target for an autoencoder are the same
        with tf.variable_scope('encoder'):
            tied_embedding = tf.get_variable('tied_embedding',
                                             initializer=tf.constant(
                                                 get_w2v_model(self.vocab)),
                                             trainable=True)
            lookup_result = tf.nn.embedding_lookup(tied_embedding,
                                                   self.enc_inp)
            masked_emb = tf.concat([
                tf.zeros([1, 1]),
                tf.ones([tied_embedding.get_shape()[0] - 1, 1])
            ],
                                   axis=0)
            mask_lookup_result = tf.nn.embedding_lookup(
                masked_emb, self.enc_inp)
            lookup_result = tf.multiply(lookup_result, mask_lookup_result)
            encoder_proj = tf.layers.dense(lookup_result,
                                           self.config['rnn_size'])

            if self.config.get('bi_lstm'):
                forward_encoder = self._rnn_cell(
                    self.config['w2v_embedding_size'] // 2)
                backward_encoder = self._rnn_cell(
                    self.config['w2v_embedding_size'] // 2)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=forward_encoder,
                    cell_bw=backward_encoder,
                    inputs=encoder_proj,
                    sequence_length=self.enc_seq_len,
                    dtype=tf.float32)
                encoded_state = tf.concat((state_fw, state_bw), -1)
            else:
                _, encoded_state = tf.nn.dynamic_rnn(
                    cell=self._rnn_cell(self.config['w2v_embedding_size']),
                    inputs=encoder_proj,
                    sequence_length=self.enc_seq_len,
                    dtype=tf.float32)
        self.lookup_result = lookup_result
        self.z_mean = tf.layers.dense(encoded_state,
                                      self.config['latent_size'])
        self.z_logvar = tf.layers.dense(encoded_state,
                                        self.config['latent_size'])
        return self.z_mean, self.z_logvar
Example #2
0
    def __graph__(self):
        # entry points
        input_words_ = tf.placeholder(tf.int32, [None, self.max_input_length, self.max_sequence_length], name='input_words')
        bow_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.config['vocabulary_size']], name='bow_features')
        context_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.feature_vector_size], name='input_features')
        action_ = tf.placeholder(tf.int32, [None, self.max_input_length], name='ground_truth_action')
        prev_action_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='prev_action')
        action_mask_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='action_mask')
        # action_seq_length = tf.count_nonzero(action_, -1)

        embedding_matrix = tf.get_variable('emb',
                                           initializer=tf.constant(get_w2v_model(self.vocab)),
                                           trainable=True)
        lookup_result = tf.nn.embedding_lookup(embedding_matrix, input_words_)
        masked_emb = tf.concat([tf.zeros([1, 1]), tf.ones([embedding_matrix.get_shape()[0] - 1, 1])], axis=0)
        mask_lookup_result = tf.nn.embedding_lookup(masked_emb, input_words_)
        lookup_result = tf.multiply(lookup_result, mask_lookup_result)
        utterance_embeddings = tf.reduce_mean(lookup_result, axis=2)

        all_input = tf.concat([utterance_embeddings, bow_features_, context_features_, prev_action_, action_mask_], axis=-1) 
        # input projection
        projected_features = tf.layers.dense(all_input, self.nb_hidden, name='input_projection')

        lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.nb_hidden, state_is_tuple=True)
        outputs, states = tf.nn.dynamic_rnn(lstm_cell, projected_features, dtype=tf.float32)

        # output projection
        logits = tf.layers.dense(outputs, self.action_size)
        # probabilities
        #  normalization : elemwise multiply with action mask
        # not doing softmax because it's taken care of in the cross-entropy!
        probs = tf.multiply(logits, action_mask_)

        # prediction
        prediction = tf.argmax(probs, axis=-1)

        # self.all_model_weights = tf.concat([tf.reshape(var, (-1,)) for var in tf.trainable_variables()], axis=-1)
        # self.initial_weights = tf.Variable(initial_value=tf.zeros_like(self.all_model_weights), name='initial_weights', trainable=False)
        # self.euclidean_loss = tf.nn.l2_loss(self.all_model_weights - self.initial_weights)
        # euclidean_loss_weight = float(self.model_folder is not None)
        # mask_fn = lambda l: tf.sequence_mask(l, self.max_input_length, dtype=tf.float32)
        # sequence_mask = mask_fn(action_seq_length)
        sequence_mask = tf.placeholder(tf.float32, [None, self.max_input_length], name='sequence_mask')
        # loss
        l2_loss = tf.reduce_sum([tf.nn.l2_loss(v)
                                 for v in tf.trainable_variables()
                                 if v.name[0] != 'b']) * self.config['l2_coef']
        hcn_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=action_, weights=sequence_mask, average_across_batch=False)
        loss = hcn_loss + l2_loss # + self.euclidean_loss * euclidean_loss_weight

        # train op
        self.lr = tf.train.exponential_decay(self.config['learning_rate'],
                                             self.global_step,
                                             self.config.get('steps_before_decay', 0),
                                             self.config.get('learning_rate_decay', 1.0),
                                             staircase=True)
        optimizer = getattr(tf.train, self.config['optimizer'])(self.lr)
        gradients, variables = zip(*optimizer.compute_gradients(loss))
        gradients_filtered, variables_filtered = [], []
        if len(self.trainable_vars):
            for gradient, variable in zip(gradients, variables):
                if variable.name in self.trainable_vars:
                    gradients_filtered.append(gradient)
                    variables_filtered.append(variable)
        else:
            gradients_filtered, variables_filtered = gradients, variables
        gradients_filtered, _ = tf.clip_by_global_norm(gradients_filtered, self.config['clip_norm'])
        train_op = optimizer.apply_gradients(zip(gradients_filtered, variables_filtered), global_step=self.global_step)

        # attach symbols to self
        self.loss = loss
        self.prediction = prediction
        self.probs = probs
        self.logits = logits
        self.train_op = train_op

        # attach placeholders
        self.input_words_ = input_words_
        self.context_features_ = context_features_
        self.bow_features_ = bow_features_
        self.action_ = action_
        self.prev_action_ = prev_action_
        self.action_mask_ = action_mask_
        self.sequence_mask_ = sequence_mask
Example #3
0
    def __graph__(self):
        # entry points
        input_words = tf.placeholder(tf.int32,
                                     [None, self.max_input_length, self.max_sequence_length],
                                     name='input_words')
        input_contexts = tf.placeholder(tf.float32,
                                        [None, self.max_input_length, self.feature_vector_size],
                                        name='input_contexts')
        bow_features = tf.placeholder(tf.float32,
                                      [None, self.max_input_length, len(self.vocab)],
                                      name='bow_features')
        action_ = tf.placeholder(tf.int32, [None, self.max_input_length], name='ground_truth_action')
        prev_action_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='prev_action')
        action_mask_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='action_mask')
        # action_seq_length = tf.count_nonzero(action_, -1)

        input_words_reshaped = tf.reshape(input_words, shape=[-1, self.max_sequence_length])
        embedding_matrix = tf.get_variable('emb',
                                           initializer=tf.constant(get_w2v_model(self.vocab)),
                                           trainable=self.config['trainable_embeddings'])
        lookup_result = tf.nn.embedding_lookup(embedding_matrix, input_words_reshaped)
        masked_emb = tf.concat([tf.zeros([1, 1]), tf.ones([embedding_matrix.get_shape()[0] - 1, 1])], axis=0)
        mask_lookup_result = tf.nn.embedding_lookup(masked_emb, input_words_reshaped)
        lookup_result = tf.multiply(lookup_result, mask_lookup_result)

        self.turn_level_proj = tf.layers.dense(lookup_result, self.nb_hidden, name='turn_level_proj')

        if self.config['bi_lstm']:
            self.turn_fw_cell = tf.contrib.rnn.BasicLSTMCell(int(self.nb_hidden / 2),
                                                             state_is_tuple=True,
                                                             name='turn_encoder_fw')
            self.turn_bw_cell = tf.contrib.rnn.BasicLSTMCell(int(self.nb_hidden / 2),
                                                             state_is_tuple=True,
                                                             name='turn_encoder_bw')
            _, turn_states = tf.nn.bidirectional_dynamic_rnn(self.turn_fw_cell,
                                                             self.turn_bw_cell,
                                                             self.turn_level_proj,
                                                             dtype=tf.float32)
            fw_states, bw_states = turn_states
            fw_states_reshaped = tf.reshape(fw_states.c, shape=[-1,
                                                                self.max_input_length,
                                                                int(self.nb_hidden / 2)])
            bw_states_reshaped = tf.reshape(bw_states.c, shape=[-1,
                                                                self.max_input_length,
                                                                int(self.nb_hidden / 2)])
            self.turn_states_reshaped = tf.concat([fw_states_reshaped, bw_states_reshaped], axis=-1)
        else:
            self.turn_lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.nb_hidden, state_is_tuple=True,
                                                               name='turn_encoder')
            turn_outputs, turn_states = tf.nn.dynamic_rnn(self.turn_lstm_cell,
                                                          self.turn_level_proj,
                                                          dtype=tf.float32)
            self.turn_states_reshaped = tf.reshape(turn_states.c, shape=[-1,
                                                                         self.max_input_length,
                                                                         self.nb_hidden])

        self.z_mean = tf.layers.dense(self.turn_states_reshaped,
                                      self.config['latent_size'],
                                      name='z_mean')
        self.z_logvar = tf.layers.dense(self.turn_states_reshaped,
                                        self.config['latent_size'],
                                        name='z_logvar')
        gaussian_noise = tf.truncated_normal(tf.shape(self.z_logvar))
        self.z = self.z_mean + tf.exp(0.5 * self.z_logvar) * gaussian_noise

        bow_logits = tf.layers.dense(self.z, len(self.vocab), name='bow_logits')

        # input projection
        all_inputs = tf.concat([self.z, bow_features, input_contexts, action_mask_, prev_action_], axis=-1)
        # add relu/tanh here if necessary
        dialog_lstm_projection = tf.layers.dense(all_inputs, self.config['embedding_size'], name='dialog_lstm_projection')

        lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.config['embedding_size'], state_is_tuple=True, name='dialog_encoder')
        outputs, states = tf.nn.dynamic_rnn(lstm_cell, dialog_lstm_projection, dtype=tf.float32)

        # output projection
        logits = tf.layers.dense(outputs, self.action_size, name='output_projection') 
        # probabilities
        #  normalization : elemwise multiply with action mask
        # not doing softmax because it's taken care of in the cross-entropy!
        probs = tf.multiply(logits, action_mask_)

        # prediction
        prediction = tf.argmax(probs, axis=-1)

        # mask_fn = lambda l: tf.sequence_mask(l, self.max_input_length, dtype=tf.float32)
        # sequence_mask = mask_fn(action_seq_length)
        sequence_mask = tf.placeholder(tf.float32, [None, self.max_input_length], name='sequence_mask')
        # loss
        self.hcn_loss = tf.contrib.seq2seq.sequence_loss(logits=logits,
                                                         targets=action_,
                                                         weights=sequence_mask,
                                                         average_across_batch=False)
        # self.vae_kl_loss = tf.reduce_mean(tf.reshape(self.vrae._kl_loss_fn(self.vrae.z_mean, self.vrae.z_logvar), shape=[-1, self.max_input_length]), axis=-1)
        # self.vae_bow_loss = tf.reduce_mean(tf.reshape(self.vrae._bow_loss_fn(self.vrae.bow_logits, self.vrae.bow_targets), shape=[-1, self.max_input_length]), axis=-1)
        self.vae_kl_loss = -0.5 * tf.reduce_mean(tf.reduce_sum(1.0 + self.z_logvar - tf.square(self.z_mean) - tf.exp(self.z_logvar), axis=-1), axis=-1)
        self.vae_bow_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=bow_features, logits=bow_logits), axis=-1), axis=-1)
        self.vae_overall_loss = self.vae_kl_loss + self.vae_bow_loss

        self.l2_loss = tf.reduce_sum([tf.nn.l2_loss(v)
                                      for v in tf.trainable_variables()
                                      if v.name[0] != 'b']) * self.config['l2_coef']
        self.loss = self.hcn_loss + self.vae_overall_loss + self.l2_loss
 
        self.lr = tf.train.exponential_decay(self.config['learning_rate'],
                                             self.global_step,
                                             self.config.get('steps_before_decay', 0),
                                             self.config.get('learning_rate_decay', 1.0),
                                             staircase=True) 
        # train op
        optimizer = getattr(tf.train, self.config['optimizer'])(self.lr)
        gradients, variables = zip(*optimizer.compute_gradients(self.loss))
        gradients_filtered, variables_filtered = [], []
        if len(self.trainable_vars):
            for gradient, variable in zip(gradients, variables):
                if variable.name in self.trainable_vars:
                    gradients_filtered.append(gradient)
                    variables_filtered.append(variable)
        else:
            gradients_filtered, variables_filtered = gradients, variables
        gradients_filtered, _ = tf.clip_by_global_norm(gradients_filtered, self.config['clip_norm'])
        train_op = optimizer.apply_gradients(zip(gradients_filtered, variables_filtered), global_step=self.global_step)

        # attach symbols to self
        self.prediction = prediction
        self.probs = probs
        self.logits = logits
        self.sequence_mask_ = sequence_mask
        self.train_op = train_op

        # attach placeholders
        self.input_words = input_words
        self.input_contexts = input_contexts
        self.bow_features_ = bow_features
        self.action_ = action_
        self.action_mask_ = action_mask_
        self.prev_action_ = prev_action_
Example #4
0
    def __graph__(self):
        # entry points
        input_words_ = tf.placeholder(tf.int32, [None, self.max_input_length, self.max_sequence_length], name='input_words')
        bow_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.config['vocabulary_size']], name='bow_features')
        context_features_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.feature_vector_size], name='input_features')
        action_ = tf.placeholder(tf.int32, [None, self.max_input_length], name='ground_truth_action')
        prev_action_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='prev_action')
        action_mask_ = tf.placeholder(tf.float32, [None, self.max_input_length, self.action_size], name='action_mask')
        # action_seq_length = tf.count_nonzero(action_, -1)

        embedding_matrix = tf.get_variable('emb',
                                           initializer=tf.constant(get_w2v_model(self.vocab)),
                                           trainable=True)
        lookup_result = tf.nn.embedding_lookup(embedding_matrix, input_words_)
        masked_emb = tf.concat([tf.zeros([1, 1]), tf.ones([embedding_matrix.get_shape()[0] - 1, 1])], axis=0)
        mask_lookup_result = tf.nn.embedding_lookup(masked_emb, input_words_)
        lookup_result = tf.multiply(lookup_result, mask_lookup_result)

        filter_sizes = self.config['filter_sizes']
        num_filters = self.config['num_filters']
        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        lookup_result_reshaped_expanded = tf.expand_dims(tf.reshape(lookup_result, shape=(-1, self.max_sequence_length, self.config['w2v_embedding_size'])), -1)
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, self.config['w2v_embedding_size'], 1, num_filters]
                W_conv = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W_conv")
                b_conv = tf.Variable(tf.constant(0.0, shape=[num_filters]), name="b_conv")
                conv = tf.nn.conv2d(lookup_result_reshaped_expanded,
                                    W_conv,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b_conv), name="relu_conv")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, self.config['max_sequence_length'] - filter_size + 1, 1, 1],
                                        strides=[1, 1, 1, 1],
                                        padding='VALID',
                                        name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        h_pool = tf.concat(pooled_outputs, 3)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
        # Add dropout
        # with tf.name_scope("dropout"):
        #     h_drop_flat = tf.nn.dropout(h_pool_flat, self.config['conv_dropout_keep_prob'])
        h_drop = tf.reshape(h_pool_flat, shape=(-1, self.max_input_length, num_filters_total)) # h_drop = tf.reshape(h_drop_flat, shape=(-1, self.max_input_length, num_filters_total))
        
        self.z_mean = tf.layers.dense(h_drop, self.config['latent_size'])
        self.z_logvar = tf.layers.dense(h_drop, self.config['latent_size'])
        gaussian_noise = tf.random_normal(tf.shape(self.z_logvar))
        self.z = self.z_mean + tf.exp(0.5 * self.z_logvar) * gaussian_noise

        bow_logits = tf.layers.dense(self.z, len(self.vocab), name='bow_logits')

        all_input = tf.concat([self.z, bow_features_, context_features_, prev_action_, action_mask_], axis=-1)
        # input projection
        projected_features = tf.layers.dense(all_input, self.nb_hidden, name='input_projection')

        lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.nb_hidden, state_is_tuple=True)
        outputs, states = tf.nn.dynamic_rnn(lstm_cell, projected_features, dtype=tf.float32)

        # output projection
        logits = tf.layers.dense(outputs, self.action_size, name='logits')
        # probabilities
        #  normalization : elemwise multiply with action mask
        # not doing softmax because it's taken care of in the cross-entropy!
        probs = tf.multiply(logits, action_mask_)

        # prediction
        prediction = tf.argmax(probs, axis=-1)

        mask_fn = lambda l: tf.sequence_mask(l, self.max_input_length, dtype=tf.float32)
        # sequence_mask = mask_fn(action_seq_length)
        sequence_mask = tf.placeholder(tf.float32, [None, self.max_input_length], name='sequence_mask')
        # loss
        self.l2_loss = tf.reduce_sum([tf.nn.l2_loss(v)
                                      for v in tf.trainable_variables()
                                      if v.name[0] != 'b']) * self.config['l2_coef']
        self.kl_loss = -0.5 * tf.reduce_mean(tf.reduce_sum(1.0 + self.z_logvar - tf.square(self.z_mean) - tf.exp(self.z_logvar), axis=-1), axis=-1)
        self.bow_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=bow_features_, logits=bow_logits), axis=-1), axis=-1)
        self.hcn_loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=action_, weights=sequence_mask, average_across_batch=False)
        loss = self.hcn_loss + self.kl_loss + self.bow_loss + self.l2_loss

        # train op
        self.lr = tf.train.exponential_decay(self.config['learning_rate'],
                                             self.global_step,
                                             self.config.get('steps_before_decay', 0),
                                             self.config.get('learning_rate_decay', 1.0),
                                             staircase=True)
        optimizer = getattr(tf.train, self.config['optimizer'])(self.lr)
        gradients, variables = zip(*optimizer.compute_gradients(loss))
        gradients_filtered, variables_filtered = [], []
        if len(self.trainable_vars):
            for gradient, variable in zip(gradients, variables):
                if variable.name in self.trainable_vars:
                    gradients_filtered.append(gradient)
                    variables_filtered.append(variable)
        else:
            gradients_filtered, variables_filtered = gradients, variables
        gradients_filtered, _ = tf.clip_by_global_norm(gradients_filtered, self.config['clip_norm'])
        train_op = optimizer.apply_gradients(zip(gradients_filtered, variables_filtered), global_step=self.global_step)

        # attach symbols to self
        self.loss = loss
        self.prediction = prediction
        self.probs = probs
        self.logits = logits
        self.train_op = train_op

        # attach placeholders
        self.input_words_ = input_words_
        self.context_features_ = context_features_
        self.bow_features_ = bow_features_
        self.action_ = action_
        self.prev_action_ = prev_action_
        self.action_mask_ = action_mask_
        self.sequence_mask_ = sequence_mask