Exemple #1
0
def z1_pre_encoder(x, z2, rhus=[256, 256]):
    """
    Pre-stochastic layer encoder for z1 (latent segment variable)
    Args:
        x(tf.Tensor): tensor of shape (bs, T, F)
        z2(tf.Tensor): tensor of shape (bs, D1)
        rhus(list): list of numbers of LSTM layer hidden units
    Return:
        out(tf.Tensor): concatenation of hidden states of all LSTM layers
    """
    bs, T = tf.shape(x)[0], tf.shape(x)[1]
    z2 = tf.tile(tf.expand_dims(z2, 1), (1, T, 1))
    x_z2 = tf.concat([x, z2], axis=-1)

    cell = MultiRNNCell([BasicLSTMCell(rhu) for rhu in rhus])
    init_state = cell.zero_state(bs, x.dtype)
    name = "z1_enc_lstm_%s" % ("_".join(map(str, rhus)), )
    _, final_state = dynamic_rnn(cell,
                                 x_z2,
                                 dtype=x.dtype,
                                 initial_state=init_state,
                                 time_major=False,
                                 scope=name)

    out = [l_final_state.h for l_final_state in final_state]
    out = tf.concat(out, axis=-1)
    return out
Exemple #2
0
def decoder(z1, z2, x, rhus=[256, 256], x_mu_nl=None, x_logvar_nl=None):
    """
    decoder
    Args:
        z1(tf.Tensor)
        z2(tf.Tensor)
        x(tf.Tensor): tensor of shape (bs, T, F). only shape is used
        rhus(list)
    """
    bs = tf.shape(x)[0]
    z1_z2 = tf.concat([z1, z2], axis=-1)

    cell = MultiRNNCell([BasicLSTMCell(rhu) for rhu in rhus])
    state_t = cell.zero_state(bs, x.dtype)
    name = "dec_lstm_%s_step" % ("_".join(map(str, rhus)), )

    def cell_step(inp, prev_state):
        return cell(inp, prev_state, scope=name)

    gdim = x.get_shape().as_list()[2]
    gname = "dec_gauss_step"

    def glayer_step(inp):
        return gauss_layer(inp, gdim, x_mu_nl, x_logvar_nl, gname)

    out, x_mu, x_logvar, x_sample = [], [], [], []
    for t in xrange(x.get_shape().as_list()[1]):
        if t > 0:
            tf.get_variable_scope().reuse_variables()

        out_t, state_t, x_mu_t, x_logvar_t, x_sample_t = decoder_step(
            z1_z2, state_t, cell_step, glayer_step)
        out.append(out_t)
        x_mu.append(x_mu_t)
        x_logvar.append(x_logvar_t)
        x_sample.append(x_sample_t)

    out = tf.stack(out, axis=1, name="dec_pre_out")
    x_mu = tf.stack(x_mu, axis=1, name="dec_x_mu")
    x_logvar = tf.stack(x_logvar, axis=1, name="dec_x_logvar")
    x_sample = tf.stack(x_sample, axis=1, name="dec_x_sample")
    px_z = [x_mu, x_logvar]
    return out, px_z, x_sample
def z2_pre_encoder(x, rhus=[256, 256]):
    """
    Pre-stochastic layer encoder for z2 (latent sequence variable)
    Args:
        x(tf.Tensor): tensor of shape (bs, T, F)
        rhus(list): list of numbers of LSTM layer hidden units
    Return:
        out(tf.Tensor): concatenation of hidden states of all LSTM layers
    """
    bs = tf.shape(x)[0]
    
    cell = MultiRNNCell([BasicLSTMCell(rhu) for rhu in rhus])
    init_state = cell.zero_state(bs, x.dtype)
    name = "z2_enc_lstm_%s" % ("_".join(map(str, rhus)),)
    _, final_state = dynamic_rnn(cell, x, dtype=x.dtype,
            initial_state=init_state, time_major=False, scope=name)
    
    out = [l_final_state.h for l_final_state in final_state]
    out = tf.concat(out, axis=-1)
    return out
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 is_train,
                 vocab=None,
                 embed=None,
                 learning_rate=0.1,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 num_samples=512,
                 max_length=30,
                 use_lstm=True):

        self.posts_1 = tf.placeholder(tf.string, shape=(None, None))
        self.posts_2 = tf.placeholder(tf.string, shape=(None, None))
        self.posts_3 = tf.placeholder(tf.string, shape=(None, None))
        self.posts_4 = tf.placeholder(tf.string, shape=(None, None))

        self.entity_1 = tf.placeholder(tf.string, shape=(None, None, None, 3))
        self.entity_2 = tf.placeholder(tf.string, shape=(None, None, None, 3))
        self.entity_3 = tf.placeholder(tf.string, shape=(None, None, None, 3))
        self.entity_4 = tf.placeholder(tf.string, shape=(None, None, None, 3))

        self.entity_mask_1 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))
        self.entity_mask_2 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))
        self.entity_mask_3 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))
        self.entity_mask_4 = tf.placeholder(tf.float32,
                                            shape=(None, None, None))

        self.posts_length_1 = tf.placeholder(tf.int32, shape=(None))
        self.posts_length_2 = tf.placeholder(tf.int32, shape=(None))
        self.posts_length_3 = tf.placeholder(tf.int32, shape=(None))
        self.posts_length_4 = tf.placeholder(tf.int32, shape=(None))

        self.responses = tf.placeholder(tf.string, shape=(None, None))
        self.responses_length = tf.placeholder(tf.int32, shape=(None))

        self.epoch = tf.Variable(0, trainable=False, name='epoch')
        self.epoch_add_op = self.epoch.assign(self.epoch + 1)

        if is_train:
            self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        else:
            self.symbols = tf.Variable(np.array(['.'] * num_symbols),
                                       name="symbols")

        self.symbol2index = HashTable(KeyValueTensorInitializer(
            self.symbols,
            tf.Variable(
                np.array([i for i in range(num_symbols)], dtype=np.int32),
                False)),
                                      default_value=UNK_ID,
                                      name="symbol2index")

        self.posts_input_1 = self.symbol2index.lookup(self.posts_1)

        self.posts_2_target = self.posts_2_embed = self.symbol2index.lookup(
            self.posts_2)
        self.posts_3_target = self.posts_3_embed = self.symbol2index.lookup(
            self.posts_3)
        self.posts_4_target = self.posts_4_embed = self.symbol2index.lookup(
            self.posts_4)

        self.responses_target = self.symbol2index.lookup(self.responses)

        batch_size, decoder_len = tf.shape(self.posts_1)[0], tf.shape(
            self.responses)[1]

        self.posts_input_2 = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.posts_2_embed, [tf.shape(self.posts_2)[1] - 1, 1],
                     1)[0]
        ], 1)
        self.posts_input_3 = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.posts_3_embed, [tf.shape(self.posts_3)[1] - 1, 1],
                     1)[0]
        ], 1)
        self.posts_input_4 = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.posts_4_embed, [tf.shape(self.posts_4)[1] - 1, 1],
                     1)[0]
        ], 1)

        self.responses_target = self.symbol2index.lookup(self.responses)

        batch_size, decoder_len = tf.shape(self.posts_1)[0], tf.shape(
            self.responses)[1]

        self.responses_input = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int32) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)

        self.encoder_2_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.posts_length_2 - 1,
                                 tf.shape(self.posts_2)[1]),
                      reverse=True,
                      axis=1), [-1, tf.shape(self.posts_2)[1]])
        self.encoder_3_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.posts_length_3 - 1,
                                 tf.shape(self.posts_3)[1]),
                      reverse=True,
                      axis=1), [-1, tf.shape(self.posts_3)[1]])
        self.encoder_4_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.posts_length_4 - 1,
                                 tf.shape(self.posts_4)[1]),
                      reverse=True,
                      axis=1), [-1, tf.shape(self.posts_4)[1]])

        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        if embed is None:
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input_1 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_1)
        self.encoder_input_2 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_2)
        self.encoder_input_3 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_3)
        self.encoder_input_4 = tf.nn.embedding_lookup(self.embed,
                                                      self.posts_input_4)

        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)

        entity_embedding_1 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_1)),
            [
                batch_size,
                tf.shape(self.entity_1)[1],
                tf.shape(self.entity_1)[2], 3 * num_embed_units
            ])
        entity_embedding_2 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_2)),
            [
                batch_size,
                tf.shape(self.entity_2)[1],
                tf.shape(self.entity_2)[2], 3 * num_embed_units
            ])
        entity_embedding_3 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_3)),
            [
                batch_size,
                tf.shape(self.entity_3)[1],
                tf.shape(self.entity_3)[2], 3 * num_embed_units
            ])
        entity_embedding_4 = tf.reshape(
            tf.nn.embedding_lookup(self.embed,
                                   self.symbol2index.lookup(self.entity_4)),
            [
                batch_size,
                tf.shape(self.entity_4)[1],
                tf.shape(self.entity_4)[2], 3 * num_embed_units
            ])

        head_1, relation_1, tail_1 = tf.split(entity_embedding_1,
                                              [num_embed_units] * 3,
                                              axis=3)
        head_2, relation_2, tail_2 = tf.split(entity_embedding_2,
                                              [num_embed_units] * 3,
                                              axis=3)
        head_3, relation_3, tail_3 = tf.split(entity_embedding_3,
                                              [num_embed_units] * 3,
                                              axis=3)
        head_4, relation_4, tail_4 = tf.split(entity_embedding_4,
                                              [num_embed_units] * 3,
                                              axis=3)

        with tf.variable_scope('graph_attention'):
            #[batch_size, max_reponse_length, max_triple_num, 2*embed_units]
            head_tail_1 = tf.concat([head_1, tail_1], axis=3)
            #[batch_size, max_reponse_length, max_triple_num, embed_units]
            head_tail_transformed_1 = tf.layers.dense(
                head_tail_1,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            #[batch_size, max_reponse_length, max_triple_num, embed_units]
            relation_transformed_1 = tf.layers.dense(relation_1,
                                                     num_embed_units,
                                                     name='relation_transform')
            #[batch_size, max_reponse_length, max_triple_num]
            e_weight_1 = tf.reduce_sum(relation_transformed_1 *
                                       head_tail_transformed_1,
                                       axis=3)
            #[batch_size, max_reponse_length, max_triple_num]
            alpha_weight_1 = tf.nn.softmax(e_weight_1)
            #[batch_size, max_reponse_length, embed_units]
            graph_embed_1 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_1, 3) *
                (tf.expand_dims(self.entity_mask_1, 3) * head_tail_1),
                axis=2)

        with tf.variable_scope('graph_attention', reuse=True):
            head_tail_2 = tf.concat([head_2, tail_2], axis=3)
            head_tail_transformed_2 = tf.layers.dense(
                head_tail_2,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            relation_transformed_2 = tf.layers.dense(relation_2,
                                                     num_embed_units,
                                                     name='relation_transform')
            e_weight_2 = tf.reduce_sum(relation_transformed_2 *
                                       head_tail_transformed_2,
                                       axis=3)
            alpha_weight_2 = tf.nn.softmax(e_weight_2)
            graph_embed_2 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_2, 3) *
                (tf.expand_dims(self.entity_mask_2, 3) * head_tail_2),
                axis=2)

        with tf.variable_scope('graph_attention', reuse=True):
            head_tail_3 = tf.concat([head_3, tail_3], axis=3)
            head_tail_transformed_3 = tf.layers.dense(
                head_tail_3,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            relation_transformed_3 = tf.layers.dense(relation_3,
                                                     num_embed_units,
                                                     name='relation_transform')
            e_weight_3 = tf.reduce_sum(relation_transformed_3 *
                                       head_tail_transformed_3,
                                       axis=3)
            alpha_weight_3 = tf.nn.softmax(e_weight_3)
            graph_embed_3 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_3, 3) *
                (tf.expand_dims(self.entity_mask_3, 3) * head_tail_3),
                axis=2)

        with tf.variable_scope('graph_attention', reuse=True):
            head_tail_4 = tf.concat([head_4, tail_4], axis=3)
            head_tail_transformed_4 = tf.layers.dense(
                head_tail_4,
                num_embed_units,
                activation=tf.tanh,
                name='head_tail_transform')
            relation_transformed_4 = tf.layers.dense(relation_4,
                                                     num_embed_units,
                                                     name='relation_transform')
            e_weight_4 = tf.reduce_sum(relation_transformed_4 *
                                       head_tail_transformed_4,
                                       axis=3)
            alpha_weight_4 = tf.nn.softmax(e_weight_4)
            graph_embed_4 = tf.reduce_sum(
                tf.expand_dims(alpha_weight_4, 3) *
                (tf.expand_dims(self.entity_mask_4, 3) * head_tail_4),
                axis=2)

        if use_lstm:
            cell = MultiRNNCell([LSTMCell(num_units)] * num_layers)
        else:
            cell = MultiRNNCell([GRUCell(num_units)] * num_layers)

        output_fn, sampled_sequence_loss = output_projection_layer(
            num_units, num_symbols, num_samples)

        encoder_output_1, encoder_state_1 = dynamic_rnn(cell,
                                                        self.encoder_input_1,
                                                        self.posts_length_1,
                                                        dtype=tf.float32,
                                                        scope="encoder")

        attention_keys_1, attention_values_1, attention_score_fn_1, attention_construct_fn_1 \
                = attention_decoder_fn.prepare_attention(graph_embed_1, encoder_output_1, 'luong', num_units)
        decoder_fn_train_1 = attention_decoder_fn.attention_decoder_fn_train(
            encoder_state_1,
            attention_keys_1,
            attention_values_1,
            attention_score_fn_1,
            attention_construct_fn_1,
            max_length=tf.reduce_max(self.posts_length_2))
        encoder_output_2, encoder_state_2, alignments_ta_2 = dynamic_rnn_decoder(
            cell,
            decoder_fn_train_1,
            self.encoder_input_2,
            self.posts_length_2,
            scope="decoder")
        self.alignments_2 = tf.transpose(alignments_ta_2.stack(),
                                         perm=[1, 0, 2])

        self.decoder_loss_2 = sampled_sequence_loss(encoder_output_2,
                                                    self.posts_2_target,
                                                    self.encoder_2_mask)

        with variable_scope.variable_scope('', reuse=True):
            attention_keys_2, attention_values_2, attention_score_fn_2, attention_construct_fn_2 \
                    = attention_decoder_fn.prepare_attention(graph_embed_2, encoder_output_2, 'luong', num_units)
            decoder_fn_train_2 = attention_decoder_fn.attention_decoder_fn_train(
                encoder_state_2,
                attention_keys_2,
                attention_values_2,
                attention_score_fn_2,
                attention_construct_fn_2,
                max_length=tf.reduce_max(self.posts_length_3))
            encoder_output_3, encoder_state_3, alignments_ta_3 = dynamic_rnn_decoder(
                cell,
                decoder_fn_train_2,
                self.encoder_input_3,
                self.posts_length_3,
                scope="decoder")
            self.alignments_3 = tf.transpose(alignments_ta_3.stack(),
                                             perm=[1, 0, 2])

            self.decoder_loss_3 = sampled_sequence_loss(
                encoder_output_3, self.posts_3_target, self.encoder_3_mask)

            attention_keys_3, attention_values_3, attention_score_fn_3, attention_construct_fn_3 \
                    = attention_decoder_fn.prepare_attention(graph_embed_3, encoder_output_3, 'luong', num_units)
            decoder_fn_train_3 = attention_decoder_fn.attention_decoder_fn_train(
                encoder_state_3,
                attention_keys_3,
                attention_values_3,
                attention_score_fn_3,
                attention_construct_fn_3,
                max_length=tf.reduce_max(self.posts_length_4))
            encoder_output_4, encoder_state_4, alignments_ta_4 = dynamic_rnn_decoder(
                cell,
                decoder_fn_train_3,
                self.encoder_input_4,
                self.posts_length_4,
                scope="decoder")
            self.alignments_4 = tf.transpose(alignments_ta_4.stack(),
                                             perm=[1, 0, 2])

            self.decoder_loss_4 = sampled_sequence_loss(
                encoder_output_4, self.posts_4_target, self.encoder_4_mask)

            attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                    = attention_decoder_fn.prepare_attention(graph_embed_4, encoder_output_4, 'luong', num_units)

        if is_train:
            with variable_scope.variable_scope('', reuse=True):
                decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(
                    encoder_state_4,
                    attention_keys,
                    attention_values,
                    attention_score_fn,
                    attention_construct_fn,
                    max_length=tf.reduce_max(self.responses_length))
                self.decoder_output, _, alignments_ta = dynamic_rnn_decoder(
                    cell,
                    decoder_fn_train,
                    self.decoder_input,
                    self.responses_length,
                    scope="decoder")
                self.alignments = tf.transpose(alignments_ta.stack(),
                                               perm=[1, 0, 2])

                self.decoder_loss = sampled_sequence_loss(
                    self.decoder_output, self.responses_target,
                    self.decoder_mask)

            self.params = tf.trainable_variables()

            self.learning_rate = tf.Variable(float(learning_rate),
                                             trainable=False,
                                             dtype=tf.float32)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)

            #opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            opt = tf.train.MomentumOptimizer(self.learning_rate, 0.9)

            gradients = tf.gradients(
                self.decoder_loss + self.decoder_loss_2 + self.decoder_loss_3 +
                self.decoder_loss_4, self.params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
                gradients, max_gradient_norm)
            self.update = opt.apply_gradients(zip(clipped_gradients,
                                                  self.params),
                                              global_step=self.global_step)

        else:
            with variable_scope.variable_scope('', reuse=True):
                decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(
                    output_fn, encoder_state_4, attention_keys,
                    attention_values, attention_score_fn,
                    attention_construct_fn, self.embed, GO_ID, EOS_ID,
                    max_length, num_symbols)
                self.decoder_distribution, _, alignments_ta = dynamic_rnn_decoder(
                    cell, decoder_fn_inference, scope="decoder")
                output_len = tf.shape(self.decoder_distribution)[1]
                self.alignments = tf.transpose(
                    alignments_ta.gather(tf.range(output_len)), [1, 0, 2])

            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, num_symbols - 2],
                         2)[1], 2) + 2  # for removing UNK
            self.generation = tf.nn.embedding_lookup(self.symbols,
                                                     self.generation_index,
                                                     name="generation")

            self.params = tf.trainable_variables()

        self.saver = tf.train.Saver(tf.global_variables(),
                                    write_version=tf.train.SaverDef.V2,
                                    max_to_keep=10,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)
Exemple #5
0
    def __init__(self, model_parameters, training_parameters, directories,
                 **kwargs):
        """ Initialization of the RNN Model as TensorFlow computational graph
    """

        self.model_parameters = model_parameters
        self.training_parameters = training_parameters
        self.directories = directories

        # Define model hyperparameters Tensors
        with tf.name_scope("Parameters"):
            self.learning_rate = tf.placeholder(tf.float32,
                                                name="learning_rate")
            self.momentum = tf.placeholder(tf.float32, name="momentum")
            self.input_keep_probability = tf.placeholder(
                tf.float32, name="input_keep_probability")
            self.output_keep_probability = tf.placeholder(
                tf.float32, name="output_keep_probability")

        # Define input, output and initialization Tensors
        with tf.name_scope("Input"):
            self.inputs = tf.placeholder("float", [
                None, self.model_parameters.sequence_length,
                self.model_parameters.input_dimension
            ],
                                         name='input_placeholder')

            self.targets = tf.placeholder("float", [
                None, self.model_parameters.sequence_length,
                self.model_parameters.n_classes
            ],
                                          name='labels_placeholder')

            self.init = tf.placeholder(
                tf.float32,
                shape=[None, self.model_parameters.state_size],
                name="init")

        # Define the TensorFlow RNN computational graph
        with tf.name_scope("RNN"):
            cells = []

            # Define the layers
            for _ in range(self.model_parameters.n_layers):
                if self.model_parameters.model == 'rnn':
                    cell = BasicRNNCell(self.model_parameters.state_size)
                elif self.model_parameters.model == 'gru':
                    cell = GRUCell(self.model_parameters.state_size)
                elif self.model_parameters.model == 'lstm':
                    cell = BasicLSTMCell(self.model_parameters.state_size,
                                         state_is_tuple=True)
                elif self.model_parameters.model == 'nas':
                    cell = NASCell(self.model_parameters.state_size)
                else:
                    raise Exception("model type not supported: {}".format(
                        self.model_parameters.model))

                if (self.model_parameters.output_keep_probability < 1.0
                        or self.model_parameters.input_keep_probability < 1.0):

                    if self.model_parameters.output_keep_probability < 1.0:
                        cell = DropoutWrapper(
                            cell,
                            output_keep_prob=self.output_keep_probability)

                    if self.model_parameters.input_keep_probability < 1.0:
                        cell = DropoutWrapper(
                            cell, input_keep_prob=self.input_keep_probability)

                cells.append(cell)
            cell = MultiRNNCell(cells)

            # Simulate time steps and get RNN cell output
            self.outputs, self.next_state = tf.nn.dynamic_rnn(cell,
                                                              self.inputs,
                                                              dtype=tf.float32)

        # Define cost Tensors
        with tf.name_scope("Cost"):

            # Flatten to apply same weights to all time steps
            self.flattened_outputs = tf.reshape(
                self.outputs, [-1, self.model_parameters.state_size],
                name="flattened_outputs")

            self.softmax_w = tf.Variable(tf.truncated_normal([
                self.model_parameters.state_size,
                self.model_parameters.n_classes
            ],
                                                             stddev=0.01),
                                         name="softmax_weights")

            self.softmax_b = tf.Variable(tf.constant(
                0.1, shape=[self.model_parameters.n_classes]),
                                         name="softmax_biases")

            # Softmax activation layer, using RNN inner loop last output
            # logits and labels must have the same shape [batch_size, num_classes]
            self.logits = tf.matmul(self.flattened_outputs,
                                    self.softmax_w) + self.softmax_b
            self.unshaped_predictions = tf.nn.softmax(
                self.logits, name="unshaped_predictions")

            tf.summary.histogram('logits', self.logits)

            # Return to the initial predictions shape
            self.predictions = tf.reshape(self.unshaped_predictions, [
                -1, self.model_parameters.sequence_length,
                self.model_parameters.n_classes
            ],
                                          name="predictions")

            self.cross_entropy = tf.reduce_mean(-tf.reduce_sum(
                self.targets *
                tf.log(self.predictions), reduction_indices=[2]))

            # Get the most likely label for each input
            self.label_prediction = tf.argmax(self.predictions,
                                              2,
                                              name="label_predictions")

            # Compare predictions to labels
            self.correct_prediction = tf.equal(tf.argmax(self.predictions, 2),
                                               tf.argmax(self.targets, 2),
                                               name="correct_predictions")
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction,
                                                   tf.float32),
                                           name="accuracy")

        # Define Training Tensors
        with tf.name_scope("Train"):
            #self.validation_perplexity = tf.Variable(dtype=tf.float32, initial_value=float("inf"),
            #trainable=False,
            #name="validation_perplexity")

            #self.validation_accuracy = tf.Variable(dtype=tf.float32, initial_value=float("inf"),
            #trainable=False,
            #name="validation_accuracy")

            #tf.scalar_summary(self.validation_perplexity.op.name, self.validation_perplexity)
            #tf.scalar_summary(self.validation_accuracy.op.name, self.validation_accuracy)

            #self.training_epoch_perplexity = tf.Variable(dtype=tf.float32, initial_value=float("inf"),
            #trainable=False,
            #name="training_epoch_perplexity")

            #self.training_epoch_accuracy = tf.Variable(dtype=tf.float32, initial_value=float("inf"),
            #trainable=False,
            #name="training_epoch_accuracy")

            #tf.scalar_summary(self.training_epoch_perplexity.op.name, self.training_epoch_perplexity)
            #tf.scalar_summary(self.training_epoch_accuracy.op.name, self.training_epoch_accuracy)

            #self.iteration = tf.Variable(0, dtype=tf.int64, name="iteration", trainable=False)

            # Momentum optimisation
            self.optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate,
                momentum=self.momentum,
                name="optimizer")

            self.train_step = self.optimizer.minimize(self.cross_entropy,
                                                      name="train_step")

            # Initializing the variables
            self.initializer = tf.global_variables_initializer()
Exemple #6
0
    def __init__(self,
                 num_symbols,
                 num_embed_units,
                 num_units,
                 num_layers,
                 beam_size,
                 embed,
                 learning_rate=0.5,
                 remove_unk=False,
                 learning_rate_decay_factor=0.95,
                 max_gradient_norm=5.0,
                 num_samples=512,
                 max_length=8,
                 use_lstm=False):

        self.posts = tf.placeholder(tf.string, (None, None),
                                    'enc_inps')  # batch*len
        self.posts_length = tf.placeholder(tf.int32, (None),
                                           'enc_lens')  # batch
        self.responses = tf.placeholder(tf.string, (None, None),
                                        'dec_inps')  # batch*len
        self.responses_length = tf.placeholder(tf.int32, (None),
                                               'dec_lens')  # batch

        # initialize the training process
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         dtype=tf.float32)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.symbol2index = MutableHashTable(key_dtype=tf.string,
                                             value_dtype=tf.int64,
                                             default_value=UNK_ID,
                                             shared_name="in_table",
                                             name="in_table",
                                             checkpoint=True)
        self.index2symbol = MutableHashTable(key_dtype=tf.int64,
                                             value_dtype=tf.string,
                                             default_value='_UNK',
                                             shared_name="out_table",
                                             name="out_table",
                                             checkpoint=True)
        # build the vocab table (string to index)

        self.posts_input = self.symbol2index.lookup(self.posts)  # batch*len
        self.responses_target = self.symbol2index.lookup(
            self.responses)  #batch*len

        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(
            self.responses)[1]
        self.responses_input = tf.concat([
            tf.ones([batch_size, 1], dtype=tf.int64) * GO_ID,
            tf.split(self.responses_target, [decoder_len - 1, 1], 1)[0]
        ], 1)  # batch*len
        self.decoder_mask = tf.reshape(
            tf.cumsum(tf.one_hot(self.responses_length - 1, decoder_len),
                      reverse=True,
                      axis=1), [-1, decoder_len])

        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed',
                                         [num_symbols, num_embed_units],
                                         tf.float32)
        else:
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed',
                                         dtype=tf.float32,
                                         initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(
            self.embed, self.posts_input)  #batch*len*unit
        self.decoder_input = tf.nn.embedding_lookup(self.embed,
                                                    self.responses_input)

        if use_lstm:
            cell = MultiRNNCell([LSTMCell(num_units)] * num_layers)
        else:
            cell = MultiRNNCell([GRUCell(num_units)] * num_layers)

        # rnn encoder
        encoder_output, encoder_state = dynamic_rnn(cell,
                                                    self.encoder_input,
                                                    self.posts_length,
                                                    dtype=tf.float32,
                                                    scope="encoder")

        # get output projection function
        output_fn, sampled_sequence_loss = output_projection_layer(
            num_units, num_symbols, num_samples)

        # get attention function
        attention_keys, attention_values, attention_score_fn, attention_construct_fn \
                = attention_decoder_fn.prepare_attention(encoder_output, 'luong', num_units)

        with tf.variable_scope('decoder'):
            decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(
                encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn)
            self.decoder_output, _, _ = dynamic_rnn_decoder(
                cell,
                decoder_fn_train,
                self.decoder_input,
                self.responses_length,
                scope="decoder_rnn")
            self.decoder_loss = sampled_sequence_loss(self.decoder_output,
                                                      self.responses_target,
                                                      self.decoder_mask)

        with tf.variable_scope('decoder', reuse=True):
            decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(
                output_fn, encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn, self.embed, GO_ID,
                EOS_ID, max_length, num_symbols)

            self.decoder_distribution, _, _ = dynamic_rnn_decoder(
                cell, decoder_fn_inference, scope="decoder_rnn")
            self.generation_index = tf.argmax(
                tf.split(self.decoder_distribution, [2, num_symbols - 2],
                         2)[1], 2) + 2  # for removing UNK
            self.generation = self.index2symbol.lookup(self.generation_index,
                                                       name='generation')

        with tf.variable_scope('decoder', reuse=True):
            decoder_fn_beam_inference = attention_decoder_fn_beam_inference(
                output_fn, encoder_state, attention_keys, attention_values,
                attention_score_fn, attention_construct_fn, self.embed, GO_ID,
                EOS_ID, max_length, num_symbols, beam_size, remove_unk)
            _, _, self.context_state = dynamic_rnn_decoder(
                cell, decoder_fn_beam_inference, scope="decoder_rnn")
            (log_beam_probs, beam_parents, beam_symbols, result_probs,
             result_parents, result_symbols) = self.context_state

            self.beam_parents = tf.transpose(tf.reshape(
                beam_parents.stack(), [max_length + 1, -1, beam_size]),
                                             [1, 0, 2],
                                             name='beam_parents')
            self.beam_symbols = tf.transpose(
                tf.reshape(beam_symbols.stack(),
                           [max_length + 1, -1, beam_size]), [1, 0, 2])
            self.beam_symbols = self.index2symbol.lookup(tf.cast(
                self.beam_symbols, tf.int64),
                                                         name="beam_symbols")

            self.result_probs = tf.transpose(tf.reshape(
                result_probs.stack(), [max_length + 1, -1, beam_size * 2]),
                                             [1, 0, 2],
                                             name='result_probs')
            self.result_symbols = tf.transpose(
                tf.reshape(result_symbols.stack(),
                           [max_length + 1, -1, beam_size * 2]), [1, 0, 2])
            self.result_parents = tf.transpose(tf.reshape(
                result_parents.stack(), [max_length + 1, -1, beam_size * 2]),
                                               [1, 0, 2],
                                               name='result_parents')
            self.result_symbols = self.index2symbol.lookup(
                tf.cast(self.result_symbols, tf.int64), name='result_symbols')

        self.params = tf.trainable_variables()

        # calculate the gradient of parameters
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.decoder_loss, self.params)
        clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                                          global_step=self.global_step)

        self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                                    max_to_keep=3,
                                    pad_step_number=True,
                                    keep_checkpoint_every_n_hours=1.0)

        # Exporter for serving
        self.model_exporter = exporter.Exporter(self.saver)
        inputs = {"enc_inps:0": self.posts, "enc_lens:0": self.posts_length}
        outputs = {
            "beam_symbols": self.beam_symbols,
            "beam_parents": self.beam_parents,
            "result_probs": self.result_probs,
            "result_symbols": self.result_symbols,
            "result_parents": self.result_parents
        }
        self.model_exporter.init(tf.get_default_graph().as_graph_def(),
                                 named_graph_signatures={
                                     "inputs":
                                     exporter.generic_signature(inputs),
                                     "outputs":
                                     exporter.generic_signature(outputs)
                                 })
    def __init__(self,
            num_symbols,
            num_qwords, #modify
            num_embed_units,
            num_units,
            num_layers,
            is_train,
            vocab=None,
            embed=None,
            question_data=True,
            learning_rate=0.5,
            learning_rate_decay_factor=0.95,
            max_gradient_norm=5.0,
            num_samples=512,
            max_length=30,
            use_lstm=False):

        self.posts = tf.placeholder(tf.string, shape=(None, None))  # batch*len
        self.posts_length = tf.placeholder(tf.int32, shape=(None))  # batch
        self.responses = tf.placeholder(tf.string, shape=(None, None))  # batch*len
        self.responses_length = tf.placeholder(tf.int32, shape=(None))  # batch
        self.keyword_tensor = tf.placeholder(tf.float32, shape=(None, 3, None)) #(batch * len) * 3 * numsymbol
        self.word_type = tf.placeholder(tf.int32, shape=(None))   #(batch * len)

        # build the vocab table (string to index)
        if is_train:
            self.symbols = tf.Variable(vocab, trainable=False, name="symbols")
        else:
            self.symbols = tf.Variable(np.array(['.']*num_symbols), name="symbols")
        self.symbol2index = HashTable(KeyValueTensorInitializer(self.symbols,
            tf.Variable(np.array([i for i in range(num_symbols)], dtype=np.int32), False)),
            default_value=UNK_ID, name="symbol2index")
        self.posts_input = self.symbol2index.lookup(self.posts)   # batch*len
        self.responses_target = self.symbol2index.lookup(self.responses)   #batch*len
        
        batch_size, decoder_len = tf.shape(self.responses)[0], tf.shape(self.responses)[1]
        self.responses_input = tf.concat([tf.ones([batch_size, 1], dtype=tf.int32)*GO_ID,
            tf.split(self.responses_target, [decoder_len-1, 1], 1)[0]], 1)   # batch*len
        #delete the last column of responses_target) and add 'GO at the front of it.
        self.decoder_mask = tf.reshape(tf.cumsum(tf.one_hot(self.responses_length-1,
            decoder_len), reverse=True, axis=1), [-1, decoder_len]) # bacth * len

        print "embedding..."
        # build the embedding table (index to vector)
        if embed is None:
            # initialize the embedding randomly
            self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32)
        else:
            print len(vocab), len(embed), len(embed[0])
            print embed
            # initialize the embedding by pre-trained word vectors
            self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)

        self.encoder_input = tf.nn.embedding_lookup(self.embed, self.posts_input) #batch*len*unit
        self.decoder_input = tf.nn.embedding_lookup(self.embed, self.responses_input)

        print "embedding finished"

        if use_lstm:
            cell = MultiRNNCell([LSTMCell(num_units)] * num_layers)
        else:
            cell = MultiRNNCell([GRUCell(num_units)] * num_layers)

        # rnn encoder
        encoder_output, encoder_state = dynamic_rnn(cell, self.encoder_input,
                self.posts_length, dtype=tf.float32, scope="encoder")
        # get output projection function
        output_fn, sampled_sequence_loss = output_projection_layer(num_units,
                num_symbols, num_qwords, num_samples, question_data)

        print "encoder_output.shape:", encoder_output.get_shape()

        # get attention function
        attention_keys, attention_values, attention_score_fn, attention_construct_fn \
              = attention_decoder_fn.prepare_attention(encoder_output, 'luong', num_units)

        # get decoding loop function
        decoder_fn_train = attention_decoder_fn.attention_decoder_fn_train(encoder_state,
                attention_keys, attention_values, attention_score_fn, attention_construct_fn)
        decoder_fn_inference = attention_decoder_fn.attention_decoder_fn_inference(output_fn,
                self.keyword_tensor,
                encoder_state, attention_keys, attention_values, attention_score_fn,
                attention_construct_fn, self.embed, GO_ID, EOS_ID, max_length, num_symbols)

        if is_train:
            # rnn decoder
            self.decoder_output, _, _ = dynamic_rnn_decoder(cell, decoder_fn_train,
                    self.decoder_input, self.responses_length, scope="decoder")
            # calculate the loss of decoder
            # self.decoder_output = tf.Print(self.decoder_output, [self.decoder_output])
            self.decoder_loss, self.log_perplexity = sampled_sequence_loss(self.decoder_output,
                    self.responses_target, self.decoder_mask, self.keyword_tensor, self.word_type)

            # building graph finished and get all parameters
            self.params = tf.trainable_variables()

            for item in tf.trainable_variables():
                print item.name, item.get_shape()

            # initialize the training process
            self.learning_rate = tf.Variable(float(learning_rate), trainable=False,
                    dtype=tf.float32)
            self.learning_rate_decay_op = self.learning_rate.assign(
                    self.learning_rate * learning_rate_decay_factor)

            self.global_step = tf.Variable(0, trainable=False)

            # calculate the gradient of parameters

            opt = tf.train.GradientDescentOptimizer(self.learning_rate)
            gradients = tf.gradients(self.decoder_loss, self.params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients,
                    max_gradient_norm)
            self.update = opt.apply_gradients(zip(clipped_gradients, self.params),
                    global_step=self.global_step)

        else:
            # rnn decoder
            self.decoder_distribution, _, _ = dynamic_rnn_decoder(cell, decoder_fn_inference,
                    scope="decoder")
            print("self.decoder_distribution.shape():",self.decoder_distribution.get_shape())
            self.decoder_distribution = tf.Print(self.decoder_distribution, ["distribution.shape()", tf.reduce_sum(self.decoder_distribution)])
            # generating the response
            self.generation_index = tf.argmax(tf.split(self.decoder_distribution,
                [2, num_symbols-2], 2)[1], 2) + 2 # for removing UNK
            self.generation = tf.nn.embedding_lookup(self.symbols, self.generation_index)

            self.params = tf.trainable_variables()

        self.saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V2,
                max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
    def _build_rnn_decoder_and_recon_x(self,
                                       inputs,
                                       targets,
                                       training,
                                       reuse=False):
        with tf.variable_scope("dec_rec_and_recon_x", reuse=reuse):
            C, T, F = self._model_conf["target_shape"]

            Cell = _cell_dict[self._model_conf["rec_cell_type"]]
            cell = MultiRNNCell([Cell(hu) \
                    for hu in self._model_conf["rec_dec"]])

            if self._model_conf["rec_learn_init"]:
                raise NotImplementedError
            else:
                input_shape = tuple(array_ops.shape(input_) \
                        for input_ in nest.flatten(inputs))
                batch_size = input_shape[0][0]
                init_state = cell.zero_state(batch_size,
                                             self._model_conf["input_dtype"])

            rec_dec_inp = self._model_conf["rec_dec_inp_test"]
            if training:
                rec_dec_inp = self._model_conf["rec_dec_inp_train"]

            if rec_dec_inp is not None:
                n_concur = self._model_conf["rec_dec_concur"]
                if T % n_concur != 0:
                    raise ValueError("total time steps must be " + \
                            "multiples of rec_dec_concur")
                n_frame = T // n_concur
            else:
                n_frame = T
            n_hist = self._model_conf["rec_dec_inp_hist"]
            info("decoder: n_frame=%s, n_concur=%s, n_hist=%s" %
                 (n_frame, n_concur, n_hist))

            def make_hist(hist, new_hist):
                with tf.name_scope("make_hist"):
                    if not self._model_conf["x_conti"]:
                        # TODO add target embedding?
                        new_hist = tf.cast(new_hist, tf.float32)

                    if n_hist > n_concur:
                        diff = n_hist - n_concur
                        return tf.concat([hist[:, :, -diff:, :], new_hist],
                                         axis=-2)
                    else:
                        return new_hist[:, :, -n_hist:, :]

            outputs = []
            if self._model_conf["x_conti"]:
                x_mu, x_logvar, x = [], [], []
            else:
                x_logits, x = [], []
            state_f = init_state
            hist = tf.zeros((array_ops.shape(inputs)[0], C, n_hist, F),
                            dtype=self._model_conf["input_dtype"],
                            name="init_hist")

            for f in xrange(n_frame):
                input_f = inputs
                if rec_dec_inp:
                    input_f = tf.concat(
                        [inputs,
                         tf.reshape(hist, (-1, C * n_hist * F))],
                        axis=-1,
                        name="input_f_%s" % f)
                if f > 0:
                    tf.get_variable_scope().reuse_variables()

                output_f, state_f = cell(input_f, state_f)
                outputs.append(output_f)

                # TODO: input hist as well (like sampleRNN)?
                if self._model_conf["x_conti"]:
                    x_mu_f, x_logvar_f, x_f = dense_latent(
                        inputs=output_f,
                        num_outputs=C * n_concur * F,
                        mu_nl=self._model_conf["x_mu_nl"],
                        logvar_nl=self._model_conf["x_logvar_nl"],
                        scope="recon_x_f")
                    x_mu.append(
                        tf.reshape(x_mu_f, (-1, C, n_concur, F),
                                   name="recon_x_mu_f_4d"))
                    x_logvar.append(
                        tf.reshape(x_logvar_f, (-1, C, n_concur, F),
                                   name="recon_x_logvar_f_4d"))
                    x.append(
                        tf.reshape(x_f, (-1, C, n_concur, F),
                                   name="recon_x_f_4d"))

                    if rec_dec_inp == "targets":
                        t_slice = slice(f * n_concur, (f + 1) * n_concur)
                        hist = make_hist(hist, targets[:, :, t_slice, :])
                    elif rec_dec_inp == "x_mu":
                        hist = make_hist(hist, x_mu[-1])
                    elif rec_dec_inp == "x":
                        hist = make_hist(hist, x[-1])
                    elif rec_dec_inp:
                        raise ValueError("unsupported rec_dec_inp (%s)" %
                                         (rec_dec_inp))
                else:
                    raise ValueError
                    # n_bins = self._model_conf["n_bins"]
                    # x_logits_f, x_f = cat_dense_latent(
                    #         inputs=output_f,
                    #         num_outputs=C * n_concur * F,
                    #         n_bins=n_bins,
                    #         scope="recon_x_f")
                    # x_logits.append(tf.reshape(
                    #         x_logits_f,
                    #         (-1, C, n_concur, F, n_bins),
                    #         name="recon_x_logits_f_5d"))
                    # x.append(tf.reshape(
                    #         x_f,
                    #         (-1, C, n_concur, F),
                    #         name="recon_x_f_4d"))

                    # if rec_dec_inp == "targets":
                    #     t_slice = slice(f * n_concur, (f + 1) * n_concur)
                    #     hist = make_hist(hist, targets[:, :, t_slice, :])
                    # elif rec_dec_inp == "x_max":
                    #     hist = make_hist(hist, tf.argmax(x_logits[-1], -1))
                    # elif rec_dec_inp == "x":
                    #     hist = make_hist(hist, x[-1])
                    # elif rec_dec_inp:
                    #     raise ValueError("unsupported rec_dec_inp (%s)" % (
                    #             rec_dec_inp))

            # (bs, n_frame, top_rnn_hu)
            outputs = tf.stack(outputs, axis=1, name="rec_outputs")
            x = tf.concat(x, axis=2, name="recon_x_t_4d")

            if self._model_conf["x_conti"]:
                x_mu = tf.concat(x_mu, axis=2, name="recon_x_mu_t_4d")
                x_logvar = tf.concat(x_logvar,
                                     axis=2,
                                     name="recon_x_logvar_t_4d")
                px = [x_mu, x_logvar]
            else:
                x_logits = tf.concat(x_logits,
                                     axis=2,
                                     name="recon_x_logits_t_5d")
                px = x_logits

        return outputs, px, x
    def _build_z2_encoder(self, inputs, z1, reuse=False):
        weights_regularizer = l2_regularizer(self._train_conf["l2_weight"])
        normalizer_fn = batch_norm if self._model_conf["if_bn"] else None
        normalizer_params = None
        if self._model_conf["if_bn"]:
            normalizer_params = {
                "scope": "BatchNorm",
                "is_training": self._feed_dict["is_train"],
                "reuse": reuse
            }
            # TODO: need to upgrade to latest,
            #       which commit support param_regularizers args

        if not hasattr(self, "_debug_outputs"):
            self._debug_outputs = {}

        C, T, F = self._model_conf["target_shape"]
        n_concur = self._model_conf["rec_z2_enc_concur"]
        if T % n_concur != 0:
            raise ValueError("total time steps must be multiples of %s" %
                             (n_concur))
        n_frame = T // n_concur
        info("z2_encoder: n_frame=%s, n_concur=%s" % (n_frame, n_concur))

        # input_dim = np.prod(inputs.get_shape().as_list()[1:])
        # outputs = tf.concat([tf.reshape(inputs, [-1, input_dim]), z1], axis=1)

        with tf.variable_scope("z2_enc", reuse=reuse):
            # recurrent layers
            if self._model_conf["rec_z2_enc"]:
                # reshape to (N, n_frame, n_concur*C*F)
                inputs = array_ops.transpose(inputs, (0, 2, 1, 3))
                inputs_shape = inputs.get_shape().as_list()
                inputs_depth = np.prod(inputs_shape[2:])
                new_shape = (-1, n_frame, n_concur * inputs_depth)
                inputs = tf.reshape(inputs, new_shape)

                # append z1 to each frame
                tiled_z1 = tf.tile(tf.expand_dims(z1, 1), (1, n_frame, 1))
                inputs = tf.concat([inputs, tiled_z1], axis=-1)

                self._debug_outputs["inp_reshape"] = inputs
                if self._model_conf["rec_z2_enc_bi"]:
                    raise NotImplementedError
                else:
                    Cell = _cell_dict[self._model_conf["rec_cell_type"]]
                    cell = MultiRNNCell([Cell(hu) \
                            for hu in self._model_conf["rec_z2_enc"]])

                    if self._model_conf["rec_learn_init"]:
                        raise NotImplementedError
                    else:
                        input_shape = tuple(array_ops.shape(input_) \
                                for input_ in nest.flatten(inputs))
                        batch_size = input_shape[0][0]
                        init_state = cell.zero_state(
                            batch_size, self._model_conf["input_dtype"])

                    _, final_states = dynamic_rnn(
                        cell,
                        inputs,
                        dtype=self._model_conf["input_dtype"],
                        initial_state=init_state,
                        time_major=False,
                        scope="z2_enc_%sL_rec" %
                        len(self._model_conf["rec_z2_enc"]))
                    self._debug_outputs["raw_rnn_out"] = _
                    self._debug_outputs["raw_rnn_final"] = final_states

                    if self._model_conf["rec_z2_enc_out"].startswith("last"):
                        final_states = final_states[-1:]

                    if self._model_conf["rec_cell_type"] == "lstm":
                        outputs = []
                        for state in final_states:
                            if "h" in self._model_conf["rec_z2_enc_out"].split(
                                    "_")[1]:
                                outputs.append(state.h)
                            if "c" in self._model_conf["rec_z2_enc_out"].split(
                                    "_")[1]:
                                outputs.append(state.c)
                    else:
                        outputs = final_states

                    outputs = tf.concat(outputs, axis=-1)
                    self._debug_outputs["concat_rnn_out"] = outputs
            else:
                input_dim = np.prod(inputs.get_shape().as_list()[1:])
                outputs = tf.concat([tf.reshape(inputs, [-1, input_dim]), z1],
                                    axis=1)

            # fully connected layers
            output_dim = np.prod(outputs.get_shape().as_list()[1:])
            outputs = tf.reshape(outputs, [-1, output_dim])

            for i, hu in enumerate(self._model_conf["hu_z2_enc"]):
                outputs = fully_connected(
                    inputs=outputs,
                    num_outputs=hu,
                    activation_fn=nn.relu,
                    normalizer_fn=normalizer_fn,
                    normalizer_params=normalizer_params,
                    weights_regularizer=weights_regularizer,
                    reuse=reuse,
                    scope="z2_enc_fc%s" % (i + 1))

            z2_mu, z2_logvar, z2 = dense_latent(
                outputs,
                self._model_conf["n_latent2"],
                logvar_nl=self._model_conf["z2_logvar_nl"],
                reuse=reuse,
                scope="z2_enc_lat")

        return [z2_mu, z2_logvar], z2
Exemple #10
0
  def __init__(self,
    model_parameters,
    training_parameters,
    directories, 
    **kwargs):

    """ Initialization of the RNN Model as TensorFlow computational graph
    """

    self.model_parameters = model_parameters
    self.training_parameters = training_parameters
    self.directories = directories

    # Define model hyperparameters Tensors
    with tf.name_scope("Parameters"):
      self.learning_rate = tf.placeholder(tf.float32, 
        name="learning_rate")
      self.momentum = tf.placeholder(tf.float32, 
        name="momentum")
      self.input_keep_probability = tf.placeholder(tf.float32, 
        name="input_keep_probability")
      self.output_keep_probability = tf.placeholder(tf.float32, 
        name="output_keep_probability")


      self.is_training = tf.placeholder(tf.bool)

    # Define input, output and initialization Tensors
    with tf.name_scope("Input"):
      self.inputs = tf.placeholder("float", [None, 
        self.model_parameters.sequence_length, 
        self.model_parameters.input_dimension], 
        name='input_placeholder')

      self.targets = tf.placeholder("float", [None, 
        self.model_parameters.sequence_length, 
        1], 
        name='labels_placeholder')

      self.init = tf.placeholder(tf.float32, shape=[None, 
        self.model_parameters.state_size], 
        name="init")

    # Define the TensorFlow RNN computational graph
    with tf.name_scope("LSTMRNN_RNN"):
      cells = []

      # Define the layers
      for _ in range(self.model_parameters.n_layers):
        if self.model_parameters.model == 'rnn':
          cell = BasicRNNCell(self.model_parameters.state_size)
        elif self.model_parameters.model == 'gru':
          cell = GRUCell(self.model_parameters.state_size)
        elif self.model_parameters.model == 'lstm':
          cell = BasicLSTMCell(self.model_parameters.state_size, state_is_tuple=True)
        elif self.model_parameters.model == 'nas':
          cell = NASCell(self.model_parameters.state_size)
        else:
          raise Exception("model type not supported: {}".format(self.model_parameters.model))

        if (self.model_parameters.output_keep_probability < 1.0 
          or self.model_parameters.input_keep_probability < 1.0):

          if self.model_parameters.output_keep_probability < 1.0 :
            cell = DropoutWrapper(cell,
              output_keep_prob=self.output_keep_probability)

          if self.model_parameters.input_keep_probability < 1.0 :
            cell = DropoutWrapper(cell,
              input_keep_prob=self.input_keep_probability)

        cells.append(cell)
      cell = MultiRNNCell(cells)

      # Simulate time steps and get RNN cell output
      self.outputs, self.next_state = tf.nn.dynamic_rnn(cell, self.inputs, dtype = tf.float32)


    # Define cost Tensors
    with tf.name_scope("LSTMRNN_Cost"):

      # Flatten to apply same weights to all time steps
      self.flattened_outputs = tf.reshape(self.outputs, [-1, 
        self.model_parameters.state_size], 
        name="flattened_outputs")

      self.output_w = tf.Variable(tf.truncated_normal([
        self.model_parameters.state_size, 
        1], stddev=0.01), 
        name="output_weights")

      self.variable_summaries(self.output_w, 'output_weights')

      self.output_b = tf.Variable(tf.constant(0.1), 
        name="output_biases")

      self.variable_summaries(self.output_w, 'output_biases')

      # Define decision threshold Tensor
      self.decision_threshold = tf.Variable(self.model_parameters.threshold, 
        name="decision_threshold")

      # Define moving average step Tensor
      self.ma_step = tf.Variable(self.model_parameters.ma_step, 
        name="ma_step")

      # Softmax activation layer, using RNN inner loop last output
      # logits and labels must have the same shape [batch_size, num_classes]

      self.logits = tf.add(tf.matmul(self.flattened_outputs, self.output_w),
        self.output_b, 
        name="logits")

      self.logits_bn = self.batch_norm_wrapper(inputs=self.logits, 
        is_training=self.is_training)

      tf.summary.histogram('logits', self.logits)
      tf.summary.histogram('logits_bn', self.logits_bn)

      self.predictions = tf.reshape(self.logits, 
        [-1, self.model_parameters.sequence_length, 1], 
        name="predictions")

      self.shaped_predictions = tf.reshape(self.predictions, 
        [-1], 
        name="shaped_predictions")

      self.tmp_smoothed_predictions = tf.concat([self.shaped_predictions,
        tf.fill(tf.expand_dims(self.ma_step-1, 0), self.shaped_predictions[tf.shape(self.shaped_predictions)[0]-1])],
        axis=0,
        name="tmp_smoothed_predictions")

      self.ma_loop_idx = tf.constant(0, dtype='int32')
      self.shaped_smoothed_predictions = tf.zeros([0], dtype='float32')

      _, self.shaped_smoothed_predictions = tf.while_loop(lambda i, _: i < tf.shape(self.shaped_predictions)[0],
        self.ma_while_body,
        [self.ma_loop_idx, self.shaped_smoothed_predictions],
        shape_invariants=[tf.TensorShape([]),
        tf.TensorShape([None])])

      self.smoothed_predictions = tf.reshape(self.shaped_smoothed_predictions, 
        [-1, self.model_parameters.sequence_length, 1], 
        name="smoothed_predictions")


      self.soft_predictions_summary = tf.summary.tensor_summary("soft_predictions", self.smoothed_predictions)
      # self.soft_predictions_summary = tf.summary.tensor_summary("soft_predictions", self.predictions)

      # self.shaped_logits = tf.reshape(self.logits, 
      #   [-1, self.model_parameters.sequence_length, 1], 
      #   name="shaped_logits")

      # Cross-Entropy
      # self.cost = tf.reduce_mean(-tf.reduce_sum(
      #   self.targets * tf.log(self.predictions), 
      #   reduction_indices=[2]), name="cross_entropy")


      # self.cross_entropy = tf.reduce_mean(
      #   tf.nn.sigmoid_cross_entropy_with_logits(_sentinel=None,
      #     labels=self.targets,
      #     logits=self.predictions),
      #   name="cross_entropy")

      # self.cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
      #   _sentinel=None,
      #   labels=self.targets,
      #   logits=self.shaped_logits,
      #   name="cross_entropy")

      # Root Mean Squared Error
      # self.mean_squared_error = tf.losses.mean_squared_error(
      #   labels=self.targets,
      #   predictions=self.predictions)

      self.cost = tf.sqrt(tf.reduce_mean(
        tf.squared_difference(
          self.smoothed_predictions, self.targets)))

      # self.cost = tf.sqrt(tf.reduce_mean(
      #   tf.squared_difference(
      #     self.predictions, self.targets)))

      tf.summary.scalar('training_cost', self.cost)

      # self.cost = tf.reduce_mean(
      #   self.cross_entropy,
      #   name="cost")

      voicing_condition = tf.greater(self.smoothed_predictions, 
        tf.fill(tf.shape(self.smoothed_predictions), self.decision_threshold),
        name="thresholding")

      # voicing_condition = tf.greater(self.predictions, 
      #   tf.fill(tf.shape(self.predictions), self.decision_threshold),
      #   name="thresholding")


      self.label_predictions = tf.where(voicing_condition, 
        tf.ones_like(self.smoothed_predictions) , 
        tf.zeros_like(self.smoothed_predictions),
        name="label_predictions")

      # self.label_predictions = tf.where(voicing_condition, 
      #   tf.ones_like(self.predictions) , 
      #   tf.zeros_like(self.predictions),
      #   name="label_predictions")

      self.hard_predictions_summary = tf.summary.tensor_summary("hard_predictions", self.label_predictions)


      self.correct_prediction = tf.equal(self.label_predictions, 
        self.targets, 
        name="correct_predictions")

      self.r = tf.reshape(self.targets, [-1])
      self.h = tf.reshape(self.label_predictions, [-1])

      # Defined outside the while loop to avoid problems
      self.dump_one = tf.constant(1, dtype=tf.int32, shape=[])

      self.temp_pk_miss = tf.Variable([0], tf.int32, name='temp_pk_miss')
      self.temp_pk_falsealarm = tf.Variable([0], tf.int32, name='temp_pk_falsealarm')
      self.loop_idx = tf.constant(0, dtype=tf.int32, name='loop_idx')
        
      self.loop_vars = self.loop_idx, self.temp_pk_miss, self.temp_pk_falsealarm
        
      _, self.all_temp_pk_miss,  self.all_temp_pk_falsealarm = tf.while_loop(
        self.while_condition,
        self.while_body,
        self.loop_vars,
        shape_invariants=(self.loop_idx.get_shape(), tf.TensorShape([None]), tf.TensorShape([None])))
        
      self.pk_miss = tf.reduce_mean(
        tf.cast(self.all_temp_pk_miss, tf.float32))
      tf.summary.scalar('p_miss', self.pk_miss)
      
      self.pk_falsealarm = tf.reduce_mean(
        tf.cast(self.all_temp_pk_falsealarm, tf.float32))
      tf.summary.scalar('p_falsealarm', self.pk_falsealarm)

      self.pk = tf.reduce_mean(
        tf.cast(
          tf.add(self.all_temp_pk_miss, self.all_temp_pk_falsealarm), 
          tf.float32),
        name='pk')

      tf.summary.scalar('pk', self.pk)
      
      self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32), 
        name="accuracy")

      tf.summary.scalar('accuracy', self.accuracy)

      self.recall, self.update_op_recall = tf.metrics.recall(
        labels=self.targets,
        predictions=self.label_predictions,
        name="recall")

      tf.summary.scalar('recall', self.recall)

      self.precision, self.update_op_precision = tf.metrics.precision(
        labels=self.targets,
        predictions=self.label_predictions,
        name="precision")

      tf.summary.scalar('precision', self.precision)



    # Define Training Tensors
    with tf.name_scope("LSTMRNN_Train"):

      # Momentum optimisation
      self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, 
        momentum=self.momentum, 
        name="optimizer")


      self.train_step = self.optimizer.minimize(self.cost, 
        name="train_step")

      # Initializing the variables
      self.initializer = tf.group(tf.global_variables_initializer(),
        tf.local_variables_initializer())