Exemple #1
0
    def encode(self, inputs, sequence_length):
        with tf.variable_scope(self.shared_scope or "RNNEncoder") as scope:
            # TODO: flatten the tensor with rank >= 4 to rank 3 tensor.
            sequence_length, _ = flatten(sequence_length, 1)
            inputs, prev_shape = flatten(
                inputs, 3)  # [*, max_sequence_length, hidden_size]
            output_shape = prev_shape[:-1] + [self.output_size]
            state_shape = prev_shape[:-2] + [self.output_size]

            outputs, state = tf.nn.bidirectional_dynamic_rnn(
                self.cell_fw,
                self.cell_bw,
                inputs,
                sequence_length=sequence_length,
                dtype=tf.float32,
                scope=scope)
            with tf.variable_scope("outputs"):
                outputs = tf.concat(outputs, -1)
                outputs = linear(outputs, self.output_size)
                outputs = tf.nn.dropout(outputs, self.keep_prob)

            with tf.variable_scope("state"):
                state = merge_state(state)
                state = linear(state, self.output_size)
            outputs = tf.reshape(outputs, output_shape)
            state = tf.reshape(state, state_shape)
        return outputs, state
Exemple #2
0
            def attention(decoder_state, response_last_hidden):
                with tf.variable_scope("Attention"):
                    with tf.variable_scope("decoder_features"):
                        decoder_features = tf_utils.linear(
                            decoder_state, attention_vec_size,
                            True)  # shape (batch_size, attention_vec_size)
                        decoder_features = tf.expand_dims(
                            tf.expand_dims(decoder_features, 1), 1
                        )  # reshape to (batch_size, 1, 1, attention_vec_size)
                    with tf.variable_scope("response_features"):
                        response_features = tf_utils.linear(
                            response_last_hidden, attention_vec_size, True)
                        response_features = tf.expand_dims(
                            tf.expand_dims(response_features, 1), 1)

                    def masked_attention(e):
                        attn_dist = tf.nn.softmax(
                            e)  # take softmax. shape (batch_size, attn_length)
                        masked_sums = tf.reduce_sum(
                            attn_dist, axis=1)  # shape (batch_size)
                        return attn_dist / tf.reshape(masked_sums,
                                                      [-1, 1])  # re-normalize

                    e = tf.reduce_sum(
                        v * tf.tanh(encoder_features + decoder_features +
                                    response_features), [2, 3])  # calculate e
                    attn_dist = masked_attention(e)
                    context_vector = tf.reduce_sum(
                        tf.reshape(attn_dist,
                                   [-1, self.config.max_utter_len, 1, 1]) *
                        encoder_states,
                        [1, 2])  # shape (batch_size, attn_size).
                    context_vector = tf.reshape(context_vector,
                                                [-1, attn_size])
                return context_vector, attn_dist
Exemple #3
0
    def encode(self, input_embeddings, sequence_length):
        with tf.variable_scope(self.shared_scope
                               or "SentenceEncoder") as scope:
            if self.cell_bw is not None:
                outputs, state = tf.nn.bidirectional_dynamic_rnn(
                    self.cell_fw,
                    self.cell_bw,
                    input_embeddings,
                    sequence_length=sequence_length,
                    dtype=tf.float32,
                    scope=scope)
                with tf.variable_scope("outputs"):
                    outputs = tf.concat(outputs, 2)
                    outputs = linear(outputs, self.rnn_size)
                    outputs = tf.nn.dropout(outputs, self.keep_prob)

                with tf.variable_scope("state"):
                    state = merge_state(state)
                    state = linear(state, self.rnn_size)
            else:
                outputs, state = tf.nn.dynamic_rnn(
                    self.cell_fw,
                    input_embeddings,
                    sequence_length=sequence_length,
                    dtype=tf.float32,
                    scope=scope)
        return outputs, state
 def _get_distribution(state, output_size):
     h = state
     num_layers = 1
     for i in range(num_layers):
         with tf.variable_scope('linear%d' % i) as scope:
             h = linear(h, output_size, scope=scope)
     with tf.variable_scope('Mean'):
         mean = linear(h, output_size, activation=None)
     with tf.variable_scope('Var'):
         var = linear(h, output_size, activation=tf.nn.softplus)
     return tfd.MultivariateNormalDiag(mean, var)
Exemple #5
0
 def encode(self, input_embeddings, sequence_length):
     with tf.variable_scope(self.shared_scope
                            or "MultiEncoderWrapper") as scope:
         outputs, state = zip(*[
             e.encode(input_embeddings, sequence_length)
             for e in self.encoders
         ])
         with tf.variable_scope('outputs'):
             outputs = tf.concat(outputs, 2)
             outputs = linear(outputs, self.rnn_size)
         with tf.variable_scope('state'):
             state = merge_state(state, self.rnn_size)
     return outputs, state
Exemple #6
0
def merge_state(state, rnn_size, activation=tf.nn.tanh):
    """
  This function assumes that the state is an output from 'tf.nn.bidirectional_dynamic_rnn' i.e. state = (fw_state, bw_state). the state can also be a nested tuple such as state = ((fw_state_0, fw_state_1, ...), (bw_state_0, bw_state_1)) if our RNN has multiple layers. 
  """

    if not type(state) == tuple:
        raise ValueError
    if isinstance(state[0], LSTMStateTuple):
        raise NotImplementedError

    # In the function linear(), two states from the forward and backward RNN (both states have the shape of [batch_size, rnn_state]) are combined and transformed into the tensor with the shape of [batch_size, rnn_state] to make the encoder's and the decoder's state size equal.

    if type(state[0]) == tuple:  # num_layers >= 2
        new_state = []
        for fs, bs in zip(*state):
            ns = tf.concat([fs, bs], axis=-1)
            if rnn_size is not None:
                ns = linear(ns, rnn_size, activation=activation)
            new_state.append(ns)
        new_state = tuple(new_state)
    else:
        new_state = tf.concat(state, 1)
        new_state = linear(new_state, rnn_size, activation=activation)
    return new_state
Exemple #7
0
    def build_model(self):
        # Build the Computation Graph
        inputs = tf.nn.embedding_lookup(
            self.data.embed, self.input_x)  # [batch_size, sent_len, emd_size]
        avg_pooling = tf_utils.AvgPooling(inputs, self.input_x_len,
                                          self.seq_len)
        logits = tf_utils.linear(avg_pooling,
                                 self.num_class,
                                 bias=True,
                                 scope='softmax')

        # Obtain the Predict, Loss, Train_op
        predict_prob = tf.nn.softmax(logits, name='predict_prob')
        predict_label = tf.cast(tf.argmax(logits, 1), tf.int32)
        with tf.name_scope("loss"):
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=self.input_y)
            loss = tf.reduce_mean(loss)
            l2_loss = tf.add_n([
                tf.nn.l2_loss(v) for v in tf.trainable_variables()
                if v.get_shape().ndims > 1
            ])
            reg_loss = loss + self.config.lambda_l2 * l2_loss
            # Build the loss
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            # optimizer = tf.train.AdagradOptimizer(self.learning_rate)
            if self.config.clipper:
                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                                  self.config.clipper)
                train_step = optimizer.apply_gradients(list(zip(grads, tvars)))
            else:
                train_step = optimizer.minimize(
                    loss, global_step=self.global_step_tensor)
            self.predict_prob = predict_prob
            self.predict_label = predict_label
            self.logits = logits
            self.loss = loss
            self.reg_loss = reg_loss
            self.train_step = train_step
    def build_model(self):
        # Build the Computation Graph
        self.layers = self.config.layers
        self.lstm_size = self.config.lstm_size
        inputs = tf.nn.embedding_lookup(
            self.data.embed, self.input_x)  # [batch_size, sent_len, emd_size]

        def BiLSTM(input_x,
                   input_x_len,
                   hidden_size,
                   num_layers=1,
                   dropout_keep_rate=None,
                   return_sequence=True):
            def lstm_cell():
                return tf.contrib.rnn.BasicLSTMCell(hidden_size)

            def gru_cell():
                return tf.contrib.rnn.GRUCell(hidden_size)

            cell_fw = lstm_cell()
            cell_bw = lstm_cell()

            if num_layers > 1:
                cell_fw = tf.contrib.rnn.MultiRNNCell(
                    [lstm_cell() for _ in range(num_layers)])
                cell_bw = tf.contrib.rnn.MultiRNNCell(
                    [lstm_cell() for _ in range(num_layers)])

            if dropout_keep_rate is not None:
                cell_fw = tf.contrib.rnn.DropoutWrapper(
                    cell_fw, output_keep_prob=dropout_keep_rate)
                cell_bw = tf.contrib.rnn.DropoutWrapper(
                    cell_bw, output_keep_prob=dropout_keep_rate)

            b_outputs, b_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                input_x,
                sequence_length=input_x_len,
                dtype=tf.float32)
            if return_sequence:
                outputs = tf.concat(b_outputs, axis=2)
            else:
                # states: [c, h]
                outputs = tf.concat([b_states[0][1], b_states[1][1]], axis=-1)
            return outputs

        with tf.variable_scope("bilstm") as s:
            lstm_x = BiLSTM(inputs,
                            self.input_x_len,
                            self.lstm_size,
                            num_layers=self.layers,
                            dropout_keep_rate=self.drop_keep_rate,
                            return_sequence=True)

        avg_pooling = tf_utils.AvgPooling(inputs, self.input_x_len,
                                          self.seq_len)
        max_pooling = tf_utils.MaxPooling(lstm_x, self.input_x_len)
        logits = tf_utils.linear([max_pooling, avg_pooling],
                                 self.num_class,
                                 bias=True,
                                 scope='softmax')

        # Obtain the Predict, Loss, Train_op
        predict_prob = tf.nn.softmax(logits, name='predict_prob')
        predict_label = tf.cast(tf.argmax(logits, 1), tf.int32)
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                       labels=self.input_y)
        loss = tf.reduce_mean(loss)
        l2_loss = tf.add_n([
            tf.nn.l2_loss(v) for v in tf.trainable_variables()
            if v.get_shape().ndims > 1
        ])
        reg_loss = loss + self.config.lambda_l2 * l2_loss
        optimizer = tf.train.AdamOptimizer(self.learning_rate)
        if self.config.clipper:
            tvars = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                              self.config.clipper)
            train_step = optimizer.apply_gradients(list(zip(grads, tvars)))
        else:
            train_step = optimizer.minimize(
                loss, global_step=self.global_step_tensor)
        self.predict_prob = predict_prob
        self.predict_label = predict_label
        self.loss = loss
        self.reg_loss = reg_loss
        self.train_step = train_step
Exemple #9
0
    def build_model(self):
        # Build the Computation Graph
        self.filter_sizes = self.config.filter_sizes
        self.num_filters = self.config.num_filters
        self.initializer = tf.random_normal_initializer(stddev=0.1)
        inputs = tf.nn.embedding_lookup(self.data.embed, self.input_x)
        inputs_ = tf.expand_dims(inputs, -1)
        pooled_outputs = []
        for i, filter_size in enumerate(self.filter_sizes):
            with tf.name_scope("convolution-pooling-%s" % filter_size):
                filter = tf.get_variable(
                    "filter-%s" % filter_size,
                    [filter_size, self.embed_size, 1, self.num_filters],
                    initializer=self.initializer)
                conv = tf.nn.conv2d(inputs_,
                                    filter,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                b = tf.get_variable("b-%s" % filter_size, [self.num_filters])
                h = tf.nn.relu(tf.nn.bias_add(conv, b), "relu")
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.seq_len - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)
        h_pool = tf.concat(pooled_outputs, -1)
        num_filters_total = self.num_filters * len(self.filter_sizes)
        outputs = tf.reshape(h_pool, [-1, num_filters_total])
        if self.drop_keep_rate is not None:
            outputs = tf.nn.dropout(outputs, keep_prob=self.drop_keep_rate)
        logits = tf_utils.linear(outputs,
                                 self.num_class,
                                 bias=True,
                                 scope='softmax')

        # Obtain the Predict, Loss, Train_op
        predict_prob = tf.nn.softmax(logits, name='predict_prob')
        predict_label = tf.cast(tf.argmax(logits, 1), tf.int32)
        with tf.name_scope("loss"):
            loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                        labels=self.input_y))
            l2_loss = tf.add_n([
                tf.nn.l2_loss(v) for v in tf.trainable_variables()
                if v.get_shape().ndims > 1
            ])
            reg_loss = loss + self.config.lambda_l2 * l2_loss
            optimizer = tf.train.AdamOptimizer(self.learning_rate)
            if self.config.clipper:
                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                                  self.config.clipper)
                train_step = optimizer.apply_gradients(list(zip(grads, tvars)))
            else:
                train_step = optimizer.minimize(
                    loss, global_step=self.global_step_tensor)
            self.predict_prob = predict_prob
            self.predict_label = predict_label
            self.logits = logits
            self.loss = loss
            self.reg_loss = reg_loss
            self.train_step = train_step