Example #1
0
def rnn_forward(config, inputs, scope=None):
    with tf.variable_scope(scope or "forward"):

        JX, JQ = config.max_context_size, config.max_ques_size
        d = config.hidden_size
        x, x_len, q, q_len = [inputs[key] for key in ['x', 'x_len', 'q', 'q_len']]
        x_mask = tf.sequence_mask(x_len, JX)
        q_mask = tf.sequence_mask(q_len, JQ)

        # emb_mat = tf.get_variable('emb_mat', shape=[V, d])
        emb_mat = config.emb_mat_ph if config.serve else config.emb_mat
        emb_mat = tf.slice(emb_mat, [2, 0], [-1, -1])
        emb_mat = tf.concat([tf.get_variable('emb_mat', shape=[2, d]), emb_mat], axis=0)
        xx = tf.nn.embedding_lookup(emb_mat, x, name='xx')  # [N, JX, d]
        qq = tf.nn.embedding_lookup(emb_mat, q, name='qq')  # [N, JQ, d]
        
        #now process xx and qq with this new matrices
        with tf.variable_scope('xx-encoder'):
            fw_xx_cell = GRUCell(d)
            fw_xx_cell = DropoutWrapper(cell=fw_xx_cell, output_keep_prob=config.keep_prob)

            bw_xx_cell = GRUCell(d)
            bw_xx_cell = DropoutWrapper(cell=bw_xx_cell, output_keep_prob=config.keep_prob)
            outputs_xx, _ = bidirectional_dynamic_rnn(
                    fw_xx_cell, bw_xx_cell, xx, dtype=tf.float32)

            with tf.variable_scope('qq-encoder'):
                fw_qq_cell = GRUCell(d)
                fw_qq_cell = DropoutWrapper(cell=fw_qq_cell, output_keep_prob=config.keep_prob)
                bw_qq_cell = GRUCell(d)
                fw_xx_cell = DropoutWrapper(cell=fw_xx_cell, output_keep_prob=config.keep_prob)
               
                outputs_qq, _ = bidirectional_dynamic_rnn(
                        fw_qq_cell, bw_qq_cell, qq, dtype=tf.float32)

#                print('ACHTUNG\n',outputs_xx.shape)
                xx_fwbw=tf.concat(outputs_xx, 2)
                qq_fwbw=tf.concat(outputs_qq, 2)
               # q_mask=tf.concat([q_mask,q_mask],0)
               # x_mask=tf.concat([x_mask,x_mask],0)
             #   q_mask_exp=tf.concat([q_mask,q_mask],2)
                qq_avg = tf.reduce_mean(bool_mask(qq_fwbw, q_mask, expand=True), axis=1)  # [N, d]
                qq_avg_exp = tf.expand_dims(qq_avg, axis=1)  # [N, 1, d]
                qq_avg_tiled = tf.tile(qq_avg_exp, [1, JX, 1])  # [N, JX, d]

                xq = tf.concat([xx_fwbw, qq_avg_tiled, xx_fwbw * qq_avg_tiled], axis=2)  # [N, JX, 3d]
                xq_flat = tf.reshape(xq, [-1, 2*3*d])  # [N * JX, 3*d]
                # Compute logits
                with tf.variable_scope('start'):
                    logits1 = exp_mask(tf.reshape(tf.layers.dense(inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
                    yp1 = tf.argmax(logits1, axis=1)  # [N]
                with tf.variable_scope('stop'):
                    logits2 = exp_mask(tf.reshape(tf.layers.dense(inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
                    yp2 = tf.argmax(logits2, axis=1)  # [N]

                outputs = {'logits1': logits1, 'logits2': logits2, 'yp1': yp1, 'yp2': yp2}
                variables = {'emb_mat': emb_mat}
                return variables, outputs
Example #2
0
    def _rnn_layer(self, name, is_train_mode, in_sequence, in_lengths):
        with tf.variable_scope(name):
            fw_cell = tfcontrib.rnn.LSTMCell(
                self.hidden_size,
                state_is_tuple=True,
                initializer=tf.truncated_normal_initializer(stddev=.001))
            bw_cell = tfcontrib.rnn.LSTMCell(
                self.hidden_size,
                state_is_tuple=True,
                initializer=tf.truncated_normal_initializer(stddev=.001))
            if is_train_mode:
                fw_cell = tfcontrib.rnn.DropoutWrapper(
                    fw_cell, output_keep_prob=self.out_keep_prob)
                bw_cell = tfcontrib.rnn.DropoutWrapper(
                    bw_cell, output_keep_prob=self.out_keep_prob)

            output, _ = nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                in_sequence,
                sequence_length=in_lengths,
                time_major=False,
                dtype=tf.float32)
            output = tf.concat(output, 2)
            return output
Example #3
0
    def _projection_lstm_layer(self):
        """pass"""
        with tf.variable_scope("projection_lstm_layer"):
            cells = {
                "fw": nn.rnn_cell.DropoutWrapper(
                    nn.rnn_cell.LSTMCell(num_units = self.hidden_dim,
                                         initializer = tc.layers.xavier_initializer(),
                                         state_is_tuple = True),
                    output_keep_prob = self.dropout),
                "bw": nn.rnn_cell.DropoutWrapper(
                    nn.rnn_cell.LSTMCell(num_units = self.hidden_dim,
                                         initializer = tc.layers.xavier_initializer(),
                                         state_is_tuple = True),
                    output_keep_prob = self.dropout)}

            outputs, state = nn.bidirectional_dynamic_rnn(
                cell_fw = cells["fw"],
                cell_bw = cells["bw"],
                inputs = self.embedding,
                sequence_length = self.length,
                dtype = tf.float32
            )
            outputs = tf.concat(outputs, axis = 2)

            w = tf.get_variable("W",
                                shape = [self.hidden_dim * 2, self.num_tags],
                                dtype = tf.float32,
                                initializer = tc.layers.xavier_initializer())
            b = tf.get_variable("b", shape = [self.num_tags], dtype = tf.float32,
                                initializer = tf.zeros_initializer())

            outputs = tf.nn.xw_plus_b(outputs, w, b)

        return outputs
Example #4
0
def BiLSTM_Correlation_BiLSTM(cn, cor, weights, biases):
    # BiLSTM
    lstm_fw_cell = rnn.BasicLSTMCell(n_bilstm_hidden, forget_bias=1.0)
    lstm_bw_cell = rnn.BasicLSTMCell(n_bilstm_hidden, forget_bias=1.0)
    bilstm_outputs, _ = nn.bidirectional_dynamic_rnn(lstm_fw_cell, lstm_bw_cell, cn,
                                                     dtype=tf.float32, scope='input')
    bilstm_output = tf.matmul(tf.concat(bilstm_outputs, axis=2)[0], weights['bilstm']) + biases['bilstm']

    # Attention
    lstm2_input = tf.convert_to_tensor([tf.matmul(cor, bilstm_output)])

    # BiLSTM
    lstm2_fw_cell = rnn.BasicLSTMCell(n_bilstm2_hidden, forget_bias=1.0)
    lstm2_bw_cell = rnn.BasicLSTMCell(n_bilstm2_hidden, forget_bias=1.0)
    bilstm2_outputs, _ = nn.bidirectional_dynamic_rnn(lstm2_fw_cell, lstm2_bw_cell, lstm2_input,
                                                      dtype=tf.float32, scope='output')
    bilstm2_output = tf.matmul(tf.concat(bilstm2_outputs, axis=2)[0], weights['bilstm2']) + biases['bilstm2']

    return bilstm2_output
Example #5
0
def BiLSTM(x, x_len, n_hidden, biRnnScopeName):

    lstm_fw_cell = rnn.LSTMCell(n_hidden)

    lstm_bw_cell = rnn.LSTMCell(n_hidden)

    outputs, output_states = nn.bidirectional_dynamic_rnn(
        lstm_fw_cell,
        lstm_bw_cell,
        x,
        dtype=tf.float32,
        sequence_length=x_len,
        scope=biRnnScopeName)
    return outputs, output_states
Example #6
0
def BiGRU(x, x_len, n_hidden, biRnnScopeName):

    gru_fw_cell = rnn.GRUCell(n_hidden)

    gru_bw_cell = rnn.GRUCell(n_hidden)

    outputs, output_states = nn.bidirectional_dynamic_rnn(
        gru_fw_cell,
        gru_bw_cell,
        x,
        dtype=tf.float32,
        sequence_length=x_len,
        scope=biRnnScopeName)
    return outputs, output_states
Example #7
0
def separable_lstm2(cell,
                    num_units,
                    inputs,
                    seq_lengths1,
                    seq_lengths2,
                    scope=None):
    """Run bidirectional LSTMs first horizontally then vertically.
  Args:
    cell: an RNN cell
    num_units: number of neurons
    inputs: input sequence (length, batch_size, ninput)
    sequence_lengths: array of length 'batch_size' containing sequence_lengths
    scope: optional scope name
  Returns:
    (batch_size, height, width, num_units*2) tensor
  """
    with variable_scope.variable_scope(scope, "SeparableLstm", [inputs]):
        batch_size = tf.shape(inputs)[0]
        _, height, width, depth = _shape(inputs)
        reshaped = array_ops.reshape(inputs,
                                     [batch_size * width, height, depth])
        _, states = bidirectional_dynamic_rnn(cell(num_units),
                                              cell(num_units),
                                              reshaped,
                                              sequence_length=seq_lengths1,
                                              dtype=tf.float32)
        stacked_state = array_ops.concat(states, 1)
        with variable_scope.variable_scope("vertical"):
            unpacked = array_ops.reshape(stacked_state,
                                         [batch_size, width, num_units * 2])
            _, states = bidirectional_dynamic_rnn(cell(num_units),
                                                  cell(num_units),
                                                  unpacked,
                                                  sequence_length=seq_lengths2,
                                                  dtype=tf.float32)
        return array_ops.concat(states, 1)
Example #8
0
 def BiRNN(self, x, x_lens, n_steps, n_hidden, biRnnScopeName, dropoutName):
     x = tf.nn.dropout(x, 0.5, name=dropoutName)
     # Forward direction cell
     lstm_fw_cell = rnn.LSTMCell(n_hidden)
     # Backward direction cell
     lstm_bw_cell = rnn.LSTMCell(n_hidden)
     # need scope to identified different cell
     outputs, output_states = nn.bidirectional_dynamic_rnn(
         lstm_fw_cell,
         lstm_bw_cell,
         x,
         sequence_length=x_lens,
         dtype=tf.float32,
         scope=biRnnScopeName)
     return outputs, output_states
Example #9
0
def BiLSTM(x, x_len, n_hidden, biRnnScopeName):

    lstm_fw_cell = rnn.LSTMCell(n_hidden)

    lstm_bw_cell = rnn.LSTMCell(n_hidden)

    outputs, output_states = nn.bidirectional_dynamic_rnn(
        lstm_fw_cell,
        lstm_bw_cell,
        x,
        dtype=tf.float32,
        sequence_length=x_len,
        scope=biRnnScopeName)
    #output_states is a tuple (fw,bw), fw contain (c,h) bw too.
    return outputs, output_states
Example #10
0
 def biLSTMCell(x, x_lens, hiddenSize):
     # 前向lstm单元
     lstm_fw_cell = rnn.LSTMCell(hiddenSize)
     # 后向lstm单元
     lstm_bw_cell = rnn.LSTMCell(hiddenSize)
     # 双向lstm
     outputs, output_states = nn.bidirectional_dynamic_rnn(
         lstm_fw_cell,
         lstm_bw_cell,
         x,
         sequence_length=x_lens,
         dtype=tf.float32)
     output_fw, output_bw = outputs
     return tf.concat(
         [output_fw, output_bw], 1
     )  # outputs ->(output_fw, output_bw)->(batch_size, max_time, n_hidden)
Example #11
0
    def bilstm(self, sequence, sequence_length, lstm_unit, reuse=None):
        with tf.variable_scope('BiLSTM', reuse=reuse, dtype=tf.float32):
            cell_fw = LSTMCell(num_units=lstm_unit,
                               reuse=tf.get_variable_scope().reuse)
            cell_bw = LSTMCell(num_units=lstm_unit,
                               reuse=tf.get_variable_scope().reuse)

        ((output_fw, output_bw),
         _) = bidirectional_dynamic_rnn(cell_fw,
                                        cell_bw,
                                        sequence,
                                        dtype=tf.float32,
                                        sequence_length=sequence_length)

        return tf.concat([output_fw, output_bw],
                         axis=2)  # (batch_size, num_step, lstm_unit * 2)
Example #12
0
 def forward(self, X, is_training=False):
     if self.cell_type == CellType.Bidir_Dynamic:
         return bidirectional_dynamic_rnn(cell_fw=self.cells,
                                          cell_bw=self.cells,
                                          inputs=X,
                                          dtype=tf.float32)
     elif self.cell_type == CellType.Bidir_Static:
         X = tf.unstack(X, num=self.seq_length, axis=1)
         return static_bidirectional_rnn(cell_fw=self.cells,
                                         cell_bw=self.cells,
                                         inputs=X,
                                         dtype=tf.float32)
     elif self.cell_type == CellType.Dynamic:
         return dynamic_rnn(self.cells, X, dtype=tf.float32)
     elif self.cell_type == CellType.Static:
         X = tf.unstack(X, num=self.seq_length, axis=1)
         return static_rnn(self.cells, X, dtype=tf.float32)
Example #13
0
def bidirectional_horizontal_lstm(cell,
                                  num_units,
                                  inputs,
                                  seq_lengths,
                                  scope=None):
    he_init = tf.contrib.layers.variance_scaling_initializer()
    with variable_scope.variable_scope(scope, "BiHorizontalLstm", [inputs]):
        batch_size = tf.shape(inputs)[0]
        height = _shape(inputs)[1]
        sequence = grid_to_sequence(inputs)

        forward_cell = cell(num_units)
        backward_cell = cell(num_units)
        outputs, states = bidirectional_dynamic_rnn(
            forward_cell,
            backward_cell,
            sequence,
            sequence_length=seq_lengths,
            time_major=True,
            dtype=tf.float32)
        stacked_state = tf.expand_dims(array_ops.concat(states, 1), 0)
        output = sequence_to_grid(stacked_state, batch_size, height)
        return output
Example #14
0
 def forward(self, x, computation_mode=MakiRestorable.INFERENCE_MODE):
     if self._cell_type == CellType.BIDIR_DYNAMIC:
         (outputs_f, outputs_b), (states_f, states_b) = \
             bidirectional_dynamic_rnn(cell_fw=self._cells, cell_bw=self._cells, inputs=x, dtype=tf.float32)
         # Creation of the two MakiTensors for both `outputs_f` and `outputs_b` is inappropriate since
         # the algorithm that builds the computational graph does not consider such case and
         # therefore can not handle this situation, it will cause an error.
         self._cells_state = tf.concat([states_f, states_b], axis=-1)
         return tf.concat([outputs_f, outputs_b], axis=-1)
     elif self._cell_type == CellType.BIDIR_STATIC:
         x = tf.unstack(x, num=self._seq_length, axis=1)
         outputs_fb, states_f, states_b = \
             static_bidirectional_rnn(cell_fw=self._cells, cell_bw=self._cells, inputs=x, dtype=tf.float32)
         self._cells_state = tf.concat([states_f, states_f], axis=-1)
         return outputs_fb
     elif self._cell_type == CellType.DYNAMIC:
         outputs, states = dynamic_rnn(self._cells, x, dtype=tf.float32)
         self._cells_state = states
         return outputs
     elif self._cell_type == CellType.STATIC:
         x = tf.unstack(x, num=self._seq_length, axis=1)
         outputs, states = static_rnn(self._cells, x, dtype=tf.float32)
         self._cells_state = states
         return tf.stack(outputs, axis=1)
            DropoutWrapper(fw_cell, output_keep_prob=keep_prob)
            for fw_cell in fw_cells
        ]
        fw_cells = MultiRNNCell(fw_cells)

    with tf.variable_scope('Backward'):
        bw_cells = [GRUCell(num_units) for _ in range(num_layers)]
        bw_cells = [
            DropoutWrapper(bw_cell, output_keep_prob=keep_prob)
            for bw_cell in bw_cells
        ]
        bw_cells = MultiRNNCell(bw_cells)

    outputs, states = bidirectional_dynamic_rnn(
        fw_cells,
        bw_cells,
        rnn_input,
        dtype=tf.float32,
        sequence_length=sequence_length)
    # ★Attention
    # 'outputs' is a tensor of shape [batch_size, max_time, num_of_units]
    # 'state' is a N-tuple where N is the number of GRUCells containing a
    # tf.contrib.rnn.GRUcells for each cell

    fw_states, bw_states = states
    fw_states = fw_states[-1]  #[batch_size,num_of_units]
    bw_states = bw_states[-1]  #[batch_size,num_of_units]
    fc_states = tf.concat([fw_states, bw_states], 1)

with tf.variable_scope('full_connected'):
    fc = tf.contrib.layers.fully_connected(fc_states,
                                           num_class,
def build_graph(training_setting):
    tf.reset_default_graph()
    graph = tf.Graph()

    with graph.as_default():
        with tf.name_scope('inputs') as name_scope:
            X_sent = tf.placeholder(
                tf.int32, [None, training_setting['maximum_sent_length']],
                name='x_sent')
            y = tf.placeholder(tf.float32,
                               [None, training_setting['classes_num']],
                               name='y')
            dropout = tf.placeholder(tf.float32, shape=[], name='dropout')

            pretrained_embeddings_input = tf.placeholder(
                tf.float32,
                shape=[
                    training_setting['pretrained_vocab_length'],
                    training_setting['embedding_size']
                ],
                name='pretrained_embeddings_ph')

        with tf.name_scope('embedding') as name_scope:
            reserved_embeddings = tf.Variable(tf.random_uniform([
                training_setting['reserved_vocab_length'],
                training_setting['embedding_size']
            ], -1.0, 1.0),
                                              trainable=True,
                                              name='reserved_embeddings')

            if training_setting['use_pretrained_embeddings']:
                pretrained_embeddings = tf.Variable(
                    tf.random_uniform([
                        training_setting['pretrained_vocab_length'],
                        training_setting['embedding_size']
                    ], -1.0, 1.0),
                    trainable=False,
                    name='pretrained_embeddings')

                assign_pretrained_embeddings = tf.assign(
                    pretrained_embeddings,
                    pretrained_embeddings_input,
                    name='assign_pretrained_embeddings')
            else:

                pretrained_embeddings = tf.Variable(
                    tf.random_uniform([
                        training_setting['pretrained_vocab_length'],
                        training_setting['embedding_size']
                    ], -1.0, 1.0),
                    trainable=True,
                    name='pretrained_embeddings')

            X_sent = tf.where(
                tf.less(X_sent,
                        tf.constant(
                            training_setting['reserved_vocab_length'])),
                X_sent * 2,
                (X_sent - training_setting['reserved_vocab_length']) * 2 + 1)

            word_embeddings_sent = tf.nn.embedding_lookup(
                [reserved_embeddings, pretrained_embeddings],
                X_sent,
                name='word_embeddings_sent')

            # word_embeddings_sent = tf_print(word_embeddings_sent, 'word_embeddings_sent')

        with tf.name_scope('gru_cell_sent') as name_scope:
            gru_forward_sent = rnn.DropoutWrapper(rnn.GRUCell(
                training_setting['hidden_units']),
                                                  output_keep_prob=dropout)
            gru_backward_sent = rnn.DropoutWrapper(rnn.GRUCell(
                training_setting['hidden_units']),
                                                   output_keep_prob=dropout)

            (gru_output_forward,
             gru_output_backward), _ = nn.bidirectional_dynamic_rnn(
                 gru_forward_sent,
                 gru_backward_sent,
                 word_embeddings_sent,
                 dtype=tf.float32,
                 scope=name_scope)
            bidirectional_gru_output_sent = tf.concat(
                axis=2,
                values=(gru_output_forward, gru_output_backward),
                name='output_sent')
            # bidirectional_gru_output_sent1 = tf_print(bidirectional_gru_output_sent1, 'bidirectional_gru_output_sent1')

        with tf.name_scope('pooling') as name_scope:
            W = tf.Variable(
                tf.random_normal([2 * training_setting['hidden_units']],
                                 name='attention_weight'))
            b = tf.Variable(tf.random_normal([1]), name='attention_bias')
            # W = tf_print(W, 'W')
            attentions = tf.reduce_sum(
                tf.multiply(W, bidirectional_gru_output_sent), axis=2) + b
            attentions = tf.nn.softmax(attentions)
            # attentions = tf_print(attentions, 'attentions')

            expand_attentions = tf.expand_dims(attentions, 1)
            transpose_outputs = tf.transpose(bidirectional_gru_output_sent,
                                             perm=[0, 2, 1])

            attentions_output = tf.reduce_sum(tf.transpose(tf.multiply(
                expand_attentions, transpose_outputs),
                                                           perm=[0, 2, 1]),
                                              axis=1)
            # attentions_output = tf_print(attentions_output, 'attentions_output')

        with tf.name_scope('mlp') as name_scope:
            W_mlp = tf.Variable(
                tf.random_normal([
                    2 * training_setting['hidden_units'],
                    training_setting['classes_num']
                ]))
            b_mlp = tf.Variable(
                tf.random_normal([training_setting['classes_num']]))

            logits = tf.matmul(attentions_output, W_mlp) + b_mlp
            probability = tf.nn.softmax(logits, name='probability')
            y_pred = tf.one_hot(tf.argmax(probability, 1),
                                depth=training_setting['classes_num'],
                                name='y_pred')

        with tf.name_scope('loss') as name_scope:
            loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                           labels=y,
                                                           name='loss')

        with tf.name_scope('optimizer') as name_scope:
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=training_setting['learning_rate']).minimize(
                    loss, name='optimizer')
    return graph
Example #17
0
def build_graph(training_setting):
    tf.reset_default_graph()
    graph = tf.Graph()

    with graph.as_default():
        with tf.name_scope('inputs') as name_scope:
            sequence_length = tf.placeholder(tf.int32, [None],
                                             name='sequence_length')
            X_sent = tf.placeholder(
                tf.int32, [None, training_setting['maximum_sent_length']],
                name='x_sent')
            y_slot = tf.placeholder(
                tf.int32, [None, training_setting['maximum_sent_length']],
                name='y_slot')
            y_intent = tf.placeholder(tf.int32, [None], name='y_intent')
            y_intent = tf.one_hot(y_intent,
                                  depth=training_setting['intent_num'],
                                  dtype=tf.int32)
            dropout = tf.placeholder(tf.float32, shape=[], name='dropout')

            pretrained_embeddings_input = tf.placeholder(
                tf.float32,
                shape=[
                    training_setting['pretrained_vocab_length'],
                    training_setting['embedding_size']
                ],
                name='pretrained_embeddings_ph')

        with tf.name_scope('embedding') as name_scope:
            reserved_embeddings = tf.Variable(tf.random_uniform([
                training_setting['reserved_vocab_length'],
                training_setting['embedding_size']
            ], -1.0, 1.0),
                                              trainable=True,
                                              name='reserved_embeddings')

            if training_setting['use_pretrained_embeddings']:
                pretrained_embeddings = tf.Variable(
                    tf.random_uniform([
                        training_setting['pretrained_vocab_length'],
                        training_setting['embedding_size']
                    ], -1.0, 1.0),
                    trainable=False,
                    name='pretrained_embeddings')

                assign_pretrained_embeddings = tf.assign(
                    pretrained_embeddings,
                    pretrained_embeddings_input,
                    name='assign_pretrained_embeddings')
            else:

                pretrained_embeddings = tf.Variable(
                    tf.random_uniform([
                        training_setting['pretrained_vocab_length'],
                        training_setting['embedding_size']
                    ], -1.0, 1.0),
                    trainable=True,
                    name='pretrained_embeddings')

            X_sent = tf.where(
                tf.less(X_sent,
                        tf.constant(
                            training_setting['reserved_vocab_length'])),
                X_sent * 2,
                (X_sent - training_setting['reserved_vocab_length']) * 2 + 1)

            word_embeddings_sent = tf.nn.embedding_lookup(
                [reserved_embeddings, pretrained_embeddings],
                X_sent,
                name='word_embeddings_sent')

            # word_embeddings_sent = tf_print(word_embeddings_sent, 'word_embeddings_sent')

        with tf.name_scope('gru_cell_sent') as name_scope:
            gru_forward_sent = rnn.DropoutWrapper(rnn.GRUCell(
                training_setting['hidden_units']),
                                                  output_keep_prob=dropout)
            gru_backward_sent = rnn.DropoutWrapper(rnn.GRUCell(
                training_setting['hidden_units']),
                                                   output_keep_prob=dropout)

            (gru_output_forward,
             gru_output_backward), _ = nn.bidirectional_dynamic_rnn(
                 gru_forward_sent,
                 gru_backward_sent,
                 word_embeddings_sent,
                 dtype=tf.float32,
                 sequence_length=sequence_length,
                 scope=name_scope)

            bidirectional_gru_output_sent = tf.concat(
                axis=2,
                values=(gru_output_forward, gru_output_backward),
                name='output_sent')

            # bidirectional_gru_output_sent = tf_print(bidirectional_gru_output_sent, 'bidirectional_gru_output_sent')

        with tf.name_scope('pooling') as name_scope:
            W = tf.Variable(
                tf.random_normal([2 * training_setting['hidden_units']],
                                 name='attention_weight'))
            b = tf.Variable(tf.random_normal([1]), name='attention_bias')
            # W = tf_print(W, 'W')
            attentions = tf.reduce_sum(
                tf.multiply(W, bidirectional_gru_output_sent), axis=2) + b
            attentions = tf.nn.softmax(attentions)
            # attentions = tf_print(attentions, 'attentions')

            expand_attentions = tf.expand_dims(attentions, 1)
            transpose_outputs = tf.transpose(bidirectional_gru_output_sent,
                                             perm=[0, 2, 1])

            attentions_output = tf.reduce_sum(tf.transpose(tf.multiply(
                expand_attentions, transpose_outputs),
                                                           perm=[0, 2, 1]),
                                              axis=1)
            # attentions_output = tf_print(attentions_output, 'attentions_output')

        with tf.name_scope('slot') as name_scope:
            # attentions_output = tf_print(attentions_output, 'attention output')
            W_projection_slot = tf.get_variable(
                "W_projection_slot", shape=[64, training_setting['slot_num']])
            b_projection_slot = tf.get_variable(
                "b_projection_slot", shape=[training_setting['slot_num']])
            logits = []
            hidden_states_list = []
            for i in range(training_setting['maximum_sent_length']):
                feature = bidirectional_gru_output_sent[:, i, :]
                hidden_states = tf.layers.dense(feature,
                                                64,
                                                activation=tf.nn.tanh)
                output = tf.matmul(hidden_states,
                                   W_projection_slot) + b_projection_slot
                logits.append(output)
                hidden_states_list.append(hidden_states)

            logits_slots = tf.stack(logits, axis=1)
            y_pred_slot = tf.argmax(logits_slots, axis=2, name="y_pred")

        with tf.name_scope('intent') as name_scope:
            W_mlp = tf.Variable(
                tf.random_normal([
                    2 * training_setting['hidden_units'],
                    training_setting['intent_num']
                ]))
            b_mlp = tf.Variable(
                tf.random_normal([training_setting['intent_num']]))

            logits_intent = tf.matmul(attentions_output, W_mlp) + b_mlp
            y_pred_intent = tf.argmax(logits_intent, 1, name='y_pred')

        with tf.name_scope('loss') as name_scope:
            mask = tf.to_float(tf.not_equal(sequence_length, 0))

            loss_slot = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=y_slot, logits=(logits_slots))
            loss_slot = tf.reduce_sum(
                loss_slot, axis=1) / training_setting['maximum_sent_length']
            loss_slot = tf.reduce_mean(loss_slot)

            loss_intent = tf.nn.softmax_cross_entropy_with_logits(
                labels=y_intent, logits=logits_intent)
            loss_intent = tf.reduce_mean(loss_intent)

            # l2_losses = tf.add_n(
            #     [tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'bias' not in v.name]) * self.l2_lambda
            # weights_intent = tf.nn.sigmoid(tf.cast(self.global_step / 1000, dtype=tf.float32)) / 2

            loss = loss_slot + loss_intent
            loss = tf.identity(loss, name='loss')

            # loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_intent, name='loss')

        with tf.name_scope('optimizer') as name_scope:
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=training_setting['learning_rate']).minimize(
                    loss, name='optimizer')
    return graph
Example #18
0
def attention_forward(config, inputs, scope=None):
    with tf.variable_scope(scope or "rnn_attention"):

        JX, JQ = config.max_context_size, config.max_ques_size
        d = config.hidden_size
        x, x_len, q, q_len = [inputs[key]
                              for key in ['x', 'x_len', 'q', 'q_len']]
        x_mask = tf.sequence_mask(x_len, JX)

        # emb_mat = tf.get_variable('emb_mat', shape=[V, d])
        emb_mat = config.emb_mat_ph if config.serve else config.emb_mat
        emb_mat = tf.slice(emb_mat, [2, 0], [-1, -1])
        emb_mat = tf.concat(
            [tf.get_variable('emb_mat', shape=[2, d]), emb_mat], axis=0)
        xx = tf.nn.embedding_lookup(emb_mat, x, name='xx')  # [N, JX, d]
        qq = tf.nn.embedding_lookup(emb_mat, q, name='qq')  # [N, JQ, d]

        with tf.variable_scope("context_rnn"):

            # run context embeddings through GRU
            dropout = 0.1
            fw_cell = GRUCell(64)
            bw_cell = GRUCell(64)
            if config.is_train:
                fw_cell = DropoutWrapper(
                    fw_cell, output_keep_prob=(1.0 - dropout))
                bw_cell = DropoutWrapper(
                    bw_cell, output_keep_prob=(1.0 - dropout))
            (output_fw, output_bw), _ = bidirectional_dynamic_rnn(
                fw_cell, bw_cell, xx, dtype=tf.float32, sequence_length=x_len)
            xx_rnn_toobig = tf.concat([output_fw, output_bw], axis=2)
            xx_rnn = tf.layers.dense(xx_rnn_toobig, 50, activation=None)

        with tf.variable_scope("question_rnn"):

            # run question embeddings through GRU
            dropout = 0.1
            fw_cell2 = GRUCell(64)
            bw_cell2 = GRUCell(64)
            if config.is_train:
                fw_cell2 = DropoutWrapper(
                    fw_cell2, output_keep_prob=(1.0 - dropout))
                bw_cell2 = DropoutWrapper(
                    bw_cell2, output_keep_prob=(1.0 - dropout))
            (output_fw2, output_bw2), _ = bidirectional_dynamic_rnn(
                fw_cell2, bw_cell2, qq, dtype=tf.float32, sequence_length=q_len)
            qq_rnn_toobig = tf.concat([output_fw2, output_bw2], axis=2)
            qq_rnn = tf.layers.dense(qq_rnn_toobig, 50, activation=None)

        # equation 10
        # how can i point-wise multiply xx_rnn and qq_rnn given their different sizes?
        xx_rnn_exp = tf.expand_dims(xx_rnn, axis=2)
        xx_rnn_tiled = tf.tile(xx_rnn_exp, [1, 1, JQ, 1])
        qq_rnn_exp = tf.expand_dims(qq_rnn, axis=1)
        qq_rnn_tiled = tf.tile(qq_rnn_exp, [1, JX, 1, 1])

        weights = tf.get_variable(name="weights", shape=[3*d, 1])
        bScalar = tf.get_variable(name="bScalar", shape=[])
        insideBrackets = tf.concat([xx_rnn_tiled, qq_rnn_tiled, tf.math.multiply(
            xx_rnn_tiled, qq_rnn_tiled)], axis=3)
        insideBracketsReshaped = tf.reshape(insideBrackets, [tf.shape(insideBrackets)[
            0] * tf.shape(insideBrackets)[1] * tf.shape(insideBrackets)[2], 3*d])
        dotProductWithWeightsPlusScalar = tf.matmul(
            insideBracketsReshaped, weights) + bScalar
        dotProductWithWeightsReshaped = tf.reshape(dotProductWithWeightsPlusScalar, [tf.shape(insideBrackets)[
            0], tf.shape(insideBrackets)[1], tf.shape(insideBrackets)[2]])
        p = tf.nn.softmax(dotProductWithWeightsReshaped, 2)

        p_exp = tf.expand_dims(p, axis=3)
        p_tiled = tf.tile(p_exp, [1, 1, 1, d])

        # equation 9
        qk_bar = tf.reduce_sum(tf.multiply(p_tiled, qq_rnn_tiled), axis=2)

        # plug qk_bar in place of qq_avg_tiled below

        xq = tf.concat([xx_rnn, qk_bar, xx_rnn * qk_bar],
                       axis=2)  # [N, JX, 3d]
        xq_flat = tf.reshape(xq, [-1, 3*d])  # [N * JX, 3*d]

        # Compute logits
        with tf.variable_scope('start'):
            logits1 = exp_mask(tf.reshape(tf.layers.dense(
                inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
            yp1 = tf.argmax(logits1, axis=1)  # [N]
        with tf.variable_scope('stop'):
            logits2 = exp_mask(tf.reshape(tf.layers.dense(
                inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
            yp2 = tf.argmax(logits2, axis=1)  # [N]

        outputs = {'logits1': logits1,
                   'logits2': logits2, 'yp1': yp1, 'yp2': yp2}
        variables = {'emb_mat': emb_mat}
        return variables, outputs
Example #19
0
with graph.as_default():
    SEQUENCE_INPUT = tf.placeholder(tf.float32, shape=(None, None, 2))
    LABEL_INPUT = tf.placeholder(tf.int32, shape=(None, 1))
    LABEL_ONE_HOT = tf.one_hot(LABEL_INPUT, L_UNIQUE)
    DROPOUT_IN = tf.placeholder_with_default(1.0, ())

    batch_size = tf.shape(SEQUENCE_INPUT)[0]

    CELL_FW, CELL_FW_INIT = create_lstm(batch_size, 32, activation=tf.nn.relu)
    CELL_BW, CELL_BW_INIT = create_lstm(batch_size, 32, activation=tf.nn.relu)

    x = SEQUENCE_INPUT

    outputs, states = bidirectional_dynamic_rnn(cell_fw=CELL_FW,
                                                cell_bw=CELL_BW,
                                                initial_state_fw=CELL_FW_INIT,
                                                initial_state_bw=CELL_BW_INIT,
                                                dtype=tf.float32,
                                                inputs=x)

    output_fw, output_bw = outputs
    outputs = (output_fw[:, -1, :], output_bw[:, -1, :])

    x = tf.concat(outputs, -1)
    x = tf.nn.dropout(x, DROPOUT_IN)
    x = tf.contrib.layers.fully_connected(x, 256)
    x = tf.nn.dropout(x, DROPOUT_IN)
    x = tf.contrib.layers.fully_connected(x, L_UNIQUE, activation_fn=None)
    PREDICTION_TENSOR = tf.contrib.layers.softmax(x)

    PREDICTED_LABEL_T = tf.round(PREDICTION_TENSOR)
    EQUAL_T = tf.equal(PREDICTED_LABEL_T, LABEL_ONE_HOT)
Example #20
0
def rnn_forward(config, inputs, scope=None):
    with tf.variable_scope(scope or "rnn"):

        JX, JQ = config.max_context_size, config.max_ques_size
        d = config.hidden_size
        x, x_len, q, q_len = [inputs[key]
                              for key in ['x', 'x_len', 'q', 'q_len']]
        x_mask = tf.sequence_mask(x_len, JX)
        q_mask = tf.sequence_mask(q_len, JQ)

        # emb_mat = tf.get_variable('emb_mat', shape=[V, d])
        emb_mat = config.emb_mat_ph if config.serve else config.emb_mat
        emb_mat = tf.slice(emb_mat, [2, 0], [-1, -1])
        emb_mat = tf.concat(
            [tf.get_variable('emb_mat', shape=[2, d]), emb_mat], axis=0)
        xx = tf.nn.embedding_lookup(emb_mat, x, name='xx')  # [N, JX, d]
        qq = tf.nn.embedding_lookup(emb_mat, q, name='qq')  # [N, JQ, d]

        with tf.variable_scope("context_rnn"):

            # run context embeddings through GRU
            dropout = 0.1
            fw_cell = GRUCell(64)
            bw_cell = GRUCell(64)
            if config.is_train:
                fw_cell = DropoutWrapper(
                    fw_cell, output_keep_prob=(1.0 - dropout))
                bw_cell = DropoutWrapper(
                    bw_cell, output_keep_prob=(1.0 - dropout))
            (output_fw, output_bw), _ = bidirectional_dynamic_rnn(
                fw_cell, bw_cell, xx, dtype=tf.float32)
            xx_rnn_toobig = tf.concat([output_fw, output_bw], axis=2)
            xx_rnn = tf.layers.dense(xx_rnn_toobig, 50, activation=None)

        with tf.variable_scope("question_rnn"):

            # run question embeddings through GRU
            dropout = 0.1
            fw_cell2 = GRUCell(64)
            bw_cell2 = GRUCell(64)
            if config.is_train:
                fw_cell2 = DropoutWrapper(
                    fw_cell2, output_keep_prob=(1.0 - dropout))
                bw_cell2 = DropoutWrapper(
                    bw_cell2, output_keep_prob=(1.0 - dropout))
            (output_fw2, output_bw2), _ = bidirectional_dynamic_rnn(
                fw_cell2, bw_cell2, qq, dtype=tf.float32)
            qq_rnn_toobig = tf.concat([output_fw2, output_bw2], axis=2)
            qq_rnn = tf.layers.dense(qq_rnn_toobig, 50, activation=None)

        # equation 1 (averaging)
        qq_avg = tf.reduce_mean(
            bool_mask(qq_rnn, q_mask, expand=True), axis=1)  # [N, d]
        qq_avg_exp = tf.expand_dims(qq_avg, axis=1)  # [N, 1, d]
        qq_avg_tiled = tf.tile(qq_avg_exp, [1, JX, 1])  # [N, JX, d]

        xq = tf.concat([xx_rnn, qq_avg_tiled, xx_rnn * qq_avg_tiled],
                       axis=2)  # [N, JX, 3d]
        xq_flat = tf.reshape(xq, [-1, 3*d])  # [N * JX, 3*d]

        # Compute logits
        with tf.variable_scope('start'):
            logits1 = exp_mask(tf.reshape(tf.layers.dense(
                inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
            yp1 = tf.argmax(logits1, axis=1)  # [N]
        with tf.variable_scope('stop'):
            logits2 = exp_mask(tf.reshape(tf.layers.dense(
                inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
            yp2 = tf.argmax(logits2, axis=1)  # [N]

        outputs = {'logits1': logits1,
                   'logits2': logits2, 'yp1': yp1, 'yp2': yp2}
        variables = {'emb_mat': emb_mat}
        return variables, outputs
Example #21
0
def attention_forward(config, inputs, scope=None):
    with tf.variable_scope(scope or "forward"):

        JX, JQ = config.max_context_size, config.max_ques_size
        d = config.hidden_size
        x, x_len, q, q_len = [inputs[key] for key in ['x', 'x_len', 'q', 'q_len']]
        x_mask = tf.sequence_mask(x_len, JX)
        q_mask = tf.sequence_mask(q_len, JQ)

        # emb_mat = tf.get_variable('emb_mat', shape=[V, d])
        emb_mat = config.emb_mat_ph if config.serve else config.emb_mat
        emb_mat = tf.slice(emb_mat, [2, 0], [-1, -1])
        emb_mat = tf.concat([tf.get_variable('emb_mat', shape=[2, d]), emb_mat], axis=0)
        xx = tf.nn.embedding_lookup(emb_mat, x, name='xx')  # [N, JX, d]
        qq = tf.nn.embedding_lookup(emb_mat, q, name='qq')  # [N, JQ, d]
        
        #now process xx and qq with this new matrices
        with tf.variable_scope('xx-encoder'):
            fw_xx_cell = GRUCell(d)
            fw_xx_cell = DropoutWrapper(cell=fw_xx_cell, output_keep_prob=config.keep_prob)

            bw_xx_cell = GRUCell(d)
            bw_xx_cell = DropoutWrapper(cell=bw_xx_cell, output_keep_prob=config.keep_prob)
            outputs_xx, _ = bidirectional_dynamic_rnn(
                    fw_xx_cell, bw_xx_cell, xx, dtype=tf.float32)

            with tf.variable_scope('qq-encoder'):
                fw_qq_cell = GRUCell(d)
                fw_qq_cell = DropoutWrapper(cell=fw_qq_cell, output_keep_prob=config.keep_prob)
                bw_qq_cell = GRUCell(d)
                fw_xx_cell = DropoutWrapper(cell=fw_xx_cell, output_keep_prob=config.keep_prob)
               
                outputs_qq, _ = bidirectional_dynamic_rnn(
                        fw_qq_cell, bw_qq_cell, qq, dtype=tf.float32)

                xx_fwbw=tf.concat(outputs_xx, 2 ) #[N,JX,2d]
                qq_fwbw=tf.concat(outputs_qq, 2) #[N,JQ,2d]
                qq_exp= tf.expand_dims(qq_fwbw, axis=2)  # [N,JQ, 1, 2d]
                qq_tiled = tf.tile(qq_exp, [1,1, JX, 1])  # [N,JQ, JX, 2d]

                xx_exp= tf.expand_dims(xx_fwbw, axis=1)  # [N, 1,JX, 2d]
                xx_tiled = tf.tile(xx_exp, [1,JQ, 1, 1])  # [N,JQ, JX, 2d]
                pre_pk= tf.concat([xx_tiled,qq_tiled, xx_tiled * qq_tiled], axis=-1)  # [N,JQ,JX, 6d]
                pre_pk_flat=tf.reshape(pre_pk,[-1,6*d])
                with tf.variable_scope('weights'):
                    logits_p=tf.layers.dense(inputs=pre_pk_flat, units=1)
                print('logitsp shape:', logits_p.shape)
                logits_p=tf.reshape(logits_p, [-1,JQ,JX,1])
                pk=tf.nn.softmax(logits_p,axis=1) #softmax along JQ 
                print('logitsp shape after:', logits_p.shape)
                print('pk shape:', pk.shape)
                #now, get the new qs
                qq_rew=tf.reduce_sum(qq_tiled * pk,axis=1) #[N,JX,1]???
                print('new weights shape', qq_rew.shape)
                #now we can resum as in previous methods
                xq = tf.concat([xx_fwbw, qq_rew, xx_fwbw *qq_rew ], axis=2)  # [N, JX, 6d]
                xq_flat = tf.reshape(xq, [-1, 2*3*d])  # [N * JX, 3*d]
                # Compute logits
                with tf.variable_scope('start'):
                    logits1 = exp_mask(tf.reshape(tf.layers.dense(inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
                    yp1 = tf.argmax(logits1, axis=1)  # [N]
                with tf.variable_scope('stop'):
                    logits2 = exp_mask(tf.reshape(tf.layers.dense(inputs=xq_flat, units=1), [-1, JX]), x_mask)  # [N, JX]
                    yp2 = tf.argmax(logits2, axis=1)  # [N]

                outputs = {'logits1': logits1, 'logits2': logits2, 'yp1': yp1, 'yp2': yp2}
                variables = {'emb_mat': emb_mat}
                return variables, outputs