예제 #1
0
 def spatial_dropout(self, inputs, keep_prob):
     if keep_prob < 1:
         batch_size = shape(inputs, 0)
         input_size = shape(inputs, -1)
         noise_shape = tf.stack([batch_size] + [1] + [input_size])
         inputs = tf.nn.dropout(inputs, keep_prob, noise_shape=noise_shape)
     return inputs
예제 #2
0
    def loss_layer(self, project_logits, lengths, name=None):
        """
        calculate crf loss
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        """
        batch_size = shape(project_logits, 0)
        num_steps = shape(project_logits, 1)
        with tf.variable_scope("crf_loss" if not name else name):
            # small = -10000.0
            # # pad logits for crf loss
            # start_logits = tf.concat(
            #     [small * tf.ones(shape=[batch_size, 1, self.num_tags]), tf.zeros(shape=[batch_size, 1, 1])], axis=-1)
            # pad_logits = tf.cast(small * tf.ones([batch_size, num_steps, 1]), tf.float32)
            # logits = tf.concat([project_logits, pad_logits], axis=-1)
            # logits = tf.concat([start_logits, logits], axis=1)
            # targets = tf.concat(
            #     [tf.cast(self.num_tags*tf.ones([batch_size, 1]), tf.int32), self.labels_ids], axis=-1)

            trans = tf.get_variable("transitions",
                                    shape=[self.num_tags, self.num_tags],
                                    initializer=self.initializer)

            log_likelihood, trans = crf_log_likelihood(
                inputs=project_logits,
                tag_indices=self.labels_ids,
                transition_params=trans,
                sequence_lengths=lengths)

            return tf.reduce_mean(-log_likelihood), trans
예제 #3
0
    def biLSTM_layer(self,
                     lstm_inputs,
                     lstm_dim,
                     lengths,
                     num_layers,
                     keep_prob=1.):
        """
        :param lstm_inputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, 2*lstm_dim]
        """
        batch_size = shape(lstm_inputs, 0)
        with tf.variable_scope("char_BiLSTM"):
            for layer in range(num_layers):
                with tf.variable_scope("layer_{}".format(layer)):
                    with tf.variable_scope("forward"):
                        cell_fw = CustomLSTMCell(lstm_dim, batch_size,
                                                 keep_prob)
                    with tf.variable_scope("backward"):
                        cell_bw = CustomLSTMCell(lstm_dim, batch_size,
                                                 keep_prob)
                    state_fw = tf.contrib.rnn.LSTMStateTuple(
                        tf.tile(cell_fw.initial_state.c, [batch_size, 1]),
                        tf.tile(cell_fw.initial_state.h, [batch_size, 1]))
                    state_bw = tf.contrib.rnn.LSTMStateTuple(
                        tf.tile(cell_bw.initial_state.c, [batch_size, 1]),
                        tf.tile(cell_bw.initial_state.h, [batch_size, 1]))

                    (fw_outputs,
                     bw_outputs), _ = tf.nn.bidirectional_dynamic_rnn(
                         cell_fw=cell_fw,
                         cell_bw=cell_bw,
                         inputs=lstm_inputs,
                         sequence_length=lengths,
                         initial_state_fw=state_fw,
                         initial_state_bw=state_bw)
                    text_outputs = tf.concat(
                        [fw_outputs, bw_outputs],
                        2)  # [num_sentences, max_sentence_length, emb]
                    text_outputs = tf.nn.dropout(text_outputs, keep_prob)
                    if layer > 0:
                        highway_gates = tf.sigmoid(
                            projection(text_outputs, shape(text_outputs, 2))
                        )  # [num_sentences, max_sentence_length, emb]
                        text_outputs = highway_gates * text_outputs + (
                            1 - highway_gates) * lstm_inputs
                    lstm_inputs = text_outputs

            return lstm_inputs
예제 #4
0
    def project_layer(self, lstm_outputs):
        """
        hidden layer between lstm layer and logits
        :param lstm_outputs: [batch_size, num_steps, emb_size]
        :return: [batch_size, num_steps, num_tags]
        """
        num_steps = shape(lstm_outputs, 1)
        num_tags = len(self.label2id)
        with tf.variable_scope("project"):
            # with tf.variable_scope("hidden"):
            #     W = tf.get_variable("W", shape=[self.lstm_dim*2, self.lstm_dim],
            #                         dtype=tf.float32, initializer=self.initializer)

            #     b = tf.get_variable("b", shape=[self.lstm_dim], dtype=tf.float32,
            #                         initializer=tf.zeros_initializer())
            #     output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim*2])
            #     hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b))

            # project to score of tags
            output = tf.reshape(lstm_outputs, shape=[-1, self.lstm_dim * 2])
            with tf.variable_scope("logits"):
                W = tf.get_variable("W",
                                    shape=[self.lstm_dim * 2, num_tags],
                                    dtype=tf.float32,
                                    initializer=self.initializer)

                b = tf.get_variable("b",
                                    shape=[num_tags],
                                    dtype=tf.float32,
                                    initializer=tf.zeros_initializer())

                pred = tf.nn.xw_plus_b(output, W, b)

            return tf.reshape(pred, [-1, num_steps, num_tags])
예제 #5
0
    def layer_norm(self, inputs, epsilon=1e-6):
        with tf.variable_scope("layer_norm", values=[inputs]):
            channel_size = shape(inputs, -1)
            scale = tf.get_variable("scale",
                                    shape=[channel_size],
                                    initializer=tf.ones_initializer())

            offset = tf.get_variable("offset",
                                     shape=[channel_size],
                                     initializer=tf.zeros_initializer())

            mean = tf.reduce_mean(inputs, axis=-1, keep_dims=True)
            variance = tf.reduce_mean(tf.square(inputs - mean),
                                      axis=-1,
                                      keep_dims=True)

            norm_inputs = (inputs - mean) * tf.rsqrt(variance + epsilon)

            return norm_inputs * scale + offset