Example #1
0
    def loss(self):
        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            labels = tf.cast(self.input_y, 'int32')
            if self.viterbi:
                log_likelihood, transition_params = crf.crf_log_likelihood(
                    self.unflat_scores,
                    labels,
                    self.label_mask,
                    self.flat_sequence_lengths,
                    transition_params=self.transition_params)
                # self.transition_params = transition_params
                loss = tf.reduce_mean(-log_likelihood)
            else:
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.unflat_scores, labels=labels)
                masked_losses = tf.multiply(losses, self.input_mask)
                loss = tf.div(tf.reduce_sum(masked_losses),
                              tf.reduce_sum(self.input_mask))
            loss += self.l2_penalty * self.l2_loss

            drop_loss = tf.nn.l2_loss(
                tf.subtract(self.unflat_scores, self.unflat_no_dropout_scores))
            loss += self.drop_penalty * drop_loss

        return loss
Example #2
0
 def __call__(self, y_true, y_pred, sample_weight=None, **kwargs):
     assert sample_weight is not None, "your model has to support masking"
     if len(y_true.shape) == 3:
         y_true = tf.argmax(y_true, axis=-1)
     sequence_lengths = tf.math.count_nonzero(sample_weight, axis=1)
     y_pred = tf.convert_to_tensor(y_pred, dtype=self.dtype)
     log_likelihood, self.crf.transitions = crf_log_likelihood(
         y_pred,
         tf.cast(y_true, dtype=tf.int32),
         sequence_lengths,
         transition_params=self.crf.transitions)
     return tf.reduce_mean(-log_likelihood)
Example #3
0
    def __init__(self, name, batch, config, v_shape):
        words, pos, gazetteer, chars, len_chars, labels, len_words = batch
        n_words, n_pos, n_categories, n_chars, n_tags = v_shape

        batch_size = tf.shape(words)[0]
        max_words = tf.shape(words)[1]

        embeddings = char_embeddings(chars, len_chars, n_chars, config)
        embedding_pool = tf.reshape(
            conv_max_pool(embeddings, len_words, config),
            [batch_size, max_words, config.pool_size])

        fw = tf.nn.rnn_cell.MultiRNNCell([
            tf.nn.rnn_cell.LSTMCell(config.h_size) for _ in range(config.depth)
        ])
        bw = tf.nn.rnn_cell.MultiRNNCell([
            tf.nn.rnn_cell.LSTMCell(config.h_size) for _ in range(config.depth)
        ])

        self.dropout = tf.placeholder(tf.float32, [])

        features = tf.nn.dropout(tf.concat(
            [words, pos, gazetteer, embedding_pool], axis=2),
                                 keep_prob=1 - self.dropout)

        output, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw,
                                                    cell_bw=bw,
                                                    inputs=features,
                                                    sequence_length=len_words,
                                                    dtype=tf.float32)
        output = tf.concat(output, axis=2)
        output = tf.layers.dense(tf.nn.dropout(output,
                                               keep_prob=1 - self.dropout),
                                 units=n_tags,
                                 name="output")

        log_likelihood, transition = crf.crf_log_likelihood(
            output, tag_indices=labels, sequence_lengths=len_words)

        # Viterbi decode
        self.predict, self.score = crf.crf_decode(output,
                                                  transition_params=transition,
                                                  sequence_length=len_words)

        # Cross-entropy loss
        self.loss = tf.reduce_mean(-log_likelihood, name="loss")

        tvars = tf.trainable_variables()
        gradients, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clip_norm=5.0)

        optimizer = tf.train.AdamOptimizer(config.learning_rate, epsilon=0.1)
        self.train = optimizer.apply_gradients(zip(gradients, tvars))
Example #4
0
    def loss_layer(self, project_logits, lengths, name=None):
        """
        calculate crf loss
        :param project_logits: [1, num_steps, num_tags]
        :return: scalar loss
        """
        with tf.variable_scope("crf_loss" if not name else name):
            small = -1000.0

            start_logits = tf.concat([
                small * tf.ones(shape=[self.batch_size, 1, self.num_tags]),
                tf.zeros(shape=[self.batch_size, 1, 1])
            ],
                                     axis=-1)

            pad_logits = tf.cast(
                small * tf.ones([self.batch_size, self.num_steps, 1]),
                tf.float32)

            logits = tf.concat([project_logits, pad_logits], axis=-1)

            logits = tf.concat([start_logits, logits], axis=1)

            targets = tf.concat(
                #在targets后面又增加了一维,值为13(开始标签),原本是0-12
                [
                    tf.cast(self.num_tags * tf.ones([self.batch_size, 1]),
                            tf.int32), self.targets
                ],
                axis=-1)
            #多了个开始标签,所以加1
            self.trans = tf.get_variable(
                "transitions",
                shape=[self.num_tags + 1, self.num_tags + 1],
                initializer=self.initializer)
            # 同理,多了个开始标签,所以lengths+1
            log_likelihood, self.trans = crf_log_likelihood(
                inputs=logits,
                tag_indices=targets,
                transition_params=self.trans,
                sequence_lengths=lengths + 1)
            # 极大化负对数似然 除以batch_size
            return tf.reduce_mean(-log_likelihood)
Example #5
0
    def __init__(self, max_seq_len, max_word_len, char_dim, char_rnn_dim,
                 char_bidirect, word_dim, rnn_dim, word_bidirect, cap_dim,
                 pos_dim, load_path, num_word, num_char, num_cap, num_pos,
                 num_tag):
        self.word_ids = tf.placeholder(tf.int32, [None, max_seq_len],
                                       name="word_ids")
        self.seq_lengths = tf.placeholder(
            tf.int64, [None], name="seq_lengths")  # number of valid words
        self.char_for_ids = tf.placeholder(tf.int32,
                                           [None, max_seq_len, max_word_len],
                                           name="char_for_ids")
        self.char_rev_ids = tf.placeholder(tf.int32,
                                           [None, max_seq_len, max_word_len],
                                           name="char_rev_ids")
        self.word_lengths = tf.placeholder(tf.int32, [None, max_seq_len],
                                           name="char_pos_ids")
        self.tag_ids = tf.placeholder(tf.int32, [None, max_seq_len],
                                      name='tag_ids')
        self.cap_ids = tf.placeholder(tf.int32, [None, max_seq_len],
                                      name='cap_ids')
        self.pos_ids = tf.placeholder(tf.int32, [None, max_seq_len],
                                      name='pos_ids')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.word_dim = word_dim
        self.char_dim = char_dim
        self.cap_dim = cap_dim
        self.pos_dim = pos_dim
        self.char_bidirect = char_bidirect
        initializer = tf.contrib.layers.xavier_initializer(uniform=True,
                                                           seed=None,
                                                           dtype=tf.float32)
        inputs = []
        input_dim = 0
        with tf.device("/gpu:2"):
            if word_dim:
                word_embedding = tf.get_variable('word_embedding',
                                                 [num_word, word_dim],
                                                 initializer=initializer)
                word_embedded = tf.nn.embedding_lookup(word_embedding,
                                                       self.word_ids,
                                                       name="word_layer")
                inputs.append(word_embedded)
                input_dim += word_dim
            if char_dim:
                char_embedding = tf.get_variable('char_embedding',
                                                 [num_char, char_dim],
                                                 initializer=initializer)
                word_lengths = tf.reshape(self.word_lengths, [-1])
                with tf.variable_scope('char_forward_rnn'):

                    char_for_embedded = tf.reshape(
                        tf.nn.embedding_lookup(char_embedding,
                                               self.char_for_ids),
                        [-1, max_word_len, char_dim])
                    char_for_state = self.rnn(char_for_embedded, char_rnn_dim,
                                              word_lengths)
                    char_for_out = tf.reshape(char_for_state,
                                              [-1, max_seq_len, char_rnn_dim])

                    inputs.append(char_for_out)
                    input_dim += char_rnn_dim
                if char_bidirect:
                    with tf.variable_scope('char_backward_rnn'):
                        char_rev_embedded = tf.reshape(
                            tf.nn.embedding_lookup(char_embedding,
                                                   self.char_rev_ids),
                            [-1, max_word_len, char_dim])
                        char_rev_state = self.rnn(char_rev_embedded,
                                                  char_rnn_dim, word_lengths)
                        char_rev_out = tf.reshape(
                            char_rev_state, [-1, max_seq_len, char_rnn_dim])

                        inputs.append(char_rev_out)
                        input_dim += char_rnn_dim
            if cap_dim:
                cap_embedding = tf.get_variable('cap_embedding',
                                                [num_cap, cap_dim],
                                                initializer=initializer)
                cap_embedded = tf.nn.embedding_lookup(cap_embedding,
                                                      self.cap_ids,
                                                      name="cap_layer")
                inputs.append(cap_embedded)
                input_dim += cap_dim
            if pos_dim:
                pos_embedding = tf.get_variable('pos_embedding',
                                                [num_pos, pos_dim],
                                                initializer=initializer)
                pos_embedded = tf.nn.embedding_lookup(pos_embedding,
                                                      self.pos_ids,
                                                      name='pos_layer')
                inputs.append(pos_embedded)
                input_dim += pos_dim

            inputs = tf.concat(2, inputs)

            inputs = tf.nn.dropout(inputs, self.dropout_keep_prob)

            with tf.variable_scope('forward_rnn'):
                word_for_output = self.rnn(inputs, rnn_dim, None)

            if word_bidirect:
                inputs_rev = tf.reverse_sequence(inputs,
                                                 self.seq_lengths,
                                                 seq_dim=1,
                                                 batch_dim=None)
                with tf.variable_scope('backward_rnn'):
                    word_rev_output = self.rnn(inputs_rev, rnn_dim, None)
                word_rev_output = tf.reverse_sequence(word_rev_output,
                                                      self.seq_lengths,
                                                      seq_dim=1,
                                                      batch_dim=None)
                final_output = tf.concat(2, [word_for_output, word_rev_output])
                final_output = self.hidden_layer(final_output,
                                                 2 * rnn_dim,
                                                 rnn_dim,
                                                 "tanh_layer",
                                                 initializer,
                                                 activation=tf.tanh)

            else:
                final_output = word_for_output

            self.tag_scores = self.hidden_layer(
                final_output,
                rnn_dim,
                num_tag,
                'final_layer',
                initializer,
                activation=None)  # [batch_size, seq_dim, num_tags]

            # Compute the log-likelihood of the gold sequences and keep the transition
            # params for inference at test time
            self.transitions = tf.get_variable("transitions",
                                               [num_tag, num_tag])
            log_likelihood, _ = crf.crf_log_likelihood(self.tag_scores,
                                                       self.tag_ids,
                                                       self.seq_lengths,
                                                       self.transitions)
            self.loss = tf.reduce_mean(-log_likelihood)
            self.likelihood = tf.exp(log_likelihood)
            tvars = tf.trainable_variables()
            max_grad_norm = 5
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              max_grad_norm)
            optimizer = tf.train.AdamOptimizer(1e-3)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        self.session = tf.Session(config=config)
        self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        if load_path:
            self.saver.restore(self.session, load_path)
        else:
            self.session.run(tf.initialize_all_variables())