Beispiel #1
0
 def metric_fn(label_ids, logits,num_labels,mask):
     predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32)
     #cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels-1, weights=mask)
     cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels, weights=mask)
     return {
         "confusion_matrix":cm
     }
Beispiel #2
0
            def metric_fn(label_ids, logits, num_labels, mask):
                predictions = tf.math.argmax(logits,
                                             axis=-1,
                                             output_type=tf.int32)
                cm = metrics.streaming_confusion_matrix(label_ids,
                                                        predictions,
                                                        num_labels - 1,
                                                        weights=mask)

                accuracy = tf.metrics.accuracy(labels=label_ids,
                                               predictions=predictions,
                                               weights=mask)

                precision = tf.metrics.precision(labels=label_ids,
                                                 predictions=predictions,
                                                 weights=mask)
                recall = tf.metrics.recall(labels=label_ids,
                                           predictions=predictions,
                                           weights=mask)

                return {
                    "confusion_matrix": cm,
                    "eval_accuracy": accuracy,
                    "eval_precision": precision,
                    "eval_recall": recall,
                }
Beispiel #3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        (total_loss, logits, decode_tags, mask_length) = create_model(bert_config, is_training,
                                                                      input_ids, input_mask, segment_ids,
                                                                      label_ids, num_labels)
        tvars = tf.trainable_variables()
        initialized_variable_names = {}
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape, init_string)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss,
                                                     learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps)
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op)
        elif mode == tf.estimator.ModeKeys.EVAL:
            predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32)
            cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels, weights=input_mask)
            evl_metrics = {
                'accuracy': tf.metrics.accuracy(label_ids, decode_tags, input_mask),
                'cm': cm,
            }
            for metric_name, op in evl_metrics.items():
                tf.summary.scalar(metric_name, op[1])

            eval_to_log = {"label_ids": label_ids,
                           "decode_tags": decode_tags}
            eval_hooks = tf.train.LoggingTensorHook(eval_to_log, every_n_iter=100)
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     evaluation_hooks=[eval_hooks],
                                                     eval_metric_ops=evl_metrics)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=decode_tags)
        return output_spec
Beispiel #4
0
 def metric_fn(intent_per_example_loss, intent_label_ids, intent_logits, slot_label_ids, num_slot_labels, slot_predict, is_real_example, mask):
     # slot_predictions = tf.math.argmax(slot_logits, axis=-1, output_type=tf.int32)
     slot_cm = metrics.streaming_confusion_matrix(slot_label_ids, slot_predict, num_slot_labels, weights=mask)
     intent_predictions = tf.argmax(intent_logits, axis=-1, output_type=tf.int32)
     intent_accuracy = tf.metrics.accuracy(
         labels=intent_label_ids, predictions=intent_predictions, weights=is_real_example)
     intent_loss = tf.metrics.mean(
         values=intent_per_example_loss, weights=is_real_example)
     return {
         "intent_eval_accuracy": intent_accuracy,
         "intent_eval_loss": intent_loss,
         "slot_cm": slot_cm,
     }
Beispiel #5
0
    def _build_graph(self, hparams, scope=None):
        """Construct the train, evaluation, and inference graphs.
        Args:
            hparams: The hyperparameters for configuration
            scope: The variable scope name for this subgraph
        Returns:
            A tuple with (logits, loss, metrics, update_ops)
        """

        sample = self.iterator.get_next()

        inputs, tgt_outputs, seq_len = sample

        with tf.variable_scope(scope or "dynamic_bdrnn", dtype=tf.float32):
            # TODO: hidden activations are passed thru FC net
            # TODO: hidden-to-hidden network has skip connections (residual)
            # TODO: initial hidden and cell states are learned

            # create bdrnn
            fw_cells = mdl_help.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=hparams.num_units,
                num_layers=hparams.num_layers,
                depth=0,
                num_residual_layers=0,
                forget_bias=hparams.forget_bias,
                dropout=0.,
                mode=self.mode,
                num_gpus=1,
                base_gpu=0)

            bw_cells = mdl_help.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=hparams.num_units,
                num_layers=hparams.num_layers,
                depth=0,
                num_residual_layers=0,
                forget_bias=hparams.forget_bias,
                dropout=0.,
                mode=self.mode,
                num_gpus=1,
                base_gpu=0)

            #            print(fw_cells.zero_state(1, dtype=tf.float32))
            #            initial_fw_state = tf.get_variable("initial_fw_state", shape=fw_cells.state_size)
            #            initial_bw_state = tf.get_variable("initial_bw_state", shape=bw_cells.state_size)
            #            initial_fw_state_tiled = tf.tile(initial_fw_state, [hparams.batch_size, 1])
            #            initial_bw_state_tiled = tf.tile(initial_bw_state, [hparams.batch_size, 1])

            # run bdrnn
            outputs, output_states = tf.nn.bidirectional_dynamic_rnn(
                cell_fw=fw_cells,
                cell_bw=bw_cells,
                inputs=inputs,
                sequence_length=seq_len,
                initial_state_fw=None,
                initial_state_bw=None,
                dtype=tf.float32)
            # outputs is a tuple (output_fw, output_bw)
            # output_fw/output_bw are tensors [batch_size, max_time, cell.output_size]
            # outputs_states is a tuple (output_state_fw, output_state_bw) containing final states for
            # forward and backward rnn

            # concatenate the outputs of each direction
            combined_outputs = tf.concat([outputs[0], outputs[1]], axis=-1)

            # dense output layers
            dense1 = tf.layers.dense(inputs=combined_outputs,
                                     units=hparams.num_dense_units,
                                     activation=tf.nn.relu,
                                     use_bias=True)
            drop1 = tf.layers.dropout(
                inputs=dense1,
                rate=hparams.dropout,
                training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)
            dense2 = tf.layers.dense(inputs=drop1,
                                     units=hparams.num_dense_units,
                                     activation=tf.nn.relu,
                                     use_bias=True)
            drop2 = tf.layers.dropout(
                inputs=dense2,
                rate=hparams.dropout,
                training=self.mode == tf.contrib.learn.ModeKeys.TRAIN)

            logits = tf.layers.dense(inputs=drop2,
                                     units=hparams.num_labels,
                                     use_bias=False)

            # mask out entries longer than target sequence length
            mask = tf.sequence_mask(seq_len, dtype=tf.float32)

            #stop gradient thru labels by crossent op
            tgt_outputs = tf.stop_gradient(tgt_outputs)

            crossent = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits, labels=tgt_outputs, name="crossent")

            # divide loss by batch_size * mean(seq_len)
            loss = (tf.reduce_sum(crossent * mask) /
                    (hparams.batch_size *
                     tf.reduce_mean(tf.cast(seq_len, tf.float32))))

            metrics = []
            update_ops = []
            if self.mode == tf.contrib.learn.ModeKeys.EVAL:
                predictions = tf.argmax(input=logits, axis=-1)
                tgt_labels = tf.argmax(input=tgt_outputs, axis=-1)
                acc, acc_update = tf.metrics.accuracy(predictions=predictions,
                                                      labels=tgt_labels,
                                                      weights=mask)
                # confusion matrix
                targets_flat = tf.reshape(tgt_labels, [-1])
                predictions_flat = tf.reshape(predictions, [-1])
                mask_flat = tf.reshape(mask, [-1])
                cm, cm_update = streaming_confusion_matrix(
                    labels=targets_flat,
                    predictions=predictions_flat,
                    num_classes=hparams.num_labels,
                    weights=mask_flat)
                tf.add_to_collection("eval",
                                     cm_summary(cm, hparams.num_labels))
                metrics = [acc, cm]
                update_ops = [acc_update, cm_update]

            return logits, loss, metrics, update_ops
Beispiel #6
0
    def _build_graph(self, hparams, scope=None):
        """Construct the train, evaluation, and inference graphs.
        Args:
            hparams: The hyperparameters for configuration
            scope: The variable scope name for this subgraph, default "dynamic_seq2seq"
        Returns:
            A tuple with (logits, loss, metrics, update_ops)
        """

        enc_inputs, dec_inputs, dec_outputs, seq_len = self.iterator.get_next()

        # get the size of the batch
        batch_size = tf.shape(enc_inputs)[0]

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=tf.float32):
            # create encoder
            dense_input_layer = tf.layers.Dense(hparams.num_units, use_bias=False)

            if hparams.dense_input:
                enc_inputs = dense_input_layer(enc_inputs)

            enc_cells = mdl_help.create_rnn_cell(unit_type=hparams.unit_type,
                                                 num_units=hparams.num_units,
                                                 num_layers=hparams.num_layers,
                                                 depth=hparams.depth,
                                                 num_residual_layers=hparams.num_residual_layers,
                                                 forget_bias=hparams.forget_bias,
                                                 dropout=hparams.dropout,
                                                 mode=self.mode,
                                                 use_highway_as_residual=hparams.use_highway_as_residual)

            # run encoder
            enc_outputs, enc_state = tf.nn.dynamic_rnn(cell=enc_cells,
                                                       inputs=enc_inputs,
                                                       sequence_length=seq_len,
                                                       swap_memory=True,
                                                       dtype=tf.float32,
                                                       scope="encoder")

            tgt_seq_len = tf.add(seq_len, tf.constant(1, tf.int32))

            # TODO: Add Inference decoder
            # create decoder
            dec_cells = mdl_help.create_rnn_cell(unit_type=hparams.unit_type,
                                                 num_units=hparams.num_units,
                                                 num_layers=hparams.num_layers,
                                                 depth=hparams.depth,
                                                 num_residual_layers=hparams.num_residual_layers,
                                                 forget_bias=hparams.forget_bias,
                                                 dropout=hparams.dropout,
                                                 mode=self.mode,
                                                 use_highway_as_residual=hparams.use_highway_as_residual)

            # decoder embedding
            decoder_embedding = tf.get_variable("decoder_embedding",
                                                [hparams.num_labels, hparams.num_units])
            if hparams.dense_input:
                # convert to int32 argmax values for embedding to work
                dec_inputs = tf.argmax(dec_inputs, axis=-1, output_type=tf.int32)
                dec_inputs = tf.nn.embedding_lookup(decoder_embedding, dec_inputs)

            # output project layer
            projection_layer = tf.layers.Dense(hparams.num_labels, use_bias=False)

            if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
                if hparams.train_helper == "teacher":
                    # teacher forcing
                    helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_inputs,
                                                               sequence_length=tgt_seq_len)
                elif hparams.train_helper == "sched":
                    if hparams.dense_input:
                        embedding = decoder_embedding
                    else:
                        embedding = tf.eye(hparams.num_labels)
                    # scheduled sampling
                    helper = tf.contrib.seq2seq.\
                             ScheduledEmbeddingTrainingHelper(inputs=dec_inputs,
                                                              sequence_length=tgt_seq_len,
                                                              embedding=embedding,
                                                              sampling_probability=self.sample_probability,
                                                              )
            elif self.mode == tf.contrib.learn.ModeKeys.EVAL:
                if hparams.dense_input:
                    embedding = decoder_embedding
                else:
                    embedding = tf.eye(hparams.num_labels)
                helper = tf.contrib.seq2seq.\
                         ScheduledEmbeddingTrainingHelper(inputs=dec_inputs,
                                                          sequence_length=tgt_seq_len,
                                                          embedding=embedding,
                                                          sampling_probability=tf.constant(1.0))

            decoder = tf.contrib.seq2seq.BasicDecoder(cell=dec_cells,
                                                      helper=helper,
                                                      initial_state=enc_state,
                                                      output_layer=projection_layer)

            # run decoder
            final_outputs, final_states, _ = tf.contrib.seq2seq.dynamic_decode(
                    decoder=decoder,
                    impute_finished=True,
                    swap_memory=True,
                    scope="decoder")

            logits = final_outputs.rnn_output

            # mask out entries longer than target sequence length
            mask = tf.sequence_mask(tgt_seq_len, dtype=tf.float32)

            #stop gradient thru labels by crossent op
            labels = tf.stop_gradient(dec_outputs)

            crossent = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                  labels=labels,
                                                                  name="crossent")

#            loss = (tf.reduce_sum(crossent*mask)/(hparams.batch_size*tf.reduce_mean(tf.cast(tgt_seq_len,
#                                                                                            tf.float32))))


            loss = tf.reduce_sum((crossent * mask) / tf.expand_dims(
                tf.expand_dims(tf.cast(tgt_seq_len, tf.float32), -1), -1)) / tf.cast(batch_size, tf.float32)

            metrics = []
            update_ops = []
            if self.mode == tf.contrib.learn.ModeKeys.EVAL:
                predictions = tf.argmax(input=logits, axis=-1)
                targets = tf.argmax(input=dec_outputs, axis=-1)
                acc, acc_update = tf.metrics.accuracy(predictions=predictions,
                                                      labels=targets,
                                                      weights=mask)
                # flatten for confusion matrix
                targets_flat = tf.reshape(targets, [-1])
                predictions_flat = tf.reshape(predictions, [-1])
                mask_flat = tf.reshape(mask, [-1])
                cm, cm_update = streaming_confusion_matrix(labels=targets_flat,
                                                           predictions=predictions_flat,
                                                           num_classes=hparams.num_labels,
                                                           weights=mask_flat)
                tf.add_to_collection("eval", cm_summary(cm, hparams.num_labels))
                metrics = [acc, cm]
                update_ops = [acc_update, cm_update]

            return logits, loss, metrics, update_ops