def model_fn(features, mode):
     input_ids, input_mask, label_ids = [features.get(k) for k in \
                                         ("input_ids", "input_mask", "label_ids")]
     is_training = mode == tf.estimator.ModeKeys.TRAIN
     total_loss, per_example_loss, logits, probabilities = create_model(
         bert_config, is_training, input_ids, input_mask, num_labels,
         label_ids)
     tvars = tf.trainable_variables()
     assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)
     tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
     if mode == tf.estimator.ModeKeys.TRAIN:
         train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                  num_train_steps,
                                                  num_warmup_steps)
         output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=total_loss,
                                                  train_op=train_op)
     elif mode == tf.estimator.ModeKeys.EVAL:
         accu = tf.metrics.accuracy(labels=label_ids, predictions= \
             tf.argmax(logits, axis=-1, output_type=tf.int32))
         loss = tf.metrics.mean(values=per_example_loss)
         output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=total_loss,
                                                  eval_metric_ops={
                                                      "eval_accu": accu,
                                                      "eval_loss": loss
                                                  })
     else:
         output_spec = tf.estimator.EstimatorSpec(
             mode=mode, predictions={"prob": probabilities})
     return output_spec
Beispiel #2
0
    def _model_builder(self):
        self.ids_placeholder = tf.placeholder(
            tf.int32, shape=[None, FLAGS.max_seq_length])
        self.mask_placeholder = tf.placeholder(
            tf.int32, shape=[None, FLAGS.max_seq_length])
        self.segment_placeholder = tf.placeholder(
            tf.int32, shape=[None, FLAGS.max_seq_length])
        self.labels_placeholder = tf.placeholder(
            tf.int32, shape=[None, FLAGS.max_seq_length])
        self.loss, self.logits, self.output = self.create_model()

        self.sess_config = tf.ConfigProto()
        self.sess_config.allow_soft_placement = True
        self.sess_config.log_device_placement = False
        self.sess_config.gpu_options.allow_growth = True
        self.sess_config.gpu_options.per_process_gpu_memory_fraction = 1
        self.sess = tf.Session(config=self.sess_config)

        # if not self.is_training:
        #     variables = tf.contrib.framework.get_variables_to_restore()
        #     variables_to_restore = [v for v in variables if v.name.split('/')[0] == 'bert']
        #     saver = tf.train.Saver(variables_to_restore)
        #     saver.restore(self.sess, self.init_checkpoint)
        # else:
        tvars = tf.trainable_variables()
        (assignment_map, initialized_variable_names) = \
            modeling.get_assignment_map_from_checkpoint(tvars, self.init_checkpoint)
        tf.train.init_from_checkpoint(self.init_checkpoint, assignment_map)
        print("restore from the checkpoint {0}".format(self.init_checkpoint))
Beispiel #3
0
 def fit(self):
     '''
     训练模型
     :return:
     '''
     train_data = self.get_input_feature(self.train_path)
     input_ids = tf.placeholder(shape=[None, None],
                                dtype=tf.int32,
                                name="input_ids")
     input_mask = tf.placeholder(shape=[None, None],
                                 dtype=tf.int32,
                                 name="input_mask")
     seq_lens = tf.placeholder(tf.int32, [None], name='seq_lens')
     labels = tf.placeholder(tf.int32, [None, None], name='labels')
     preds_seq, log_likelihood = self.model(input_ids, input_mask, seq_lens,
                                            labels)
     init_checkpoint = os.path.join(self.bert_path, 'bert_model.ckpt')
     tvars = tf.trainable_variables()
     assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)
     tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
     tf.add_to_collection('preds_seq', preds_seq)
     loss = -log_likelihood / tf.cast(seq_lens, tf.float32)
     loss = tf.reduce_mean(loss)
     if 'sgd' == self.loss.lower():
         train_op = tf.train.GradientDescentOptimizer(
             self.rate).minimize(loss)
     elif 'adam' == self.loss.lower():
         train_op = tf.train.AdamOptimizer(self.rate).minimize(loss)
     else:
         train_op = tf.train.GradientDescentOptimizer(
             self.rate).minimize(loss)
     saver = tf.train.Saver(tf.global_variables())
     with tf.Session(config=self.tf_config) as sess:
         sess.run(tf.global_variables_initializer())
         for i in range(self.epoch):
             for step, (input_ids_batch, input_mask_batch, seq_lens_batch,
                        labels_batch) in enumerate(
                            self.batch_yield(train_data)):
                 _, curr_loss = sess.run(
                     [train_op, loss],
                     feed_dict={
                         input_ids: input_ids_batch,
                         input_mask: input_mask_batch,
                         seq_lens: seq_lens_batch,
                         labels: labels_batch
                     })
                 if step % 10 == 0:
                     self.logger.info(
                         'epoch:%d, batch: %d, current loss: %f' %
                         (i, step + 1, curr_loss))
         saver.save(sess, self.model_path)
         tf.summary.FileWriter(self.summary_path, sess.graph)
         self.evaluate(sess, input_ids, input_mask, seq_lens, labels,
                       preds_seq)
Beispiel #4
0
def optimize_graph(params: Params):

    config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True)

    init_checkpoint = params.ckpt_dir

    tf.logging.info('build graph...')
    # input placeholders, not sure if they are friendly to XLA
    input_ids = tf.placeholder(tf.int32, (None, params.max_seq_len),
                               'input_ids')
    input_mask = tf.placeholder(tf.int32, (None, params.max_seq_len),
                                'input_mask')
    input_type_ids = tf.placeholder(tf.int32, (None, params.max_seq_len),
                                    'segment_ids')

    jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

    with jit_scope():
        features = {}
        features['input_ids'] = input_ids
        features['input_mask'] = input_mask
        features['segment_ids'] = input_type_ids
        model = BertMultiTask(params)
        hidden_feature = model.body(features, tf.estimator.ModeKeys.PREDICT)
        pred = model.top(features, hidden_feature,
                         tf.estimator.ModeKeys.PREDICT)

        output_tensors = [pred[k] for k in pred]

        tvars = tf.trainable_variables()

        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)

        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tmp_g = tf.get_default_graph().as_graph_def()

    with tf.Session(config=config) as sess:
        tf.logging.info('load parameters from checkpoint...')
        sess.run(tf.global_variables_initializer())
        tf.logging.info('freeze...')
        tmp_g = tf.graph_util.convert_variables_to_constants(
            sess, tmp_g, [n.name[:-2] for n in output_tensors])
    tmp_file = os.path.join(params.ckpt_dir, 'export_model')
    tf.logging.info('write graph to a tmp file: %s' % tmp_file)
    with tf.gfile.GFile(tmp_file, 'wb') as f:
        f.write(tmp_g.SerializeToString())
    return tmp_file
 def model_fn(features, mode):
     input_ids = features["input_ids"]
     input_mask = features["input_mask"]
     proba = create_model(bert_config, False, input_ids, input_mask,
                          num_labels)
     tvars = tf.trainable_variables()
     assignment_map, _ = modeling.get_assignment_map_from_checkpoint(
         tvars, init_checkpoint)
     tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
     export_outputs = {'predict': tf.estimator.export.PredictOutput(proba)}
     output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=proba,
                                              export_outputs=export_outputs)
     return output_spec
Beispiel #6
0
    def _model_builder(self):
        self.ids_placeholder = tf.placeholder(tf.int32, shape=[None, FLAGS.max_seq_length])
        self.mask_placeholder = tf.placeholder(tf.int32, shape=[None, FLAGS.max_seq_length])
        self.segment_placeholder = tf.placeholder(tf.int32, shape=[None, FLAGS.max_seq_length])
        self.labels_placeholder = tf.placeholder(tf.float32, shape=[None, self.num_labels])
        self.loss, self.logits, self.sigmoid_logits = self.create_model()

        self.sess_config = tf.ConfigProto()
        self.sess_config.allow_soft_placement = True
        self.sess_config.log_device_placement = False
        self.sess_config.gpu_options.allow_growth = True
        self.sess_config.gpu_options.per_process_gpu_memory_fraction = 1
        self.sess = tf.Session(config=self.sess_config)
        tvars = tf.trainable_variables()
        (assignment_map, initialized_variable_names) = \
            modeling.get_assignment_map_from_checkpoint(tvars, self.init_checkpoint)
        tf.train.init_from_checkpoint(self.init_checkpoint, assignment_map)
        print("restore from the checkpoint {0}".format(self.init_checkpoint))
Beispiel #7
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        masked_lm_positions = features["masked_lm_positions"]
        masked_lm_ids = features["masked_lm_ids"]
        masked_lm_weights = features["masked_lm_weights"]

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        model = modeling.DTIBertModel(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids,
            input_mask=input_mask,
            use_one_hot_embeddings=use_one_hot_embeddings)

        (masked_lm_loss, masked_lm_example_loss,
         masked_lm_log_probs) = get_masked_lm_output(
             bert_config, model.get_sequence_output(),
             model.get_embedding_table(), masked_lm_positions, masked_lm_ids,
             masked_lm_weights)

        total_loss = masked_lm_loss

        tvars = tf.trainable_variables()

        initialized_variable_names = {}
        scaffold_fn = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        output_spec = None

        # masked_lm_predictions = tf.argmax(
        #     masked_lm_log_probs, axis=-1, output_type=tf.int32)
        # masked_lm_accuracy = tf.metrics.accuracy(
        #     labels=tf.reshape(masked_lm_ids, [-1]),
        #     predictions=masked_lm_predictions,
        #     weights=tf.reshape(masked_lm_weights, [-1]))
        # logging_hook = tf.train.LoggingTensorHook({"loss": total_loss,
        #                                            "accuracy": masked_lm_accuracy[1]}, every_n_iter=100)

        # logging_hook = tf.train.LoggingTensorHook({"loss": total_loss}, every_n_iter=100)

        def metric_fn(masked_lm_example_loss, masked_lm_log_probs,
                      masked_lm_ids, masked_lm_weights):
            """Computes the loss and accuracy of the model."""
            masked_lm_log_probs = tf.reshape(
                masked_lm_log_probs, [-1, masked_lm_log_probs.shape[-1]])
            masked_lm_predictions = tf.argmax(masked_lm_log_probs,
                                              axis=-1,
                                              output_type=tf.int32)
            masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])
            masked_lm_ids = tf.reshape(masked_lm_ids, [-1])
            masked_lm_weights = tf.reshape(masked_lm_weights, [-1])
            masked_lm_accuracy = tf.metrics.accuracy(
                labels=masked_lm_ids,
                predictions=masked_lm_predictions,
                weights=masked_lm_weights)
            masked_lm_mean_loss = tf.metrics.mean(
                values=masked_lm_example_loss, weights=masked_lm_weights)

            return {
                "masked_lm_accuracy": masked_lm_accuracy,
                "masked_lm_loss": masked_lm_mean_loss,
            }

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)

            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                # training_hooks=[logging_hook],
                training_hooks=[],
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = (metric_fn, [
                masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,
                masked_lm_weights
            ])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            raise ValueError("Only TRAIN and EVAL modes are supported: %s" %
                             (mode))

        return output_spec
Beispiel #8
0
    def model_fn(features, labels, mode, params):
        logging.info("*** Features ***")
        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s" %
                         (name, features[name].shape))
        input_ids = features["input_ids"]
        mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        if FLAGS.crf:
            (total_loss, logits,
             predicts) = create_model(bert_config, is_training, input_ids,
                                      mask, segment_ids, label_ids, num_labels,
                                      use_one_hot_embeddings)
        else:
            (total_loss, logits,
             predicts) = create_model(bert_config, is_training, input_ids,
                                      mask, segment_ids, label_ids, num_labels,
                                      use_one_hot_embeddings)
        tvars = tf.trainable_variables()
        scaffold_fn = None
        initialized_variable_names = None
        if init_checkpoint:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
            if use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:

                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                         init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(total_loss, learning_rate,
                                                     num_train_steps,
                                                     num_warmup_steps, use_tpu)
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)

        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(label_ids, logits, num_labels, mask):
                predictions = tf.math.argmax(logits,
                                             axis=-1,
                                             output_type=tf.int32)
                cm = metrics.streaming_confusion_matrix(label_ids,
                                                        predictions,
                                                        num_labels - 1,
                                                        weights=mask)
                return {"confusion_matrix": cm}
                #

            eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
        return output_spec
Beispiel #9
0
def optimize_graph(params):

    config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True)

    init_checkpoint = params.ckpt_dir

    tf.logging.info('build graph...')
    # input placeholders, not sure if they are friendly to XLA
    input_ids = tf.placeholder(tf.int32, (None, params.max_seq_len),
                               'input_ids')
    input_mask = tf.placeholder(tf.int32, (None, params.max_seq_len),
                                'input_mask')
    input_type_ids = tf.placeholder(tf.int32, (None, params.max_seq_len),
                                    'segment_ids')

    jit_scope = tf.contrib.compiler.jit.experimental_jit_scope

    with jit_scope():
        features = {}
        features['input_ids'] = input_ids
        features['input_mask'] = input_mask
        features['segment_ids'] = input_type_ids
        model = BertMultiTask(params)
        hidden_feature = model.body(features, tf.estimator.ModeKeys.PREDICT)
        pred = model.top(features, hidden_feature,
                         tf.estimator.ModeKeys.PREDICT)

        output_tensors = [pred[k] for k in pred]

        tvars = tf.trainable_variables()

        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)

        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tmp_g = tf.get_default_graph().as_graph_def()

    input_node_names = ['input_ids', 'input_mask', 'segment_ids']
    output_node_names = [
        '%s_top/%s_predict' %
        (params.share_top[problem], params.share_top[problem])
        for problem in params.problem_list
    ]

    transforms = [
        'remove_nodes(op=Identity)',
        'fold_constants(ignore_errors=true)',
        'fold_batch_norms',
        # 'quantize_weights',
        # 'quantize_nodes',
        'merge_duplicate_nodes',
        'strip_unused_nodes',
        'sort_by_execution_order'
    ]

    with tf.Session(config=config) as sess:
        tf.logging.info('load parameters from checkpoint...')
        sess.run(tf.global_variables_initializer())
        tf.logging.info('freeze...')
        tmp_g = tf.graph_util.convert_variables_to_constants(
            sess, tmp_g, [n.name[:-2] for n in output_tensors])
        tmp_g = TransformGraph(tmp_g, input_node_names, output_node_names,
                               transforms)
    tmp_file = os.path.join(params.ckpt_dir, 'export_model')
    tf.logging.info('write graph to a tmp file: %s' % tmp_file)
    with tf.gfile.GFile(tmp_file, 'wb') as f:
        f.write(tmp_g.SerializeToString())
    return tmp_file