Example #1
0
        def model_fn(features,
                     labels,
                     mode,
                     params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""
            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]

            embeddings = self._create_model(model_config, run_config, input_ids, input_mask, segment_ids, model_type)
            scaffold_fn = model_utils.init_from_checkpoint(FLAGS)
            
            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                loss = tf.Variable(0.0, name="loss", dtype=tf.float32)
                train_op, _, _ = model_utils.get_train_op(FLAGS, loss)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={ "embeddings": embeddings },
                    scaffold_fn=scaffold_fn)
            
            return output_spec
Example #2
0
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""
            def metric_fn(label_ids, predict_ids):
                precision = tf.metrics.precision(labels=label_ids,
                                                 predictions=predict_ids)
                recall = tf.metrics.recall(labels=label_ids,
                                           predictions=predict_ids)

                metric = {
                    "precision": precision,
                    "recall": recall,
                }

                return metric

            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" %
                                (name, features[name].shape))

            input_ids = features["input_ids"]
            input_masks = features["input_masks"]
            segment_ids = features["segment_ids"]
            label_ids = features["label_ids"] if mode in [
                tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL
            ] else None

            loss, predict_ids = self._create_model(input_ids, input_masks,
                                                   segment_ids, label_ids,
                                                   label_list, mode)

            scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_op, _, _ = model_utils.get_train_op(FLAGS, loss)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
            elif mode == tf.estimator.ModeKeys.EVAL:
                masked_label_ids = self._get_masked_data(label_ids, label_list)
                masked_predict_ids = self._get_masked_data(
                    predict_ids, label_list)
                eval_metrics = (metric_fn,
                                [masked_label_ids, masked_predict_ids])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"predict": predict_ids},
                    scaffold_fn=scaffold_fn)

            return output_spec
Example #3
0
    def model_fn(features, labels, mode, params):
        """doc."""
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        assert is_training

        #### Retrieve `mems` from `params["cache"]`
        mems = {}
        idx = 0
        if FLAGS.mem_len > 0:
            mems["mems"] = params["cache"]

        #### Get loss from inputs
        total_loss, new_mems, monitor_dict = function_builder.get_loss(
            FLAGS, features, labels, mems, is_training)

        #### Turn `new_mems` into `new_cache`
        new_cache = []
        if FLAGS.mem_len > 0:
            new_cache += new_mems["mems"]

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info("#params: {}".format(num_params))

        #### Configuring the optimizer
        train_op, learning_rate, gnorm = model_utils.get_train_op(
            FLAGS, total_loss)
        monitor_dict["lr"] = learning_rate
        monitor_dict["gnorm"] = gnorm

        #### Customized initial checkpoint
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS, global_vars=True)

        #### Creating host calls
        host_call = function_builder.construct_scalar_host_call(
            monitor_dict=monitor_dict,
            model_dir=FLAGS.model_dir,
            prefix="train/",
            reduce_fn=tf.reduce_mean)

        #### Constucting training TPUEstimatorSpec with new cache.
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                     loss=total_loss,
                                                     train_op=train_op,
                                                     host_call=host_call,
                                                     scaffold_fn=scaffold_fn)

        train_spec.cache = new_cache

        return train_spec
Example #4
0
    def __init__(self, config, FLAGS):
        self.config = config
        self.max_segment_len = config['max_segment_len']
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.subtoken_maps = {}
        self.gold = {}
        self.eval_data = None  # Load eval data lazily.
        self.FLAGS = FLAGS
        self.xlnet_config = xlnet.XLNetConfig(
            json_path=FLAGS.xlnet_config_file)

        print("################ spiece_model_file #############")
        print(FLAGS.spiece_model_file)
        self.tokenizer = spm.SentencePieceProcessor()
        self.tokenizer.load(FLAGS.spiece_model_file)

        input_props = []
        input_props.append((tf.int32, [None, None]))  # input_ids.
        input_props.append((tf.int32, [None, None]))  # seg_ids.
        input_props.append((tf.float32, [None, None]))  # input_mask
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None, None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.
        input_props.append((tf.int32, [None]))  # Sentence Map

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        ##### xlnet ######
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)
        self.train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, self.loss)
        self.global_step = tf.train.get_or_create_global_step()
Example #5
0
def train(ps_device):
    ##### Get input function and model function

    train_input_fn, record_info_dict = data_utils.get_input_fn(
        tfrecord_dir=FLAGS.record_info_dir,
        split="train",
        bsz_per_host=FLAGS.train_batch_size,
        seq_len=FLAGS.seq_len,
        reuse_len=FLAGS.reuse_len,
        bi_data=FLAGS.bi_data,
        num_hosts=1,
        num_core_per_host=1,  # set to one no matter how many GPUs
        perm_size=FLAGS.perm_size,
        mask_alpha=FLAGS.mask_alpha,
        mask_beta=FLAGS.mask_beta,
        uncased=FLAGS.uncased,
        num_passes=FLAGS.num_passes,
        use_bfloat16=FLAGS.use_bfloat16,
        num_predict=FLAGS.num_predict)

    # for key, info in record_info_dict.items():
    tf.logging.info("num of batches {}".format(record_info_dict["num_batch"]))

    ##### Create input tensors / placeholders
    bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host

    params = {
        "batch_size": FLAGS.train_batch_size  # the whole batch
    }
    train_set = train_input_fn(params)

    example = train_set.make_one_shot_iterator().get_next()

    if FLAGS.num_core_per_host > 1:
        examples = [{} for _ in range(FLAGS.num_core_per_host)]
        for key in example.keys():
            vals = tf.split(example[key], FLAGS.num_core_per_host, 0)
            for device_id in range(FLAGS.num_core_per_host):
                examples[device_id][key] = vals[device_id]
    else:
        examples = [example]

    ##### Create computational graph
    tower_mems, tower_losses, tower_new_mems, tower_grads_and_vars = [], [], [], []

    for i in range(FLAGS.num_core_per_host):
        reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, ps_device)), \
            tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

            # The mems for each tower is a dictionary
            mems_i = {}
            if FLAGS.mem_len:
                mems_i["mems"] = create_mems_tf(bsz_per_core)

            loss_i, new_mems_i, grads_and_vars_i = single_core_graph(
                is_training=True, features=examples[i], mems=mems_i)

            tower_mems.append(mems_i)
            tower_losses.append(loss_i)
            tower_new_mems.append(new_mems_i)
            tower_grads_and_vars.append(grads_and_vars_i)

    ## average losses and gradients across towers
    if len(tower_losses) > 1:
        loss = tf.add_n(tower_losses) / len(tower_losses)
        grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
    else:
        loss = tower_losses[0]
        grads_and_vars = tower_grads_and_vars[0]

    ## get train op
    train_op, learning_rate, gnorm = model_utils.get_train_op(
        FLAGS, None, grads_and_vars=grads_and_vars)
    global_step = tf.train.get_global_step()

    ##### Training loop
    # initialize mems
    tower_mems_np = []
    for i in range(FLAGS.num_core_per_host):
        mems_i_np = {}
        for key in tower_mems[i].keys():
            mems_i_np[key] = initialize_mems_np(bsz_per_core)
        tower_mems_np.append(mems_i_np)

    saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(allow_growth=True)

    model_utils.init_from_checkpoint(FLAGS, global_vars=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())

        fetches = [
            loss, tower_new_mems, global_step, gnorm, learning_rate, train_op
        ]

        total_loss, prev_step = 0., -1
        while True:
            feed_dict = {}
            for i in range(FLAGS.num_core_per_host):
                for key in tower_mems_np[i].keys():
                    for m, m_np in zip(tower_mems[i][key],
                                       tower_mems_np[i][key]):
                        feed_dict[m] = m_np

            fetched = sess.run(fetches, feed_dict=feed_dict)

            loss_np, tower_mems_np, curr_step = fetched[:3]
            total_loss += loss_np

            if curr_step > 0 and curr_step % FLAGS.iterations == 0:
                curr_loss = total_loss / (curr_step - prev_step)
                tf.logging.info(
                    "[{}] | gnorm {:.2f} lr {:8.6f} "
                    "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
                        curr_step, fetched[-3], fetched[-2], curr_loss,
                        math.exp(curr_loss), curr_loss / math.log(2)))
                total_loss, prev_step = 0., curr_step

            if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
                save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
                saver.save(sess, save_path)
                tf.logging.info("Model saved in path: {}".format(save_path))

            if curr_step >= FLAGS.train_steps:
                break
Example #6
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # Get loss from inputs
        if FLAGS.is_regression:
            (total_loss, per_example_loss,
             logits) = function_builder.get_regression_loss(
                 FLAGS, features, is_training)
        else:
            (total_loss, per_example_loss, logits,
             p) = function_builder.get_classification_loss(
                 FLAGS, features, n_class, is_training)

        # Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        # load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        # Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': predictions,
                    'weights': is_real_example
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)
                """
                change
                """
                conf_mat = get_metrics_ops(label_ids, predictions, 3,
                                           is_real_example)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    'eval_accuracy': accuracy,
                    'eval_loss': loss,
                    "conf_mat": conf_mat
                }

            def regression_metric_fn(per_example_loss, label_ids, logits,
                                     is_real_example):
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                pearsonr = tf.contrib.metrics.streaming_pearson_correlation(
                    logits, label_ids, weights=is_real_example)

                return {'eval_loss': loss, 'eval_pearsonr': pearsonr}

            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)

            # Constucting evaluation TPUEstimatorSpec with new cache.
            label_ids = tf.reshape(features['label_ids'], [-1])

            if FLAGS.is_regression:
                metric_fn = regression_metric_fn
            else:
                metric_fn = metric_fn
            metric_args = [
                per_example_loss, label_ids, logits, is_real_example
            ]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.reshape(features["label_ids"], [-1])

            predictions = {
                "logits": logits,
                "labels": label_ids,
                "is_real": features["is_real_example"],
                "p": p,
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            return output_spec

        # Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        # Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            # Creating host calls
            if not FLAGS.is_regression:
                label_ids = tf.reshape(features['label_ids'], [-1])
                predictions = tf.argmax(logits,
                                        axis=-1,
                                        output_type=label_ids.dtype)
                is_correct = tf.equal(predictions, label_ids)
                accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

                monitor_dict["accuracy"] = accuracy

                host_call = function_builder.construct_scalar_host_call(
                    monitor_dict=monitor_dict,
                    model_dir=FLAGS.model_dir,
                    prefix="train/",
                    reduce_fn=tf.reduce_mean)
            else:
                host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec
Example #7
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        total_loss, per_example_loss, logits = function_builder.get_race_loss(
            FLAGS, features, is_training)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': predictions,
                    'weights': is_real_example
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {'eval_accuracy': accuracy, 'eval_loss': loss}

            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)

            #### Constucting evaluation TPUEstimatorSpec with new cache.
            label_ids = tf.reshape(features['label_ids'], [-1])
            metric_args = [
                per_example_loss, label_ids, logits, is_real_example
            ]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.reshape(features["label_ids"], [-1])

            predictions = {
                "logits": logits,
                "labels": label_ids,
                "is_real": features["is_real_example"]
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            return output_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            #### Creating host calls
            host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec
Example #8
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        outputs = function_builder.get_qa_outputs(FLAGS, features, is_training)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        scaffold_fn = None

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.PREDICT:
            if FLAGS.init_checkpoint:
                tf.logging.info(
                    "init_checkpoint not being used in predict mode.")

            predictions = {
                "unique_ids": features["unique_ids"],
                "start_top_index": outputs["start_top_index"],
                "start_top_log_probs": outputs["start_top_log_probs"],
                "end_top_index": outputs["end_top_index"],
                "end_top_log_probs": outputs["end_top_log_probs"],
                "cls_logits": outputs["cls_logits"]
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            return output_spec

        ### Compute loss
        seq_length = tf.shape(features["input_ids"])[1]

        def compute_loss(log_probs, positions):
            one_hot_positions = tf.one_hot(positions,
                                           depth=seq_length,
                                           dtype=tf.float32)

            loss = -tf.reduce_sum(one_hot_positions * log_probs, axis=-1)
            loss = tf.reduce_mean(loss)
            return loss

        start_loss = compute_loss(outputs["start_log_probs"],
                                  features["start_positions"])
        end_loss = compute_loss(outputs["end_log_probs"],
                                features["end_positions"])

        total_loss = (start_loss + end_loss) * 0.5

        cls_logits = outputs["cls_logits"]
        is_impossible = tf.reshape(features["is_impossible"], [-1])
        regression_loss = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=is_impossible, logits=cls_logits)
        regression_loss = tf.reduce_mean(regression_loss)

        # note(zhiliny): by default multiply the loss by 0.5 so that the scale is
        # comparable to start_loss and end_loss
        total_loss += regression_loss * 0.5

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            host_call = function_builder.construct_scalar_host_call(
                monitor_dict=monitor_dict,
                model_dir=FLAGS.model_dir,
                prefix="train/",
                reduce_fn=tf.reduce_mean)

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec