Beispiel #1
0
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""
            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" %
                                (name, features[name].shape))

            input_ids = features["input_ids"]
            input_mask = features["input_mask"]
            segment_ids = features["segment_ids"]

            embeddings = self._create_model(model_config, run_config,
                                            input_ids, input_mask, segment_ids,
                                            model_type)
            scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                loss = tf.Variable(0.0, name="loss", dtype=tf.float32)
                train_op, _, _ = model_utils.get_train_op(FLAGS, loss)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"embeddings": embeddings},
                    scaffold_fn=scaffold_fn)

            return output_spec
Beispiel #2
0
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""
            def metric_fn(label_ids, predict_ids):
                precision = tf.metrics.precision(labels=label_ids,
                                                 predictions=predict_ids)
                recall = tf.metrics.recall(labels=label_ids,
                                           predictions=predict_ids)

                metric = {
                    "precision": precision,
                    "recall": recall,
                }

                return metric

            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" %
                                (name, features[name].shape))

            input_ids = features["input_ids"]
            input_masks = features["input_masks"]
            segment_ids = features["segment_ids"]
            label_ids = features["label_ids"] if mode in [
                tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL
            ] else None

            loss, predict_ids = self._create_model(model_config, run_config,
                                                   input_ids, input_masks,
                                                   segment_ids, label_ids,
                                                   label_list, mode)

            scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_op, _, _ = model_utils.get_train_op(FLAGS, loss)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
            elif mode == tf.estimator.ModeKeys.EVAL:
                masked_label_ids = self._get_masked_data(label_ids, label_list)
                masked_predict_ids = self._get_masked_data(
                    predict_ids, label_list)
                eval_metrics = (metric_fn,
                                [masked_label_ids, masked_predict_ids])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"predict": predict_ids},
                    scaffold_fn=scaffold_fn)

            return output_spec
Beispiel #3
0
    def model_fn(features, labels, mode, params):
        """doc."""
        #### Training or Evaluation
        is_training = mode == tf.estimator.ModeKeys.TRAIN
        assert is_training

        #### Retrieve `mems` from `params["cache"]`
        mems = {}
        idx = 0
        if FLAGS.mem_len > 0:
            mems['mems'] = params['cache']

        #### Get loss from inputs
        total_loss, new_mems, monitor_dict = function_builder.get_loss(
            FLAGS, features, labels, mems, is_training
        )

        #### Turn `new_mems` into `new_cache`
        new_cache = []
        if FLAGS.mem_len > 0:
            new_cache += new_mems['mems']

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### Configuring the optimizer
        train_op, learning_rate, gnorm = model_utils.get_train_op(
            FLAGS, total_loss
        )
        monitor_dict['lr'] = learning_rate
        monitor_dict['gnorm'] = gnorm

        #### Customized initial checkpoint
        scaffold_fn = model_utils.init_from_checkpoint(
            FLAGS, global_vars = True
        )

        #### Creating host calls
        host_call = function_builder.construct_scalar_host_call(
            monitor_dict = monitor_dict,
            model_dir = FLAGS.model_dir,
            prefix = 'train/',
            reduce_fn = tf.reduce_mean,
        )

        #### Constucting training TPUEstimatorSpec with new cache.
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode = mode,
            loss = total_loss,
            train_op = train_op,
            host_call = host_call,
            scaffold_fn = scaffold_fn,
        )

        train_spec.cache = new_cache

        return train_spec
        def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
            """The `model_fn` for TPUEstimator."""
            def metric_fn(sent_label_ids, sent_predict_ids):
                sent_accuracy = tf.metrics.accuracy(
                    labels=sent_label_ids, predictions=sent_predict_ids)

                metric = {
                    "sent_accuracy": sent_accuracy,
                }

                return metric

            tf.logging.info("*** Features ***")
            for name in sorted(features.keys()):
                tf.logging.info("  name = %s, shape = %s" %
                                (name, features[name].shape))

            input_ids = features["input_ids"]
            input_masks = features["input_masks"]
            segment_ids = features["segment_ids"]
            sent_label_ids = features["sent_label_ids"] if mode in [
                tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL
            ] else None

            loss, sent_predict_ids, sent_predict_scores, sent_predict_probs = self._create_model(
                input_ids, input_masks, segment_ids, sent_label_ids,
                sent_label_list, mode)

            scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

            output_spec = None
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_op, _, _ = model_utils.get_train_op(FLAGS, loss)
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    scaffold_fn=scaffold_fn)
            elif mode == tf.estimator.ModeKeys.EVAL:
                eval_metrics = (metric_fn, [sent_label_ids, sent_predict_ids])
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=loss,
                    eval_metrics=eval_metrics,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={
                        "sent_predict_id": sent_predict_ids,
                        "sent_predict_score": sent_predict_scores,
                        "sent_predict_prob": sent_predict_probs
                    },
                    scaffold_fn=scaffold_fn)

            return output_spec
Beispiel #5
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        inp = tf.transpose(features["input_ids"], [1, 0])
        seg_id = tf.transpose(features["segment_ids"], [1, 0])
        inp_mask = tf.transpose(features["input_mask"], [1, 0])

        xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)

        # no need for dropout in prediction mode
        xlnet_config.dropout = 0.0
        xlnet_config.dropatt = 0.0

        run_config = xlnet.create_run_config(False, True, FLAGS)

        # no need for dropout in prediction mode
        run_config.dropout = 0.0
        run_config.dropatt = 0.0

        xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                       run_config=run_config,
                                       input_ids=inp,
                                       seg_ids=seg_id,
                                       input_mask=inp_mask)

        # Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        # load pretrained models
        scaffold_fn = init_from_checkpoint(FLAGS)

        # Get a sequence output
        seq_out = xlnet_model.get_sequence_output()

        tokens = tf.transpose(seq_out, [1, 0, 2])

        predictions = {
            "unique_id": unique_ids,
            'tokens': tokens,
            'input_mask': tf.transpose(inp_mask, [1, 0])
        }

        if FLAGS.use_tpu:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=predictions)
        return output_spec
Beispiel #6
0
    def model_fn(features, labels, mode, params):
        """doc."""
        #### Training or Evaluation
        is_eval = mode == tf.estimator.ModeKeys.EVAL
        assert is_eval

        #### Retrieve `mems` from `params["cache"]`
        mems = {}
        idx = 0
        if FLAGS.mem_len > 0:
            mems['mems'] = params['cache']

        #### Get loss from inputs
        total_loss, total_accuracy, new_mems, monitor_dict = custom_function_builder.get_loss(
            FLAGS, features, labels, mems, False)

        #### Turn `new_mems` into `new_cache`
        new_cache = []
        if FLAGS.mem_len > 0:
            new_cache += new_mems['mems']

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### Configuring the optimizer
        train_op, learning_rate, gnorm = model_utils.get_train_op(
            FLAGS, total_loss)
        monitor_dict['lr'] = learning_rate
        monitor_dict['gnorm'] = gnorm

        #### Customized initial checkpoint
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS, global_vars=True)

        # def metric_fn(accuracy):
        #     return
        #
        # eval_metrics = (metric_fn, [total_accuracy])

        output_spec = tf.estimator.EstimatorSpec(
            mode=mode,
            loss=total_loss,
            train_op=train_op,
            eval_metric_ops={'accuracy': total_accuracy},
            scaffold=scaffold_fn,
        )

        return output_spec
Beispiel #7
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        #********************************************************************************************#
        bsz_per_core = tf.shape(features["input_ids"])[0]
        inp = tf.transpose(features["input_ids"], [1, 0])
        seg_id = tf.transpose(features["segment_ids"], [1, 0])
        inp_mask = tf.transpose(features["input_mask"], [1, 0])
        label_ids = features["label_ids"]

        xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
        run_config = xlnet.create_run_config(is_training, True, FLAGS)
        xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                       run_config=run_config,
                                       input_ids=inp,
                                       seg_ids=seg_id,
                                       input_mask=inp_mask)
        #summary = xlnet_model.get_pooled_out(FLAGS.summary_type, FLAGS.use_summ_proj)
        # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
        xlnet_model_out = xlnet_model.get_sequence_output()
        embedding = tf.transpose(xlnet_model_out, [1, 0, 2])
        max_seq_length = embedding.shape[1].value
        # 算序列真实长度
        used = tf.sign(tf.abs(features["input_ids"]))
        lengths = tf.reduce_sum(
            used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
        # 添加CRF output layer
        blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                              hidden_unit=10,
                              cell_type="lstm",
                              num_layers=1,
                              dropout_rate=0.5,
                              initializers=initializers,
                              num_labels=n_class,
                              seq_length=max_seq_length,
                              labels=label_ids,
                              lengths=lengths,
                              is_training=is_training)
        total_loss, logits, trans, pred_ids = blstm_crf.add_blstm_crf_layer(
            crf_only=True)
        #********************************************************************************************#

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(label_ids, pred_ids):
                return {
                    "eval_loss":
                    tf.metrics.mean_squared_error(labels=label_ids,
                                                  predictions=pred_ids),
                }

            eval_metrics = metric_fn(features["label_ids"], pred_ids)
            eval_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, eval_metric_ops=eval_metrics)
            return eval_spec
        elif mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "logits": logits,
                "labels": label_ids,
                "pred_ids": pred_ids,
                "input_mask": features["input_mask"]
            }
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=predictions)
            return output_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                loss=total_loss,
                                                train_op=train_op)
        return train_spec
Beispiel #8
0
def train(ps_device):
    # Get input function and model function

    train_input_fn, record_info_dict = data_utils.get_input_fn(
        tfrecord_dir=FLAGS.record_info_dir,
        split="train",
        bsz_per_host=FLAGS.train_batch_size,
        seq_len=FLAGS.seq_len,
        reuse_len=FLAGS.reuse_len,
        bi_data=FLAGS.bi_data,
        num_hosts=1,
        num_core_per_host=1,  # set to one no matter how many GPUs
        perm_size=FLAGS.perm_size,
        mask_alpha=FLAGS.mask_alpha,
        mask_beta=FLAGS.mask_beta,
        uncased=FLAGS.uncased,
        num_passes=FLAGS.num_passes,
        use_bfloat16=FLAGS.use_bfloat16,
        num_predict=FLAGS.num_predict)

    # for key, info in record_info_dict.items():
    tf.logging.info("num of batches {}".format(record_info_dict["num_batch"]))

    # Create input tensors / placeholders
    bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host

    params = {
        "batch_size": FLAGS.train_batch_size  # the whole batch
    }
    train_set = train_input_fn(params)

    example = train_set.make_one_shot_iterator().get_next()

    if FLAGS.num_core_per_host > 1:
        examples = [{} for _ in range(FLAGS.num_core_per_host)]
        for key in example.keys():
            vals = tf.split(example[key], FLAGS.num_core_per_host, 0)
            for device_id in range(FLAGS.num_core_per_host):
                examples[device_id][key] = vals[device_id]
    else:
        examples = [example]

    # Create computational graph
    tower_mems, tower_losses, tower_new_mems, tower_grads_and_vars = [], [], [], []

    for i in range(FLAGS.num_core_per_host):
        reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, ps_device)), \
                tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

            # The mems for each tower is a dictionary
            mems_i = {}
            if FLAGS.mem_len:
                mems_i["mems"] = create_mems_tf(bsz_per_core)

            loss_i, new_mems_i, grads_and_vars_i = single_core_graph(
                is_training=True,
                features=examples[i],
                mems=mems_i)

            tower_mems.append(mems_i)
            tower_losses.append(loss_i)
            tower_new_mems.append(new_mems_i)
            tower_grads_and_vars.append(grads_and_vars_i)

    # average losses and gradients across towers
    if len(tower_losses) > 1:
        loss = tf.add_n(tower_losses) / len(tower_losses)
        grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
    else:
        loss = tower_losses[0]
        grads_and_vars = tower_grads_and_vars[0]

    # get train op
    train_op, learning_rate, gnorm = model_utils.get_train_op(FLAGS, None,
                                                              grads_and_vars=grads_and_vars)
    global_step = tf.train.get_global_step()

    # Training loop
    # initialize mems
    tower_mems_np = []
    for i in range(FLAGS.num_core_per_host):
        mems_i_np = {}
        for key in tower_mems[i].keys():
            mems_i_np[key] = initialize_mems_np(bsz_per_core)
        tower_mems_np.append(mems_i_np)

    saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.97)#allow_growth=True)

    model_utils.init_from_checkpoint(FLAGS, global_vars=True)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False,
                                          gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())
        sess.graph.finalize()
        run_metadata = tf.RunMetadata()
        options = tf.RunOptions(trace_level=tf.RunOptions.SOFTWARE_TRACE)

        dot_rep = graph_to_dot(tf.get_default_graph())
        # s = Source(dot_rep, filename="test.gv", format="PNG")
        with open('profs/xln.dot', 'w') as fwr:
            fwr.write(str(dot_rep))

        operations_tensors = {}
        operations_attributes = {}
        operations_names = tf.get_default_graph().get_operations()
        count1 = 0
        count2 = 0

        for operation in operations_names:
            operation_name = operation.name
            operations_info = tf.get_default_graph(
            ).get_operation_by_name(operation_name).values()

            try:
                operations_attributes[operation_name] = []
                operations_attributes[operation_name].append(
                    operation.type)
                operations_attributes[operation_name].append(tf.get_default_graph(
                ).get_tensor_by_name(operation_name + ':0').dtype._is_ref_dtype)
            except:
                pass

            if len(operations_info) > 0:
                if not (operations_info[0].shape.ndims is None):
                    operation_shape = operations_info[0].shape.as_list(
                    )
                    operation_dtype_size = operations_info[0].dtype.size
                    if not (operation_dtype_size is None):
                        operation_no_of_elements = 1
                        for dim in operation_shape:
                            if not(dim is None):
                                operation_no_of_elements = operation_no_of_elements * dim
                        total_size = operation_no_of_elements * operation_dtype_size
                        operations_tensors[operation_name] = total_size
                    else:
                        count1 = count1 + 1
                else:
                    count1 = count1 + 1
                    operations_tensors[operation_name] = -1

                #   print('no shape_1: ' + operation_name)
                #  print('no shape_2: ' + str(operations_info))
                #  operation_namee = operation_name + ':0'
                # tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
                # print('no shape_3:' + str(tf.shape(tensor)))
                # print('no shape:' + str(tensor.get_shape()))

            else:
                # print('no info :' + operation_name)
                # operation_namee = operation.name + ':0'
                count2 = count2 + 1
                operations_tensors[operation_name] = -1

                # try:
                #   tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
                # print(tensor)
                # print(tf.shape(tensor))
                # except:
                # print('no tensor: ' + operation_namee)
        print(count1)
        print(count2)

        with open('./profs/tensors_sz_32.txt', 'w') as f:
            for tensor, size in operations_tensors.items():
                f.write('"' + tensor + '"::' + str(size) + '\n')

        with open('./profs/operations_attributes.txt', 'w') as f:
            for op, attrs in operations_attributes.items():
                strr = op
                for attr in attrs:
                    strr += '::' + str(attr)
                strr += '\n'
                f.write(strr)

        fetches = [loss, tower_new_mems, global_step,
                   gnorm, learning_rate, train_op]
        iter = 0
        total_loss, prev_step = 0., -1
        while True:
            feed_dict = {}
            for i in range(FLAGS.num_core_per_host):
                for key in tower_mems_np[i].keys():
                    for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]):
                        feed_dict[m] = m_np
            if iter % 10 == 7 or iter == 0:
                fetched = sess.run(fetches, feed_dict=feed_dict, options=options, run_metadata=run_metadata)
                #if iter > 0:
                profile(run_metadata, iter)
            else:
                t0 = time.time()
                fetched = sess.run(fetches, feed_dict=feed_dict)
                print(time.time() - t0)
            if iter == 0:
                mem_options = tf.profiler.ProfileOptionBuilder.time_and_memory()
                mem_options["min_bytes"] = 0
                mem_options["min_micros"] = 0
                mem_options["output"] = 'file:outfile=./profs/mem.txt'
                mem_options["select"] = ("bytes", "peak_bytes", "output_bytes",
                          "residual_bytes")
                mem = tf.profiler.profile(
                  tf.Graph(), run_meta=run_metadata, cmd="scope", options=mem_options)
                with open('profs/mem2.txt', 'w') as f:
                  f.write(str(mem))
            iter += 1

            loss_np, tower_mems_np, curr_step = fetched[:3]
            total_loss += loss_np

            if curr_step > 0 and curr_step % FLAGS.iterations == 0:
                curr_loss = total_loss / (curr_step - prev_step)
                tf.logging.info("[{}] | gnorm {:.2f} lr {:8.6f} "
                                "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
                                    curr_step, fetched[-3], fetched[-2],
                                    curr_loss, math.exp(curr_loss), curr_loss / math.log(2)))
                total_loss, prev_step = 0., curr_step

            if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
                save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
                saver.save(sess, save_path)
                tf.logging.info("Model saved in path: {}".format(save_path))

            if curr_step >= FLAGS.train_steps:
                break
Beispiel #9
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        total_loss, per_example_loss, logits = function_builder.get_race_loss(
            FLAGS, features, is_training)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        logger.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits, is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': predictions,
                    'weights': is_real_example
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {
                    'eval_accuracy': accuracy,
                    'eval_loss': loss}

            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)

            #### Constucting evaluation TPUEstimatorSpec with new cache.
            label_ids = tf.reshape(features['label_ids'], [-1])
            metric_args = [per_example_loss, label_ids, logits, is_real_example]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            #### Creating host calls
            host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, train_op=train_op)

        return train_spec
Beispiel #10
0
    def model_fn(features, labels, mode, params):
        # ### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        return_dict = function_builder.get_classification_outputs(
            FLAGS, features, is_training)
        # per_example_loss = return_dict["per_example_loss"]
        cls_logits = return_dict["cls_logits"]
        # ### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        logger.info('#params: {}'.format(num_params))

        # ### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        if mode == tf.estimator.ModeKeys.PREDICT:
            # label_ids = tf.reshape(features["cls"], [-1])
            predictions = {
                "feature_id": features["feature_id"],
                "cls_logits": cls_logits,
                # "cls": label_ids,
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            return output_spec

        def compute_loss(log_probs, positions, depth):
            one_hot_positions = tf.one_hot(positions,
                                           depth=depth,
                                           dtype=tf.float32)

            loss = -tf.reduce_sum(one_hot_positions * log_probs, axis=-1)
            loss = tf.reduce_mean(loss)
            return loss

        cls_log_probs = return_dict["cls_log_probs"]
        num_choices = FLAGS.num_choices
        if num_choices:
            num_classes = num_choices
        else:
            num_classes = FLAGS.num_classes
        total_loss = compute_loss(cls_log_probs,
                                  features["cls"],
                                  depth=num_classes)

        # ### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {'loss/cls': total_loss, "lr": learning_rate}

        # ### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            # ### Creating host calls
            if not FLAGS.is_regression:
                label_ids = tf.reshape(features['cls'], [-1])
                predictions = tf.argmax(cls_logits,
                                        axis=-1,
                                        output_type=label_ids.dtype)
                is_correct = tf.equal(predictions, label_ids)
                accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

                monitor_dict["accuracy"] = accuracy

                host_call = function_builder.construct_scalar_host_call(
                    monitor_dict=monitor_dict,
                    model_dir=FLAGS.model_dir,
                    prefix="train/",
                    reduce_fn=tf.reduce_mean)
            else:
                host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec
Beispiel #11
0
  def model_fn(features, labels, mode, params):
    #### Training or Evaluation
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    #### Get loss from inputs
    outputs = function_builder.get_qa_outputs(FLAGS, features, is_training)

    #### Check model parameters
    num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
    tf.logging.info('#params: {}'.format(num_params))

    scaffold_fn = None

    #### Evaluation mode
    if mode == tf.estimator.ModeKeys.PREDICT:
      if FLAGS.init_checkpoint:
        tf.logging.info("init_checkpoint not being used in predict mode.")

      predictions = {
          "unique_ids": features["unique_ids"],
          "start_top_index": outputs["start_top_index"],
          "start_top_log_probs": outputs["start_top_log_probs"],
          "end_top_index": outputs["end_top_index"],
          "end_top_log_probs": outputs["end_top_log_probs"],
          "cls_logits": outputs["cls_logits"]
      }

      if FLAGS.use_tpu:
        output_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)
      else:
        output_spec = tf.estimator.EstimatorSpec(
            mode=mode, predictions=predictions)
      return output_spec

    ### Compute loss
    seq_length = tf.shape(features["input_ids"])[1]
    def compute_loss(log_probs, positions):
      one_hot_positions = tf.one_hot(
          positions, depth=seq_length, dtype=tf.float32)

      loss = - tf.reduce_sum(one_hot_positions * log_probs, axis=-1)
      loss = tf.reduce_mean(loss)
      return loss

    start_loss = compute_loss(
        outputs["start_log_probs"], features["start_positions"])
    end_loss = compute_loss(
        outputs["end_log_probs"], features["end_positions"])

    total_loss = (start_loss + end_loss) * 0.5

    cls_logits = outputs["cls_logits"]
    is_impossible = tf.reshape(features["is_impossible"], [-1])
    regression_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=is_impossible, logits=cls_logits)
    regression_loss = tf.reduce_mean(regression_loss)

    # note(zhiliny): by default multiply the loss by 0.5 so that the scale is
    # comparable to start_loss and end_loss
    total_loss += regression_loss * 0.5

    #### Configuring the optimizer
    train_op, learning_rate, _ = model_utils.get_train_op(FLAGS, total_loss)

    monitor_dict = {}
    monitor_dict["lr"] = learning_rate

    #### load pretrained models
    scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

    #### Constucting training TPUEstimatorSpec with new cache.
    if FLAGS.use_tpu:
      host_call = function_builder.construct_scalar_host_call(
          monitor_dict=monitor_dict,
          model_dir=FLAGS.model_dir,
          prefix="train/",
          reduce_fn=tf.reduce_mean)

      train_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode, loss=total_loss, train_op=train_op, host_call=host_call,
          scaffold_fn=scaffold_fn)
    else:
      train_spec = tf.estimator.EstimatorSpec(
          mode=mode, loss=total_loss, train_op=train_op)

    return train_spec
def train(ps_device):
    ##### Get input function and model function

    train_input_fn, record_info_dict = data_utils.get_input_fn(
        info_dir=os.path.join(FLAGS.record_info_dir, "train"),
        split="train",
        bsz_per_host=FLAGS.train_batch_size,
        seq_len=FLAGS.seq_len,
        reuse_len=FLAGS.reuse_len,
        bi_data=FLAGS.bi_data,
        num_hosts=1,
        num_core_per_host=1,  # set to one no matter how many GPUs
        perm_size=FLAGS.perm_size,
        mask_alpha=FLAGS.mask_alpha,
        mask_beta=FLAGS.mask_beta,
        use_bfloat16=FLAGS.use_bfloat16,
        num_predict=FLAGS.num_predict)

    valid_input_fn, record_info_dict_valid = data_utils.get_input_fn(
        info_dir=os.path.join(FLAGS.record_info_dir, "valid"),
        split="valid",
        bsz_per_host=FLAGS.train_batch_size,
        seq_len=FLAGS.seq_len,
        reuse_len=FLAGS.reuse_len,
        bi_data=FLAGS.bi_data,
        num_hosts=1,
        num_core_per_host=1,
        perm_size=FLAGS.perm_size,
        mask_alpha=FLAGS.mask_alpha,
        mask_beta=FLAGS.mask_beta,
        use_bfloat16=FLAGS.use_bfloat16,
        num_predict=FLAGS.num_predict)

    # for key, info in record_info_dict.items():
    num_train_batches = record_info_dict["num_batch"]
    tf.logging.info("num of train batches {}".format(
        record_info_dict["num_batch"]))
    tf.logging.info("num of validation batches {}".format(
        record_info_dict_valid["num_batch"]))

    ##### Create input tensors / placeholders
    bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host

    params = {
        "batch_size": FLAGS.train_batch_size  # the whole batch
    }
    train_set = train_input_fn(params)
    valid_set = valid_input_fn(params)

    t_iter = train_set.make_initializable_iterator()
    example = t_iter.get_next()
    v_iter = valid_set.make_initializable_iterator()
    v_example = v_iter.get_next()

    if FLAGS.num_core_per_host > 1:
        # train set
        examples = [{} for _ in range(FLAGS.num_core_per_host)]
        for key in example.keys():
            vals = tf.split(example[key], FLAGS.num_core_per_host, 0)
            for device_id in range(FLAGS.num_core_per_host):
                examples[device_id][key] = vals[device_id]

        # validation set
        v_examples = [{} for _ in range(FLAGS.num_core_per_host)]
        for key in v_example.keys():
            vals = tf.split(v_example[key], FLAGS.num_core_per_host, 0)
            for device_id in range(FLAGS.num_core_per_host):
                v_examples[device_id][key] = vals[device_id]
    else:
        examples = [example]
        v_examples = [v_example]

    ##### Create computational graph
    tower_mems, tower_losses, tower_new_mems, tower_grads_and_vars = [], [], [], []
    v_tower_mems, v_tower_losses, v_tower_new_mems = [], [], []

    for i in range(FLAGS.num_core_per_host):
        reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, ps_device)), \
            tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

            # The mems for each tower is a dictionary
            mems_i = {}
            v_mems_i = {}
            if FLAGS.mem_len:
                mems_i["mems"] = create_mems_tf(bsz_per_core)
                v_mems_i["mems"] = create_mems_tf(bsz_per_core)

            loss_i, new_mems_i, grads_and_vars_i = single_core_graph(
                is_training=True, features=examples[i], mems=mems_i)

            v_loss_i, v_new_mems_i = single_core_graph(is_training=False,
                                                       features=v_examples[i],
                                                       mems=v_mems_i)

            tower_mems.append(mems_i)
            tower_losses.append(loss_i)
            tower_new_mems.append(new_mems_i)
            tower_grads_and_vars.append(grads_and_vars_i)

            v_tower_mems.append(v_mems_i)
            v_tower_losses.append(v_loss_i)
            v_tower_new_mems.append(v_new_mems_i)

    ## average losses and gradients across towers
    if len(tower_losses) > 1:
        loss = tf.add_n(tower_losses) / len(tower_losses)
        grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
    else:
        loss = tower_losses[0]
        grads_and_vars = tower_grads_and_vars[0]

    if len(v_tower_losses) > 1:
        v_loss = tf.add_n(v_tower_losses) / len(v_tower_losses)
    else:
        v_loss = v_tower_losses[0]

    ## get train op
    train_op, learning_rate, gnorm = model_utils.get_train_op(
        FLAGS, None, num_train_batches, grads_and_vars=grads_and_vars)
    global_step = tf.train.get_global_step()

    ##### Training loop
    # initialize mems
    tower_mems_np = []
    v_tower_mems_np = []
    for i in range(FLAGS.num_core_per_host):
        mems_i_np = {}
        v_mems_i_np = {}
        for key in tower_mems[i].keys():
            mems_i_np[key] = initialize_mems_np(bsz_per_core)
            v_mems_i_np[key] = initialize_mems_np(bsz_per_core)
        tower_mems_np.append(mems_i_np)
        v_tower_mems_np.append(v_mems_i_np)

    saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(allow_growth=True)

    model_utils.init_from_checkpoint(FLAGS, global_vars=True)

    # Create performance summaries for Tensorboard logging
    training_performance_summaries, valid_performance_summaries = tb.tensorboard_setup(
    )

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())

        # variables that are run in the session
        fetches = [
            loss, tower_new_mems, global_step, gnorm, learning_rate, train_op
        ]
        v_fetches = [v_loss, v_tower_new_mems]

        # Create writers for Tensorboard logging
        info_dict = {
            "id": FLAGS.run_id,
            "n_layers": FLAGS.n_layers,
            "d_model": FLAGS.d_model,
            "n_heads": FLAGS.n_head
        }
        train_summary_writer, valid_summary_writer = tb.create_writers(
            sess, info_dict, logging_dir=FLAGS.tb_logging_dir)

        total_loss, prev_step = 0., -1
        for i in range(FLAGS.epochs):

            # Train loop
            try:
                sess.run(t_iter.initializer)
                while True:
                    feed_dict = {}
                    for i in range(FLAGS.num_core_per_host):
                        for key in tower_mems_np[i].keys():
                            for m, m_np in zip(tower_mems[i][key],
                                               tower_mems_np[i][key]):
                                feed_dict[m] = m_np

                    fetched = sess.run(fetches, feed_dict=feed_dict)
                    loss_np, tower_mems_np, curr_step = fetched[:3]
                    total_loss += loss_np
                    print(curr_step)

                    # Log training progress
                    if curr_step > 0 and curr_step % FLAGS.log_steps == 0:
                        curr_loss = total_loss / (curr_step - prev_step)
                        summ = tb.run_train(sess,
                                            training_performance_summaries,
                                            curr_loss)
                        train_summary_writer.add_summary(summ, curr_step)
                        tf.logging.info(
                            "[{}] | gnorm {:.2f} lr {:8.6f} "
                            "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".
                            format(curr_step, fetched[-3], fetched[-2],
                                   curr_loss, math.exp(curr_loss),
                                   curr_loss / math.log(2)))
                        total_loss, prev_step = 0., curr_step

                    # Save checkpoint
                    if curr_step > 0 and FLAGS.save_steps is not None and curr_step % FLAGS.save_steps == 0:
                        save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
                        saver.save(sess, save_path)
                        tf.logging.info(
                            "Model saved in path: {}".format(save_path))

            except tf.errors.OutOfRangeError:
                pass

            # Validation loop
            try:
                sess.run(v_iter.initializer)
                v_total_loss, v_steps = 0., 0
                while True:
                    v_feed_dict = {}
                    for i in range(FLAGS.num_core_per_host):
                        for key in v_tower_mems_np[i].keys():
                            for m, m_np in zip(v_tower_mems[i][key],
                                               v_tower_mems_np[i][key]):
                                v_feed_dict[m] = m_np

                    v_fetched = sess.run(v_fetches, feed_dict=v_feed_dict)
                    v_loss_np, v_tower_mems_np = v_fetched[:]
                    v_total_loss += v_loss_np
                    v_steps += 1

            except tf.errors.OutOfRangeError:
                val_loss = v_total_loss / v_steps
                v_pplx = math.exp(val_loss)
                tf.logging.info(
                    "Validation: [{}] | loss {:.2f} | pplx {:>7.2f}".format(
                        curr_step, val_loss, v_pplx))

                summ_valid = tb.run_valid(sess, valid_performance_summaries,
                                          val_loss, v_pplx)
                valid_summary_writer.add_summary(summ_valid, curr_step)

            tf.logging.info("------------ Epoch {} ------------".format(i))
Beispiel #13
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        if FLAGS.is_regression:
            (total_loss, per_example_loss,
             logits) = function_builder.get_regression_loss(
                 FLAGS, features, is_training)
        else:
            (total_loss, per_example_loss,
             logits) = function_builder.get_classification_loss(
                 FLAGS, features, n_class, is_training)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': predictions,
                    'weights': is_real_example
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)

                f1 = tf.contrib.metrics.f1_score(label_ids, predictions)

                #print('Label ids object type: {}'.format(type(label_ids)))
                #print('Predictions object type: {}'.format(type(predictions)))
                '''
        cm = tf.math.confusion_matrix(label_ids,predictions,num_classes=n_class)
        print('Converting confusion matrix into its values.')
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        _cm = sess.run(cm)
        sess.close()
        print("Created value of confusion matrix: {}".format(_cm))
        '''
                '''
        sess = tf.Session()
        #sess.run(tf.global_variables_initializer())
        _cm = sess.run(cm)
        sess.close()
        print("Created value of confusion matrix: {}".format(_cm))
        '''
                '''
        This giant part below was supposed to calculate f1 precision etc but it failed because eval() and run() gives error.
        Error:
        tensorflow.python.framework.errors_impl.FailedPreconditionError: GetNext() failed because the iterator has not been initialized. Ensure that you have run the initializer operation for this iterator before getting the next element.
        [[node IteratorGetNext (defined at content/drive/My Drive/thesis/xlnet/run_classifier.py:866
        '''
                '''
        sess = tf.InteractiveSession()
        label_ids_np=label_ids.eval()
        predictions_np = predictions.eval()
        sess.close()

        print('Conversion succeeded.')
        print('Label_ids_np type: {}'.format(type(label_ids_np)))
        print('Predictions_np type: {}'.format(type(predictions_np)))

        sess = tf.InteractiveSession()
        print('Tf conversion: from {} to {} '.format(type(tf.constant([1,2,3])),type(tf.constant([1,2,3]).eval())))
        sess.close()
        
        #precision, recall, f1, _ = precision_recall_fscore_support(label_ids, predictions, average="macro", labels=list(range(0,n_class)))
        #mcc = matthews_corrcoef(label_ids_np, predictions_np)
        
        sess = tf.get_default_session()
        with sess.as_default():
            label_ids_np = label_ids.eval()
            predictions_np = predictions.eval()
        

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        label_ids_np = sess.run(label_ids)
        predictions_np = sess.run(predictions)
        sess.close()

        precision_macro = scu.get_precision_macro(label_ids_np,predictions_np)
        recall_macro = scu.get_recall_macro(label_ids_np,predictions_np)
        f1_macro = scu.get_f1_macro(label_ids_np,predictions_np)
        mcc = scu.get_mcc_score(label_ids_np,predictions_np)
        print(f1_macro)
        print(mcc)
        '''

                return {'eval_accuracy': accuracy, 'eval_loss': loss, 'f1': f1}

            def regression_metric_fn(per_example_loss, label_ids, logits,
                                     is_real_example):
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                pearsonr = tf.contrib.metrics.streaming_pearson_correlation(
                    logits, label_ids, weights=is_real_example)
                return {'eval_loss': loss, 'eval_pearsonr': pearsonr}

            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)

            #### Constucting evaluation TPUEstimatorSpec with new cache.
            label_ids = tf.reshape(features['label_ids'], [-1])

            if FLAGS.is_regression:
                metric_fn = regression_metric_fn
            else:
                metric_fn = metric_fn
            metric_args = [
                per_example_loss, label_ids, logits, is_real_example
            ]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.reshape(features["label_ids"], [-1])

            predictions = {
                "logits": logits,
                "labels": label_ids,
                "is_real": features["is_real_example"]
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            return output_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            #### Creating host calls
            if not FLAGS.is_regression:
                label_ids = tf.reshape(features['label_ids'], [-1])
                predictions = tf.argmax(logits,
                                        axis=-1,
                                        output_type=label_ids.dtype)
                is_correct = tf.equal(predictions, label_ids)
                accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

                monitor_dict["accuracy"] = accuracy

                host_call = function_builder.construct_scalar_host_call(
                    monitor_dict=monitor_dict,
                    model_dir=FLAGS.model_dir,
                    prefix="train/",
                    reduce_fn=tf.reduce_mean)
            else:
                host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec
Beispiel #14
0
    def model_fn(features, labels, mode, params):
        # Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        total_loss, per_example_loss, logits, label, mask = function_builder.get_crf_outputs(
            FLAGS, features, is_training)

        # Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        # predict mode
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                "logits": logits,
                "labels": label,
                'mask': features['input_mask'],
                'label_mask': mask
            }
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=predictions)
            return output_spec

        # Evaluation mode
        elif mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits, weight):
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': logits,
                    'weights': weight
                }

                accuracy = tf.metrics.accuracy(**eval_input_dict)

                eval_input_dict = {
                    'labels': tf.one_hot(label_ids, FLAGS.crf_classes),
                    'predictions': tf.one_hot(logits, FLAGS.crf_classes),
                    'weights': weight
                }
                f1 = tf.contrib.metrics.f1_score(**eval_input_dict)
                loss = tf.metrics.mean(values=per_example_loss)
                return {'eval_accuracy': accuracy, 'eval_loss': loss, 'f1': f1}

            metric_args = [per_example_loss, label, logits, mask]

            eval_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        # load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                loss=total_loss,
                                                train_op=train_op)

        return train_spec
Beispiel #15
0
def main(_):
    if FLAGS.server_ip and FLAGS.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(FLAGS.server_ip, FLAGS.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)

    tf.logging.set_verbosity(tf.logging.INFO)

    #### Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.log_step_count_steps = min(FLAGS.log_step_count_steps,
                                         FLAGS.save_steps)

    if FLAGS.do_predict:
        predict_dir = FLAGS.predict_dir
        if not tf.gfile.Exists(predict_dir):
            tf.gfile.MakeDirs(predict_dir)

    processors = {
        "mnli_matched": MnliMatchedProcessor,
        "mnli_mismatched": MnliMismatchedProcessor,
        'sts-b': StsbProcessor,
        'imdb': ImdbProcessor,
        "yelp5": Yelp5Processor
    }

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval, `do_predict` or "
            "`do_submit` must be True.")

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    if not tf.gfile.Exists(FLAGS.model_dir):
        tf.gfile.MakeDirs(FLAGS.model_dir)

#   ########################### LOAD PT model
#   ########################### LOAD PT model
#   import torch
#   from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification

#   save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME)
#   tf.logging.info("Model loaded from path: {}".format(save_path))

#   device = torch.device("cuda", 4)
#   config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b')
#   config_path = os.path.join(FLAGS.model_dir, CONFIG_NAME)
#   config.to_json_file(config_path)
#   pt_model = XLNetForSequenceClassification.from_pretrained(FLAGS.model_dir, from_tf=True, num_labels=1)
#   pt_model.to(device)
#   pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7])

#   from torch.optim import Adam
#   optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999),
#                     eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay,
#                     amsgrad=False)
#   ########################### LOAD PT model
#   ########################### LOAD PT model

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels() if not FLAGS.is_regression else None

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    # run_config = model_utils.configure_tpu(FLAGS)


#   model_fn = get_model_fn(len(label_list) if label_list is not None else None)

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    # estimator = tf.estimator.Estimator(
    #     model_fn=model_fn,
    #     config=run_config)

    if FLAGS.do_train:
        train_file_base = "{}.len-{}.train.tf_record".format(
            spm_basename, FLAGS.max_seq_length)
        train_file = os.path.join(FLAGS.output_dir, train_file_base)
        tf.logging.info("Use tfrecord file {}".format(train_file))

        train_examples = processor.get_train_examples(FLAGS.data_dir)
        tf.logging.info("Num of train samples: {}".format(len(train_examples)))

        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, train_file,
                                                FLAGS.num_passes)

        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)

        # estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

        ##### Create input tensors / placeholders
        bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host

        params = {
            "batch_size": FLAGS.train_batch_size  # the whole batch
        }
        train_set = train_input_fn(params)

        example = train_set.make_one_shot_iterator().get_next()
        if FLAGS.num_core_per_host > 1:
            examples = [{} for _ in range(FLAGS.num_core_per_host)]
            for key in example.keys():
                vals = tf.split(example[key], FLAGS.num_core_per_host, 0)
                for device_id in range(FLAGS.num_core_per_host):
                    examples[device_id][key] = vals[device_id]
        else:
            examples = [example]

        ##### Create computational graph
        tower_losses, tower_grads_and_vars, tower_inputs, tower_hidden_states, tower_logits = [], [], [], [], []

        for i in range(FLAGS.num_core_per_host):
            reuse = True if i > 0 else None
            with tf.device(assign_to_gpu(i, "/gpu:0")), \
                tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

                loss_i, grads_and_vars_i, inputs_i, hidden_states_i, logits_i = single_core_graph(
                    is_training=True,
                    features=examples[i],
                    label_list=label_list)

                tower_losses.append(loss_i)
                tower_grads_and_vars.append(grads_and_vars_i)
                tower_inputs.append(inputs_i)
                tower_hidden_states.append(hidden_states_i)
                tower_logits.append(logits_i)

        ## average losses and gradients across towers
        if len(tower_losses) > 1:
            loss = tf.add_n(tower_losses) / len(tower_losses)
            grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
            inputs = dict((n, tf.concat([t[n] for t in tower_inputs], 0))
                          for n in tower_inputs[0])
            hidden_states = list(
                tf.concat(t, 0) for t in zip(*tower_hidden_states))
            logits = tf.concat(tower_logits, 0)
        else:
            loss = tower_losses[0]
            grads_and_vars = tower_grads_and_vars[0]
            inputs = tower_inputs[0]
            hidden_states = tower_hidden_states[0]
            logits = tower_logits[0]

        # Summaries
        merged = tf.summary.merge_all()

        ## get train op
        train_op, learning_rate, gnorm = model_utils.get_train_op(
            FLAGS, None, grads_and_vars=grads_and_vars)
        global_step = tf.train.get_global_step()

        ##### Training loop
        saver = tf.train.Saver(max_to_keep=FLAGS.max_save)

        gpu_options = tf.GPUOptions(allow_growth=True)

        #### load pretrained models
        model_utils.init_from_checkpoint(FLAGS, global_vars=True)

        writer = tf.summary.FileWriter(logdir=FLAGS.model_dir,
                                       graph=tf.get_default_graph())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, gpu_options=gpu_options)) as sess:
            sess.run(tf.global_variables_initializer())

            #########
            ##### PYTORCH
            import torch
            from torch.optim import Adam
            from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification, BertAdam

            save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME + '-00')
            saver.save(sess, save_path)
            tf.logging.info("Model saved in path: {}".format(save_path))

            device = torch.device("cuda", 4)
            config = XLNetConfig.from_pretrained('xlnet-large-cased',
                                                 finetuning_task=u'sts-b',
                                                 num_labels=1)
            tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')

            # pt_model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels=1)
            pt_model = XLNetForSequenceClassification.from_pretrained(
                save_path, from_tf=True, config=config)
            pt_model.to(device)
            pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7])

            optimizer = Adam(pt_model.parameters(),
                             lr=0.001,
                             betas=(0.9, 0.999),
                             eps=FLAGS.adam_epsilon,
                             weight_decay=FLAGS.weight_decay,
                             amsgrad=False)
            # optimizer = BertAdam(pt_model.parameters(), lr=FLAGS.learning_rate, t_total=FLAGS.train_steps, warmup=FLAGS.warmup_steps / FLAGS.train_steps,
            #                      eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay)
            ##### PYTORCH
            #########

            fetches = [
                loss, global_step, gnorm, learning_rate, train_op, merged,
                inputs, hidden_states, logits
            ]

            total_loss, total_loss_pt, prev_step, gnorm_pt = 0., 0., -1, 0.0
            total_logits = None
            total_labels = None
            while True:
                feed_dict = {}
                # for i in range(FLAGS.num_core_per_host):
                #   for key in tower_mems_np[i].keys():
                #     for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]):
                #       feed_dict[m] = m_np

                fetched = sess.run(fetches)

                loss_np, curr_step, gnorm_np, learning_rate_np, _, summary_np, inputs_np, hidden_states_np, logits_np = fetched
                total_loss += loss_np

                if total_logits is None:
                    total_logits = logits_np
                    total_labels = inputs_np['label_ids']
                else:
                    total_logits = np.append(total_logits, logits_np, axis=0)
                    total_labels = np.append(total_labels,
                                             inputs_np['label_ids'],
                                             axis=0)

                #########
                ##### PYTORCH
                f_inp = torch.tensor(inputs_np["input_ids"],
                                     dtype=torch.long,
                                     device=device)
                f_seg_id = torch.tensor(inputs_np["segment_ids"],
                                        dtype=torch.long,
                                        device=device)
                f_inp_mask = torch.tensor(inputs_np["input_mask"],
                                          dtype=torch.float,
                                          device=device)
                f_label = torch.tensor(inputs_np["label_ids"],
                                       dtype=torch.float,
                                       device=device)

                # with torch.no_grad():
                #   _, hidden_states_pt, _ = pt_model.transformer(f_inp, f_seg_id, f_inp_mask)
                # logits_pt, _ = pt_model(f_inp, token_type_ids=f_seg_id, input_mask=f_inp_mask)

                pt_model.train()
                outputs = pt_model(f_inp,
                                   token_type_ids=f_seg_id,
                                   input_mask=f_inp_mask,
                                   labels=f_label)
                loss_pt = outputs[0]
                loss_pt = loss_pt.mean()
                total_loss_pt += loss_pt.item()

                # # hidden_states_pt = list(t.detach().cpu().numpy() for t in hidden_states_pt)
                # # special_pt = special_pt.detach().cpu().numpy()

                # # Optimizer pt
                pt_model.zero_grad()
                loss_pt.backward()
                gnorm_pt = torch.nn.utils.clip_grad_norm_(
                    pt_model.parameters(), FLAGS.clip)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate_np
                optimizer.step()
                ##### PYTORCH
                #########

                if curr_step > 0 and curr_step % FLAGS.log_step_count_steps == 0:
                    curr_loss = total_loss / (curr_step - prev_step)
                    curr_loss_pt = total_loss_pt / (curr_step - prev_step)
                    tf.logging.info(
                        "[{}] | gnorm {:.2f} lr {:8.6f} "
                        "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
                            curr_step, gnorm_np, learning_rate_np, curr_loss,
                            math.exp(curr_loss), curr_loss / math.log(2)))

                    #########
                    ##### PYTORCH
                    tf.logging.info(
                        "  PT [{}] | gnorm PT {:.2f} lr PT {:8.6f} "
                        "| loss PT {:.2f} | pplx PT {:>7.2f}, bpc PT {:>7.4f}".
                        format(curr_step, gnorm_pt, learning_rate_np,
                               curr_loss_pt, math.exp(curr_loss_pt),
                               curr_loss_pt / math.log(2)))
                    ##### PYTORCH
                    #########

                    total_loss, total_loss_pt, prev_step = 0., 0., curr_step
                    writer.add_summary(summary_np, global_step=curr_step)

                if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
                    save_path = os.path.join(FLAGS.model_dir,
                                             "model.ckpt-{}".format(curr_step))
                    saver.save(sess, save_path)
                    tf.logging.info(
                        "Model saved in path: {}".format(save_path))

                    #########
                    ##### PYTORCH
                    # Save a trained model, configuration and tokenizer
                    model_to_save = pt_model.module if hasattr(
                        pt_model,
                        'module') else pt_model  # Only save the model it-self
                    # If we save using the predefined names, we can load using `from_pretrained`
                    output_dir = os.path.join(
                        FLAGS.output_dir, "pytorch-ckpt-{}".format(curr_step))
                    if not tf.gfile.Exists(output_dir):
                        tf.gfile.MakeDirs(output_dir)
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
                    tf.logging.info(
                        "PyTorch Model saved in path: {}".format(output_dir))
                    ##### PYTORCH
                    #########

                if curr_step >= FLAGS.train_steps:
                    break

    if FLAGS.do_eval:
        # TPU requires a fixed batch size for all batches, therefore the number
        # of examples must be a multiple of the batch size, or else examples
        # will get dropped. So we pad with fake examples which are ignored
        # later on. These do NOT count towards the metric (all tf.metrics
        # support a per-instance weight, and these get a weight of 0.0).
        #
        # Modified in XL: We also adopt the same mechanism for GPUs.
        while len(eval_examples) % FLAGS.eval_batch_size != 0:
            eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, gpu_options=gpu_options)) as sess:
            sess.run(tf.global_variables_initializer())

            ########################### LOAD PT model
            #   import torch
            #   from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification, BertAdam

            #   save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME)
            #   saver.save(sess, save_path)
            #   tf.logging.info("Model saved in path: {}".format(save_path))

            #   device = torch.device("cuda", 4)
            #   config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b', num_labels=1)
            #   tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
            #   config_path = os.path.join(FLAGS.model_dir, CONFIG_NAME)
            #   config.to_json_file(config_path)
            #   # pt_model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels=1)
            #   pt_model = XLNetForSequenceClassification.from_pretrained(FLAGS.model_dir, from_tf=True)
            #   pt_model.to(device)
            #   pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7])
            #   from torch.optim import Adam
            #   optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999),
            #                    eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay,
            #                    amsgrad=False)
            #   optimizer = BertAdam(pt_model.parameters(), lr=FLAGS.learning_rate, t_total=FLAGS.train_steps, warmup=FLAGS.warmup_steps / FLAGS.train_steps,
            #                        eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay)

            ##### PYTORCH
            #########

            fetches = [
                loss, global_step, gnorm, learning_rate, train_op, merged,
                inputs, hidden_states, logits
            ]

            total_loss, total_loss_pt, prev_step, gnorm_pt = 0., 0., -1, 0.0
            total_logits = None
            total_labels = None
            while True:
                feed_dict = {}
                # for i in range(FLAGS.num_core_per_host):
                #   for key in tower_mems_np[i].keys():
                #     for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]):
                #       feed_dict[m] = m_np

                fetched = sess.run(fetches)

                loss_np, curr_step, gnorm_np, learning_rate_np, _, summary_np, inputs_np, hidden_states_np, logits_np = fetched
                total_loss += loss_np

                if total_logits is None:
                    total_logits = logits_np
                    total_labels = inputs_np['label_ids']
                else:
                    total_logits = np.append(total_logits, logits_np, axis=0)
                    total_labels = np.append(total_labels,
                                             inputs_np['label_ids'],
                                             axis=0)
Beispiel #16
0
def main(_):

    assert tf.gfile.Exists(FLAGS.init_checkpoint)

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    processor = SubLocProcessor()

    labels = processor.get_labels()
    train_examples = processor.get_train_examples(FLAGS.data_dir)
    test_examples = processor.get_test_examples(FLAGS.data_dir)

    train_file_path = os.path.join(FLAGS.output_dir,
                                   get_basename(FLAGS.max_seq_length, "train"))
    test_file_path = os.path.join(FLAGS.output_dir,
                                  get_basename(FLAGS.max_seq_length, "test"))

    def tokenize_fn(text):
        text = preprocess_text(text)
        return encode_ids(text)

    # Create TF-Record for train examples
    file_based_convert_examples_to_features(train_examples, labels,
                                            FLAGS.max_seq_length, tokenize_fn,
                                            train_file_path)

    # Create TF-Record for test examples
    file_based_convert_examples_to_features(test_examples, labels,
                                            FLAGS.max_seq_length, tokenize_fn,
                                            test_file_path)

    train_set = get_dataset(train_file_path, FLAGS.max_seq_length,
                            FLAGS.batch_size)
    train_iter = train_set.make_one_shot_iterator()
    example = train_iter.get_next()

    inp = tf.transpose(example["input_ids"], [1, 0])
    seg_id = tf.transpose(example["segment_ids"], [1, 0])
    inp_mask = tf.transpose(example["input_mask"], [1, 0])

    xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(False, True, FLAGS)

    xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                   run_config=run_config,
                                   input_ids=inp,
                                   seg_ids=seg_id,
                                   input_mask=inp_mask)

    output = xlnet_model.get_sequence_output()

    init_from_checkpoint(FLAGS)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        try:
            while True:
                outs = sess.run(output)
                print(outs.shape)
        except tf.errors.OutOfRangeError:
            tf.logging.info("DONE")
Beispiel #17
0
def train(ps_device):
  ##### Get input function and model function

  train_input_fn, record_info_dict = data_utils.get_input_fn(
      tfrecord_dir=FLAGS.record_info_dir,
      split="train",
      bsz_per_host=FLAGS.train_batch_size,
      seq_len=FLAGS.seq_len,
      reuse_len=FLAGS.reuse_len,
      bi_data=FLAGS.bi_data,
      num_hosts=1,
      num_core_per_host=1, # set to one no matter how many GPUs
      perm_size=FLAGS.perm_size,
      mask_alpha=FLAGS.mask_alpha,
      mask_beta=FLAGS.mask_beta,
      uncased=FLAGS.uncased,
      num_passes=FLAGS.num_passes,
      use_bfloat16=FLAGS.use_bfloat16,
      num_predict=FLAGS.num_predict)

  # for key, info in record_info_dict.items():
  tf.compat.v1.logging.info("num of batches {}".format(record_info_dict["num_batch"]))

  ##### Create input tensors / placeholders
  bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host

  params = {
      "batch_size": FLAGS.train_batch_size # the whole batch
  }
  train_set = train_input_fn(params)

  example = train_set.make_one_shot_iterator().get_next()

  if FLAGS.num_core_per_host > 1:
    examples = [{} for _ in range(FLAGS.num_core_per_host)]
    for key in example.keys():
      vals = tf.split(example[key], FLAGS.num_core_per_host, 0)
      for device_id in range(FLAGS.num_core_per_host):
        examples[device_id][key] = vals[device_id]
  else:
    examples = [example]

  ##### Create computational graph
  tower_mems, tower_losses, tower_new_mems, tower_grads_and_vars = [], [], [], []

  for i in range(FLAGS.num_core_per_host):
    reuse = True if i > 0 else None
    with tf.device(assign_to_gpu(i, ps_device)), \
        tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

      # The mems for each tower is a dictionary
      mems_i = {}
      if FLAGS.mem_len:
        mems_i["mems"] = create_mems_tf(bsz_per_core)

      loss_i, new_mems_i, grads_and_vars_i = single_core_graph(
          is_training=True,
          features=examples[i],
          mems=mems_i)

      tower_mems.append(mems_i)
      tower_losses.append(loss_i)
      tower_new_mems.append(new_mems_i)
      tower_grads_and_vars.append(grads_and_vars_i)

  ## average losses and gradients across towers
  if len(tower_losses) > 1:
    loss = tf.add_n(tower_losses) / len(tower_losses)
    grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
  else:
    loss = tower_losses[0]
    grads_and_vars = tower_grads_and_vars[0]

  ## get train op
  train_op, learning_rate, gnorm = model_utils.get_train_op(FLAGS, None,
      grads_and_vars=grads_and_vars)
  global_step = tf.train.get_global_step()

  ##### Training loop
  # initialize mems
  tower_mems_np = []
  for i in range(FLAGS.num_core_per_host):
    mems_i_np = {}
    for key in tower_mems[i].keys():
      mems_i_np[key] = initialize_mems_np(bsz_per_core)
    tower_mems_np.append(mems_i_np)

  saver = tf.train.Saver()

  gpu_options = tf.GPUOptions(allow_growth=True)

  model_utils.init_from_checkpoint(FLAGS, global_vars=True)

  with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
      gpu_options=gpu_options)) as sess:
    sess.run(tf.global_variables_initializer())

    fetches = [loss, tower_new_mems, global_step, gnorm, learning_rate, train_op]

    total_loss, prev_step = 0., -1
    while True:
      feed_dict = {}
      for i in range(FLAGS.num_core_per_host):
        for key in tower_mems_np[i].keys():
          for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]):
            feed_dict[m] = m_np

      fetched = sess.run(fetches, feed_dict=feed_dict)

      loss_np, tower_mems_np, curr_step = fetched[:3]
      total_loss += loss_np

      if curr_step > 0 and curr_step % FLAGS.iterations == 0:
        curr_loss = total_loss / (curr_step - prev_step)
        tf.compat.v1.logging.info("[{}] | gnorm {:.2f} lr {:8.6f} "
            "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
            curr_step, fetched[-3], fetched[-2],
            curr_loss, math.exp(curr_loss), curr_loss / math.log(2)))
        total_loss, prev_step = 0., curr_step

      if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
        save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
        saver.save(sess, save_path)
        tf.compat.v1.logging.info("Model saved in path: {}".format(save_path))

      if curr_step >= FLAGS.train_steps:
        break
def main():
    """Main function routine"""

    tf.logging.set_verbosity(tf.logging.INFO)

    # Text encoding
    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    # Temporary fix for context problem.
    pad_txt = """In 1991, the remains of Russian Tsar Nicholas II and his family
                (except for Alexei and Maria) are discovered.
                The voice of Nicholas's young son, Tsarevich Alexei Nikolaevich, narrates the
                remainder of the story. 1883 Western Siberia,
                a young Grigori Rasputin is asked by his father and a group of men to perform magic.
                Rasputin has a vision and denounces one of the men as a horse thief. Although his
                father initially slaps him for making such an accusation, Rasputin watches as the
                man is chased outside and beaten. Twenty years later, Rasputin sees a vision of
                the Virgin Mary, prompting him to become a priest. Rasputin quickly becomes famous,
                 with people, even a bishop, begging for his blessing. """
    pad_ids = tokenize_fn(pad_txt)
    pad_ids.append(EOD_ID)

    to_special_symbol = {v: k for k, v in special_symbols.items()}

    def parse_ids(toks):
        """Uses sentencepiece to conver to text. Subsitute
        EOP_ID and EOD_ID with new lines, and rest with their names"""
        start = 0
        sent = ""
        for i in range(len(toks)):
            if toks[i] in to_special_symbol:
                if start < i:
                    sent += sp.decode_ids(toks[start:i])
                if toks[i] in [EOD_ID, EOP_ID]:
                    replace_by = "\n\n"
                else:
                    replace_by = to_special_symbol[toks[i]]

                sent += f" {replace_by} "
                start = i + 1
        if start < len(toks):
            sent += sp.decode_ids(toks[start:])

        return sent

    if not FLAGS.bidirectional_eachstep:
        prediction_graph = prediction_graph_memory
    else:
        prediction_graph = prediction_graph_no_memory

    predictions, features = prediction_graph()
    gpu_options = tf.GPUOptions(allow_growth=True)
    model_utils.init_from_checkpoint(FLAGS, global_vars=False)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())

        def predict(examples):
            """Given a list of texts in examples
            return the result"""
            preprocessor = get_preprocessor(examples, tokenize_fn, pad_ids)
            dataset = get_input_dataset(preprocessor)
            example = dataset.make_one_shot_iterator().get_next()

            num_examples = len(examples)
            num_batches = int(np.ceil(num_examples / FLAGS.batch_size))

            for _ in tqdm(range(num_batches)):
                inputs = sess.run(example)
                output, conf = sess.run(
                    predictions,
                    feed_dict={features[k]: v
                               for k, v in inputs.items()})
                for _output, _conf in zip(output, conf):
                    yield _output, _conf

        if FLAGS.unconditional or FLAGS.interactive:
            tf.logging.info("Interactive flag received."
                            " Ignoring input files if any.")
            while True:
                if FLAGS.unconditional:
                    text = ""
                else:
                    text = input("----PROMPT----\n")
                outputs = predict([text] * FLAGS.num_samples)
                for i, (output, _) in enumerate(outputs):
                    out = parse_ids(output.tolist())
                    print("======SAMPLE {}======".format(i))
                    print(out)
                    print("=====================")
                if FLAGS.unconditional:
                    break
        else:
            assert FLAGS.input_file!="", "Please provide either an"\
            " input file or set interactive flag for command line input"
            assert os.path.exists(FLAGS.input_file), FLAGS.input_file+\
            " does not exists"

            with open(FLAGS.input_file) as f:
                texts = []
                text = ""
                for line in f:
                    if line.strip() == "":
                        if text != "":
                            # Removing the last <eop> of prompt
                            # since it is not desired
                            if text.endswith("<eop>"):
                                text = text[:-5]
                            texts.extend([text] * FLAGS.num_samples)
                            text = ""
                        continue
                    text += re.sub(r'\n', '<eop>', line)
                if text != "":
                    texts.extend([text] * FLAGS.num_samples)

            tf.logging.info("Got %s lines in the input file",
                            len(texts) // FLAGS.num_samples)
            tf.logging.info("Sampling each line %s times", FLAGS.num_samples)

            outputs = iter(predict(texts))
            with open(os.path.join(FLAGS.input_file + ".xlnet"), 'w') as f:
                for i in range(0, len(texts), FLAGS.num_samples):
                    f.write("\n======Example {}=================\n".format(i))
                    f.write(texts[i])
                    for j in range(FLAGS.num_samples):
                        output, _ = next(outputs)
                        out = parse_ids(output.tolist())
                        f.write("\n======Example {} SAMPLE {}======\n".format(
                            i, j))
                        f.write(out)
                        f.write("\n==================================\n")
Beispiel #19
0
    def model_fn(features, labels, mode, params):

        for name in sorted(features.keys()):
            logging.info("  name = %s, shape = %s" % (name, features[name].shape))
        input_ids = features["input_ids"]
        mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        total_loss, logits, predicts = function_builder.get_ner_loss(FLAGS, features, is_training, num_labels)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        if mode == tf.estimator.ModeKeys.EVAL:
            def metric_fn(label_ids, logits, num_labels, mask):
                predictions = tf.math.argmax(logits, axis=-1, output_type=tf.int32)
                cm = metrics.streaming_confusion_matrix(label_ids, predictions, num_labels - 1, weights=mask)
                return {
                    "confusion_matrix": cm
                }
                #

            eval_metrics = (metric_fn, [label_ids, logits, num_labels, mask])
            eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            #label_ids = tf.reshape(features["label_ids"], [-1])
            # predictions = {
            #     "predicts": predicts,
            # }
            #print('predicts')
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, predictions=predicts, scaffold_fn=scaffold_fn)
            return output_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        #train_spec = tf.estimator.EstimatorSpec(
        #    mode=mode, loss=total_loss, train_op=train_op)
        train_spec = tf.contrib.tpu.TPUEstimatorSpec(
            mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn)


        return train_spec
Beispiel #20
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        if FLAGS.is_regression:
            (total_loss, per_example_loss,
             logits) = function_builder.get_regression_loss(
                 FLAGS, features, is_training)
        else:
            flag_val_dict = {
                "dropout": FLAGS.dropout,
                "model_dir": FLAGS.model_dir,
                "data_dir": FLAGS.data_dir,
                "use_tpu": FLAGS.use_tpu,
                "num_core_per_host": FLAGS.num_core_per_host,
                "master": FLAGS.master,
                "iterations": FLAGS.iterations,
                "learning_rate": FLAGS.learning_rate,
                "train_batch_size": FLAGS.train_batch_size,
                "model_config_path": FLAGS.model_config_path,
            }
            for name in list(features.keys()):
                t = features[name]
                if t.dtype == tf.int64:
                    t = tf.cast(t, tf.int32)
                features[name] = t
            tf.logging.info(json.dumps(flag_val_dict))
            (total_loss, per_example_loss, logits,
             probabilities) = function_builder.get_classification_loss(
                 FLAGS, features, n_class, is_training)

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': predictions,
                    'weights': is_real_example
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                ###################################
                #  precision,recall, f1 score     #
                ###################################
                precision = metrics.precision(label_ids,
                                              predictions,
                                              20,
                                              average="macro")
                recall = metrics.recall(label_ids,
                                        predictions,
                                        20,
                                        average="macro")
                f = metrics.f1(label_ids, predictions, 20, average="macro")

                ###################################
                #      confusion matrix           #
                ###################################

                def eval_confusion_matrix(labels, predictions, num_classes):
                    with tf.variable_scope("eval_confusion_matrix"):
                        con_matrix = tf.confusion_matrix(
                            labels=labels,
                            predictions=predictions,
                            num_classes=num_classes)

                        con_matrix_sum = tf.Variable(
                            tf.zeros(shape=(num_classes, num_classes),
                                     dtype=tf.int32),
                            trainable=False,
                            name="confusion_matrix_result",
                            collections=[tf.GraphKeys.LOCAL_VARIABLES])
                        update_op = tf.assign_add(con_matrix_sum, con_matrix)
                        return tf.convert_to_tensor(con_matrix_sum), update_op

                return {
                    'eval_accuracy':
                    accuracy,
                    'eval_loss':
                    loss,
                    "eval_precision":
                    precision,
                    "eval_recall":
                    recall,
                    "eval_f":
                    f,
                    "conf_mat":
                    eval_confusion_matrix(label_ids,
                                          predictions,
                                          num_classes=20)
                }

            def regression_metric_fn(per_example_loss, label_ids, logits,
                                     is_real_example):
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                pearsonr = tf.contrib.metrics.streaming_pearson_correlation(
                    logits, label_ids, weights=is_real_example)
                return {'eval_loss': loss, 'eval_pearsonr': pearsonr}

            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)

            #### Constucting evaluation TPUEstimatorSpec with new cache.
            label_ids = tf.reshape(features['label_ids'], [-1])

            if FLAGS.is_regression:
                metric_fn = regression_metric_fn
            else:
                metric_fn = metric_fn
            metric_args = [
                per_example_loss, label_ids, logits, is_real_example
            ]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.reshape(features["label_ids"], [-1])

            predictions = {
                "logits": logits,
                "labels": label_ids,
                #           "is_real": features["is_real_example"]
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions={"probabilities": probabilities},
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions={"probabilities": probabilities})
            return output_spec
        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            #### Creating host calls
            if not FLAGS.is_regression:
                label_ids = tf.reshape(features['label_ids'], [-1])
                predictions = tf.argmax(logits,
                                        axis=-1,
                                        output_type=label_ids.dtype)
                is_correct = tf.equal(predictions, label_ids)
                accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

                monitor_dict["accuracy"] = accuracy

                host_call = function_builder.construct_scalar_host_call(
                    monitor_dict=monitor_dict,
                    model_dir=FLAGS.model_dir,
                    prefix="train/",
                    reduce_fn=tf.reduce_mean)
            else:
                host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec
Beispiel #21
0
    def __init__(self, model_path):
        config_file = os.path.join(model_path, 'xlnet_config.json')
        spiece_file = os.path.join(model_path, 'spiece.model')
        ckpt_path = os.path.join(model_path, 'xlnet_model.ckpt')
        self.predict_batch_size = 100
        self.max_seq_length = 256
        self.max_predictions_per_seq = 20

        is_training = False

        # construct xlnet config and save to model_dir
        xlnet_config = xlnet.XLNetConfig(json_path=config_file)

        # construct run config from FLAGS
        # self.run_config = xlnet.create_run_config(is_training, False, FLAGS)
        run_config = xlnet.RunConfig(is_training,
                                     False,
                                     False,
                                     0.0,
                                     0.0,
                                     init="normal",
                                     init_range=0.1,
                                     init_std=0.02,
                                     mem_len=None,
                                     reuse_len=256,
                                     bi_data=False,
                                     clamp_len=-1,
                                     same_length=False)

        graph = tf.Graph()
        with graph.as_default():
            self.session = tf.Session()

            # shape: max_sentence_length x num_sentence
            self.input_ids = tf.placeholder(tf.int32, [None, None],
                                            name="input_ids")
            # shape: max_sentence_length x num_sentence
            self.seg_ids = tf.placeholder(tf.int32, [None, None],
                                          name="seg_ids")
            # shape: max_sentence_length x num_sentence
            self.input_mask = tf.placeholder(tf.int32, [None, None],
                                             name="input_mask")
            # shape: max_sentence_length x max_sentence_length x num_sentence
            self.perm_mask = tf.placeholder(tf.int32, [None, None, None],
                                            name="perm_mask")
            # shape: max_predictions_per_seq x max_sentence_length x num_sentence
            self.target_mapping = tf.placeholder(tf.int32, [None, None, None],
                                                 name="target_mapping")
            # shape: max_sentence_length x num_sentence
            self.inp_q = tf.placeholder(tf.int32, [None, None], name="inp_q")
            # shape: max_sentence_length x num_sentence
            self.target = tf.placeholder(tf.int32, [None, None], name="target")
            # # shape: bool scaler
            # self.is_training = tf.placeholder(tf.bool, name="is_training")

            xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                           run_config=run_config,
                                           input_ids=self.input_ids,
                                           seg_ids=self.seg_ids,
                                           input_mask=self.input_mask,
                                           mems=None,
                                           perm_mask=self.perm_mask,
                                           target_mapping=self.target_mapping,
                                           inp_q=self.inp_q)

            output = xlnet_model.get_sequence_output()
            lookup_table = xlnet_model.get_embedding_table()
            initializer = xlnet_model.get_initializer()

            with tf.variable_scope("model", reuse=tf.AUTO_REUSE):
                # LM loss
                lm_loss, logits = modeling.lm_loss(
                    hidden=output,
                    target=self.target,
                    n_token=xlnet_config.n_token,
                    d_model=xlnet_config.d_model,
                    initializer=initializer,
                    lookup_table=lookup_table,
                    tie_weight=True,
                    bi_data=run_config.bi_data,
                    use_tpu=run_config.use_tpu)

            self.masked_lm_example_loss = run_lm_predict.get_masked_lm_output(
                self.bert_config, model.get_sequence_output(),
                model.get_embedding_table(), self.masked_lm_positions,
                self.masked_lm_ids)

            # load the pretrained bert model parameters
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tf.trainable_variables(), bert_ckpt)
            tf.train.init_from_checkpoint(bert_ckpt, assignment_map)

            #### load pretrained models
            scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

            self.session.run(tf.global_variables_initializer())

        self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file,
                                                    do_lower_case=True)
Beispiel #22
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        #### Get loss from inputs
        inp_ids = tf.transpose(features["input_ids"], [1, 0])
        inp_mask = tf.transpose(features["input_mask"], [1, 0])
        output_ids = features["output_ids"]
        output_mask = tf.transpose(features["output_mask"], [1, 0])
        # define decoder inputs
        decoder_inputs = tf.concat(
            (tf.ones_like(output_ids[:, :1]) * 2, output_ids[:, :-1]),
            -1)  # 2代表<S>,是decoder的初始输入
        decoder_inputs = tf.transpose(decoder_inputs, [1, 0])
        args = dict(FLAGS=FLAGS,
                    is_training=is_training,
                    inp_ids=inp_ids,
                    inp_mask=inp_mask,
                    source_ntoken=params.get("source_ntoken"),
                    target_ntoken=params.get("target_ntoken"),
                    output_mask=output_mask,
                    output_ids=output_ids,
                    decoder_inputs=decoder_inputs)
        s2sm = seq2seq_models.seq2seqmodel(**args)
        total_loss, per_example_loss, logits = s2sm.total_loss, s2sm.per_example_loss, s2sm.logits

        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        #### load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, logits, output_ids):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                istarget = tf.to_float(tf.not_equal(output_ids, 0))
                accuracy = tf.reduce_sum(
                    tf.to_float(tf.equal(predictions, output_ids)) * istarget /
                    (tf.reduce_sum(istarget)))
                loss = tf.metrics.mean(values=per_example_loss)
                return {
                    'eval_accuracy': accuracy,
                    'eval_loss': loss,
                }

            #### Constucting evaluation TPUEstimatorSpec with new cache.
            metric_args = [per_example_loss, logits, output_ids]
            eval_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=total_loss,
                export_outputs=metric_fn(*metric_args))

            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            pred = tf.argmax(logits, axis=-1, output_type=tf.int32)
            predictions = {
                "logits": logits,
                "pred": pred,
                "output_ids": output_ids,
            }
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=predictions)
            return output_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                loss=total_loss,
                                                train_op=train_op)

        return train_spec
Beispiel #23
0
    def model_fn(features, labels, mode, params):
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        total_loss, per_example_loss, logits = create_model(FLAGS, features, is_training, num_labels)

        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        #### Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_list, logits, input_mask):
                input_mask *= -1
                input_mask += 1

                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_list,
                    'predictions': predictions,
                    'weights': input_mask
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)

                loss = tf.metrics.mean(values=per_example_loss, weights=input_mask)
                return {
                    'eval_accuracy': accuracy,
                    'eval_loss': loss}

            input_mask = tf.cast(features['input_mask'], dtype=tf.float32)

            label_list = features['label_list']
            metric_args = [per_example_loss, label_list, logits, input_mask]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                        mode=mode,
                        loss=total_loss,
                        eval_metric_ops=metric_fn(*metric_args))
            return eval_spec

        #### Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        #### Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            #### Creating host calls
            host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode, loss=total_loss, train_op=train_op, host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(
                mode=mode, loss=total_loss, train_op=train_op)

        return train_spec
Beispiel #24
0
def test(ps_device):

    test_input_fn, record_info_dict_test = data_utils.get_input_fn(
          info_dir=os.path.join(FLAGS.record_info_dir, "test"),
          split="test",
          bsz_per_host=FLAGS.test_batch_size,
          seq_len=FLAGS.seq_len,
          reuse_len=FLAGS.reuse_len,
          bi_data=FLAGS.bi_data,
          num_hosts=1,
          num_core_per_host=1,
          perm_size=FLAGS.perm_size,
          mask_alpha=FLAGS.mask_alpha,
          mask_beta=FLAGS.mask_beta,
          use_bfloat16=FLAGS.use_bfloat16,
          num_predict=FLAGS.num_predict)

    tf.logging.info("num of test batches {}".format(record_info_dict_test["num_batch"]))

    ##### Create input tensors / placeholders
    bsz_per_core = FLAGS.test_batch_size // FLAGS.num_core_per_host

    params = {
        "batch_size": FLAGS.test_batch_size # the whole batch
    }
    test_set = test_input_fn(params)

    t_iter = test_set.make_initializable_iterator()
    t_example = t_iter.get_next()

    if FLAGS.num_core_per_host > 1:
        # test set
        t_examples = [{} for _ in range(FLAGS.num_core_per_host)]
        for key in t_example.keys():
            vals = tf.split(t_examples[key], FLAGS.num_core_per_host, 0)
            for device_id in range(FLAGS.num_core_per_host):
                t_examples[device_id][key] = vals[device_id]
    else:
        t_examples = [t_example]

    ##### Create computational graph
    v_tower_mems, v_tower_losses, v_tower_new_mems = [], [], []

    for i in range(FLAGS.num_core_per_host):
        reuse = True if i > 0 else None
        with tf.device(assign_to_gpu(i, ps_device)), \
            tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

            # The mems for each tower is a dictionary
            v_mems_i = {}
            if FLAGS.mem_len:
                v_mems_i["mems"] = create_mems_tf(bsz_per_core)
            
            v_loss_i, v_new_mems_i = single_core_graph(
                features=t_examples[i],
                mems=v_mems_i)
            
            v_tower_mems.append(v_mems_i)
            v_tower_losses.append(v_loss_i)
            v_tower_new_mems.append(v_new_mems_i)

    ## average losses and gradients across towers
    if len(v_tower_losses) > 1:
      v_loss = tf.add_n(v_tower_losses) / len(v_tower_losses)
    else:
      v_loss = v_tower_losses[0]

    gpu_options = tf.GPUOptions(allow_growth=True)

    model_utils.init_from_checkpoint(FLAGS, global_vars=True)

    # Create performance summaries for Tensorboard logging
    test_performance_summaries = tb.tensorboard_setup_test()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
        gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())

        # Create writers for Tensorboard logging
        test_summary_writer = tb.create_test_writer(sess, logging_dir=FLAGS.tb_logging_dir)

        # initialize mems
        v_tower_mems_np = []
        for i in range(FLAGS.num_core_per_host):
            v_mems_i_np = {}
        for key in v_tower_mems[i].keys():
            v_mems_i_np[key] = initialize_mems_np(bsz_per_core)
            v_tower_mems_np.append(v_mems_i_np)
        
        v_fetches = [v_loss, v_tower_new_mems]
        
        sess.run(t_iter.initializer)
        v_total_loss = 0.
        v_steps = 0

        try:
            while True:
                v_feed_dict = {}
                for i in range(FLAGS.num_core_per_host):
                    for key in v_tower_mems_np[i].keys():
                        for m, m_np in zip(v_tower_mems[i][key], v_tower_mems_np[i][key]):
                            v_feed_dict[m] = m_np
                    
                v_fetched = sess.run(v_fetches, feed_dict=v_feed_dict)
                v_loss_np, v_tower_mems_np = v_fetched[:]
                v_total_loss += v_loss_np
                v_steps += 1
                print(v_steps)
            
        except tf.errors.OutOfRangeError:
            test_loss = v_total_loss/v_steps
            t_pplx = math.exp(test_loss)
            tf.logging.info("Test: loss {:.2f} | pplx {:>7.2f}".format(
                            test_loss,  t_pplx))
            
            summ_test = tb.run_test(sess, test_performance_summaries, test_loss, t_pplx)
            test_summary_writer.add_summary(summ_test, 1)
Beispiel #25
0
    def model_fn(features, labels, mode, params):
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        # Get loss from inputs
        if FLAGS.is_regression:
            (total_loss, per_example_loss,
             logits) = function_builder.get_regression_loss(
                 FLAGS, features, is_training)
        else:
            (total_loss, per_example_loss,
             logits) = function_builder.get_classification_loss(
                 FLAGS, features, n_class, is_training)

        # Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info('#params: {}'.format(num_params))

        # load pretrained models
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS)

        # Evaluation mode
        if mode == tf.estimator.ModeKeys.EVAL:
            assert FLAGS.num_hosts == 1

            def metric_fn(per_example_loss, label_ids, logits,
                          is_real_example):
                predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
                eval_input_dict = {
                    'labels': label_ids,
                    'predictions': predictions,
                    'weights': is_real_example
                }
                accuracy = tf.metrics.accuracy(**eval_input_dict)

                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                return {'eval_accuracy': accuracy, 'eval_loss': loss}

            def regression_metric_fn(per_example_loss, label_ids, logits,
                                     is_real_example):
                loss = tf.metrics.mean(values=per_example_loss,
                                       weights=is_real_example)
                pearsonr = tf.contrib.metrics.streaming_pearson_correlation(
                    logits, label_ids, weights=is_real_example)
                return {'eval_loss': loss, 'eval_pearsonr': pearsonr}

            is_real_example = tf.cast(features["is_real_example"],
                                      dtype=tf.float32)

            # Constucting evaluation TPUEstimatorSpec with new cache.
            label_ids = tf.reshape(features['label_ids'], [-1])

            if FLAGS.is_regression:
                metric_fn = regression_metric_fn
            else:
                metric_fn = metric_fn
            metric_args = [
                per_example_loss, label_ids, logits, is_real_example
            ]

            if FLAGS.use_tpu:
                eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metrics=(metric_fn, metric_args),
                    scaffold_fn=scaffold_fn)
            else:
                eval_spec = tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=total_loss,
                    eval_metric_ops=metric_fn(*metric_args))

            return eval_spec

        elif mode == tf.estimator.ModeKeys.PREDICT:
            label_ids = tf.reshape(features["label_ids"], [-1])

            predictions = {
                "logits": logits,
                "labels": label_ids,
                "is_real": features["is_real_example"]
            }

            if FLAGS.use_tpu:
                output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                    mode=mode,
                    predictions=predictions,
                    scaffold_fn=scaffold_fn)
            else:
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, predictions=predictions)
            return output_spec

        # Configuring the optimizer
        train_op, learning_rate, _ = model_utils.get_train_op(
            FLAGS, total_loss)

        monitor_dict = {}
        monitor_dict["lr"] = learning_rate

        # Constucting training TPUEstimatorSpec with new cache.
        if FLAGS.use_tpu:
            # Creating host calls
            if not FLAGS.is_regression:
                label_ids = tf.reshape(features['label_ids'], [-1])
                predictions = tf.argmax(logits,
                                        axis=-1,
                                        output_type=label_ids.dtype)
                is_correct = tf.equal(predictions, label_ids)
                accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))

                monitor_dict["accuracy"] = accuracy

                host_call = function_builder.construct_scalar_host_call(
                    monitor_dict=monitor_dict,
                    model_dir=FLAGS.model_dir,
                    prefix="train/",
                    reduce_fn=tf.reduce_mean)
            else:
                host_call = None

            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                host_call=host_call,
                scaffold_fn=scaffold_fn)
        else:
            train_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                    loss=total_loss,
                                                    train_op=train_op)

        return train_spec
    def model_fn(features, labels, mode, params):
        """doc."""
        #### Training or Evaluation
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        #assert is_training
        assert tf.gfile.Exists(logdir)

        #### Retrieve `mems` from `params["cache"]`
        mems = {}
        idx = 0
        if FLAGS.mem_len > 0:
            mems["mems"] = params["cache"]

        #### Get loss from inputs
        if is_training:
            total_loss, new_mems, monitor_dict = function_builder.get_loss(
                FLAGS, features, labels, mems, is_training)
        else:
            total_loss, batch_loss, batch_tgt_mask, new_mems = function_builder.get_loss(
                FLAGS, features, labels, mems, is_training)

        #### Turn `new_mems` into `new_cache`
        new_cache = []
        if FLAGS.mem_len > 0:
            new_cache += new_mems["mems"]
        #### Check model parameters
        num_params = sum([np.prod(v.shape) for v in tf.trainable_variables()])
        tf.logging.info("#params: {}".format(num_params))

        #### Customized initial checkpoint
        scaffold_fn = model_utils.init_from_checkpoint(FLAGS, global_vars=True)

        if is_training:
            #### Configuring the optimizer
            train_op, learning_rate, gnorm = model_utils.get_train_op(
                FLAGS, total_loss, None)
            monitor_dict["gnorm"] = gnorm
            monitor_dict["lr"] = learning_rate
            monitor_dict['pplx'] = tf.math.exp(total_loss)
            '''
      #### Creating host calls
      host_call = function_builder.construct_scalar_host_call(
            monitor_dict=monitor_dict,
            log_dir=logdir,
            prefix="train/",
            reduce_fn=tf.reduce_mean)
      '''
            #### Constucting training TPUEstimatorSpec with new cache.
            train_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,  #host_call=host_call,
                scaffold_fn=scaffold_fn)
            train_spec.cache = new_cache

            return train_spec

        else:
            #### Constucting validation TPUEstimatorSpec with new cache.
            eval_metrics = function_builder.construct_scalar_metric_fn(
                batch_loss, batch_tgt_mask)

            eval_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
            eval_spec.cache = new_cache

            return eval_spec