コード例 #1
0
graph = tf.Graph()
with graph.as_default():
    name_to_features = {
        "input_ids": tf.FixedLenFeature([128], tf.int64),
        "input_mask": tf.FixedLenFeature([128], tf.int64),
        "segment_ids": tf.FixedLenFeature([128], tf.int64),
        "label_ids": tf.FixedLenFeature([], tf.int64),
    }

    params = Bunch({})
    params.epoch = epoch
    params.batch_size = 32
    jd_test = "/data/xuht/jd_comment/train.tfrecords"
    print(params["batch_size"], "===batch size===")
    input_fn = tf_data_utils.train_input_fn(jd_test,
                                            tf_data_utils._decode_record,
                                            name_to_features, params)

    sess = tf.Session(config=sess_config)

    init_op = tf.group(tf.local_variables_initializer())
    sess.run(init_op)

    sess.run(hvd.broadcast_global_variables(0))

    i = 0
    cnt = 0
    while True:
        try:
            features = sess.run(input_fn)
            i += 1
コード例 #2
0
def main(_):

    hvd.init()

    sess_config = tf.ConfigProto()
    sess_config.gpu_options.visible_device_list = str(hvd.local_rank())

    print("== hvd local rank == {}".format(hvd.local_rank()))

    graph = tf.Graph()
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
    with graph.as_default():
        import json

        # config = json.load(open("/data/xuht/bert/chinese_L-12_H-768_A-12/bert_config.json", "r"))

        config = json.load(open(FLAGS.config_file, "r"))

        init_checkpoint = FLAGS.init_checkpoint
        print("===init checkoutpoint==={}".format(init_checkpoint))

        # init_checkpoint = "/data/xuht/bert/chinese_L-12_H-768_A-12/bert_model.ckpt"
        # init_checkpoint = "/data/xuht/concat/model_1/oqmrc.ckpt"
        config = Bunch(config)
        config.use_one_hot_embeddings = True
        config.scope = "bert"
        config.dropout_prob = 0.1
        config.label_type = "single_label"

        import json
        label_dict = json.load(open(FLAGS.label_id))

        # label_tensor = np.asarray(label_dict["class_ratio"]).astype(np.float32)
        label_tensor = None
        # config.loss = "focal_loss"

        # os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id
        sess = tf.Session(config=sess_config)

        train_size = int(FLAGS.train_size / hvd.size())

        num_train_steps = int(train_size / FLAGS.batch_size * FLAGS.epoch)
        num_warmup_steps = int(num_train_steps * 0.01)

        num_storage_steps = int(train_size / FLAGS.batch_size)

        print(num_train_steps, num_warmup_steps, "=============")

        opt_config = Bunch({
            "init_lr": (1e-5 / hvd.size()),
            "num_train_steps": num_train_steps,
            "num_warmup_steps": num_warmup_steps
        })

        model_io_config = Bunch({"fix_lm": False})

        model_io_fn = model_io.ModelIO(model_io_config)

        num_choice = FLAGS.num_classes
        max_seq_length = FLAGS.max_length

        if FLAGS.model_type == "original":
            model_function = bert_order_classifier.classifier_model_fn_builder
        elif FLAGS.model_type == "attn":
            model_function = bert_order_classifier.classifier_attn_model_fn_builder
        elif FLAGS.model_type == "orignal_nonlinear":
            model_function = bert_order_classifier.classifier_model_fn_builder_v1

        model_train_fn = model_function(config,
                                        num_choice,
                                        init_checkpoint,
                                        model_reuse=None,
                                        load_pretrained=True,
                                        model_io_fn=model_io_fn,
                                        model_io_config=model_io_config,
                                        opt_config=opt_config,
                                        input_name=["a", "b"],
                                        label_tensor=label_tensor)

        model_eval_fn = model_function(config,
                                       num_choice,
                                       init_checkpoint,
                                       model_reuse=True,
                                       load_pretrained=True,
                                       model_io_fn=model_io_fn,
                                       model_io_config=model_io_config,
                                       opt_config=opt_config,
                                       input_name=["a", "b"],
                                       label_tensor=label_tensor)

        def metric_fn(features, logits, loss):
            print(logits.get_shape(), "===logits shape===")
            pred_label = tf.argmax(logits, axis=-1, output_type=tf.int32)
            prob = tf.nn.softmax(logits)
            accuracy = correct = tf.equal(
                tf.cast(pred_label, tf.int32),
                tf.cast(features["label_ids"], tf.int32))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
            return {
                "accuracy": accuracy,
                "loss": loss,
                "pred_label": pred_label,
                "label_ids": features["label_ids"]
            }

        name_to_features = {
            "input_ids_a": tf.FixedLenFeature([max_seq_length], tf.int64),
            "input_mask_a": tf.FixedLenFeature([max_seq_length], tf.int64),
            "segment_ids_a": tf.FixedLenFeature([max_seq_length], tf.int64),
            "input_ids_b": tf.FixedLenFeature([max_seq_length], tf.int64),
            "input_mask_b": tf.FixedLenFeature([max_seq_length], tf.int64),
            "segment_ids_b": tf.FixedLenFeature([max_seq_length], tf.int64),
            "label_ids": tf.FixedLenFeature([], tf.int64),
        }

        def _decode_record(record, name_to_features):
            """Decodes a record to a TensorFlow example.
            """
            example = tf.parse_single_example(record, name_to_features)

            # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
            # So cast all int64 to int32.
            for name in list(example.keys()):
                t = example[name]
                if t.dtype == tf.int64:
                    t = tf.to_int32(t)
                example[name] = t
            return example

        params = Bunch({})
        params.epoch = FLAGS.epoch
        params.batch_size = FLAGS.batch_size
        # train_features = tf_data_utils.train_input_fn("/data/xuht/wsdm19/data/train.tfrecords",
        #                             _decode_record, name_to_features, params)
        # eval_features = tf_data_utils.eval_input_fn("/data/xuht/wsdm19/data/dev.tfrecords",
        #                             _decode_record, name_to_features, params)

        train_features = tf_data_utils.train_input_fn(FLAGS.train_file,
                                                      _decode_record,
                                                      name_to_features, params)
        eval_features = tf_data_utils.eval_input_fn(FLAGS.dev_file,
                                                    _decode_record,
                                                    name_to_features, params)

        [train_op, train_loss, train_per_example_loss,
         train_logits] = model_train_fn(train_features, [],
                                        tf.estimator.ModeKeys.TRAIN)
        [_, eval_loss, eval_per_example_loss,
         eval_logits] = model_eval_fn(eval_features, [],
                                      tf.estimator.ModeKeys.EVAL)
        result = metric_fn(eval_features, eval_logits, eval_loss)

        model_io_fn.set_saver()

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        sess.run(hvd.broadcast_global_variables(0))

        def eval_fn(result):
            i = 0
            total_accuracy = 0
            label, label_id = [], []
            label_weight = []
            while True:
                try:
                    eval_result = sess.run(result)
                    total_accuracy += eval_result["accuracy"]
                    label_id.extend(eval_result["label_ids"])
                    label.extend(eval_result["pred_label"])
                    for item in eval_result["label_ids"]:
                        label_weight.append(label_tensor[item])
                    i += 1
                except tf.errors.OutOfRangeError:
                    print("End of dataset")
                    break
            f1 = f1_score(label_id,
                          label,
                          average="macro",
                          sample_weight=label_weight)
            accuracy = accuracy_score(label_id,
                                      label,
                                      sample_weight=label_weight)
            print("test accuracy accuracy {} {} f1 {}".format(
                total_accuracy / i, accuracy, f1))
            return total_accuracy / i, f1

        def train_fn(op, loss):
            i = 0
            cnt = 0
            total_loss = 0.0
            while True:
                try:
                    [_, train_loss] = sess.run([op, loss])
                    total_loss += train_loss
                    i += 1
                    cnt += 1
                    if np.mod(i, num_storage_steps) == 0:
                        print(total_loss / cnt)
                        # model_io_fn.save_model(sess, "/data/xuht/wsdm19/data/model_11_15_focal_loss/oqmrc_{}.ckpt".format(int(i/8000)))
                        if hvd.rank() == 0:
                            model_io_fn.save_model(
                                sess,
                                FLAGS.model_output + "/oqmrc_{}.ckpt".format(
                                    int(i / num_storage_steps)))
                            print("==successful storing model=={}".format(
                                int(i / num_storage_steps)))
                        total_loss = 0
                        cnt = 0
                except tf.errors.OutOfRangeError:
                    break

        print("===========begin to train============")
        train_fn(train_op, train_loss)

        # model_io_fn.save_model(sess, "/data/xuht/wsdm19/data/model_11_15_focal_loss/oqmrc.ckpt")
        if hvd.rank() == 0:
            model_io_fn.save_model(sess, FLAGS.model_output + "/oqmrc.ckpt")

            print("===========begin to eval============")
            accuracy, f1 = eval_fn(result)
            print("==accuracy {} f1 {}==".format(accuracy, f1))
コード例 #3
0
ファイル: base_train.py プロジェクト: P79N6A/BERT
def main(_):

    hvd.init()

    sess_config = tf.ConfigProto()
    sess_config.gpu_options.visible_device_list = str(hvd.local_rank())

    graph = tf.Graph()
    with graph.as_default():
        import json

        config = json.load(open(FLAGS.config_file, "r"))
        init_checkpoint = FLAGS.init_checkpoint

        config = Bunch(config)
        config.use_one_hot_embeddings = True
        config.scope = "bert"
        config.dropout_prob = 0.1
        config.label_type = "single_label"

        if FLAGS.if_shard == "0":
            train_size = FLAGS.train_size
            epoch = int(FLAGS.epoch / hvd.size())
        elif FLAGS.if_shard == "1":
            train_size = int(FLAGS.train_size / hvd.size())
            epoch = FLAGS.epoch

        init_lr = 2e-5

        label_dict = json.load(open(FLAGS.label_id))

        num_train_steps = int(train_size / FLAGS.batch_size * epoch)
        num_warmup_steps = int(num_train_steps * 0.1)

        num_storage_steps = int(train_size / FLAGS.batch_size)

        print(" model type {}".format(FLAGS.model_type))

        print(num_train_steps, num_warmup_steps, "=============")

        opt_config = Bunch({
            "init_lr": init_lr / hvd.size(),
            "num_train_steps": num_train_steps,
            "num_warmup_steps": num_warmup_steps
        })

        sess = tf.Session(config=sess_config)

        model_io_config = Bunch({"fix_lm": False})

        model_io_fn = model_io.ModelIO(model_io_config)

        optimizer_fn = optimizer.Optimizer(opt_config)

        num_classes = FLAGS.num_classes

        model_train_fn = bert_classifier.classifier_model_fn_builder(
            config,
            num_classes,
            init_checkpoint,
            reuse=None,
            load_pretrained=True,
            model_io_fn=model_io_fn,
            optimizer_fn=optimizer_fn,
            model_io_config=model_io_config,
            opt_config=opt_config)

        model_eval_fn = bert_classifier.classifier_model_fn_builder(
            config,
            num_classes,
            init_checkpoint,
            reuse=True,
            load_pretrained=True,
            model_io_fn=model_io_fn,
            optimizer_fn=optimizer_fn,
            model_io_config=model_io_config,
            opt_config=opt_config)

        def metric_fn(features, logits, loss):
            print(logits.get_shape(), "===logits shape===")
            pred_label = tf.argmax(logits, axis=-1, output_type=tf.int32)
            prob = tf.nn.softmax(logits)
            accuracy = correct = tf.equal(
                tf.cast(pred_label, tf.int32),
                tf.cast(features["label_ids"], tf.int32))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
            return {
                "accuracy": accuracy,
                "loss": loss,
                "pred_label": pred_label,
                "label_ids": features["label_ids"]
            }

        name_to_features = {
            "input_ids": tf.FixedLenFeature([FLAGS.max_length], tf.int64),
            "input_mask": tf.FixedLenFeature([FLAGS.max_length], tf.int64),
            "segment_ids": tf.FixedLenFeature([FLAGS.max_length], tf.int64),
            "label_ids": tf.FixedLenFeature([], tf.int64),
        }

        def _decode_record(record, name_to_features):
            """Decodes a record to a TensorFlow example.
			"""
            example = tf.parse_single_example(record, name_to_features)

            # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
            # So cast all int64 to int32.
            for name in list(example.keys()):
                t = example[name]
                if t.dtype == tf.int64:
                    t = tf.to_int32(t)
                example[name] = t
            return example

        params = Bunch({})
        params.epoch = FLAGS.epoch
        params.batch_size = FLAGS.batch_size

        train_features = tf_data_utils.train_input_fn(FLAGS.train_file,
                                                      _decode_record,
                                                      name_to_features,
                                                      params,
                                                      if_shard=FLAGS.if_shard)
        eval_features = tf_data_utils.eval_input_fn(FLAGS.dev_file,
                                                    _decode_record,
                                                    name_to_features,
                                                    params,
                                                    if_shard=FLAGS.if_shard)

        [train_op, train_loss, train_per_example_loss,
         train_logits] = model_train_fn(train_features, [],
                                        tf.estimator.ModeKeys.TRAIN)
        train_dict = {"train_op": train_op, "train_loss": train_loss}
        [_, eval_loss, eval_per_example_loss,
         eval_logits] = model_eval_fn(eval_features, [],
                                      tf.estimator.ModeKeys.EVAL)
        eval_dict = metric_fn(eval_features, eval_logits, eval_loss)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        sess.run(hvd.broadcast_global_variables(0))

        model_io_fn.set_saver()

        print("===horovod rank==={}".format(hvd.rank()))

        def run_eval(steps):
            import _pickle as pkl
            # eval_features = tf_data_utils.eval_input_fn(
            # 							FLAGS.dev_file,
            # 							_decode_record,
            # 							name_to_features, params)
            # [_, eval_loss,
            # eval_per_example_loss, eval_logits] = model_eval_fn(eval_features, [], tf.estimator.ModeKeys.EVAL)
            # eval_dict = metric_fn(eval_features, eval_logits, eval_loss)
            # sess.run(tf.local_variables_initializer())
            eval_finial_dict = eval_fn(eval_dict)
            if hvd.rank() == 0:
                pkl.dump(
                    eval_finial_dict,
                    open(
                        FLAGS.model_output + "/eval_dict_{}.pkl".format(steps),
                        "wb"))
            return eval_finial_dict

        def eval_fn(result):
            i = 0
            total_accuracy = 0
            eval_total_dict = {}

            while True:
                try:
                    eval_result = sess.run(result)
                    for key in eval_result:
                        if key not in eval_total_dict:
                            if key in ["pred_label", "label_ids"]:
                                eval_total_dict[key] = []
                                eval_total_dict[key].extend(eval_result[key])
                            if key in ["accuracy", "loss"]:
                                eval_total_dict[key] = 0.0
                                eval_total_dict[key] += eval_result[key]
                        else:
                            if key in ["pred_label", "label_ids"]:
                                eval_total_dict[key].extend(eval_result[key])
                            if key in ["accuracy", "loss"]:
                                eval_total_dict[key] += eval_result[key]

                    i += 1
                    # if i == 100:
                    # 	break
                except tf.errors.OutOfRangeError:
                    print("End of dataset")
                    break

            label_id = eval_total_dict["label_ids"]
            pred_label = eval_total_dict["pred_label"]

            result = classification_report(label_id,
                                           pred_label,
                                           target_names=list(
                                               label_dict["label2id"].keys()))

            print(result)
            eval_total_dict["classification_report"] = result
            return eval_total_dict

        def train_fn(op_dict):
            i = 0
            cnt = 0
            loss_dict = {}
            monitoring_train = []
            monitoring_eval = []
            while True:
                try:
                    train_result = sess.run(op_dict)
                    for key in train_result:
                        if key == "train_op":
                            continue
                        else:
                            if np.isnan(train_result[key]):
                                print(train_loss, "get nan loss")
                                break
                            else:
                                if key in loss_dict:
                                    loss_dict[key] += train_result[key]
                                else:
                                    loss_dict[key] = train_result[key]

                    i += 1
                    cnt += 1

                    if np.mod(i, num_storage_steps) == 0:
                        string = ""
                        for key in loss_dict:
                            tmp = key + " " + str(loss_dict[key] / cnt) + "\t"
                            string += tmp
                        print(string)
                        monitoring_train.append(loss_dict)

                        if hvd.rank() == 0:
                            model_io_fn.save_model(
                                sess,
                                FLAGS.model_output + "/model_{}.ckpt".format(
                                    int(i / num_storage_steps)))

                        print("==successful storing model=={}".format(
                            int(i / num_storage_steps)))
                        cnt = 0

                        # eval_finial_dict = run_eval(int(i/num_storage_steps))
                        # monitoring_eval.append(eval_finial_dict)

                        for key in loss_dict:
                            loss_dict[key] = 0.0

                except tf.errors.OutOfRangeError:
                    if hvd.rank() == 0:
                        import _pickle as pkl
                        pkl.dump(
                            {
                                "train": monitoring_train,
                                "eval": monitoring_eval
                            },
                            open(FLAGS.model_output + "/monitoring.pkl", "wb"))

                    break

        print("===========begin to train============")
        train_fn(train_dict)
        if hvd.rank() == 0:
            model_io_fn.save_model(sess, FLAGS.model_output + "/model.ckpt")
            print("===========begin to eval============")
            eval_finial_dict = run_eval("final")
コード例 #4
0
ファイル: test_model_distributed.py プロジェクト: P79N6A/BERT
def main(_):

    hvd.init()

    sess_config = tf.ConfigProto()
    # sess_config.gpu_options.visible_device_list = str(hvd.local_rank())

    sess_config.gpu_options.visible_device_list = \
           '%d,%d' % (hvd.local_rank() * 2, hvd.local_rank() * 2 + 1)

    graph = tf.Graph()
    with graph.as_default():
        import json

        config = json.load(
            open("/data/xuht/bert/chinese_L-12_H-768_A-12/bert_config.json",
                 "r"))
        init_checkpoint = "/data/xuht/bert/chinese_L-12_H-768_A-12/bert_model.ckpt"
        config = Bunch(config)
        config.use_one_hot_embeddings = True
        config.scope = "bert"
        config.dropout_prob = 0.1
        config.label_type = "single_label"
        config.loss = "focal_loss"
        #     config.num_hidden_layers =

        # os.environ["CUDA_VISIBLE_DEVICES"] = "0"

        num_train = int(33056 / hvd.size())

        batch_size = 32

        valid_step = int(num_train / batch_size)

        epoch = 2
        num_train_steps = int(num_train / (batch_size) * epoch)

        decay_train_steps = num_train_steps

        # decay_train_steps = int(
        # 		33056 / batch_size * epoch)

        num_warmup_steps = int(num_train_steps * 0.01)

        sess = tf.Session(config=sess_config)

        opt_config = Bunch({
            "init_lr": float(1e-5 / hvd.size()),
            "num_train_steps": decay_train_steps,
            "cycle": False,
            "num_warmup_steps": num_warmup_steps,
            "lr_decay": "polynomial_decay"
        })
        model_io_config = Bunch({"fix_lm": False})

        model_io_fn = model_io.ModelIO(model_io_config)

        optimizer_fn = optimizer.Optimizer(opt_config)

        num_calsses = 2

        model_train_fn = bert_classifier.classifier_model_fn_builder(
            config,
            num_calsses,
            init_checkpoint,
            reuse=None,
            load_pretrained=True,
            model_io_fn=model_io_fn,
            optimizer_fn=optimizer_fn,
            model_io_config=model_io_config,
            opt_config=opt_config,
            gpu_id=0,
            gpu_nums=2)

        # model_eval_fn = bert_classifier.classifier_model_fn_builder(config, num_calsses, init_checkpoint,
        # 										reuse=True,
        # 										load_pretrained=True,
        # 										model_io_fn=model_io_fn,
        # 										optimizer_fn=optimizer_fn,
        # 										model_io_config=model_io_config,
        # 										opt_config=opt_config,
        # 										gpu_id=0,
        # 										gpu_nums=2)

        def metric_fn(features, logits, loss):
            print(logits.get_shape(), "===logits shape===")
            pred_label = tf.argmax(logits, axis=-1, output_type=tf.int32)
            prob = tf.nn.softmax(logits)
            accuracy = correct = tf.equal(
                tf.cast(pred_label, tf.int32),
                tf.cast(features["label_ids"], tf.int32))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
            return {
                "accuracy": accuracy,
                "loss": loss,
                "pred_label": pred_label,
                "label_ids": features["label_ids"]
            }

        name_to_features = {
            "input_ids": tf.FixedLenFeature([128], tf.int64),
            "input_mask": tf.FixedLenFeature([128], tf.int64),
            "segment_ids": tf.FixedLenFeature([128], tf.int64),
            "label_ids": tf.FixedLenFeature([], tf.int64),
        }

        params = Bunch({})
        params.epoch = epoch
        params.batch_size = 32
        train_file = "/data/xuht/eventy_detection/event/model/train.tfrecords"
        train_file1 = "/data/xuht/eventy_detection/sentiment/model/sentiment_11_14/train.tfrecords"
        title_sentiment = "/data/xuht/eventy_detection/sentiment/model/test/train.tfrecords"
        sentiment = "/data/xuht/eventy_detection/sentiment/model/bert/train_11_15.tfrecords"
        jd_train = "/data/xuht/jd_comment/train.tfrecords"
        train_features = tf_data_utils.train_input_fn(
            jd_train, tf_data_utils._decode_record, name_to_features, params)

        test_file = [
            "/data/xuht/eventy_detection/sentiment/model/sentiment_11_14/test.tfrecords"
        ]
        test_file1_1 = [
            "/data/xuht/eventy_detection/sentiment/model/test/train.tfrecords",
            "/data/xuht/eventy_detection/sentiment/model/test/test.tfrecords"
        ]
        test_file2 = "/data/xuht/eventy_detection/event/model/test.tfrecords"
        title_test = "/data/xuht/eventy_detection/sentiment/model/test/test.tfrecords"
        jd_test = "/data/xuht/jd_comment/test.tfrecords"
        sentiment_test = "/data/xuht/eventy_detection/sentiment/model/bert/test_11_15.tfrecords"

        eval_features = tf_data_utils.eval_input_fn(
            jd_test, tf_data_utils._decode_record, name_to_features, params)

        [train_op, train_loss, train_per_example_loss,
         train_logits] = model_train_fn(train_features, [],
                                        tf.estimator.ModeKeys.TRAIN)
        # [_, eval_loss, eval_per_example_loss, eval_logits] = model_eval_fn(eval_features, [], tf.estimator.ModeKeys.EVAL)
        # result = metric_fn(eval_features, eval_logits, eval_loss)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        sess.run(hvd.broadcast_global_variables(0))

        model_io_fn.set_saver()

        print("===horovod rank==={}".format(hvd.rank()))

        def eval_fn(result):
            i = 0
            total_accuracy = 0
            label, label_id = [], []
            while True:
                try:
                    eval_result = sess.run(result)
                    total_accuracy += eval_result["accuracy"]
                    label_id.extend(eval_result["label_ids"])
                    label.extend(eval_result["pred_label"])
                    i += 1
                except tf.errors.OutOfRangeError:
                    print("End of dataset")
                    break
            macro_f1 = f1_score(label_id, label, average="macro")
            micro_f1 = f1_score(label_id, label, average="micro")
            accuracy = accuracy_score(label_id, label)
            print("test accuracy {} macro_f1 score {} micro_f1 {} accuracy {}".
                  format(total_accuracy / i, macro_f1, micro_f1, accuracy))
            return total_accuracy / i, label_id, label

        def train_fn(op, loss):
            i = 0
            total_loss = 0
            cnt = 0
            while True:
                try:
                    [_, train_loss] = sess.run([op, loss])
                    i += 1
                    cnt += 1
                    total_loss += train_loss
                    # print("==device id {} global step {}".format(hvd.rank(), step))
                    if np.mod(i, valid_step) == 0:
                        print(total_loss / cnt)
                        cnt = 0
                        total_loss = 0
                except tf.errors.OutOfRangeError:
                    print("End of dataset")
                    break

        import time
        start = time.time()
        train_fn(train_op, train_loss)
        # acc, true_label, pred_label = eval_fn(result)
        end = time.time()
        print("==total time {} numbers of devices {}".format(
            end - start, hvd.size()))
コード例 #5
0
def main(_):

	hvd.init()

	sess_config = tf.ConfigProto()
	sess_config.gpu_options.visible_device_list = str(hvd.local_rank())

	graph = tf.Graph()
	from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
	with graph.as_default():
		import json
		
		# config = json.load(open("/data/xuht/bert/chinese_L-12_H-768_A-12/bert_config.json", "r"))
		# init_checkpoint = "/data/xuht/bert/chinese_L-12_H-768_A-12/bert_model.ckpt"

		config = json.load(open(FLAGS.config_file))
		init_checkpoint = FLAGS.init_checkpoint

	#     init_checkpoint = "/data/xuht/ai_challenge_cqmrc/bert/concat/model/oqmrc.ckpt"
		config = Bunch(config)
		config.use_one_hot_embeddings = True
		config.scope = "bert"
		config.dropout_prob = 0.1
		config.label_type = "single_label"
		
		# os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id
		sess = tf.Session(config=sess_config)

		train_size = int(FLAGS.train_size/hvd.size())
		print("===train size===", train_size)

		num_train_steps = int(
				train_size / FLAGS.batch_size * FLAGS.epoch)
		decay_train_steps = num_train_steps
		# decay_train_steps = int(
		# 		FLAGS.train_size / FLAGS.batch_size * FLAGS.epoch)

		num_warmup_steps = int(num_train_steps * 0.01)
		# num_warmup_steps = 0

		num_storage_steps = int(train_size / FLAGS.batch_size)

		init_lr = 1e-5
		lr = float(init_lr/hvd.size())
		# lr = init_lr
		# end_lr = (hvd.size()-1)/(np.power(hvd.size(), 2)) * init_lr
		end_lr = 0.0

		opt_config = Bunch({"init_lr":lr, 
							"num_train_steps":decay_train_steps,
							"num_warmup_steps":num_warmup_steps,
							"end_learning_rate":end_lr})

		model_io_config = Bunch({"fix_lm":False})
		
		model_io_fn = model_io.ModelIO(model_io_config)
		
		num_choice = FLAGS.num_classes
		max_seq_length = FLAGS.max_length

		model_train_fn = bert_classifier.multichoice_model_fn_builder(config, num_choice, init_checkpoint, 
												reuse=None, 
												load_pretrained=True,
												model_io_fn=model_io_fn,
												model_io_config=model_io_config, 
												opt_config=opt_config)
		
		model_eval_fn = bert_classifier.multichoice_model_fn_builder(config, num_choice, init_checkpoint, 
												reuse=True, 
												load_pretrained=True,
												model_io_fn=model_io_fn,
												model_io_config=model_io_config, 
												opt_config=opt_config)
		
		def metric_fn(features, logits, loss):
			print(logits.get_shape(), "===logits shape===")
			pred_label = tf.argmax(logits, axis=-1, output_type=tf.int32)
			prob = tf.nn.softmax(logits)
			accuracy = correct = tf.equal(
				tf.cast(pred_label, tf.int32),
				tf.cast(features["label_ids"], tf.int32)
			)
			accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
			return {"accuracy":accuracy, "loss":loss, "pred_label":pred_label, "label_ids":features["label_ids"]}
		
		name_to_features = {
				"input_ids":
						tf.FixedLenFeature([max_seq_length*num_choice], tf.int64),
				"input_mask":
						tf.FixedLenFeature([max_seq_length*num_choice], tf.int64),
				"segment_ids":
						tf.FixedLenFeature([max_seq_length*num_choice], tf.int64),
				"label_ids":
						tf.FixedLenFeature([], tf.int64),
		}
		
		def _decode_record(record, name_to_features):
			"""Decodes a record to a TensorFlow example.
			"""
			example = tf.parse_single_example(record, name_to_features)

			# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
			# So cast all int64 to int32.
			for name in list(example.keys()):
				t = example[name]
				if t.dtype == tf.int64:
					t = tf.to_int32(t)
				example[name] = t
			for name in ["input_ids", "input_mask", "segment_ids"]:
				example[name] = tf.reshape(example[name], [-1, max_seq_length])
			return example 

		params = Bunch({})
		params.epoch = FLAGS.epoch
		params.batch_size = FLAGS.batch_size
		train_features = tf_data_utils.train_input_fn(FLAGS.train_file,
									_decode_record, name_to_features, params)
		eval_features = tf_data_utils.eval_input_fn(FLAGS.dev_file,
									_decode_record, name_to_features, params)
		
		[train_op, train_loss, train_per_example_loss, train_logits] = model_train_fn(train_features, [], tf.estimator.ModeKeys.TRAIN)
		[_, eval_loss, eval_per_example_loss, eval_logits] = model_eval_fn(eval_features, [], tf.estimator.ModeKeys.EVAL)
		result = metric_fn(eval_features, eval_logits, eval_loss)
		
		model_io_fn.set_saver()

		print("====succeeded in set savering====", hvd.rank())
		
		init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
		sess.run(init_op)

		print("====succeeded in initializing global and local variables====", hvd.rank())

		sess.run(hvd.broadcast_global_variables(0))

		print("====succeeded in initializing params====", hvd.rank())
		
		def eval_fn(result):
			i = 0
			total_accuracy = 0
			label, label_id = [], []
			while True:
				try:
					eval_result = sess.run(result)
					total_accuracy += eval_result["accuracy"]
					label_id.extend(eval_result["label_ids"])
					label.extend(eval_result["pred_label"])
					i += 1
				except tf.errors.OutOfRangeError:
					print("End of dataset")
					break
			f1 = f1_score(label_id, label, average="macro")
			accuracy = accuracy_score(label_id, label)
			print("test accuracy accuracy {} {}, f1 {}".format(total_accuracy/i, 
				accuracy, f1))
			return total_accuracy/ i, f1

		output_dict = []
		fwobj = open(FLAGS.model_output+"/log.txt", "w")
		
		def train_fn(op, loss):
			i = 0
			cnt = 0
			total_loss = 0.0
			while True:
				try:
					[_, train_loss] = sess.run([op, loss])
#					print(train_loss, i)

					total_loss += train_loss
					i += 1
					cnt += 1
					if np.mod(i, num_storage_steps) == 0:
						
						if hvd.rank() == 0:
							fwobj.write("loss {} rank {}\n".format(total_loss/cnt, hvd.rank()))
							model_io_fn.save_model(sess, FLAGS.model_output+"/oqmrc_epoch_{}_hvd_{}.ckpt".format(int(i/num_storage_steps), hvd.rank()))
						total_loss = 0
						cnt = 0
				except tf.errors.OutOfRangeError:
					break
		if hvd.rank() == 0:
			model_io_fn.save_model(sess, FLAGS.model_output+"/oqmrc_hvd_{}_initial.ckpt".format(hvd.rank()))		
		print("===========begin to train============")        
		train_fn(train_op, train_loss)
		if hvd.rank() == 0:
			print("===========begin to eval============")
			eval_fn(result)
			model_io_fn.save_model(sess, FLAGS.model_output+"/oqmrc_hvd_{}.ckpt".format(hvd.rank()))
			fwobj.close()
コード例 #6
0
def main(_):

    hvd.init()

    sess_config = tf.ConfigProto()
    sess_config.gpu_options.visible_device_list = str(hvd.local_rank())

    graph = tf.Graph()
    with graph.as_default():
        import json

        config = json.load(open(FLAGS.config_file, "r"))
        init_checkpoint = FLAGS.init_checkpoint

        config = Bunch(config)
        config.use_one_hot_embeddings = True
        config.scope = "bert"
        config.dropout_prob = 0.1
        config.label_type = "single_label"

        if FLAGS.if_shard == "0":
            train_size = FLAGS.train_size
            epoch = int(FLAGS.epoch / hvd.size())
        elif FLAGS.if_shard == "1":
            train_size = int(FLAGS.train_size / hvd.size())
            epoch = FLAGS.epoch

        init_lr = 2e-5

        num_train_steps = int(train_size / FLAGS.batch_size * epoch)
        num_warmup_steps = int(num_train_steps * 0.1)

        num_storage_steps = int(train_size / FLAGS.batch_size)

        print(" model type {}".format(FLAGS.model_type))

        print(num_train_steps, num_warmup_steps, "=============")

        opt_config = Bunch({
            "init_lr": init_lr / hvd.size(),
            "num_train_steps": num_train_steps,
            "num_warmup_steps": num_warmup_steps
        })

        sess = tf.Session(config=sess_config)

        model_io_config = Bunch({"fix_lm": False})

        model_io_fn = model_io.ModelIO(model_io_config)

        optimizer_fn = optimizer.Optimizer(opt_config)

        num_classes = FLAGS.num_classes

        model_train_fn = bert_classifier.classifier_model_fn_builder(
            config,
            num_classes,
            init_checkpoint,
            reuse=None,
            load_pretrained=True,
            model_io_fn=model_io_fn,
            optimizer_fn=optimizer_fn,
            model_io_config=model_io_config,
            opt_config=opt_config)

        model_eval_fn = bert_classifier.classifier_model_fn_builder(
            config,
            num_classes,
            init_checkpoint,
            reuse=True,
            load_pretrained=True,
            model_io_fn=model_io_fn,
            optimizer_fn=optimizer_fn,
            model_io_config=model_io_config,
            opt_config=opt_config)

        def metric_fn(features, logits, loss):
            print(logits.get_shape(), "===logits shape===")
            pred_label = tf.argmax(logits, axis=-1, output_type=tf.int32)
            prob = tf.nn.softmax(logits)
            accuracy = correct = tf.equal(
                tf.cast(pred_label, tf.int32),
                tf.cast(features["label_ids"], tf.int32))
            accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
            return {
                "accuracy": accuracy,
                "loss": loss,
                "pred_label": pred_label,
                "label_ids": features["label_ids"]
            }

        name_to_features = {
            "input_ids": tf.FixedLenFeature([FLAGS.max_length], tf.int64),
            "input_mask": tf.FixedLenFeature([FLAGS.max_length], tf.int64),
            "segment_ids": tf.FixedLenFeature([FLAGS.max_length], tf.int64),
            "label_ids": tf.FixedLenFeature([], tf.int64),
        }

        def _decode_record(record, name_to_features):
            """Decodes a record to a TensorFlow example.
			"""
            example = tf.parse_single_example(record, name_to_features)

            # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
            # So cast all int64 to int32.
            for name in list(example.keys()):
                t = example[name]
                if t.dtype == tf.int64:
                    t = tf.to_int32(t)
                example[name] = t
            return example

        params = Bunch({})
        params.epoch = FLAGS.epoch
        params.batch_size = FLAGS.batch_size

        train_features = tf_data_utils.train_input_fn(FLAGS.train_file,
                                                      _decode_record,
                                                      name_to_features,
                                                      params,
                                                      if_shard=FLAGS.if_shard)
        eval_features = tf_data_utils.eval_input_fn(FLAGS.dev_file,
                                                    _decode_record,
                                                    name_to_features,
                                                    params,
                                                    if_shard=FLAGS.if_shard)

        [train_op, train_loss, train_per_example_loss,
         train_logits] = model_train_fn(train_features, [],
                                        tf.estimator.ModeKeys.TRAIN)
        [_, eval_loss, eval_per_example_loss,
         eval_logits] = model_eval_fn(eval_features, [],
                                      tf.estimator.ModeKeys.EVAL)
        result = metric_fn(eval_features, eval_logits, eval_loss)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)

        sess.run(hvd.broadcast_global_variables(0))

        model_io_fn.set_saver()

        print("===horovod rank==={}".format(hvd.rank()))

        def eval_fn(result):
            i = 0
            total_accuracy = 0
            label, label_id = [], []
            while True:
                try:
                    eval_result = sess.run(result)
                    total_accuracy += eval_result["accuracy"]
                    label_id.extend(eval_result["label_ids"])
                    label.extend(eval_result["pred_label"])
                    i += 1
                except tf.errors.OutOfRangeError:
                    print("End of dataset")
                    break
            macro_f1 = f1_score(label_id, label, average="macro")
            micro_f1 = f1_score(label_id, label, average="micro")
            macro_precision = precision_score(label_id, label, average="macro")
            micro_precision = precision_score(label_id, label, average="micro")
            macro_recall = recall_score(label_id, label, average="macro")
            micro_recall = recall_score(label_id, label, average="micro")
            accuracy = accuracy_score(label_id, label)
            print("test accuracy {} macro_f1 score {} micro_f1 {} accuracy {}".
                  format(total_accuracy / i, macro_f1, micro_f1, accuracy))
            return total_accuracy / i, label_id, label

        def train_fn(op, loss):
            i = 0
            total_loss = 0
            cnt = 0
            while True:
                try:
                    [_, train_loss] = sess.run([op, loss])
                    i += 1
                    cnt += 1
                    total_loss += train_loss
                    # print("==device id {} global step {}".format(hvd.rank(), step))
                    if np.mod(i, num_storage_steps) == 0:
                        print(total_loss / cnt)
                        if hvd.rank() == 0:
                            model_io_fn.save_model(
                                sess,
                                FLAGS.model_output + "/oqmrc_{}.ckpt".format(
                                    int(i / num_storage_steps)))
                        cnt = 0
                        total_loss = 0
                except tf.errors.OutOfRangeError:
                    print("End of dataset")
                    break

        import time
        import time
        start = time.time()
        train_fn(train_op, train_loss)
        acc, true_label, pred_label = eval_fn(result)
        end = time.time()
        print("==total time {} numbers of devices {}".format(
            end - start, hvd.size()))
        if hvd.rank() == 0:
            model_io_fn.save_model(sess, FLAGS.model_output + "/oqmrc.ckpt")
            import _pickle as pkl
            pkl.dump({
                "true_label": true_label,
                "pred_label": pred_label
            }, open(FLAGS.model_output + "/eval_result.json", "wb"))