Beispiel #1
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  processors = {
      "cola": ColaProcessor,
      "mnli": MnliProcessor,
      "mrpc": MrpcProcessor,
      "xnli": XnliProcessor,
  }

  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                FLAGS.init_checkpoint)

  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
    raise ValueError(
        "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length %d because the BERT model "
        "was only trained up to sequence length %d" %
        (FLAGS.max_seq_length, bert_config.max_position_embeddings))

  tf.gfile.MakeDirs(FLAGS.output_dir)

  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name]()

  label_list = processor.get_labels()

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
  run_config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=tf.contrib.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  train_examples = None
  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    train_examples = processor.get_train_examples(FLAGS.data_dir)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      num_labels=len(label_list),
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = tf.contrib.tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
    file_based_convert_examples_to_features(
        train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num examples = %d", len(train_examples))
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

  if FLAGS.do_eval:
    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    num_actual_eval_examples = len(eval_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on. These do NOT count towards the metric (all tf.metrics
      # support a per-instance weight, and these get a weight of 0.0).
      while len(eval_examples) % FLAGS.eval_batch_size != 0:
        eval_examples.append(PaddingInputExample())

    eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
    file_based_convert_examples_to_features(
        eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)

    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(eval_examples), num_actual_eval_examples,
                    len(eval_examples) - num_actual_eval_examples)
    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

    # This tells the estimator to run through the entire set.
    eval_steps = None
    # However, if running eval on the TPU, you will need to specify the
    # number of steps.
    if FLAGS.use_tpu:
      assert len(eval_examples) % FLAGS.eval_batch_size == 0
      eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

    eval_drop_remainder = True if FLAGS.use_tpu else False
    eval_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=eval_drop_remainder)

    result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
    with tf.gfile.GFile(output_eval_file, "w") as writer:
      tf.logging.info("***** Eval results *****")
      for key in sorted(result.keys()):
        tf.logging.info("  %s = %s", key, str(result[key]))
        writer.write("%s = %s\n" % (key, str(result[key])))

  if FLAGS.do_predict:
    predict_examples = processor.get_test_examples(FLAGS.data_dir)
    num_actual_predict_examples = len(predict_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on.
      while len(predict_examples) % FLAGS.predict_batch_size != 0:
        predict_examples.append(PaddingInputExample())

    predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
    file_based_convert_examples_to_features(predict_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            predict_file)

    tf.logging.info("***** Running prediction*****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(predict_examples), num_actual_predict_examples,
                    len(predict_examples) - num_actual_predict_examples)
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    predict_drop_remainder = True if FLAGS.use_tpu else False
    predict_input_fn = file_based_input_fn_builder(
        input_file=predict_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=predict_drop_remainder)

    result = estimator.predict(input_fn=predict_input_fn)

    output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
    with tf.gfile.GFile(output_predict_file, "w") as writer:
      num_written_lines = 0
      tf.logging.info("***** Predict results *****")
      for (i, prediction) in enumerate(result):
        probabilities = prediction["probabilities"]
        if i >= num_actual_predict_examples:
          break
        output_line = "\t".join(
            str(class_probability)
            for class_probability in probabilities) + "\n"
        writer.write(output_line)
        num_written_lines += 1
    assert num_written_lines == num_actual_predict_examples
def bert_lr_model(num_epoches,
                  shuffle,
                  train_batch_size,
                  eval_batch_size,
                  test_batch_size,
                  learning_rate,
                  save_frequence,
                  start_eval,
                  early_stop,
                  num_labels=2):
    start1 = time.clock()
    start2 = datetime.now()

    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {"mrpc": MrpcProcessor}

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)
    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model was only trained up to sequence length %d"
            % (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    train_examples = processor.get_train_examples(FLAGS.data_dir)
    train_len = len(train_examples)
    train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
    file_based_convert_examples_to_features(train_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            train_file)

    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    eval_len = len(eval_examples)
    eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
    file_based_convert_examples_to_features(eval_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            eval_file)

    test_examples = processor.get_test_examples(FLAGS.data_dir)
    test_len = len(test_examples)
    test_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
    file_based_convert_examples_to_features(test_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            test_file)

    params = {
        "train_file": train_file,
        "eval_file": eval_file,
        "test_file": test_file,
        "num_epochs": num_epoches,
        "shuffle": shuffle,
        "train_batch_size": train_batch_size,
        "eval_batch_size": eval_batch_size,
        "test_batch_size": test_batch_size
    }

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    # sess = tf.Session()
    with sess.as_default():
        data = file_based_input_fn_builder(FLAGS.max_seq_length, False, params)
        data_iter = data.creat_train_dataset()
        iterator = data_iter.make_one_shot_iterator()
        next_element = iterator.get_next()
        sess_bert_lr = model_fn_builder(FLAGS.max_seq_length,
                                        bert_config,
                                        num_labels,
                                        use_one_hot_embeddings=FLAGS.use_tpu)

        tvars = tf.trainable_variables()

        if FLAGS.init_checkpoint:
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, FLAGS.init_checkpoint)
            tf.train.init_from_checkpoint(FLAGS.init_checkpoint,
                                          assignment_map)

        sess_only_input_fro_QA = only_input_fro_QA()

        global_step = tf.get_variable(initializer=0,
                                      name="globle_step",
                                      trainable=False)

        num_train_steps = int(train_len * num_epoches / train_batch_size)

        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

        # train_op = optimization.create_optimizer(
        #     sess_only_input_fro_QA.loss, learning_rate, num_train_steps, num_warmup_steps, global_step, use_tpu=FLAGS.use_tpu)
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(
            sess_only_input_fro_QA.loss, global_step=global_step)

        # optimizer = tf.train.AdamOptimizer(learning_rate)
        # gradients, vriables = zip(*optimizer.compute_gradients(sess_bert_lr.total_loss))
        # gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_grad_norm)
        # train_op = optimizer.apply_gradients(zip(gradients, vriables), global_step=global_step)
        #
        # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        # train_op = tf.group([train_op, update_ops])

        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        saver = tf.train.Saver()
        best_auc = 0.0
        best_iter = 0
        early_stop_iter = 1
        for epoch in range(num_epoches):
            elapsed1 = (time.clock() - start1)
            elapsed2 = (datetime.now() - start2)
            print("Time used1:", elapsed1)
            print("Time used2:", elapsed2)
            flag = True
            while flag:
                train_data = sess.run(next_element)

                output_layer1, output_layer2 = sess.run(
                    [sess_bert_lr.output_layer1, sess_bert_lr.output_layer2],
                    feed_dict={
                        sess_bert_lr.input_ids1:
                        train_data["input_ids1"],
                        sess_bert_lr.input_mask1:
                        train_data["input_mask1"],
                        sess_bert_lr.segment_ids1:
                        train_data["segment_ids1"],
                        sess_bert_lr.label_ids1:
                        train_data["label_ids1"],
                        sess_bert_lr.is_real_example1:
                        train_data["is_real_example1"],
                        sess_bert_lr.input_ids2:
                        train_data["input_ids2"],
                        sess_bert_lr.input_mask2:
                        train_data["input_mask2"],
                        sess_bert_lr.segment_ids2:
                        train_data["segment_ids2"],
                        sess_bert_lr.label_ids2:
                        train_data["label_ids2"],
                        sess_bert_lr.is_real_example2:
                        train_data["is_real_example2"],
                        sess_bert_lr.is_training:
                        False
                    })
                _, loss, acc = sess.run(
                    [
                        train_op, sess_only_input_fro_QA.loss,
                        sess_only_input_fro_QA.acc
                    ],
                    feed_dict={
                        sess_only_input_fro_QA.output_layer1: output_layer1,
                        sess_only_input_fro_QA.output_layer2: output_layer2,
                    })

                cur_step = tf.train.global_step(sess, global_step)
                if int(cur_step * train_batch_size / train_len) > epoch:
                    flag = False

                print("epoch:{},global_step:{},loss:{},acc:{}".format(
                    epoch, cur_step, loss, acc))
                if cur_step % save_frequence == 0 and cur_step > start_eval:
                    valid_dev(sess,
                              sess_bert_lr,
                              sess_only_input_fro_QA,
                              data,
                              test_batch_size,
                              eval_len,
                              file="eval")
                    step_auc = valid_dev(sess,
                                         sess_bert_lr,
                                         sess_only_input_fro_QA,
                                         data,
                                         test_batch_size,
                                         test_len,
                                         file="test")
                    if step_auc > best_auc and cur_step >= start_eval:
                        early_stop_iter = 1
                        best_auc = step_auc
                        best_iter = cur_step
                        print('Saving model for step {}'.format(cur_step))
                        saver.save(sess,
                                   FLAGS.checkpoint_model_path,
                                   global_step=cur_step)
                    elif step_auc < best_auc and cur_step > start_eval:
                        early_stop_iter += 1
                    if early_stop_iter >= early_stop:
                        print("train_over, best_iter={}, best_auc={}".format(
                            best_iter, best_auc))
                        sess.close()
                        exit()
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
        "xnli": XnliProcessor,
        "clef2019": CLEF2019Processor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)
        if FLAGS.file_with_predictions is not None:
            output_predict_file = os.path.join(FLAGS.output_dir,
                                               FLAGS.file_with_predictions)
        else:
            output_predict_file = os.path.join(FLAGS.output_dir,
                                               "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, prediction) in enumerate(result):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Beispiel #4
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "xnli": XnliProcessor,
        "tnews": TnewsProcessor,
        "afqmc": AFQMCProcessor,
        "iflytek": iFLYTEKDataProcessor,
        "copa": COPAProcessor,
        "cmnli": CMNLIProcessor,
        "wsc": WSCProcessor,
        "csl": CslProcessor,
        # "cola": classifier_utils.ColaProcessor,
        # "mnli": classifier_utils.MnliProcessor,
        # "mismnli": classifier_utils.MisMnliProcessor,
        # "mrpc": classifier_utils.MrpcProcessor,
        # "rte": classifier_utils.RteProcessor,
        # "sst-2": classifier_utils.Sst2Processor,
        # "sts-b": classifier_utils.StsbProcessor,
        # "qqp": classifier_utils.QqpProcessor,
        # "qnli": classifier_utils.QnliProcessor,
        # "wnli": classifier_utils.WnliProcessor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    if not FLAGS.albert_config_file and not FLAGS.albert_hub_module_handle:
        raise ValueError("At least one of `--albert_config_file` and "
                         "`--albert_hub_module_handle` must be set")

    if FLAGS.albert_config_file:
        albert_config = modeling.AlbertConfig.from_json_file(
            FLAGS.albert_config_file)
        if FLAGS.max_seq_length > albert_config.max_position_embeddings:
            raise ValueError(
                "Cannot use sequence length %d because the ALBERT model "
                "was only trained up to sequence length %d" %
                (FLAGS.max_seq_length, albert_config.max_position_embeddings))
    else:
        albert_config = None  # Get the config from TF-Hub.

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name](
        use_spm=True if FLAGS.spm_model_file else False,
        do_lower_case=FLAGS.do_lower_case)

    label_list = processor.get_labels()

    if FLAGS.albert_hub_module_handle:
        tokenizer = tokenization.FullTokenizer.from_hub_module(
            hub_module=FLAGS.albert_hub_module_handle,
            spm_model_file=FLAGS.spm_model_file)
    else:
        tokenizer = tokenization.FullTokenizer.from_scratch(
            vocab_file=FLAGS.vocab_file,
            do_lower_case=FLAGS.do_lower_case,
            spm_model_file=FLAGS.spm_model_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(
            min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=int(FLAGS.save_checkpoints_steps),
        keep_checkpoint_max=0,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
    model_fn = classifier_utils.model_fn_builder(
        albert_config=albert_config,
        num_labels=len(label_list),
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=FLAGS.train_step,
        num_warmup_steps=FLAGS.warmup_step,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        task_name=task_name,
        hub_module=FLAGS.albert_hub_module_handle,
        optimizer=FLAGS.optimizer)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        cached_dir = FLAGS.cached_dir
        if not cached_dir:
            cached_dir = FLAGS.output_dir
        train_file = os.path.join(cached_dir, task_name + "_train.tf_record")
        if not tf.gfile.Exists(train_file):
            classifier_utils.file_based_convert_examples_to_features(
                train_examples, label_list, FLAGS.max_seq_length, tokenizer,
                train_file, task_name)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", FLAGS.train_step)
        train_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.train_batch_size)
        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_step)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(classifier_utils.PaddingInputExample())

        cached_dir = FLAGS.cached_dir
        if not cached_dir:
            cached_dir = FLAGS.output_dir
        eval_file = os.path.join(cached_dir, task_name + "_eval.tf_record")
        if not tf.gfile.Exists(eval_file):
            classifier_utils.file_based_convert_examples_to_features(
                eval_examples, label_list, FLAGS.max_seq_length, tokenizer,
                eval_file, task_name)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.eval_batch_size)

        best_trial_info_file = os.path.join(FLAGS.output_dir, "best_trial.txt")

        def _best_trial_info():
            """Returns information about which checkpoints have been evaled so far."""
            if tf.gfile.Exists(best_trial_info_file):
                with tf.gfile.GFile(best_trial_info_file, "r") as best_info:
                    global_step, best_metric_global_step, metric_value = (
                        best_info.read().split(":"))
                    global_step = int(global_step)
                    best_metric_global_step = int(best_metric_global_step)
                    metric_value = float(metric_value)
            else:
                metric_value = -1
                best_metric_global_step = -1
                global_step = -1
            tf.logging.info(
                "Best trial info: Step: %s, Best Value Step: %s, "
                "Best Value: %s", global_step, best_metric_global_step,
                metric_value)
            return global_step, best_metric_global_step, metric_value

        def _remove_checkpoint(checkpoint_path):
            for ext in ["meta", "data-00000-of-00001", "index"]:
                src_ckpt = checkpoint_path + ".{}".format(ext)
                tf.logging.info("removing {}".format(src_ckpt))
                tf.gfile.Remove(src_ckpt)

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")

        if task_name == "sts-b":
            key_name = "pearson"
        elif task_name == "cola":
            key_name = "matthew_corr"
        else:
            key_name = "eval_accuracy"

        global_step, best_perf_global_step, best_perf = _best_trial_info()
        writer = tf.gfile.GFile(output_eval_file, "w")
        while global_step < FLAGS.train_step:
            steps_and_files = {}
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                    gstep = int(cur_filename.split("-")[-1])
                    if gstep not in steps_and_files:
                        tf.logging.info(
                            "Add {} to eval list.".format(cur_filename))
                        steps_and_files[gstep] = cur_filename
            tf.logging.info("found {} files.".format(len(steps_and_files)))
            if not steps_and_files:
                tf.logging.info(
                    "found 0 file, global step: {}. Sleeping.".format(
                        global_step))
                time.sleep(60)
            else:
                for checkpoint in sorted(steps_and_files.items()):
                    step, checkpoint_path = checkpoint
                    if global_step >= step:
                        if (best_perf_global_step != step
                                and len(_find_valid_cands(step)) > 1):
                            _remove_checkpoint(checkpoint_path)
                        continue
                    result = estimator.evaluate(
                        input_fn=eval_input_fn,
                        steps=eval_steps,
                        checkpoint_path=checkpoint_path)
                    global_step = result["global_step"]
                    tf.logging.info("***** Eval results *****")
                    for key in sorted(result.keys()):
                        tf.logging.info("  %s = %s", key, str(result[key]))
                        writer.write("%s = %s\n" % (key, str(result[key])))
                    writer.write("best = {}\n".format(best_perf))
                    if result[key_name] > best_perf:
                        best_perf = result[key_name]
                        best_perf_global_step = global_step
                    elif len(_find_valid_cands(global_step)) > 1:
                        _remove_checkpoint(checkpoint_path)
                    writer.write("=" * 50 + "\n")
                    writer.flush()
                    with tf.gfile.GFile(best_trial_info_file,
                                        "w") as best_info:
                        best_info.write("{}:{}:{}".format(
                            global_step, best_perf_global_step, best_perf))
        writer.close()

        for ext in ["meta", "data-00000-of-00001", "index"]:
            src_ckpt = "model.ckpt-{}.{}".format(best_perf_global_step, ext)
            tgt_ckpt = "model.ckpt-best.{}".format(ext)
            tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt))
            tf.io.gfile.rename(os.path.join(FLAGS.output_dir, src_ckpt),
                               os.path.join(FLAGS.output_dir, tgt_ckpt),
                               overwrite=True)

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(classifier_utils.PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        classifier_utils.file_based_convert_examples_to_features(
            predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
            predict_file, task_name)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.predict_batch_size)

        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        result = estimator.predict(input_fn=predict_input_fn,
                                   checkpoint_path=checkpoint_path)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        output_submit_file = os.path.join(FLAGS.output_dir,
                                          "submit_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as pred_writer,\
            tf.gfile.GFile(output_submit_file, "w") as sub_writer:
            sub_writer.write("index" + "\t" + "prediction\n")
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, (example, prediction)) in\
                enumerate(zip(predict_examples, result)):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                pred_writer.write(output_line)

                if task_name != "sts-b":
                    actual_label = label_list[int(prediction["predictions"])]
                else:
                    actual_label = str(prediction["predictions"])
                sub_writer.write(example.guid + "\t" + actual_label + "\n")
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Beispiel #5
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  processors = {
      "imdb": ImdbProcessor
  }

  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                FLAGS.init_checkpoint)


  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length %d because the BERT model "
        "was only trained up to sequence length %d" %
        (FLAGS.max_seq_length, bert_config.max_position_embeddings))

  if not tf.gfile.IsDirectory(FLAGS.output_dir):
    tf.gfile.MakeDirs(FLAGS.output_dir)

  if not tf.gfile.IsDirectory(os.path.join(FLAGS.output_dir, FLAGS.subset_dir)):
    # no model has been trained on this subdataset before
    tf.gfile.MakeDirs(os.path.join(FLAGS.output_dir, FLAGS.subset_dir))


  # verify model id
  if FLAGS.mode == "train":
    FLAGS.model_id = model_hash()      # generate model hash
    new_dir = os.path.join(FLAGS.output_dir, FLAGS.subset_dir, FLAGS.model_id)
    tf.gfile.MakeDirs(new_dir) # make directory based on hash

    # write config
    flag_dict = FLAGS.flag_values_dict()
    with tf.gfile.GFile(os.path.join(new_dir, "config.txt"), "w") as writer:
      tf.logging.info("***** Writing training hyperparams to directory *****")
      for key in flag_dict.keys():
        if key in CONFIG_HYPERPARAMS:
          writer.write("%s = %s\n" % (key, str(flag_dict[key])))
  elif FLAGS.model_id is None:
    # train off; either pred or eval is on
    raise ValueError(
        "No model ID provided. Model ID is required for eval/predict when not training.")

  ## change FLAGS.output_dir to new dir
  FLAGS.output_dir = os.path.join(FLAGS.output_dir, FLAGS.subset_dir, FLAGS.model_id)


  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name](FLAGS.data_dir)

  label_list = processor.get_labels()

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  # ============== TPU settings ==================== 

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
  run_config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      keep_checkpoint_max=None,
      tpu_config=tf.contrib.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  # =================================================

  train_examples = None
  num_train_steps_per_epoch = None
  num_train_steps_total = None
  num_warmup_steps = None
  if FLAGS.mode == "train":
    train_examples = processor.get_train_examples(FLAGS.subset_dir)
    eval_examples = processor.get_dev_examples()
    num_actual_eval_examples = len(eval_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on. These do NOT count towards the metric (all tf.metrics
      # support a per-instance weight, and these get a weight of 0.0).
      while len(eval_examples) % FLAGS.eval_batch_size != 0:
        eval_examples.append(PaddingInputExample())

    num_train_steps_per_epoch = int(
        len(train_examples) / FLAGS.train_batch_size) # * FLAGS.num_train_epochs)
    num_train_steps_total = num_train_steps_per_epoch * FLAGS.num_train_epochs
    num_warmup_steps = int(num_train_steps_total * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      num_labels=len(label_list),
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps_total,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = tf.contrib.tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.mode == "train":
    train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
    file_based_convert_examples_to_features(
        train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
    train_input_fn = file_based_input_fn_builder(
          input_file=train_file,
          seq_length=FLAGS.max_seq_length,
          is_training=True,
          drop_remainder=True)

    eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
    file_based_convert_examples_to_features(
        eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)
    eval_drop_remainder = True if FLAGS.use_tpu else False
    eval_input_fn = file_based_input_fn_builder(
          input_file=eval_file,
          seq_length=FLAGS.max_seq_length,
          is_training=False,
          drop_remainder=eval_drop_remainder)

    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num examples = %d", len(train_examples))
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num epochs = %d", FLAGS.num_train_epochs)
    # tf.logging.info("  Num steps = %d", num_train_steps)

    best_val_loss = np.inf
    best_val_acc = 0.
    best_epoch = None
    best_result = None
    patience = FLAGS.patience
    for i in range(FLAGS.num_train_epochs):
      curr_epoch = i + 1
      tf.logging.info("===== Running training for EPOCH %d =====", curr_epoch)

      estimator.train(input_fn=train_input_fn, max_steps=num_train_steps_per_epoch * curr_epoch)

      tf.logging.info("===== Running evaluation for EPOCH %d =====", curr_epoch)
      tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(eval_examples), num_actual_eval_examples,
                    len(eval_examples) - num_actual_eval_examples)
      tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

      # This tells the estimator to run through the entire set.
      eval_steps = None
      # However, if running eval on the TPU, you will need to specify the
      # number of steps.
      if FLAGS.use_tpu:
        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)
      result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

      output_eval_file = os.path.join(FLAGS.output_dir, "eval_results_epoch_{}.txt".format(curr_epoch))
      with tf.gfile.GFile(output_eval_file, "w") as writer:
        tf.logging.info("***** Eval results for EPOCH %d *****", curr_epoch)
        for key in sorted(result.keys()):
          tf.logging.info("  %s = %s", key, str(result[key]))
          writer.write("%s = %s\n" % (key, str(result[key])))
      if FLAGS.early_stopping_criterion == "acc":
        if result['eval_accuracy'] > best_val_acc:
          best_val_acc = result['eval_accuracy']
          best_epoch = curr_epoch
          best_result = result
          patience = FLAGS.patience
        else:
          tf.logging.info("Validation accuracy did not increase.")
          if patience == 0:
            tf.logging.info("Early stopping.")
            break
          else:
            tf.logging.info("Will try for %d more epochs.", patience)
            patience -= 1
      else: # use loss as early stopping criterion
        if result['eval_loss'] < best_val_loss:
          best_val_loss = result['eval_loss']
          best_epoch = curr_epoch
          best_result = result
          patience = FLAGS.patience
        else:
          tf.logging.info("Validation loss did not decrease.")
          if patience == 0:
            tf.logging.info("Early stopping.")
            break
          else:
            tf.logging.info("Will try for %d more epochs.", patience)
            patience -= 1

    best_output_eval_file = os.path.join(FLAGS.output_dir, "best_eval_results.txt")
    best_global_step = best_result['global_step']
    best_checkpoint_path = os.path.join(FLAGS.output_dir, 'model.ckpt-{}'.format(best_global_step))
    with tf.gfile.GFile(best_output_eval_file, "w") as writer:
      tf.logging.info("***** Best eval results: EPOCH %d *****", best_epoch)
      writer.write("Best checkpoint path: {}\n".format(best_checkpoint_path))
      for key in sorted(best_result.keys()):
        tf.logging.info("  %s = %s", key, str(best_result[key]))
        writer.write("%s = %s\n" % (key, str(best_result[key])))

    # training complete. start autoeval on test set using best checkpoint.
  if FLAGS.mode == "eval" or FLAGS.mode == "train":
    # get checkpoint
    best_output_eval_file = os.path.join(FLAGS.output_dir, "best_eval_results.txt")
    with tf.gfile.GFile(best_output_eval_file, "r") as reader:
      best_checkpoint_path = reader.readline().replace("Best checkpoint path: ", "").replace("\n", "")
    
    eval_examples = processor.get_test_examples()
    num_actual_eval_examples = len(eval_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on. These do NOT count towards the metric (all tf.metrics
      # support a per-instance weight, and these get a weight of 0.0).
      while len(eval_examples) % FLAGS.eval_batch_size != 0:
        eval_examples.append(PaddingInputExample())

    eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
    file_based_convert_examples_to_features(
        eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file)

    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(eval_examples), num_actual_eval_examples,
                    len(eval_examples) - num_actual_eval_examples)
    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

    # This tells the estimator to run through the entire set.
    eval_steps = None
    # However, if running eval on the TPU, you will need to specify the
    # number of steps.
    if FLAGS.use_tpu:
      assert len(eval_examples) % FLAGS.eval_batch_size == 0
      eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

    eval_drop_remainder = True if FLAGS.use_tpu else False
    eval_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=eval_drop_remainder)

    result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=best_checkpoint_path)

    output_eval_file = os.path.join(FLAGS.output_dir, "test_eval_results.txt")
    with tf.gfile.GFile(output_eval_file, "w") as writer:
      tf.logging.info("***** Eval results on TEST set *****")
      writer.write("Checkpoint path: {}\n".format(best_checkpoint_path))
      for key in sorted(result.keys()):
        tf.logging.info("  %s = %s", key, str(result[key]))
        writer.write("%s = %s\n" % (key, str(result[key])))

  if FLAGS.mode == "predict":
    # get checkpoint
    best_output_eval_file = os.path.join(FLAGS.output_dir, "best_eval_results.txt")
    with tf.gfile.GFile(best_output_eval_file, "r") as reader:
      best_checkpoint_path = reader.readline().replace("Best checkpoint path: ", "").replace("\n", "")

    if FLAGS.pred_ds == 'train':
      predict_examples = processor.get_train_examples('og')
    elif FLAGS.pred_ds == 'dev':
      predict_examples = processor.get_dev_examples()
    else: # = 'test'
      predict_examples = processor.get_test_examples()
    num_actual_predict_examples = len(predict_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on.
      while len(predict_examples) % FLAGS.predict_batch_size != 0:
        predict_examples.append(PaddingInputExample())

    predict_file = os.path.join(FLAGS.output_dir, "predict_{}.tf_record".format(FLAGS.pred_ds))
    file_based_convert_examples_to_features(predict_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            predict_file)

    tf.logging.info("***** Running prediction on {} set*****".format(FLAGS.pred_ds))
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(predict_examples), num_actual_predict_examples,
                    len(predict_examples) - num_actual_predict_examples)
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    predict_drop_remainder = True if FLAGS.use_tpu else False
    predict_input_fn = file_based_input_fn_builder(
        input_file=predict_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=predict_drop_remainder)

    result = estimator.predict(input_fn=predict_input_fn, checkpoint_path=best_checkpoint_path)

    output_predict_file = os.path.join(FLAGS.output_dir, "preds_on_{}.tsv".format(FLAGS.pred_ds))
    with tf.gfile.GFile(output_predict_file, "w") as writer:
      num_written_lines = 0
      tf.logging.info("***** Predict results *****")
      for (i, prediction) in enumerate(result):
        probabilities = prediction["probabilities"]
        if i >= num_actual_predict_examples:
          break
        output_line = "\t".join(
            str(class_probability)
            for class_probability in probabilities) + "\n"
        writer.write(output_line)
        num_written_lines += 1
    assert num_written_lines == num_actual_predict_examples
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "sentence_pair": SentencePairClassificationProcessor,
        "lcqmc_pair": LCQMCPairClassificationProcessor
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    # Cloud TPU: Invalid TPU configuration, ensure ClusterResolver is passed to tpu.
    print("###tpu_cluster_resolver:", tpu_cluster_resolver)
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)  # TODO
        print("###length of total train_examples:", len(train_examples))
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        train_file_exists = os.path.exists(train_file)
        print("###train_file_exists:", train_file_exists, " ;train_file:",
              train_file)
        if not train_file_exists:  # if tf_record file not exist, convert from raw text file. # TODO
            file_based_convert_examples_to_features(train_examples, label_list,
                                                    FLAGS.max_seq_length,
                                                    tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        #######################################################################################################################
        # evaluate all checkpoints; you can use the checkpoint with the best dev accuarcy
        steps_and_files = []
        filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
        for filename in filenames:
            if filename.endswith(".index"):
                ckpt_name = filename[:-6]
                cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                global_step = int(cur_filename.split("-")[-1])
                tf.logging.info("Add {} to eval list.".format(cur_filename))
                steps_and_files.append([global_step, cur_filename])
        steps_and_files = sorted(steps_and_files, key=lambda x: x[0])

        output_eval_file = os.path.join(FLAGS.data_dir,
                                        "eval_results_albert_zh.txt")
        print("output_eval_file:", output_eval_file)
        tf.logging.info("output_eval_file:" + output_eval_file)
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            for global_step, filename in sorted(steps_and_files,
                                                key=lambda x: x[0]):
                result = estimator.evaluate(input_fn=eval_input_fn,
                                            steps=eval_steps,
                                            checkpoint_path=filename)

                tf.logging.info("***** Eval results %s *****" % (filename))
                writer.write("***** Eval results %s *****\n" % (filename))
                for key in sorted(result.keys()):
                    tf.logging.info("  %s = %s", key, str(result[key]))
                    writer.write("%s = %s\n" % (key, str(result[key])))
        #######################################################################################################################

        #result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
        #
        #output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        #with tf.gfile.GFile(output_eval_file, "w") as writer:
        #  tf.logging.info("***** Eval results *****")
        #  for key in sorted(result.keys()):
        #    tf.logging.info("  %s = %s", key, str(result[key]))
        #    writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, prediction) in enumerate(result):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Beispiel #7
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "iflytek": iFLYTEKDataProcessor,
        "cluewsc2020": WSCProcessor,
        "cmnli": CMNLIProcessor,
        "csl": CslProcessor,
        "afqmc": AFQMCProcessor,
        "tnews": TnewsProcessor,
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
        "xnli": XnliProcessor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(
            min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = tf.contrib.tpu.RunConfig(
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train or FLAGS.do_eval:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        train_file_exists = os.path.exists(train_file)
        if not train_file_exists:
            file_based_convert_examples_to_features(train_examples, label_list,
                                                    FLAGS.max_seq_length,
                                                    tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        ### select best model ###

        best_trial_info_file = os.path.join(FLAGS.output_dir, "best_trial.txt")

        def _best_trial_info():
            """Returns information about which checkpoints have been evaled so far."""
            if tf.gfile.Exists(best_trial_info_file):
                with tf.gfile.GFile(best_trial_info_file, "r") as best_info:
                    global_step, best_metric_global_step, metric_value = (
                        best_info.read().split(":"))
                    global_step = int(global_step)
                    best_metric_global_step = int(best_metric_global_step)
                    metric_value = float(metric_value)
            else:
                metric_value = -1
                best_metric_global_step = -1
                global_step = -1
            tf.logging.info(
                "Best trial info: Step: %s, Best Value Step: %s, "
                "Best Value: %s", global_step, best_metric_global_step,
                metric_value)
            return global_step, best_metric_global_step, metric_value

        def _remove_checkpoint(checkpoint_path):
            for ext in ["meta", "data-00000-of-00001", "index"]:
                src_ckpt = checkpoint_path + ".{}".format(ext)
                tf.logging.info("removing {}".format(src_ckpt))
                tf.gfile.Remove(src_ckpt)

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")

        key_name = "eval_accuracy"

        global_step, best_perf_global_step, best_perf = _best_trial_info()
        writer = tf.gfile.GFile(output_eval_file, "w")
        while global_step < num_train_steps:
            #while global_step < FLAGS.train_step:
            steps_and_files = {}
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                    gstep = int(cur_filename.split("-")[-1])
                    if gstep not in steps_and_files:
                        tf.logging.info(
                            "Add {} to eval list.".format(cur_filename))
                        steps_and_files[gstep] = cur_filename
            tf.logging.info("found {} files.".format(len(steps_and_files)))
            if not steps_and_files:
                tf.logging.info(
                    "found 0 file, global step: {}. Sleeping.".format(
                        global_step))
                time.sleep(60)
            else:
                for checkpoint in sorted(steps_and_files.items()):
                    step, checkpoint_path = checkpoint
                    if global_step >= step:
                        if (best_perf_global_step != step
                                and len(_find_valid_cands(step)) > 1):
                            _remove_checkpoint(checkpoint_path)
                        continue
                    result = estimator.evaluate(
                        input_fn=eval_input_fn,
                        steps=eval_steps,
                        checkpoint_path=checkpoint_path)
                    global_step = result["global_step"]
                    tf.logging.info("***** Eval results *****")
                    for key in sorted(result.keys()):
                        tf.logging.info("  %s = %s", key, str(result[key]))
                        writer.write("%s = %s\n" % (key, str(result[key])))
                    writer.write("best = {}\n".format(best_perf))
                    if result[key_name] > best_perf:
                        best_perf = result[key_name]
                        best_perf_global_step = global_step
                    elif len(_find_valid_cands(global_step)) > 1:
                        _remove_checkpoint(checkpoint_path)
                    writer.write("=" * 50 + "\n")
                    writer.flush()
                    with tf.gfile.GFile(best_trial_info_file,
                                        "w") as best_info:
                        best_info.write("{}:{}:{}".format(
                            global_step, best_perf_global_step, best_perf))
        writer.close()

        for ext in ["meta", "data-00000-of-00001", "index"]:
            src_ckpt = "model.ckpt-{}.{}".format(best_perf_global_step, ext)
            tgt_ckpt = "model.ckpt-best.{}".format(ext)
            tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt))
            tf.gfile.Rename(os.path.join(FLAGS.output_dir, src_ckpt),
                            os.path.join(FLAGS.output_dir, tgt_ckpt),
                            overwrite=True)
        #######################################################################################################################
        '''output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
    with tf.gfile.GFile(output_eval_file, "w") as writer:
      tf.logging.info("***** Eval results *****")
      for key in sorted(result.keys()):
        tf.logging.info("  %s = %s", key, str(result[key]))
        writer.write("%s = %s\n" % (key, str(result[key])))
    '''
    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        result = estimator.predict(input_fn=predict_input_fn,
                                   checkpoint_path=checkpoint_path)
        index2label_map = {}
        for (i, label) in enumerate(label_list):
            index2label_map[i] = label
        output_predict_file_label_name = task_name + "_predict.json"
        output_predict_file_label = os.path.join(
            FLAGS.output_dir, output_predict_file_label_name)
        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file_label, "w") as writer_label:
            with tf.gfile.GFile(output_predict_file, "w") as writer:
                num_written_lines = 0
                tf.logging.info("***** Predict results *****")
                for (i, prediction) in enumerate(result):
                    probabilities = prediction["probabilities"]
                    label_index = probabilities.argmax(0)
                    if i >= num_actual_predict_examples:
                        break
                    output_line = "\t".join(
                        str(class_probability)
                        for class_probability in probabilities) + "\n"
                    test_label_dict = {}
                    test_label_dict["id"] = i
                    test_label_dict["label"] = str(
                        index2label_map[label_index])
                    if task_name == "tnews":
                        test_label_dict["label_desc"] = ""
                    writer.write(output_line)
                    json.dump(test_label_dict, writer_label)
                    writer_label.write("\n")
                    num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
        '''
Beispiel #8
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "emotion": EmotionProcessor,
        "entity": EntityProcessor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    session_config = tf.ConfigProto(log_device_placement=False,
                                    inter_op_parallelism_threads=0,
                                    intra_op_parallelism_threads=0,
                                    allow_soft_placement=True)

    run_config = tf.estimator.RunConfig(model_dir=FLAGS.output_dir,
                                        save_summary_steps=500,
                                        save_checkpoints_steps=500,
                                        session_config=session_config)

    # run_config = tf.contrib.tpu.RunConfig(
    #     cluster=tpu_cluster_resolver,
    #     master=FLAGS.master,
    #     model_dir=FLAGS.output_dir,
    #     save_checkpoints_steps=FLAGS.save_checkpoints_steps,
    #     tpu_config=tf.contrib.tpu.TPUConfig(
    #         iterations_per_loop=FLAGS.iterations_per_loop,
    #         num_shards=FLAGS.num_tpu_cores,
    #         per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train and FLAGS.do_eval:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(
        bert_config=bert_config,
        num_labels=len(label_list),
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        # use_tpu=FLAGS.use_tpu,
        # use_one_hot_embeddings=FLAGS.use_tpu
    )

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    # estimator = tf.contrib.tpu.TPUEstimator(
    #     use_tpu=FLAGS.use_tpu,
    #     model_fn=model_fn,
    #     config=run_config,
    #     train_batch_size=FLAGS.train_batch_size,
    #     eval_batch_size=FLAGS.eval_batch_size,
    #     predict_batch_size=FLAGS.predict_batch_size)
    params = {'batch_size': FLAGS.train_batch_size}
    estimator = tf.estimator.Estimator(model_fn,
                                       params=params,
                                       config=run_config)
    if FLAGS.do_train and FLAGS.do_eval:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        if not os.path.exists(train_file):
            file_based_convert_examples_to_features(train_examples, label_list,
                                                    FLAGS.max_seq_length,
                                                    tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        #estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        #result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
        # early stop hook
        early_stopping_hook = tf.contrib.estimator.stop_if_no_decrease_hook(
            estimator=estimator,
            metric_name='loss',
            max_steps_without_decrease=num_train_steps,
            eval_dir=None,
            min_steps=0,
            run_every_secs=None,
            run_every_steps=FLAGS.save_checkpoints_steps)
        train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn,
                                            max_steps=num_train_steps,
                                            hooks=[early_stopping_hook])
        eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


#     output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
#     with tf.gfile.GFile(output_eval_file, "w") as writer:
#       tf.logging.info("***** Eval results *****")
#       for key in sorted(result.keys()):
#         tf.logging.info("  %s = %s", key, str(result[key]))
#         writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.txt")
        tf.logging.info("***** Predict results *****")
        res = []
        label_lists = processor.get_labels()
        for (i, prediction) in enumerate(result):
            #print(prediction)
            probabilities = prediction["probabilities"]
            pred_label = label_lists[np.argmax(probabilities)]
            res.append(pred_label)

        processor.save_predict(res, output_predict_file)
Beispiel #9
0
def main(_):
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

    processors = {
        "cola": ColaProcessor,
        "mnlim": MnliMProcessor,
        "mnlimm": MnliMMProcessor,
        "mrpc": MrpcProcessor,
        "qnli": QnliProcessor,
        "qqp": QqpProcessor,
        "rte": RteProcessor,
        "sst2": Sst2Processor,
        "stsb": StsbProcessor,
        "wnli": WnliProcessor,
        "ax": AxProcessor,
        "mnlimdevastest": MnliMDevAsTestProcessor
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and \
       not FLAGS.do_eval and \
       not FLAGS.do_pred:
        raise ValueError(
            "At least one of 'do_train', 'do_eval' or 'do_pred' must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.io.gfile.makedirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()
    print("Current task", task_name)

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    # special handling for mnlimdevastest
    if task_name == 'mnlimdevastest':
        task_name = 'mnlim'

    label_list = processor.get_labels()
    print("Label list of current task", label_list)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    train_examples = processor.get_train_examples(FLAGS.data_dir)
    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    num_actual_train_examples = len(train_examples)
    num_actual_eval_examples = len(eval_examples)
    print("num_actual_train_examples", num_actual_train_examples)
    print("num_actual_eval_examples", num_actual_eval_examples)
    if FLAGS.do_pred:
        test_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_test_examples = len(test_examples)
        print("num_actual_test_examples", num_actual_test_examples)

    batch_size = FLAGS.train_batch_size
    epochs = FLAGS.num_train_epochs
    embed_dim = FLAGS.hidden_size  # hidden size, 768 for BERT-base, 512 for BERT-small
    seq_length = FLAGS.max_seq_length
    num_labels = len(label_list)

    # Define some placeholders for the input
    input_ids_ph = tf.compat.v1.placeholder(tf.int32,
                                            shape=[None, seq_length],
                                            name='input_ids')
    input_mask_ph = tf.compat.v1.placeholder(tf.int32,
                                             shape=[None, seq_length],
                                             name='input_mask')
    segment_ids_ph = tf.compat.v1.placeholder(tf.int32,
                                              shape=[None, seq_length],
                                              name='segment_ids')
    label_ids_ph = tf.compat.v1.placeholder(tf.int32,
                                            shape=[
                                                None,
                                            ],
                                            name='label_ids')

    tf.compat.v1.logging.info("Running freezing experiments!")

    num_train_steps = num_actual_train_examples // batch_size * epochs
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    # get the layer(s) which need freezing.
    if FLAGS.layers is None:
        raise ValueError("In freezing experiments, layers must not be None. ")
    layer_folder_name = FLAGS.layers
    freeze_layers = list(map(int, FLAGS.layers.split(',')))
    freeze_layers.sort()
    print("Current layers: ", freeze_layers)

    # multiple random runs
    if FLAGS.tf_seed is not None:
        tf.compat.v1.random.set_random_seed(FLAGS.tf_seed)
        layer_folder_name = "{}_{}".format(layer_folder_name, FLAGS.tf_seed)

    # this placeholder is to control the flag for the dropout
    keep_prob_ph = tf.compat.v1.placeholder(tf.float32, name="keep_prob")
    is_training_ph = tf.compat.v1.placeholder(tf.bool, name='is_training')

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training_ph,
        input_ids=input_ids_ph,  # input_ids,
        input_mask=input_mask_ph,  # input_mask,
        token_type_ids=segment_ids_ph,  # segment_ids,
        use_one_hot_embeddings=False,
        use_estimator=False)

    output_layer = model.get_pooled_output()
    output_layer = tf.nn.dropout(output_layer, keep_prob=keep_prob_ph)
    output_weights = tf.get_variable(
        "output_weights", [num_labels, embed_dim],
        initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias", [num_labels],
                                  initializer=tf.zeros_initializer())
    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)

    with tf.compat.v1.variable_scope("loss"):
        # for stsb
        if num_labels == 1:
            logits = tf.squeeze(logits, [-1])
            per_example_loss = tf.square(logits - label_ids_ph)
            loss = tf.reduce_mean(per_example_loss)
        else:
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            one_hot_labels = tf.one_hot(label_ids_ph,
                                        depth=num_labels,
                                        dtype=tf.float32)
            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs,
                                              axis=-1)
            loss = tf.reduce_mean(per_example_loss)
            predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)

    # metric and summary
    # metric is tf.metric object, (val, op)
    metric = metric_fn(per_example_loss, label_ids_ph, logits, num_labels,
                       task_name)
    metric_name = list(metric.keys())
    metric_val = [m[0] for m in metric.values()]
    metric_op = [m[1] for m in metric.values()]

    metric_phs = [
        tf.compat.v1.placeholder(tf.float32, name="{}_ph".format(key))
        for key in metric.keys()
    ]
    summaries = [
        tf.compat.v1.summary.scalar(key, metric_phs[i])
        for i, key in enumerate(metric.keys())
    ]
    train_summary_total = tf.summary.merge(summaries)
    eval_summary_total = tf.summary.merge(summaries)

    log_dir = FLAGS.output_dir + 'layer_{}/'.format(layer_folder_name)

    init_checkpoint = FLAGS.init_checkpoint
    tvars = tf.compat.v1.trainable_variables()
    var_init = [
        v for v in tvars
        if 'output_weights' not in v.name and 'output_bias' not in v.name
    ]
    var_output = [
        v for v in tvars
        if 'output_weights' in v.name or "output_bias" in v.name
    ]

    # parameters need to be frozen
    if FLAGS.freeze_part == 'ffn':
        var_freeze = [
            v for i in freeze_layers for v in var_init
            if "layer_{}/intermediate/dense".format(i) in v.name
            or "layer_{}/output/dense".format(i) in v.name
        ]
    elif FLAGS.freeze_part == 'att':
        var_freeze = [
            v for i in freeze_layers for v in var_init
            if "layer_{}/attention/output/dense".format(i) in v.name
            or "layer_{}/attention/self".format(i) in v.name
        ]
    elif FLAGS.freeze_part == 'encoder':
        var_freeze = [
            v for i in freeze_layers for v in var_init
            if "layer_{}/".format(i) in v.name
        ]
    elif FLAGS.freeze_part == 'pooler+embedding':
        var_freeze = [
            v for v in var_init if "pooler" in v.name or "embeddings" in v.name
        ]
    elif FLAGS.freeze_part == 'allbutoutput':
        var_freeze = var_init
    elif FLAGS.freeze_part == 'allbutpooler+output':
        var_freeze = [v for v in var_init if 'pooler' not in v.name]
    elif FLAGS.freeze_part == 'allbutonelayer+output':
        var_freeze = [
            v for i in freeze_layers for v in var_init
            if "layer_{}/".format(i) not in v.name
        ]
    elif FLAGS.freeze_part == 'allbutonelayer+pooler+output':
        var_freeze = [
            v for i in freeze_layers for v in var_init
            if "layer_{}/".format(i) not in v.name and 'pooler' not in v.name
        ]
    elif FLAGS.freeze_part == 'nothing':
        var_freeze = []
    else:
        raise ValueError("freeze_part should be specified. ")

    print("Freezing parameters")
    for v in var_freeze:
        print(v)

    if not FLAGS.load_from_finetuned:
        # Init from Model0
        saver_init = tf.train.Saver(var_init)
    else:
        # Init from Model1
        saver_init = tf.train.Saver(var_init + var_output)

    var_train = [v for v in var_init if v not in var_freeze] + var_output
    print("Training parameters")
    for v in var_train:
        print(v)

    train_op = optimization.create_optimizer(loss=loss,
                                             init_lr=FLAGS.learning_rate,
                                             num_train_steps=num_train_steps,
                                             num_warmup_steps=num_warmup_steps,
                                             use_tpu=False,
                                             tvars=var_train)

    saver_all = tf.train.Saver(var_list=var_train + var_freeze, max_to_keep=1)

    # Isolate the variables stored behind the scenes by the metric operation
    var_metric = []
    for key in metric.keys():
        var_metric.extend(
            tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope=key))
    # Define initializer to initialize/reset running variables
    metric_vars_initializer = tf.variables_initializer(var_list=var_metric)

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.compat.v1.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        saver_init.restore(sess, init_checkpoint)

        writer = tf.compat.v1.summary.FileWriter(log_dir + 'log/train/',
                                                 sess.graph)
        writer_eval = tf.compat.v1.summary.FileWriter(log_dir + 'log/eval/')

        # if number of eval examples < 1000, just load it directly, or load by batch.
        if num_actual_eval_examples <= 1000:
            eval_input_ids, eval_input_mask, eval_segment_ids, \
            eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=num_actual_eval_examples,
                                                                     seq_length=seq_length,
                                                                     examples=eval_examples,
                                                                     label_list=label_list,
                                                                     tokenizer=tokenizer)

        start_metric = {"eval_{}".format(key): 0 for key in metric_name}
        if FLAGS.do_train:
            tf.logging.info("***** Run training *****")
            step = 1
            for n in range(epochs):

                np.random.shuffle(train_examples)
                num_batch = num_actual_train_examples // batch_size if num_actual_train_examples % batch_size == 0 \
                    else num_actual_train_examples // batch_size + 1
                id = 0

                for b in range(num_batch):

                    input_ids, input_mask, \
                    segment_ids, label_ids, is_real_example = generate_ph_input(batch_size=batch_size,
                                                                                seq_length=seq_length,
                                                                                examples=train_examples,
                                                                                label_list=label_list,
                                                                                tokenizer=tokenizer,
                                                                                train_idx_offset=id)
                    id += batch_size

                    sess.run(metric_vars_initializer)
                    sess.run([train_op] + metric_op,
                             feed_dict={
                                 input_ids_ph: input_ids,
                                 input_mask_ph: input_mask,
                                 segment_ids_ph: segment_ids,
                                 label_ids_ph: label_ids,
                                 is_training_ph: True,
                                 keep_prob_ph: 0.9
                             })
                    train_metric_val = sess.run(metric_val)
                    train_summary_str = sess.run(
                        train_summary_total,
                        feed_dict={
                            ph: value
                            for ph, value in zip(metric_phs, train_metric_val)
                        })
                    writer.add_summary(train_summary_str, step)

                    if step % 100 == 0 or step % num_batch == 0 or step == 1:
                        # evaluate on dev set

                        if num_actual_eval_examples <= 1000:

                            sess.run(metric_vars_initializer)
                            sess.run(metric_op,
                                     feed_dict={
                                         input_ids_ph: eval_input_ids,
                                         input_mask_ph: eval_input_mask,
                                         segment_ids_ph: eval_segment_ids,
                                         label_ids_ph: eval_label_ids,
                                         is_training_ph: False,
                                         keep_prob_ph: 1
                                     })
                            eval_metric_val = sess.run(metric_val)
                            eval_summary_str = sess.run(
                                eval_summary_total,
                                feed_dict={
                                    ph: value
                                    for ph, value in zip(
                                        metric_phs, eval_metric_val)
                                })

                        else:
                            num_batch_eval = num_actual_eval_examples // batch_size \
                                if num_actual_eval_examples % batch_size == 0 \
                                else num_actual_eval_examples // batch_size + 1
                            id_eval = 0

                            sess.run(metric_vars_initializer)
                            for _ in range(num_batch_eval):
                                eval_input_ids, eval_input_mask, eval_segment_ids, \
                                eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=batch_size,
                                                                                         seq_length=seq_length,
                                                                                         examples=eval_examples,
                                                                                         label_list=label_list,
                                                                                         tokenizer=tokenizer,
                                                                                         train_idx_offset=id_eval)
                                id_eval += batch_size
                                sess.run(metric_op,
                                         feed_dict={
                                             input_ids_ph: eval_input_ids,
                                             input_mask_ph: eval_input_mask,
                                             segment_ids_ph: eval_segment_ids,
                                             label_ids_ph: eval_label_ids,
                                             is_training_ph: False,
                                             keep_prob_ph: 1
                                         })

                            eval_metric_val = sess.run(metric_val)
                            eval_summary_str = sess.run(
                                eval_summary_total,
                                feed_dict={
                                    ph: value
                                    for ph, value in zip(
                                        metric_phs, eval_metric_val)
                                })

                        writer_eval.add_summary(eval_summary_str, step)

                        if step == 1:
                            for key, val in zip(metric_name, eval_metric_val):
                                start_metric["eval_{}".format(key)] = val

                    if step % 100 == 0 or step % num_batch == 0 or step == 1:
                        train_metric_list = []
                        for i in range(len(train_metric_val)):
                            if metric_name[i] == 'loss':
                                train_metric_list.append(
                                    "{}: %2.4f".format(metric_name[i]) %
                                    train_metric_val[i])
                            else:
                                train_metric_list.append(
                                    "{}: %.4f".format(metric_name[i]) %
                                    train_metric_val[i])
                        train_str = 'Train ' + '|'.join(train_metric_list)

                        eval_metric_list = []
                        for i in range(len(eval_metric_val)):
                            if metric_name[i] == 'loss':
                                eval_metric_list.append(
                                    "{}: %2.4f".format(metric_name[i]) %
                                    eval_metric_val[i])
                            else:
                                eval_metric_list.append(
                                    "{}: %.4f".format(metric_name[i]) %
                                    eval_metric_val[i])
                        eval_str = 'Eval ' + '|'.join(eval_metric_list)

                        print(
                            "Freezing {} | Epoch: %4d/%4d | Batch: %4d/%4d | {} | {}"
                            .format(layer_folder_name, train_str, eval_str) %
                            (n, epochs, b, num_batch))

                    if step % num_batch == 0:
                        saver_all.save(sess,
                                       log_dir +
                                       'freeze_{}'.format(layer_folder_name),
                                       global_step=step)

                    step += 1

            writer.close()
            writer_eval.close()

        end_metric = {"eval_{}".format(key): 0 for key in metric_name}
        if FLAGS.do_eval:
            tf.logging.info("***** Run evaluation *****")
            if num_actual_eval_examples <= 1000:

                sess.run(metric_vars_initializer)
                sess.run(metric_op,
                         feed_dict={
                             input_ids_ph: eval_input_ids,
                             input_mask_ph: eval_input_mask,
                             segment_ids_ph: eval_segment_ids,
                             label_ids_ph: eval_label_ids,
                             is_training_ph: False,
                             keep_prob_ph: 1
                         })
                eval_metric_val = sess.run(metric_val)
                preds = sess.run(predictions,
                                 feed_dict={
                                     input_ids_ph: eval_input_ids,
                                     input_mask_ph: eval_input_mask,
                                     segment_ids_ph: eval_segment_ids,
                                     label_ids_ph: eval_label_ids,
                                     is_training_ph: False,
                                     keep_prob_ph: 1
                                 })
                eval_label_ids_lst = eval_label_ids
            else:
                num_batch_eval = num_actual_eval_examples // batch_size \
                    if num_actual_eval_examples % batch_size == 0 \
                    else num_actual_eval_examples // batch_size + 1
                id_eval = 0
                preds = np.zeros(num_actual_eval_examples)
                eval_label_ids_lst = np.zeros(num_actual_eval_examples)

                sess.run(metric_vars_initializer)
                for i in range(num_batch_eval):
                    eval_input_ids, eval_input_mask, eval_segment_ids, \
                    eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=batch_size,
                                                                             seq_length=seq_length,
                                                                             examples=eval_examples,
                                                                             label_list=label_list,
                                                                             tokenizer=tokenizer,
                                                                             train_idx_offset=id_eval)
                    id_eval += batch_size
                    sess.run(metric_op,
                             feed_dict={
                                 input_ids_ph: eval_input_ids,
                                 input_mask_ph: eval_input_mask,
                                 segment_ids_ph: eval_segment_ids,
                                 label_ids_ph: eval_label_ids,
                                 is_training_ph: False,
                                 keep_prob_ph: 1
                             })
                    pred = sess.run(predictions,
                                    feed_dict={
                                        input_ids_ph: eval_input_ids,
                                        input_mask_ph: eval_input_mask,
                                        segment_ids_ph: eval_segment_ids,
                                        label_ids_ph: eval_label_ids,
                                        is_training_ph: False,
                                        keep_prob_ph: 1
                                    })
                    preds[i * batch_size:min(id_eval, num_actual_eval_examples
                                             )] = pred[:]
                    eval_label_ids_lst[i * batch_size:min(
                        id_eval, num_actual_eval_examples)] = eval_label_ids[:]
                eval_metric_val = sess.run(metric_val)

            for key, val in zip(metric_name, eval_metric_val):
                end_metric["eval_{}".format(key)] = val

            output_predict_file = os.path.join(log_dir, 'dev_predictions.tsv')
            writer_output = tf.io.gfile.GFile(output_predict_file, "w")
            preds = preds.astype(int)
            eval_label_ids_lst = eval_label_ids_lst.astype(int)

            num_written_lines = 0
            if task_name != 'stsb':
                writer_output.write(
                    "ID \t Label ID \t Label \t Ground Truth ID \t Ground Truth \n"
                )
            else:
                writer_output.write("ID \t Label \n")
            for (i, pred) in enumerate(preds):
                if task_name != 'stsb':
                    writer_output.write("{} \t {} \t {} \t {} \t {} \n".format(
                        i, pred, label_list[pred], eval_label_ids_lst[i],
                        label_list[eval_label_ids_lst[i]]))
                else:
                    writer_output.write("{} \t {} \n".format(
                        num_written_lines, pred))
            writer_output.close()
            tf.logging.info("***** Finish writing *****")

        print("Start metric", start_metric)
        print("End metric", end_metric)

        test_metric = {"test_{}".format(key): 0 for key in metric_name}
        if FLAGS.do_pred:
            # if number of test examples < 1000, just load it directly, or load by batch.
            # prediction
            tf.logging.info("***** Predict results *****")
            if num_actual_test_examples <= 1000:
                test_input_ids, test_input_mask, test_segment_ids, \
                test_label_ids, test_is_real_example = generate_ph_input(batch_size=num_actual_test_examples,
                                                                         seq_length=seq_length,
                                                                         examples=test_examples,
                                                                         label_list=label_list,
                                                                         tokenizer=tokenizer)
                sess.run(metric_vars_initializer)
                sess.run(metric_op,
                         feed_dict={
                             input_ids_ph: test_input_ids,
                             input_mask_ph: test_input_mask,
                             segment_ids_ph: test_segment_ids,
                             label_ids_ph: test_label_ids,
                             is_training_ph: False,
                             keep_prob_ph: 1
                         })
                test_metric_val = sess.run(metric_val)
                preds = sess.run(predictions,
                                 feed_dict={
                                     input_ids_ph: test_input_ids,
                                     input_mask_ph: test_input_mask,
                                     segment_ids_ph: test_segment_ids,
                                     label_ids_ph: test_label_ids,
                                     is_training_ph: False,
                                     keep_prob_ph: 1
                                 })
                test_label_ids_lst = test_label_ids
            else:
                num_batch_test = num_actual_test_examples // batch_size \
                    if num_actual_test_examples % batch_size == 0 \
                    else num_actual_test_examples // batch_size + 1
                id_test = 0

                preds = np.zeros(num_actual_test_examples)
                test_label_ids_lst = np.zeros(num_actual_test_examples)
                sess.run(metric_vars_initializer)
                for i in range(num_batch_test):
                    test_input_ids, test_input_mask, test_segment_ids, \
                    test_label_ids, test_is_real_example = generate_ph_input(batch_size=batch_size,
                                                                             seq_length=seq_length,
                                                                             examples=test_examples,
                                                                             label_list=label_list,
                                                                             tokenizer=tokenizer,
                                                                             train_idx_offset=id_test)
                    id_test += batch_size
                    sess.run(metric_op,
                             feed_dict={
                                 input_ids_ph: test_input_ids,
                                 input_mask_ph: test_input_mask,
                                 segment_ids_ph: test_segment_ids,
                                 label_ids_ph: test_label_ids,
                                 is_training_ph: False,
                                 keep_prob_ph: 1
                             })
                    pred = sess.run(predictions,
                                    feed_dict={
                                        input_ids_ph: test_input_ids,
                                        input_mask_ph: test_input_mask,
                                        segment_ids_ph: test_segment_ids,
                                        label_ids_ph: test_label_ids,
                                        is_training_ph: False,
                                        keep_prob_ph: 1
                                    })
                    preds[i * batch_size:min(id_test, num_actual_test_examples
                                             )] = pred[:]
                    test_label_ids_lst[i * batch_size:min(
                        id_test, num_actual_test_examples)] = test_label_ids[:]
                test_metric_val = sess.run(metric_val)
            for key, val in zip(metric_name, test_metric_val):
                test_metric["test_{}".format(key)] = val

            output_predict_file = os.path.join(log_dir, 'test_predictions.tsv')
            submit_predict_file = os.path.join(
                log_dir, "{}.tsv".format(standard_file_name[task_name]))
            writer_output = tf.io.gfile.GFile(output_predict_file, "w")
            writer_submit = tf.io.gfile.GFile(submit_predict_file, 'w')
            preds = preds.astype(int)
            test_label_ids_lst = test_label_ids_lst.astype(int)

            num_written_lines = 0
            if task_name != 'stsb':
                writer_output.write(
                    "ID \t Label ID \t Label \t Ground Truth ID \t Ground Truth \n"
                )
            else:
                writer_output.write("ID \t Label \n")
            writer_submit.write("ID \t Label \n")
            for (i, pred) in enumerate(preds):
                if task_name != 'stsb':
                    writer_output.write("{} \t {} \t {} \t {} \t {} \n".format(
                        i, pred, label_list[pred], test_label_ids_lst[i],
                        label_list[test_label_ids_lst[i]]))
                    writer_submit.write("{} \t {} \n".format(
                        i, label_list[pred]))
                else:
                    writer_output.write("{} \t {} \n".format(
                        num_written_lines, pred))
                    writer_submit.write("{} \t {} \n".format(i, pred))
            writer_output.close()
            writer_submit.close()
            tf.logging.info("***** Finish writing *****")

        with tf.io.gfile.GFile(FLAGS.output_dir + 'results.txt',
                               'a') as writer:
            eval_start, eval_end, test_end = [], [], []
            for metric in metric_name:
                if metric != 'loss':
                    eval_start.append("{}: %.4f".format(metric) %
                                      start_metric["eval_{}".format(metric)])
                    eval_end.append("{}: %.4f".format(metric) %
                                    end_metric["eval_{}".format(metric)])
                    test_end.append("{}: %.4f".format(metric) %
                                    test_metric["test_{}".format(metric)])

            writer.write(
                "Freezing {}: Dev start: {} | Dev end: {} | Test end: {}\n".
                format(layer_folder_name, ','.join(eval_start),
                       ','.join(eval_end), ','.join(test_end)))
Beispiel #10
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  processors = {
      "ner": CoNLL2013Processor,
  }

  skip_flags = ('load_config_file', 'do_train', 'do_eval', 'do_predict',
                'eval_fole', 'test_file', 'pred_results_file', 'checkpoint_path')
  if FLAGS.load_config_file:
    with open(FLAGS.load_config_file, 'r') as reader:
      for name, value in json.loads(reader.read()).items():
        if name not in skip_flags:
          FLAGS.__flags[name].value = value

  bert_config_file = os.path.join(FLAGS.init_checkpoint_dir, 'bert_config.json')
  vocab_file = os.path.join(FLAGS.init_checkpoint_dir, 'vocab.txt')
  if FLAGS.finetune_checkpoint:
    init_checkpoint = FLAGS.finetune_checkpoint
  else:
    init_checkpoint = os.path.join(FLAGS.init_checkpoint_dir, 'bert_model.ckpt')
  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                init_checkpoint)

  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
    raise ValueError(
        "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

  bert_config = modeling.BertConfig.from_json_file(bert_config_file)

  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length %d because the BERT model "
        "was only trained up to sequence length %d" %
        (FLAGS.max_seq_length, bert_config.max_position_embeddings))

  tf.gfile.MakeDirs(FLAGS.output_dir)

  # NOTE get TF logger
  logging.basicConfig(
      level=logging.INFO,
      format="INFO#%(asctime)s# %(message)s",
      datefmt='%Y-%m-%d %H:%M:%S',
      handlers=[logging.FileHandler(os.path.join(FLAGS.output_dir, 'log.log'))]
  )
  tf.logging.info("*"*10+" Config "+"*"*10+": \n{}".format(
      FLAGS.flag_values_dict()))
  if FLAGS.do_train:
    with open(os.path.join(FLAGS.output_dir, 'config.json'), 
              'w', encoding='utf-8') as writer:
      writer.write(to_json_string(FLAGS.flag_values_dict()))

  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name]()

  label_list = processor.get_labels()
  label_map = {}
  for (i, label) in enumerate(label_list):
    label_map[label] = i

  tokenizer = tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=FLAGS.do_lower_case)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
  run_config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      keep_checkpoint_max=FLAGS.keep_checkpoint_max,
      tpu_config=tf.contrib.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  train_examples = None
  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    train_examples = processor.get_train_examples(FLAGS.data_dir)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      label_list=label_list,
      init_checkpoint=init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = tf.contrib.tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    num_train_files = -(-len(train_examples) // FLAGS.shard_size)
    train_files = [os.path.join(FLAGS.output_dir, "train.tf_record_"+f"{i:05d}") 
                    for i in range(num_train_files)]
    tf.logging.info("*** Writing training examples to tf record files ***")
    for train_file in train_files:
      tf.logging.info("  %s", train_file)                    

    if not FLAGS.read_record:
      [tf.gfile.Remove(_f) for _f in tf.gfile.Glob(
          os.path.join(FLAGS.output_dir, "train.tf_record*"))]
      file_based_convert_examples_to_features(
        train_examples, label_map, FLAGS.max_seq_length, FLAGS.seq_overlap, tokenizer, train_files)
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num examples = %d", len(train_examples))
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_input_fn = file_based_input_fn_builder(
        input_files=train_files,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

  if FLAGS.do_eval:
    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    num_actual_eval_examples = len(eval_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on. These do NOT count towards the metric (all tf.metrics
      # support a per-instance weight, and these get a weight of 0.0).
      while len(eval_examples) % FLAGS.eval_batch_size != 0:
        eval_examples.append(PaddingInputExample())

    eval_files = [os.path.join(FLAGS.output_dir, "eval.tf_record")]
    file_based_convert_examples_to_features(
        eval_examples, label_map, FLAGS.max_seq_length, FLAGS.seq_overlap, tokenizer, eval_files)

    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(eval_examples), num_actual_eval_examples,
                    len(eval_examples) - num_actual_eval_examples)
    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

    # This tells the estimator to run through the entire set.
    eval_steps = None
    # However, if running eval on the TPU, you will need to specify the
    # number of steps.
    if FLAGS.use_tpu:
      assert len(eval_examples) % FLAGS.eval_batch_size == 0
      eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

    eval_drop_remainder = True if FLAGS.use_tpu else False
    eval_input_fn = file_based_input_fn_builder(
        input_files=eval_files,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=eval_drop_remainder)

    tf.logging.info("***** Eval results *****")
    output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
    results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, 
        checkpoint_path=FLAGS.checkpoint_path)
    with open(output_eval_file, "a") as writer:
      tf.logging.info(write_eval(writer, results, label_list, label_list[1:]))

    # NOTE self-defined metric
    predictions = estimator.predict(input_fn=eval_input_fn, 
        checkpoint_path=FLAGS.checkpoint_path,
        yield_single_examples=True)
    predict_entitys = reconstruct_from_estimator(eval_examples, predictions)
    try:
        next(predictions)
    except StopIteration:
        print("Complete Evaluation!")
    else:
      tf.logging.info("ERROR: Output examples number not matched! \
          This is likely due to bugs in splitting and reconstruct long text ")

    pred = [[e.decode() for e in entity] for entity in predict_entitys]
    label = [example.text_entity for example in eval_examples]
    sb_measure = SpanBasedF1Measure()
    sb_measure(pred, label)
    tf.logging.info(sb_measure.log_measure(output_eval_file))

    
  if FLAGS.do_predict:
    " For demo only, the actual predict phase should be run by other API "
    predict_examples = processor.get_test_examples(FLAGS.data_dir)

    predict_files = [os.path.join(FLAGS.output_dir, "predict.tf_record")]
    file_based_convert_examples_to_features(predict_examples, label_map,
                                            FLAGS.max_seq_length, FLAGS.seq_overlap,
                                            tokenizer, predict_files)

    tf.logging.info("***** Running prediction*****")
    tf.logging.info("  Num examples = %d", len(predict_examples))
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    predict_drop_remainder = True if FLAGS.use_tpu else False
    predict_input_fn = file_based_input_fn_builder(
        input_files=predict_files,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=predict_drop_remainder)

    predict_results = estimator.predict(input_fn=predict_input_fn, 
        checkpoint_path=FLAGS.checkpoint_path,
        yield_single_examples=True)

    output_predict_file = os.path.join(FLAGS.output_dir, FLAGS.pred_results_file)
    tf.logging.info("***** Predict results *****")
    # Writing prediction results for CoNLL-2003 perl evaluation conlleval.pl
    with open(output_predict_file, 'w', encoding='utf-8') as writer:
      for example, predict_result in zip(predict_examples, predict_results):
        tokens = [token for token in tokenizer.tokenize(word) 
                            for word in example.words]
        words = (word for word in example.words)
        for i, tag in enumerate(predict_result):
          if tokens[i].startswith("##"):
            continue
          line = "{}\t{}\t{}\n".format(next(words), example.labels[i], tag)
          writer.write(line)
Beispiel #11
0
def main(_):

    tf.logging.set_verbosity(tf.logging.INFO)
    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)
    processor = MrpcProcessor()
    label_list = processor.get_labels()
    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)
    tpu_cluster_resolver = None

    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)
    app = Flask(__name__)
    if FLAGS.do_predict:

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        predict_drop_remainder = True if FLAGS.use_tpu else False

        @app.route('/pred', methods=['POST', 'GET'])
        def index():
            response = {}
            try:
                data = request.json
                predict_examples = [
                    InputExample('predict', data["sentence1"],
                                 data["sentence2"], '0')
                ]
                num_actual_predict_examples = len(predict_examples)
                if FLAGS.use_tpu:
                    # TPU requires a fixed batch size for all batches, therefore the number
                    # of examples must be a multiple of the batch size, or else examples
                    # will get dropped. So we pad with fake examples which are ignored
                    # later on.
                    while len(
                            predict_examples) % FLAGS.predict_batch_size != 0:
                        predict_examples.append(PaddingInputExample())

                file_based_convert_examples_to_features(
                    predict_examples, label_list, FLAGS.max_seq_length,
                    tokenizer, predict_file)

                predict_input_fn = file_based_input_fn_builder(
                    input_file=predict_file,
                    seq_length=FLAGS.max_seq_length,
                    is_training=False,
                    drop_remainder=predict_drop_remainder)

                result = estimator.predict(input_fn=predict_input_fn)

                api_return = []
                #list用于以后添加返回多个句子同时计算的功能
                for (i, prediction) in enumerate(result):
                    probabilities = prediction["probabilities"]
                    api_return.append(probabilities)
                response["prediction_0"] = str(list(api_return[0])[0])
                response["prediction_1"] = str(list(api_return[0])[1])

            except Exception as e:
                response[
                    "error_message"] = "An error occur, please read the document or contact me by [email protected]"
            return json.dumps(response)

        app.run("0.0.0.0", port=5005, threaded=True)
Beispiel #12
0
def main(_):

    import time
    start_time = time.time()

    if FLAGS.use_perseus and FLAGS.use_horovod:
        raise ValeuError(
            "Could not set use_perseus and use_horovod at the same time.")

    if FLAGS.use_perseus:
        import perseus.tensorflow.horovod as hvd
        hvd.init()

    if FLAGS.use_horovod:
        import horovod.tensorflow as hvd
        hvd.init()

    if FLAGS.use_perseus or FLAGS.use_horovod:
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = str(hvd.local_rank())
    if FLAGS.use_xla:
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
        "news": NewsProcessor,
        "xnli": XnliProcessor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    save_checkpoints_steps = FLAGS.save_checkpoints_steps
    learning_rate = FLAGS.learning_rate
    if FLAGS.use_perseus or FLAGS.use_horovod:
        model_dir = FLAGS.output_dir if hvd.rank() == 0 else None
        save_checkpoints_steps = save_checkpoints_steps // hvd.size()
    else:
        model_dir = FLAGS.output_dir

    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=model_dir,
        session_config=config,
        save_checkpoints_steps=save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    num_workers = 1
    worker_index = 0
    if FLAGS.use_perseus or FLAGS.use_horovod:
        bcast_hook = [hvd.BroadcastGlobalVariablesHook(0)]
        num_workers = hvd.size()
        worker_index = hvd.rank()
    else:
        bcast_hook = []
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

        if FLAGS.use_perseus or FLAGS.use_horovod:
            num_train_steps = num_train_steps // hvd.size()
            num_warmup_steps = num_warmup_steps // hvd.size()
            num_workers = hvd.size()
            worker_index = hvd.rank()

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_perseus=FLAGS.use_perseus,
                                use_horovod=FLAGS.use_horovod,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        if FLAGS.use_perseus or FLAGS.use_horovod:
            train_file = os.path.join(
                FLAGS.output_dir, "train-" + str(hvd.rank()) + ".tf_record")
        else:
            train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            num_workers=num_workers,
            worker_index=worker_index)
        estimator.train(input_fn=train_input_fn,
                        max_steps=num_train_steps,
                        hooks=bcast_hook)

    do_eval = FLAGS.do_eval
    if FLAGS.use_perseus or FLAGS.use_horovod:
        if hvd.rank() == 0:
            do_eval = FLAGS.do_eval
        else:
            do_eval = False

    if do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder,
            num_workers=1,
            worker_index=0)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    do_predict = FLAGS.do_predict
    if FLAGS.use_perseus or FLAGS.use_horovod:
        if hvd.rank() == 0:
            do_predict = FLAGS.do_predict
        else:
            do_predict = False

    if do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder,
            num_workers=1,
            worker_index=0)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, prediction) in enumerate(result):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples

    if FLAGS.do_export:
        estimator._export_to_tpu = False
        estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn)

    eclapse_time = time.time() - start_time
    print(f'overall time is {eclapse_time} s')