def main(unused_argv):
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
    steps_per_checkpoint = FLAGS.steps_per_checkpoint
    iterations_per_loop = FLAGS.iterations_per_loop
    eval_steps = _NUM_EVAL_IMAGES // FLAGS.eval_batch_size
    if iterations_per_loop is None or steps_per_checkpoint < iterations_per_loop:
        iterations_per_loop = steps_per_checkpoint
    if FLAGS.mode == "eval":
        iterations_per_loop = eval_steps
    params = {
        "batches_per_epoch": batches_per_epoch,
    }

    config = contrib_tpu.RunConfig(cluster=tpu_cluster_resolver,
                                   model_dir=FLAGS.model_dir,
                                   save_checkpoints_steps=steps_per_checkpoint,
                                   log_step_count_steps=iterations_per_loop,
                                   tpu_config=contrib_tpu.TPUConfig(
                                       iterations_per_loop=iterations_per_loop,
                                       num_shards=FLAGS.num_shards))

    densenet_estimator = contrib_tpu.TPUEstimator(
        model_fn=model_fn,
        config=config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        params=params)

    if FLAGS.mode == "train":
        tf.logging.info(
            "Training for %d steps (%.2f epochs in total)." %
            (FLAGS.train_steps, FLAGS.train_steps / batches_per_epoch))
        densenet_estimator.train(input_fn=ImageNetInput(True),
                                 max_steps=FLAGS.train_steps)

    elif FLAGS.mode == "train_and_eval":
        current_step = 0
        tf.logging.info(
            "Training for %d steps (%.2f epochs in total). Current "
            "step %d" % (FLAGS.train_steps,
                         FLAGS.train_steps / batches_per_epoch, current_step))
        while current_step < FLAGS.train_steps:
            next_checkpoint = min(current_step + steps_per_checkpoint,
                                  FLAGS.train_steps)
            num_steps = next_checkpoint - current_step
            current_step = next_checkpoint
            densenet_estimator.train(input_fn=ImageNetInput(True),
                                     steps=num_steps)

            tf.logging.info("Starting to evaluate.")
            eval_results = densenet_estimator.evaluate(
                input_fn=ImageNetInput(False),
                steps=_NUM_EVAL_IMAGES // FLAGS.eval_batch_size)
            tf.logging.info("Eval results: %s" % eval_results)

    else:

        def terminate_eval():
            tf.logging.info(
                "Terminating eval after %d seconds of no checkpoints" %
                FLAGS.eval_timeout)
            return True

        # Run evaluation when there"s a new checkpoint
        # If the evaluation worker is delayed in processing a new checkpoint,
        # the checkpoint file may be deleted by the trainer before it can be
        # evaluated.
        # Ignore the error in this case.
        for ckpt in evaluation.checkpoints_iterator(
                FLAGS.model_dir,
                min_interval_secs=FLAGS.min_eval_interval,
                timeout=FLAGS.eval_timeout,
                timeout_fn=terminate_eval):

            tf.logging.info("Starting to evaluate.")
            try:
                eval_results = densenet_estimator.evaluate(
                    input_fn=ImageNetInput(False),
                    steps=eval_steps,
                    checkpoint_path=ckpt)
                tf.logging.info("Eval results: %s" % eval_results)
            except tf.errors.NotFoundError:
                tf.logging.info(
                    "Checkpoint %s no longer exists, skipping checkpoint")
Exemple #2
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": classifier_utils.ColaProcessor,
        "mnli": classifier_utils.MnliProcessor,
        "mismnli": classifier_utils.MisMnliProcessor,
        "mrpc": classifier_utils.MrpcProcessor,
        "rte": classifier_utils.RteProcessor,
        "sst-2": classifier_utils.Sst2Processor,
        "sts-b": classifier_utils.StsbProcessor,
        "qqp": classifier_utils.QqpProcessor,
        "qnli": classifier_utils.QnliProcessor,
        "wnli": classifier_utils.WnliProcessor,
    }

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

    if not FLAGS.albert_config_file and not FLAGS.albert_hub_module_handle:
        raise ValueError("At least one of `--albert_config_file` and "
                         "`--albert_hub_module_handle` must be set")

    if FLAGS.albert_config_file:
        albert_config = modeling.AlbertConfig.from_json_file(
            FLAGS.albert_config_file)
        if FLAGS.max_seq_length > albert_config.max_position_embeddings:
            raise ValueError(
                "Cannot use sequence length %d because the ALBERT model "
                "was only trained up to sequence length %d" %
                (FLAGS.max_seq_length, albert_config.max_position_embeddings))
    else:
        albert_config = None  # Get the config from TF-Hub.

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name](
        use_spm=True if FLAGS.spm_model_file else False,
        do_lower_case=FLAGS.do_lower_case)

    label_list = processor.get_labels()

    tokenizer = fine_tuning_utils.create_vocab(
        vocab_file=FLAGS.vocab_file,
        do_lower_case=FLAGS.do_lower_case,
        spm_model_file=FLAGS.spm_model_file,
        hub_module=FLAGS.albert_hub_module_handle)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(min(FLAGS.iterations_per_loop,
                                      FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=int(FLAGS.save_checkpoints_steps),
        keep_checkpoint_max=0,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
    model_fn = classifier_utils.model_fn_builder(
        albert_config=albert_config,
        num_labels=len(label_list),
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=FLAGS.train_step,
        num_warmup_steps=FLAGS.warmup_step,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        task_name=task_name,
        hub_module=FLAGS.albert_hub_module_handle,
        optimizer=FLAGS.optimizer)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        cached_dir = FLAGS.cached_dir
        if not cached_dir:
            cached_dir = FLAGS.output_dir
        train_file = os.path.join(cached_dir, task_name + "_train.tf_record")
        if not tf.gfile.Exists(train_file):
            classifier_utils.file_based_convert_examples_to_features(
                train_examples, label_list, FLAGS.max_seq_length, tokenizer,
                train_file, task_name)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", FLAGS.train_step)
        train_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.train_batch_size)
        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_step)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(classifier_utils.PaddingInputExample())

        cached_dir = FLAGS.cached_dir
        if not cached_dir:
            cached_dir = FLAGS.output_dir
        eval_file = os.path.join(cached_dir, task_name + "_eval.tf_record")
        if not tf.gfile.Exists(eval_file):
            classifier_utils.file_based_convert_examples_to_features(
                eval_examples, label_list, FLAGS.max_seq_length, tokenizer,
                eval_file, task_name)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.eval_batch_size)

        best_trial_info_file = os.path.join(FLAGS.output_dir, "best_trial.txt")

        def _best_trial_info():
            """Returns information about which checkpoints have been evaled so far."""
            if tf.gfile.Exists(best_trial_info_file):
                with tf.gfile.GFile(best_trial_info_file, "r") as best_info:
                    global_step, best_metric_global_step, metric_value = (
                        best_info.read().split(":"))
                    global_step = int(global_step)
                    best_metric_global_step = int(best_metric_global_step)
                    metric_value = float(metric_value)
            else:
                metric_value = -1
                best_metric_global_step = -1
                global_step = -1
            tf.logging.info(
                "Best trial info: Step: %s, Best Value Step: %s, "
                "Best Value: %s", global_step, best_metric_global_step, metric_value)
            return global_step, best_metric_global_step, metric_value

        def _remove_checkpoint(checkpoint_path):
            for ext in ["meta", "data-00000-of-00001", "index"]:
                src_ckpt = checkpoint_path + ".{}".format(ext)
                tf.logging.info("removing {}".format(src_ckpt))
                tf.gfile.Remove(src_ckpt)

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")

        if task_name == "sts-b":
            key_name = "pearson"
        elif task_name == "cola":
            key_name = "matthew_corr"
        else:
            key_name = "eval_accuracy"

        global_step, best_perf_global_step, best_perf = _best_trial_info()
        writer = tf.gfile.GFile(output_eval_file, "w")
        while global_step < FLAGS.train_step:
            steps_and_files = {}
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                    if cur_filename.split("-")[-1] == "best":
                        continue
                    gstep = int(cur_filename.split("-")[-1])
                    if gstep not in steps_and_files:
                        tf.logging.info("Add {} to eval list.".format(cur_filename))
                        steps_and_files[gstep] = cur_filename
            tf.logging.info("found {} files.".format(len(steps_and_files)))
            if not steps_and_files:
                tf.logging.info("found 0 file, global step: {}. Sleeping."
                                .format(global_step))
                time.sleep(60)
            else:
                for checkpoint in sorted(steps_and_files.items()):
                    step, checkpoint_path = checkpoint
                    if global_step >= step:
                        if (best_perf_global_step != step and
                                len(_find_valid_cands(step)) > 1):
                            _remove_checkpoint(checkpoint_path)
                        continue
                    result = estimator.evaluate(
                        input_fn=eval_input_fn,
                        steps=eval_steps,
                        checkpoint_path=checkpoint_path)
                    global_step = result["global_step"]
                    tf.logging.info("***** Eval results *****")
                    for key in sorted(result.keys()):
                        tf.logging.info("  %s = %s", key, str(result[key]))
                        writer.write("%s = %s\n" % (key, str(result[key])))
                    writer.write("best = {}\n".format(best_perf))
                    if result[key_name] > best_perf:
                        best_perf = result[key_name]
                        best_perf_global_step = global_step
                    elif len(_find_valid_cands(global_step)) > 1:
                        _remove_checkpoint(checkpoint_path)
                    writer.write("=" * 50 + "\n")
                    writer.flush()
                    with tf.gfile.GFile(best_trial_info_file, "w") as best_info:
                        best_info.write("{}:{}:{}".format(
                            global_step, best_perf_global_step, best_perf))
        writer.close()

        for ext in ["meta", "data-00000-of-00001", "index"]:
            src_ckpt = "model.ckpt-{}.{}".format(best_perf_global_step, ext)
            tgt_ckpt = "model.ckpt-best.{}".format(ext)
            tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt))
            tf.io.gfile.rename(
                os.path.join(FLAGS.output_dir, src_ckpt),
                os.path.join(FLAGS.output_dir, tgt_ckpt),
                overwrite=True)

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(classifier_utils.PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        classifier_utils.file_based_convert_examples_to_features(
            predict_examples, label_list,
            FLAGS.max_seq_length, tokenizer,
            predict_file, task_name)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.predict_batch_size)

        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        result = estimator.predict(
            input_fn=predict_input_fn,
            checkpoint_path=checkpoint_path)

        output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
        output_submit_file = os.path.join(FLAGS.output_dir, "submit_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as pred_writer, \
                tf.gfile.GFile(output_submit_file, "w") as sub_writer:
            sub_writer.write("index" + "\t" + "prediction\n")
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, (example, prediction)) in \
                    enumerate(zip(predict_examples, result)):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                pred_writer.write(output_line)

                if task_name != "sts-b":
                    actual_label = label_list[int(prediction["predictions"])]
                else:
                    actual_label = str(prediction["predictions"])
                sub_writer.write(example.guid + "\t" + actual_label + "\n")
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Exemple #3
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    albert_config = modeling.BertConfig.from_json_file(
        FLAGS.albert_config_file)

    validate_flags_or_throw(albert_config)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(
            min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        keep_checkpoint_max=0,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    train_examples = squad_utils.read_squad_examples(
        input_file=FLAGS.train_file, is_training=True)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    if FLAGS.do_train:
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

        # Pre-shuffle the input to avoid having to make a very large shuffle
        # buffer in in the `input_fn`.
        rng = random.Random(12345)
        rng.shuffle(train_examples)

    model_fn = squad_utils.v2_model_fn_builder(
        albert_config=albert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        max_seq_length=FLAGS.max_seq_length,
        start_n_top=FLAGS.start_n_top,
        end_n_top=FLAGS.end_n_top,
        dropout_prob=FLAGS.dropout_prob,
        hub_module=FLAGS.albert_hub_module_handle)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        # We write to a temporary file to avoid storing very large constant tensors
        # in memory.

        if not tf.gfile.Exists(FLAGS.train_feature_file):
            train_writer = squad_utils.FeatureWriter(filename=os.path.join(
                FLAGS.train_feature_file),
                                                     is_training=True)
            squad_utils.convert_examples_to_features(
                examples=train_examples,
                tokenizer=tokenizer,
                max_seq_length=FLAGS.max_seq_length,
                doc_stride=FLAGS.doc_stride,
                max_query_length=FLAGS.max_query_length,
                is_training=True,
                output_fn=train_writer.process_feature,
                do_lower_case=FLAGS.do_lower_case)
            train_writer.close()

        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num orig examples = %d", len(train_examples))
        # tf.logging.info("  Num split examples = %d", train_writer.num_features)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        del train_examples

        train_input_fn = squad_utils.input_fn_builder(
            input_file=FLAGS.train_feature_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.train_batch_size,
            is_v2=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_predict:
        with tf.gfile.Open(FLAGS.predict_file) as predict_file:
            prediction_json = json.load(predict_file)["data"]
        eval_examples = squad_utils.read_squad_examples(
            input_file=FLAGS.predict_file, is_training=False)

        if (tf.gfile.Exists(FLAGS.predict_feature_file)
                and tf.gfile.Exists(FLAGS.predict_feature_left_file)):
            tf.logging.info("Loading eval features from {}".format(
                FLAGS.predict_feature_left_file))
            with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin:
                eval_features = pickle.load(fin)
        else:
            eval_writer = squad_utils.FeatureWriter(
                filename=FLAGS.predict_feature_file, is_training=False)
            eval_features = []

            def append_feature(feature):
                eval_features.append(feature)
                eval_writer.process_feature(feature)

            squad_utils.convert_examples_to_features(
                examples=eval_examples,
                tokenizer=tokenizer,
                max_seq_length=FLAGS.max_seq_length,
                doc_stride=FLAGS.doc_stride,
                max_query_length=FLAGS.max_query_length,
                is_training=False,
                output_fn=append_feature,
                do_lower_case=FLAGS.do_lower_case)
            eval_writer.close()

            with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout:
                pickle.dump(eval_features, fout)

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(eval_examples))
        tf.logging.info("  Num split examples = %d", len(eval_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = squad_utils.input_fn_builder(
            input_file=FLAGS.predict_feature_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=False,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.predict_batch_size,
            is_v2=True)

        def get_result(checkpoint):
            """Evaluate the checkpoint on SQuAD v2.0."""
            # If running eval on the TPU, you will need to specify the number of
            # steps.
            reader = tf.train.NewCheckpointReader(checkpoint)
            global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP)
            all_results = []
            for result in estimator.predict(predict_input_fn,
                                            yield_single_examples=True,
                                            checkpoint_path=checkpoint):
                if len(all_results) % 1000 == 0:
                    tf.logging.info("Processing example: %d" %
                                    (len(all_results)))
                unique_id = int(result["unique_ids"])
                start_top_log_probs = ([
                    float(x) for x in result["start_top_log_probs"].flat
                ])
                start_top_index = [
                    int(x) for x in result["start_top_index"].flat
                ]
                end_top_log_probs = ([
                    float(x) for x in result["end_top_log_probs"].flat
                ])
                end_top_index = [int(x) for x in result["end_top_index"].flat]

                cls_logits = float(result["cls_logits"].flat[0])
                all_results.append(
                    squad_utils.RawResultV2(
                        unique_id=unique_id,
                        start_top_log_probs=start_top_log_probs,
                        start_top_index=start_top_index,
                        end_top_log_probs=end_top_log_probs,
                        end_top_index=end_top_index,
                        cls_logits=cls_logits))

            output_prediction_file = os.path.join(FLAGS.output_dir,
                                                  "predictions.json")
            output_nbest_file = os.path.join(FLAGS.output_dir,
                                             "nbest_predictions.json")
            output_null_log_odds_file = os.path.join(FLAGS.output_dir,
                                                     "null_odds.json")

            result_dict = {}
            cls_dict = {}
            squad_utils.accumulate_predictions_v2(
                result_dict, cls_dict, eval_examples, eval_features,
                all_results, FLAGS.n_best_size, FLAGS.max_answer_length,
                FLAGS.start_n_top, FLAGS.end_n_top)

            return squad_utils.evaluate_v2(
                result_dict, cls_dict, prediction_json, eval_examples,
                eval_features, all_results, FLAGS.n_best_size,
                FLAGS.max_answer_length, output_prediction_file,
                output_nbest_file, output_null_log_odds_file), int(global_step)

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if idx != "best" and int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        key_name = "f1"
        writer = tf.gfile.GFile(output_eval_file, "w")
        if tf.gfile.Exists(checkpoint_path + ".index"):
            result = get_result(checkpoint_path)
            best_perf = result[0][key_name]
            global_step = result[1]
        else:
            global_step = -1
            best_perf = -1
            checkpoint_path = None
        while global_step < num_train_steps:
            steps_and_files = {}
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                    if cur_filename.split("-")[-1] == "best":
                        continue
                    gstep = int(cur_filename.split("-")[-1])
                    if gstep not in steps_and_files:
                        tf.logging.info(
                            "Add {} to eval list.".format(cur_filename))
                        steps_and_files[gstep] = cur_filename
            tf.logging.info("found {} files.".format(len(steps_and_files)))
            if not steps_and_files:
                tf.logging.info(
                    "found 0 file, global step: {}. Sleeping.".format(
                        global_step))
                time.sleep(60)
            else:
                for ele in sorted(steps_and_files.items()):
                    step, checkpoint_path = ele
                    if global_step >= step:
                        if len(_find_valid_cands(step)) > 1:
                            for ext in [
                                    "meta", "data-00000-of-00001", "index"
                            ]:
                                src_ckpt = checkpoint_path + ".{}".format(ext)
                                tf.logging.info("removing {}".format(src_ckpt))
                                tf.gfile.Remove(src_ckpt)
                        continue
                    result, global_step = get_result(checkpoint_path)
                    tf.logging.info("***** Eval results *****")
                    for key in sorted(result.keys()):
                        tf.logging.info("  %s = %s", key, str(result[key]))
                        writer.write("%s = %s\n" % (key, str(result[key])))
                    if result[key_name] > best_perf:
                        best_perf = result[key_name]
                        for ext in ["meta", "data-00000-of-00001", "index"]:
                            src_ckpt = checkpoint_path + ".{}".format(ext)
                            tgt_ckpt = checkpoint_path.rsplit(
                                "-", 1)[0] + "-best.{}".format(ext)
                            tf.logging.info("saving {} to {}".format(
                                src_ckpt, tgt_ckpt))
                            tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True)
                            writer.write("saved {} to {}\n".format(
                                src_ckpt, tgt_ckpt))
                    writer.write("best {} = {}\n".format(key_name, best_perf))
                    tf.logging.info("  best {} = {}\n".format(
                        key_name, best_perf))

                    if len(_find_valid_cands(global_step)) > 2:
                        for ext in ["meta", "data-00000-of-00001", "index"]:
                            src_ckpt = checkpoint_path + ".{}".format(ext)
                            tf.logging.info("removing {}".format(src_ckpt))
                            tf.gfile.Remove(src_ckpt)
                    writer.write("=" * 50 + "\n")

        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        result, global_step = get_result(checkpoint_path)
        tf.logging.info("***** Final Eval results *****")
        for key in sorted(result.keys()):
            tf.logging.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))
        writer.write("best perf happened at step: {}".format(global_step))
Exemple #4
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    albert_config = modeling.AlbertConfig.from_json_file(
        FLAGS.albert_config_file)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(","):
        input_files.extend(tf.gfile.Glob(input_pattern))

    tf.logging.info("*** Input Files ***")
    for input_file in input_files:
        tf.logging.info("  %s" % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = model_fn_builder(albert_config=albert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu,
                                optimizer=FLAGS.optimizer,
                                poly_power=FLAGS.poly_power,
                                start_warmup_step=FLAGS.start_warmup_step)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        train_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=True)
        estimator.train(input_fn=train_input_fn,
                        max_steps=FLAGS.num_train_steps)

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
        global_step = -1
        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        writer = tf.gfile.GFile(output_eval_file, "w")
        tf.gfile.MakeDirs(FLAGS.export_dir)
        eval_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=False)
        while global_step < FLAGS.num_train_steps:
            if estimator.latest_checkpoint() is None:
                tf.logging.info("No checkpoint found yet. Sleeping.")
                time.sleep(1)
            else:
                result = estimator.evaluate(input_fn=eval_input_fn,
                                            steps=FLAGS.max_eval_steps)
                global_step = result["global_step"]
                tf.logging.info("***** Eval results *****")
                for key in sorted(result.keys()):
                    tf.logging.info("  %s = %s", key, str(result[key]))
                    writer.write("%s = %s\n" % (key, str(result[key])))
Exemple #5
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  processors = {
      "cola": ColaProcessor,
      "mnli": MnliProcessor,
      "mrpc": MrpcProcessor,
      "xnli": XnliProcessor,
      "qqp": QqpProcessor,
      "qnli": QnliProcessor,
      "sst2": Sst2Processor,
      "rte": QnliProcessor,
  }

  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                FLAGS.init_checkpoint)

  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
    raise ValueError(
        "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length %d because the BERT model "
        "was only trained up to sequence length %d" %
        (FLAGS.max_seq_length, bert_config.max_position_embeddings))

  tf.gfile.MakeDirs(FLAGS.output_dir)

  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name]()

  label_list = processor.get_labels()

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
  run_config = contrib_tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=contrib_tpu.TPUConfig(
          tpu_job_name=FLAGS.tpu_job_name,
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  train_examples = None
  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    train_examples = processor.get_train_examples(FLAGS.data_dir)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      num_labels=len(label_list),
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = contrib_tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
    file_based_convert_examples_to_features(train_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            train_file)
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num examples = %d", len(train_examples))
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

  if FLAGS.do_eval:
    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    num_actual_eval_examples = len(eval_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on. These do NOT count towards the metric (all tf.metrics
      # support a per-instance weight, and these get a weight of 0.0).
      while len(eval_examples) % FLAGS.eval_batch_size != 0:
        eval_examples.append(PaddingInputExample())

    eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
    if not tf.gfile.Exists(eval_file):
      file_based_convert_examples_to_features(eval_examples, label_list,
                                              FLAGS.max_seq_length, tokenizer,
                                              eval_file)

    tf.logging.info("***** Running evaluation *****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(eval_examples), num_actual_eval_examples,
                    len(eval_examples) - num_actual_eval_examples)
    tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

    # This tells the estimator to run through the entire set.
    eval_steps = None
    # However, if running eval on the TPU, you will need to specify the
    # number of steps.
    if FLAGS.use_tpu:
      assert len(eval_examples) % FLAGS.eval_batch_size == 0
      eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

    eval_drop_remainder = True if FLAGS.use_tpu else False
    eval_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=eval_drop_remainder)

    if FLAGS.do_continuous_eval:
      for ckpt in contrib_training.checkpoints_iterator(estimator.model_dir):
        try:
          result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
          output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
          with tf.gfile.GFile(output_eval_file, "w+") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
              tf.logging.info("  %s = %s", key, str(result[key]))
              writer.write("%s = %s\n" % (key, str(result[key])))
        except tf.errors.NotFoundError:
          tf.logging.error("Checkpoint path '%s' no longer exists.", ckpt)

    else:
      ########################################################################
      def _find_valid_cands(curr_step):
        filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
        candidates = []
        for filename in filenames:
          if filename.endswith(".index"):
            ckpt_name = filename[:-6]
            idx = ckpt_name.split("-")[-1]
            if idx != "best" and int(idx) > curr_step:
              candidates.append(filename)
        return candidates

      tf.logging.info("Evaling all models in output dir")
      output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
      checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
      key_name = "eval_accuracy"
      tf.logging.info("Checkpoint path " + checkpoint_path)
      if tf.gfile.Exists(checkpoint_path + ".index"):
        tf.logging.info("Found a best model... not good")
        result = estimator.evaluate(
            input_fn=eval_input_fn,
            steps=eval_steps,
            checkpoint_path=checkpoint_path)
        best_perf = result[key_name]
        global_step = result["global_step"]
      else:
        tf.logging.info("Setting global step to -1")
        global_step = -1
        best_perf = -1
        checkpoint_path = None
      tf.logging.info("Openning writer " + output_eval_file)
      writer = tf.gfile.GFile(output_eval_file, "w")

      steps_and_files = {}
      filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
      tf.logging.info("Models found " + "\n".join(filenames))
      for filename in filenames:
        if filename.endswith(".index"):
          ckpt_name = filename[:-6]
          cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
          if cur_filename.split("-")[-1] == "best":
            continue
          gstep = int(cur_filename.split("-")[-1])
          if gstep not in steps_and_files:
            tf.logging.info("Add {} to eval list.".format(cur_filename))
            steps_and_files[gstep] = cur_filename
      tf.logging.info("found {} files.".format(len(steps_and_files)))
      # steps_and_files = sorted(steps_and_files, key=lambda x: x[0])
      if not steps_and_files:
        tf.logging.info(
            "found 0 file, global step: {}. Sleeping.".format(global_step))
      else:
        for ele in sorted(steps_and_files.items()):
          step, checkpoint_path = ele
          if global_step >= step:
            if len(_find_valid_cands(step)) > 1:
              for ext in ["meta", "data-00000-of-00001", "index"]:
                src_ckpt = checkpoint_path + ".{}".format(ext)
                tf.logging.info("removing {}".format(src_ckpt))
                # GOOGLE-INTERNAL TODO(daniter):
                # Why should we remove checkpoints?
                # tf.gfile.Remove(src_ckpt)
            tf.logging.info("Skipping candidate for some reason")
            continue
          result = estimator.evaluate(
              input_fn=eval_input_fn,
              steps=eval_steps,
              checkpoint_path=checkpoint_path)
          global_step = result["global_step"]
          tf.logging.info("***** Eval results *****")
          for key in sorted(result.keys()):
            tf.logging.info("  %s = %s", key, str(result[key]))
            writer.write("%s = %s\n" % (key, str(result[key])))
          writer.write("best = {}\n".format(best_perf))
          # No need to keep the best this way
          # if result[key_name] > best_perf:
          #   best_perf = result[key_name]
          #   for ext in ["meta", "data-00000-of-00001", "index"]:
          #     src_ckpt = checkpoint_path + ".{}".format(ext)
          #     tgt_ckpt = checkpoint_path.rsplit("-",
          #                                1)[0] + "-best.{}".format(ext)
          #     tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt))
          #     tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True)
          #     writer.write("saved {} to {}\n".format(src_ckpt, tgt_ckpt))

          if len(_find_valid_cands(global_step)) > 1:
            for ext in ["meta", "data-00000-of-00001", "index"]:
              src_ckpt = checkpoint_path + ".{}".format(ext)
              tf.logging.info("removing {}".format(src_ckpt))
              # tf.gfile.Remove(src_ckpt)
          writer.write("=" * 50 + "\n")
      writer.close()

    ########################################################################

    # output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
    # with tf.gfile.GFile(output_eval_file, "w") as writer:
    #   tf.logging.info("***** Eval results *****")
    #   for key in sorted(result.keys()):
    #     tf.logging.info("  %s = %s", key, str(result[key]))
    #     writer.write("%s = %s\n" % (key, str(result[key])))

  if FLAGS.do_predict:
    predict_examples = processor.get_test_examples(FLAGS.data_dir)
    num_actual_predict_examples = len(predict_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on.
      while len(predict_examples) % FLAGS.predict_batch_size != 0:
        predict_examples.append(PaddingInputExample())

    predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
    file_based_convert_examples_to_features(predict_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            predict_file)

    tf.logging.info("***** Running prediction*****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(predict_examples), num_actual_predict_examples,
                    len(predict_examples) - num_actual_predict_examples)
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    predict_drop_remainder = True if FLAGS.use_tpu else False
    predict_input_fn = file_based_input_fn_builder(
        input_file=predict_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=predict_drop_remainder)

    result = estimator.predict(input_fn=predict_input_fn)

    output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
    with tf.gfile.GFile(output_predict_file, "w") as writer:
      num_written_lines = 0
      tf.logging.info("***** Predict results *****")
      for (i, prediction) in enumerate(result):
        probabilities = prediction["probabilities"]
        if i >= num_actual_predict_examples:
          break
        output_line = "\t".join(
            str(class_probability)
            for class_probability in probabilities) + "\n"
        writer.write(output_line)
        num_written_lines += 1
    assert num_written_lines == num_actual_predict_examples
Exemple #6
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval_dev and not FLAGS.do_eval_test:
        raise ValueError("At least one of `do_train`, `do_eval_dev` or "
                         "`do_eval_test' must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    label_list = ["Yes", "No"]
    if FLAGS.from_three_class_model:
        label_list.append("Neutral")

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = get_train()
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    eval_on = []
    if FLAGS.do_eval_dev:
        eval_on.append((get_dev(), "dev"))
    if FLAGS.do_eval_test:
        eval_on.append((get_test(), "test"))

    for eval_examples, name in eval_on:
        eval_file = os.path.join(FLAGS.output_dir, "%s.tf_record" % name)
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running %s *****" % name)
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir,
                                        "%s_eval_results.txt" % name)
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** %s eval results *****" % name)
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))
Exemple #7
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)
  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
  validate_flags_or_throw(bert_config)
  tf.gfile.MakeDirs(FLAGS.output_dir)

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
  run_config = contrib_tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=contrib_tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    num_train_features = FLAGS.train_num_precomputed
    num_train_steps = int(num_train_features / FLAGS.train_batch_size *
                          FLAGS.num_train_epochs)

    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this falls back to normal Estimator on CPU or GPU.
  estimator = contrib_tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    tf.logging.info("***** Running training on precomputed features *****")
    tf.logging.info("  Num split examples = %d", num_train_features)
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_filenames = tf.gfile.Glob(FLAGS.train_precomputed_file)
    train_input_fn = input_fn_builder(
        input_file=train_filenames,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

  if FLAGS.do_predict:
    if not FLAGS.output_prediction_file:
      raise ValueError(
          "--output_prediction_file must be defined in predict mode.")

    eval_examples = read_nq_examples(
        input_file=FLAGS.predict_file, is_training=False)
    eval_writer = FeatureWriter(
        filename=os.path.join(FLAGS.output_dir, "eval.tf_record"),
        is_training=False)
    eval_features = []

    def append_feature(feature):
      eval_features.append(feature)
      eval_writer.process_feature(feature)

    num_spans_to_ids = convert_examples_to_features(
        examples=eval_examples,
        tokenizer=tokenizer,
        is_training=False,
        output_fn=append_feature)
    eval_writer.close()
    eval_filename = eval_writer.filename

    tf.logging.info("***** Running predictions *****")
    tf.logging.info("  Num orig examples = %d", len(eval_examples))
    tf.logging.info("  Num split examples = %d", len(eval_features))
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)
    for spans, ids in num_spans_to_ids.iteritems():
      tf.logging.info("  Num split into %d = %d", spans, len(ids))

    predict_input_fn = input_fn_builder(
        input_file=eval_filename,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=False)

    # If running eval on the TPU, you will need to specify the number of steps.
    all_results = []
    for result in estimator.predict(
        predict_input_fn, yield_single_examples=True):
      if len(all_results) % 1000 == 0:
        tf.logging.info("Processing example: %d" % (len(all_results)))
      unique_id = int(result["unique_ids"])
      start_logits = [float(x) for x in result["start_logits"].flat]
      end_logits = [float(x) for x in result["end_logits"].flat]
      answer_type_logits = [float(x) for x in result["answer_type_logits"].flat]
      all_results.append(
          RawResult(
              unique_id=unique_id,
              start_logits=start_logits,
              end_logits=end_logits,
              answer_type_logits=answer_type_logits))

    candidates_dict = read_candidates(FLAGS.predict_file)
    eval_features = [
        tf.train.Example.FromString(r)
        for r in tf.python_io.tf_record_iterator(eval_filename)
    ]
    nq_pred_dict = compute_pred_dict(candidates_dict, eval_features,
                                     [r._asdict() for r in all_results])

    predictions_json = {"predictions": nq_pred_dict.values()}
    with tf.gfile.Open(FLAGS.output_prediction_file, "w") as f:
      json.dump(predictions_json, f, indent=4)
def reason(
    input_dir,
    output_dir,
    overwrite=False,
    model=gin.REQUIRED,
    num_iterations=gin.REQUIRED,
    training_steps_per_iteration=gin.REQUIRED,
    eval_steps_per_iteration=gin.REQUIRED,
    random_seed=gin.REQUIRED,
    batch_size=gin.REQUIRED,
    name="",
):
    """Trains the estimator and exports the snapshot and the gin config.

  The use of this function requires the gin binding 'dataset.name' to be
  specified if a model is trained from scratch as that determines the data set
  used for training.

  Args:
    input_dir: String with path to directory where the representation function
      is saved.
    output_dir: String with the path where the results should be saved.
    overwrite: Boolean indicating whether to overwrite output directory.
    model: GaussianEncoderModel that should be trained and exported.
    num_iterations: Integer with number of training steps.
    training_steps_per_iteration: Integer with number of training steps per
      iteration.
    eval_steps_per_iteration: Integer with number of validationand test steps
      per iteration.
    random_seed: Integer with random seed used for training.
    batch_size: Integer with the batch size.
    name: Optional string with name of the model (can be used to name models).
  """
    # We do not use the variable 'name'. Instead, it can be used to name results
    # as it will be part of the saved gin config.
    del name

    # Delete the output directory if it already exists.
    if tf.gfile.IsDirectory(output_dir):
        if overwrite:
            tf.gfile.DeleteRecursively(output_dir)
        else:
            raise ValueError(
                "Directory already exists and overwrite is False.")

    # Create a numpy random state. We will sample the random seeds for training
    # and evaluation from this.
    random_state = np.random.RandomState(random_seed)

    # Automatically set the proper data set if necessary. We replace the active
    # gin config as this will lead to a valid gin config file where the data set
    # is present.
    if gin.query_parameter("dataset.name") == "auto":
        if input_dir is None:
            raise ValueError(
                "Cannot automatically infer data set for methods with"
                " no prior model directory.")
        # Obtain the dataset name from the gin config of the previous step.
        gin_config_file = os.path.join(input_dir, "results", "gin",
                                       "postprocess.gin")
        gin_dict = results.gin_dict(gin_config_file)
        with gin.unlock_config():
            gin.bind_parameter("dataset.name",
                               gin_dict["dataset.name"].replace("'", ""))
    dataset = pgm_data.get_pgm_dataset()

    # Set the path to the TFHub embedding if we are training based on a
    # pre-trained embedding..
    if input_dir is not None:
        tfhub_dir = os.path.join(input_dir, "tfhub")
        with gin.unlock_config():
            gin.bind_parameter("HubEmbedding.hub_path", tfhub_dir)

    # We create a TPUEstimator based on the provided model. This is primarily so
    # that we could switch to TPU training in the future. For now, we train
    # locally on GPUs.
    run_config = contrib_tpu.RunConfig(
        tf_random_seed=random_seed,
        keep_checkpoint_max=1,
        tpu_config=contrib_tpu.TPUConfig(iterations_per_loop=500))
    tpu_estimator = contrib_tpu.TPUEstimator(use_tpu=False,
                                             model_fn=model.model_fn,
                                             model_dir=os.path.join(
                                                 output_dir, "tf_checkpoint"),
                                             train_batch_size=batch_size,
                                             eval_batch_size=batch_size,
                                             config=run_config)

    # Set up time to keep track of elapsed time in results.
    experiment_timer = time.time()

    # Create a dictionary to keep track of all relevant information.
    results_dict_of_dicts = {}
    validation_scores = []
    all_dicts = []

    for i in range(num_iterations):
        steps_so_far = i * training_steps_per_iteration
        tf.logging.info("Training to %d steps.", steps_so_far)
        # Train the model for the specified steps.
        tpu_estimator.train(input_fn=dataset.make_input_fn(
            random_state.randint(2**32)),
                            steps=training_steps_per_iteration)
        # Compute validation scores used for model selection.
        validation_results = tpu_estimator.evaluate(
            input_fn=dataset.make_input_fn(
                random_state.randint(2**32),
                num_batches=eval_steps_per_iteration))
        validation_scores.append(validation_results["accuracy"])
        tf.logging.info("Validation results %s", validation_results)
        # Compute test scores for final results.
        test_results = tpu_estimator.evaluate(input_fn=dataset.make_input_fn(
            random_state.randint(2**32), num_batches=eval_steps_per_iteration),
                                              name="test")
        dict_at_iteration = results.namespaced_dict(val=validation_results,
                                                    test=test_results)
        results_dict_of_dicts["step{}".format(
            steps_so_far)] = dict_at_iteration
        all_dicts.append(dict_at_iteration)

    # Select the best number of steps based on the validation scores and add it as
    # as a special key to the dictionary.
    best_index = np.argmax(validation_scores)
    results_dict_of_dicts["best"] = all_dicts[best_index]

    # Save the results. The result dir will contain all the results and config
    # files that we copied along, as we progress in the pipeline. The idea is that
    # these files will be available for analysis at the end.
    if input_dir is not None:
        original_results_dir = os.path.join(input_dir, "results")
    else:
        original_results_dir = None
    results_dict = results.namespaced_dict(**results_dict_of_dicts)
    results_dir = os.path.join(output_dir, "results")
    results_dict["elapsed_time"] = time.time() - experiment_timer
    results.update_result_directory(results_dir, "abstract_reasoning",
                                    results_dict, original_results_dir)
Exemple #9
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    # validate_flags_or_throw(bert_config)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    for fold_i in [2]:
        tf.gfile.MakeDirs(FLAGS.output_dir + "_{}".format(fold_i))

        run_config = tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            master=FLAGS.master,
            model_dir=FLAGS.output_dir + "_{}".format(fold_i),
            save_checkpoints_steps=FLAGS.save_checkpoints_steps,
            tpu_config=tpu.TPUConfig(
                iterations_per_loop=FLAGS.iterations_per_loop,
                num_shards=FLAGS.num_tpu_cores,
                per_host_input_for_training=is_per_host))

        session_config = tf.ConfigProto(log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        run_config = run_config.replace(session_config=session_config)
        run_config = run_config.replace(keep_checkpoint_max=2)

        train_examples = None
        num_train_steps = None
        num_warmup_steps = None
        if FLAGS.do_train:
            train_examples = read_squad_examples(
                input_file="CV_data/data{}/train_data.csv".format(fold_i),
                is_training=True)
            num_train_steps = int(
                len(train_examples) / FLAGS.train_batch_size *
                FLAGS.num_train_epochs)
            num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

            # Pre-shuffle the input to avoid having to make a very large shuffle
            # buffer in in the `input_fn`.
            rng = random.Random(12345)
            rng.shuffle(train_examples)

        model_fn = model_fn_builder(bert_config=bert_config,
                                    init_checkpoint=FLAGS.init_checkpoint,
                                    learning_rate=FLAGS.learning_rate,
                                    num_train_steps=num_train_steps,
                                    num_warmup_steps=num_warmup_steps,
                                    use_tpu=FLAGS.use_tpu,
                                    use_one_hot_embeddings=FLAGS.use_tpu)

        # If TPU is not available, this will fall back to normal Estimator on CPU
        # or GPU.
        estimator = tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            predict_batch_size=FLAGS.predict_batch_size)

        if FLAGS.do_train:
            # We write to a temporary file to avoid storing very large constant tensors
            # in memory.
            train_writer = FeatureWriter(filename=os.path.join(
                FLAGS.output_dir + "_{}".format(fold_i), "train.tf_record"),
                                         is_training=True)
            convert_examples_to_features(
                examples=train_examples,
                tokenizer=tokenizer,
                max_seq_length=FLAGS.max_seq_length,
                doc_stride=FLAGS.doc_stride,
                max_query_length=FLAGS.max_query_length,
                is_training=True,
                output_fn=train_writer.process_feature)
            train_writer.close()

            tf.logging.info("***** Running training *****")
            tf.logging.info("  Num orig examples = %d", len(train_examples))
            tf.logging.info("  Num split examples = %d",
                            train_writer.num_features)
            tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
            tf.logging.info("  Num steps = %d", num_train_steps)
            del train_examples

            eval_examples = read_squad_examples(
                input_file="CV_data/data{}/dev_data.csv".format(fold_i),
                is_training=True)

            eval_writer = FeatureWriter(filename=os.path.join(
                FLAGS.output_dir + "_{}".format(fold_i), "eval.tf_record"),
                                        is_training=False)
            eval_features = []

            def append_feature(feature):
                eval_features.append(feature)
                eval_writer.process_feature(feature)

            convert_examples_to_features(
                examples=eval_examples,
                tokenizer=tokenizer,
                max_seq_length=FLAGS.max_seq_length,
                doc_stride=FLAGS.doc_stride,
                max_query_length=FLAGS.max_query_length,
                is_training=False,
                output_fn=append_feature)
            eval_writer.close()

            tf.logging.info("***** Running predictions *****")
            tf.logging.info("  Num orig examples = %d", len(eval_examples))
            tf.logging.info("  Num split examples = %d", len(eval_features))
            tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

            train_input_fn = input_fn_builder(input_file=train_writer.filename,
                                              seq_length=FLAGS.max_seq_length,
                                              is_training=True,
                                              drop_remainder=True)
            estimator.train(
                input_fn=train_input_fn,
                max_steps=num_train_steps,
                hooks=[
                    EvalHook(estimator,
                             eval_writer.filename,
                             "CV_data/data{}/dev_data.csv".format(fold_i),
                             eval_features,
                             eval_steps=FLAGS.save_checkpoints_steps,
                             max_seq_length=FLAGS.max_seq_length,
                             max_answer_length=FLAGS.max_answer_length,
                             checkpoint_dir="SAVE_MODEL",
                             input_fn_builder=input_fn_builder,
                             th=85.5,
                             model_name="output_model_LSTM_{}".format(fold_i))
                ])

    if FLAGS.do_predict:
        test_examples = read_squad_examples(
            input_file="filter_data/test_data.csv", is_training=False)

        test_writer = FeatureWriter(filename=os.path.join(
            FLAGS.output_dir, "test.tf_record"),
                                    is_training=False)
        test_features = []

        def append_feature(feature):
            test_features.append(feature)
            test_writer.process_feature(feature)

        convert_examples_to_features(examples=test_examples,
                                     tokenizer=tokenizer,
                                     max_seq_length=FLAGS.max_seq_length,
                                     doc_stride=FLAGS.doc_stride,
                                     max_query_length=FLAGS.max_query_length,
                                     is_training=False,
                                     output_fn=append_feature)
        test_writer.close()

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(test_examples))
        tf.logging.info("  Num split examples = %d", len(test_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = input_fn_builder(input_file=test_writer.filename,
                                            seq_length=FLAGS.max_seq_length,
                                            is_training=False,
                                            drop_remainder=False)

        # If running eval on the TPU, you will need to specify the number of
        # steps.
        predictions = estimator.predict(predict_input_fn,
                                        yield_single_examples=True)

        #             predictions = {
        #                 "unique_ids": unique_ids,
        #                 "start_logits": start_logits,
        #                 "end_logits": end_logits,
        #             }

        instances = []
        with open("results2.csv", "w", encoding="utf-8") as fw:
            # for qa_id in hoka_test:
            #     value = "NaN"
            #     fw.write(f"\"{qa_id}\",\"{value}\"\n")
            for i, item in enumerate(predictions):
                unique_ids = item["unique_ids"]
                qa_id = test_features[i].unique_id
                # print(unique_ids, type(unique_ids))
                # print(qa_id, type(qa_id))
                assert qa_id == unique_ids

                start_logits = item["start_logits"]
                end_logits = item["end_logits"]

                n_best_item = write_prediction(
                    test_features[i],
                    start_logits,
                    end_logits,
                    n_best_size=20,
                    max_answer_length=FLAGS.max_answer_length)

                best_list = [a["text"] for a in n_best_item[:3]]

                fw.write("\"{}\",\"{}\",\"{}\",\"{}\"\n".format(
                    qa_id, *best_list))
Exemple #10
0
def train(model_dir,
          overwrite=False,
          model=gin.REQUIRED,
          training_steps=gin.REQUIRED,
          random_seed=gin.REQUIRED,
          batch_size=gin.REQUIRED,
          eval_steps=1000,
          name="",
          model_num=None):
  """Trains the estimator and exports the snapshot and the gin config.

  The use of this function requires the gin binding 'dataset.name' to be
  specified as that determines the data set used for training.

  Args:
    model_dir: String with path to directory where model output should be saved.
    overwrite: Boolean indicating whether to overwrite output directory.
    model: GaussianEncoderModel that should be trained and exported.
    training_steps: Integer with number of training steps.
    random_seed: Integer with random seed used for training.
    batch_size: Integer with the batch size.
    eval_steps: Optional integer with number of steps used for evaluation.
    name: Optional string with name of the model (can be used to name models).
    model_num: Optional integer with model number (can be used to identify
      models).
  """
  # We do not use the variables 'name' and 'model_num'. Instead, they can be
  # used to name results as they will be part of the saved gin config.
  del name, model_num

  # Delete the output directory if it already exists.
  if tf.gfile.IsDirectory(model_dir):
    if overwrite:
      tf.gfile.DeleteRecursively(model_dir)
    else:
      raise ValueError("Directory already exists and overwrite is False.")

  # Create a numpy random state. We will sample the random seeds for training
  # and evaluation from this.
  random_state = np.random.RandomState(random_seed)

  # Obtain the dataset.
  dataset = named_data.get_named_ground_truth_data()

  # We create a TPUEstimator based on the provided model. This is primarily so
  # that we could switch to TPU training in the future. For now, we train
  # locally on GPUs.
  run_config = contrib_tpu.RunConfig(
      tf_random_seed=random_seed,
      keep_checkpoint_max=1,
      tpu_config=contrib_tpu.TPUConfig(iterations_per_loop=500))
  tpu_estimator = contrib_tpu.TPUEstimator(
      use_tpu=False,
      model_fn=model.model_fn,
      model_dir=os.path.join(model_dir, "tf_checkpoint"),
      train_batch_size=batch_size,
      eval_batch_size=batch_size,
      config=run_config)

  # Set up time to keep track of elapsed time in results.
  experiment_timer = time.time()

  # Do the actual training.
  tpu_estimator.train(
      input_fn=_make_input_fn(dataset, random_state.randint(2**32)),
      steps=training_steps)

  # Save model as a TFHub module.
  output_shape = named_data.get_named_ground_truth_data().observation_shape
  module_export_path = os.path.join(model_dir, "tfhub")
  gaussian_encoder_model.export_as_tf_hub(model, output_shape,
                                          tpu_estimator.latest_checkpoint(),
                                          module_export_path)

  # Save the results. The result dir will contain all the results and config
  # files that we copied along, as we progress in the pipeline. The idea is that
  # these files will be available for analysis at the end.
  results_dict = tpu_estimator.evaluate(
      input_fn=_make_input_fn(
          dataset, random_state.randint(2**32), num_batches=eval_steps))
  results_dir = os.path.join(model_dir, "results")
  results_dict["elapsed_time"] = time.time() - experiment_timer
  results.update_result_directory(results_dir, "train", results_dict)
def main(_):
    # 设置打印日志的级别:日志级别分为Debug、Info、Warning、Error
    tf.logging.set_verbosity(tf.logging.INFO)

    # 给定任务名称对应的数据处理方式
    processors = {"imdb": IMDBProcessor}

    # 校验参数
    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    # 加载Bert模型的参数
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    # 验证参数(比如当前代码中,如果给定的最长序列长度超过512,那么直接报错,因为当前代码中Bert模型允许的最长序列为512字符)
    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    # 构建输出路径
    tf.gfile.MakeDirs(FLAGS.output_dir)

    # 获取任务名称
    task_name = FLAGS.task_name.lower()

    # 如果任务名称不在数据处理字典对象中,那么报错
    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    # 基于任务名称获取数据处理对象
    processor = processors[task_name]()

    # 基于数据处理对象获取具体的标签列表
    label_list = processor.get_labels(data_dir=FLAGS.data_dir)

    # 基于词汇表构建一个映射转换对象
    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    # 是否使用TPU
    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    # 运行参数配置
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        # 加载训练数据
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        # 计算总的训练次数
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    # 构建model_config
    with open(
            os.path.join(os.path.abspath(os.getcwd()), 'config',
                         'bilstm_config.json'), "r") as fr:
        model_config = json.load(fr)

    # 模型函数构建
    model_fn = model_fn_builder(bert_config=bert_config,
                                model_config=model_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu,
                                fine_tune=False)

    # If TPU is not available, this will fall back to normal Estimator on CPU or GPU.
    # 构建一个训练对象,如果TPU不支持的情况下,会在CPU或者GPU上运行
    estimator = tpu.TPUEstimator(use_tpu=FLAGS.use_tpu,
                                 model_fn=model_fn,
                                 config=run_config,
                                 train_batch_size=FLAGS.train_batch_size,
                                 eval_batch_size=FLAGS.eval_batch_size,
                                 predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        # 将训练数据转换为TFRecord格式数据输出并保存
        train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, train_file)
        # 打印日志
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        # 基于TFRecord数据构建数据输出函数
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        # 模型训练
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, prediction) in enumerate(result):
                # probabilities = prediction["probabilities"]
                # if i >= num_actual_predict_examples:
                #     break
                # output_line = "\t".join(
                #     str(class_probability)
                #     for class_probability in probabilities) + "\n"
                output_line = str(prediction['predictions']) + '\n'
                writer.write(output_line)
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Exemple #12
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    validate_flags_or_throw(bert_config)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    if FLAGS.do_train:
        train_examples = read_squad_examples(input_file=FLAGS.train_file,
                                             is_training=True)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)

        # Pre-shuffle the input to avoid having to make a very large shuffle
        # buffer in in the `input_fn`.
        rng = random.Random(12345)
        rng.shuffle(train_examples)

    model_fn = model_fn_builder(
        bert_config=bert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        membership_features_str=FLAGS.membership_features_str)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        # We write to a temporary file to avoid storing very large constant tensors
        # in memory.
        train_writer = FeatureWriter(filename=os.path.join(
            FLAGS.output_dir, "train.tf_record." + FLAGS.exp_name),
                                     is_training=True)
        convert_examples_to_features(examples=train_examples,
                                     tokenizer=tokenizer,
                                     max_seq_length=FLAGS.max_seq_length,
                                     doc_stride=FLAGS.doc_stride,
                                     max_query_length=FLAGS.max_query_length,
                                     is_training=True,
                                     output_fn=train_writer.process_feature)
        train_writer.close()

        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num orig examples = %d", len(train_examples))
        tf.logging.info("  Num split examples = %d", train_writer.num_features)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        del train_examples

        train_input_fn = input_fn_builder(input_file=train_writer.filename,
                                          seq_length=FLAGS.max_seq_length,
                                          is_training=True,
                                          drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        # marking is_training = True to keep the labels
        eval_examples = read_squad_examples(input_file=FLAGS.dev_file,
                                            is_training=True)

        eval_writer = FeatureWriter(filename=os.path.join(
            FLAGS.output_dir, "eval.tf_record." + FLAGS.exp_name),
                                    is_training=True)
        eval_features = []

        def append_feature(feature):
            eval_features.append(feature)
            eval_writer.process_feature(feature)

        convert_examples_to_features(examples=eval_examples,
                                     tokenizer=tokenizer,
                                     max_seq_length=FLAGS.max_seq_length,
                                     doc_stride=FLAGS.doc_stride,
                                     max_query_length=FLAGS.max_query_length,
                                     is_training=True,
                                     output_fn=append_feature)
        eval_writer.close()

        tf.logging.info("  Num orig examples = %d", len(eval_examples))
        tf.logging.info("  Num split examples = %d", len(eval_features))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        all_results = []

        eval_input_fn = input_fn_builder(input_file=eval_writer.filename,
                                         seq_length=FLAGS.max_seq_length,
                                         is_training=True,
                                         drop_remainder=True)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        if FLAGS.use_tpu:
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)
            tf.logging.info("  Num examples used = %d",
                            FLAGS.eval_batch_size * eval_steps)
        else:
            eval_steps = None
            tf.logging.info("  Num examples used = %d", len(eval_examples))

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        eval_examples = read_squad_examples(
            input_file=FLAGS.predict_input_file, is_training=False)

        eval_writer = FeatureWriter(filename=os.path.join(
            FLAGS.output_dir, "predict.tf_record." + FLAGS.exp_name),
                                    is_training=False)
        eval_features = []

        convert_examples_to_features(examples=eval_examples,
                                     tokenizer=tokenizer,
                                     max_seq_length=FLAGS.max_seq_length,
                                     doc_stride=FLAGS.doc_stride,
                                     max_query_length=FLAGS.max_query_length,
                                     is_training=False,
                                     output_fn=append_feature)
        eval_writer.close()

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(eval_examples))
        tf.logging.info("  Num split examples = %d", len(eval_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        all_results = []

        predict_input_fn = input_fn_builder(input_file=eval_writer.filename,
                                            seq_length=FLAGS.max_seq_length,
                                            is_training=False,
                                            drop_remainder=False)

        # If running eval on the TPU, you will need to specify the number of
        # steps.
        all_results = []
        for result in estimator.predict(predict_input_fn,
                                        yield_single_examples=True):
            if len(all_results) % 1000 == 0:
                tf.logging.info("Processing example: %d" % (len(all_results)))
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError("At least one of `do_train`, `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        num_train_steps = int(FLAGS.train_data_size / FLAGS.train_batch_size)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
        num_loop_steps = int(num_train_steps / FLAGS.num_train_rounds)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.eval_data_size < 0:
        # Count the number of records in the eval file to get the data size and
        # needed number of iterations
        # We don't do this for training because it is much larger and will take too
        # long to iterate over just to get a count. Please enter training size
        # as a parameter 'train_data_size'
        reader = tf.io.tf_record_iterator(FLAGS.eval_file)
        eval_data_size = 0
        for _ in reader:
            eval_data_size += 1
    else:
        eval_data_size = FLAGS.eval_data_size

    # For each "round", train for a fraction of the training data and then
    # evaluate on the validation data. Allows for intermediate progress updates
    train_rounds = FLAGS.num_train_rounds if FLAGS.do_train else 1
    for train_round in range(train_rounds):
        if FLAGS.do_train:
            tf.logging.info("***** Running training *****")
            tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
            tf.logging.info("  Num steps = %d", num_loop_steps)
            train_input_fn = file_based_input_fn_builder(
                input_file=FLAGS.train_file,
                seq_length=FLAGS.max_seq_length,
                is_training=True,
                drop_remainder=True,
                skip=num_loop_steps * train_round * FLAGS.train_batch_size)
            estimator.train(input_fn=train_input_fn, steps=num_loop_steps)

        if FLAGS.do_eval:
            # This tells the estimator to run through the entire set.
            eval_steps = None
            if FLAGS.use_tpu:
                eval_steps = int(eval_data_size / FLAGS.eval_batch_size)

            eval_drop_remainder = True if FLAGS.use_tpu else False
            eval_input_fn = file_based_input_fn_builder(
                input_file=FLAGS.eval_file,
                seq_length=FLAGS.max_seq_length,
                is_training=False,
                drop_remainder=eval_drop_remainder)

            result = estimator.evaluate(input_fn=eval_input_fn,
                                        steps=eval_steps)
            tf.logging.info("********** Eval results: %d *******\n",
                            train_round)
            for key in sorted(result.keys()):
                tf.logging.info("%s = %s" % (key, str(result[key])))
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    # validate_flags_or_throw(bert_config)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2

    for fold_i in [4]:
        model_dir = FLAGS.output_dir + "_{}".format(fold_i)

        run_config = tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            master=FLAGS.master,
            model_dir=model_dir,
            save_checkpoints_steps=FLAGS.save_checkpoints_steps,
            tpu_config=tpu.TPUConfig(
                iterations_per_loop=FLAGS.iterations_per_loop,
                num_shards=FLAGS.num_tpu_cores,
                per_host_input_for_training=is_per_host))

        session_config = tf.ConfigProto(log_device_placement=False)
        session_config.gpu_options.allow_growth = True
        run_config = run_config.replace(session_config=session_config)
        run_config = run_config.replace(keep_checkpoint_max=2)

        train_examples = None
        num_train_steps = None
        num_warmup_steps = None

        model_fn = model_fn_builder(bert_config=bert_config,
                                    init_checkpoint=FLAGS.init_checkpoint,
                                    learning_rate=FLAGS.learning_rate,
                                    num_train_steps=num_train_steps,
                                    num_warmup_steps=num_warmup_steps,
                                    use_tpu=FLAGS.use_tpu,
                                    use_one_hot_embeddings=FLAGS.use_tpu)

        # If TPU is not available, this will fall back to normal Estimator on CPU
        # or GPU.
        estimator = tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            predict_batch_size=FLAGS.predict_batch_size)

        test_examples = read_squad_examples(
            input_file="filter_data/test_data.csv", is_training=False)

        test_writer = FeatureWriter(filename=os.path.join(
            model_dir, "test.tf_record"),
                                    is_training=False)
        test_features = []

        def append_feature(feature):
            test_features.append(feature)
            test_writer.process_feature(feature)

        convert_examples_to_features(examples=test_examples,
                                     tokenizer=tokenizer,
                                     max_seq_length=FLAGS.max_seq_length,
                                     doc_stride=FLAGS.doc_stride,
                                     max_query_length=FLAGS.max_query_length,
                                     is_training=False,
                                     output_fn=append_feature)
        test_writer.close()

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(test_examples))
        tf.logging.info("  Num split examples = %d", len(test_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = input_fn_builder(input_file=test_writer.filename,
                                            seq_length=FLAGS.max_seq_length,
                                            is_training=False,
                                            drop_remainder=False)

        # If running eval on the TPU, you will need to specify the number of
        # steps.
        predictions = estimator.predict(predict_input_fn,
                                        yield_single_examples=True)

        #             predictions = {
        #                 "unique_ids": unique_ids,
        #                 "start_logits": start_logits,
        #                 "end_logits": end_logits,
        #             }

        instances = []

        with open("./results/cv_results_lstm_2th_{}.csv".format(fold_i),
                  "w",
                  encoding="utf-8") as fw:
            for i, item in enumerate(predictions):
                unique_ids = item["unique_ids"]
                qa_id = test_features[i].unique_id

                assert qa_id == unique_ids

                start_logits = item["start_logits"]
                end_logits = item["end_logits"]

                n_best_item = write_prediction(
                    test_features[i],
                    start_logits,
                    end_logits,
                    n_best_size=20,
                    max_answer_length=FLAGS.max_answer_length)

                best_list = [a["text"] for a in n_best_item[:3]]
                # json.dump({"qa_id": qa_id, "n_best_items": n_best_item}, fw)
                # fw.write("\n")
                if len(best_list) < 3:
                    print(n_best_item)
                while len(best_list) < 3:
                    best_list.append("empty")
                fw.write("\"{}\",\"{}\",\"{}\",\"{}\"\n".format(
                    qa_id, *best_list))
Exemple #15
0
def main(_):
    hvd.init()

    FLAGS.output_dir = FLAGS.output_dir if hvd.rank() == 0 else\
                       os.path.join(FLAGS.output_dir, str(hvd.rank()))
    FLAGS.train_batch_size = FLAGS.train_batch_size // hvd.size()
    FLAGS.eval_batch_size = FLAGS.eval_batch_size // hvd.size()
    tf.logging.set_verbosity(tf.logging.INFO)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    albert_config = modeling.AlbertConfig.from_json_file(
        FLAGS.albert_config_file)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    input_files = []
    for input_pattern in FLAGS.input_file.split(","):
        input_files.extend(tf.gfile.Glob(input_pattern))
    input_files_local = []
    for input_file in input_files:
        fname = input_file.split('/')[-1]
        if re.match('^.+\d+.tfrecord', fname):
            fid = int(fname[-13:-9])
            if fid % hvd.size() == hvd.rank():
                input_files_local.append(input_file)

    if input_files_local:
        input_files = input_files_local

    tf.logging.info("*** Input Files ***")
    for input_file in input_files:
        tf.logging.info("  %s" % input_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    config = tf.ConfigProto()
    config.gpu_options.visible_device_list = str(hvd.local_rank())

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=FLAGS.keep_checkpoint_max,
        session_config=config,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    model_fn = model_fn_builder(albert_config=albert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=FLAGS.num_train_steps,
                                num_warmup_steps=FLAGS.num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu,
                                optimizer=FLAGS.optimizer,
                                poly_power=FLAGS.poly_power,
                                start_warmup_step=FLAGS.start_warmup_step)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        hooks = [hvd.BroadcastGlobalVariablesHook(0)]
        train_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=True)
        estimator.train(input_fn=train_input_fn,
                        max_steps=FLAGS.num_train_steps,
                        hooks=hooks)

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)
        global_step = -1
        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        writer = tf.gfile.GFile(output_eval_file, "w")
        eval_input_fn = input_fn_builder(
            input_files=input_files,
            max_seq_length=FLAGS.max_seq_length,
            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
            is_training=False)
        best_perf = 0
        key_name = "masked_lm_accuracy"
        while global_step < FLAGS.num_train_steps:
            if estimator.latest_checkpoint() is None:
                tf.logging.info("No checkpoint found yet. Sleeping.")
                time.sleep(1)
            else:
                result = estimator.evaluate(input_fn=eval_input_fn,
                                            steps=FLAGS.max_eval_steps)
                global_step = result["global_step"]
                tf.logging.info("***** Eval results *****")
                checkpoint_path = estimator.latest_checkpoint()
                for key in sorted(result.keys()):
                    tf.logging.info("  %s = %s", key, str(result[key]))
                    writer.write("%s = %s\n" % (key, str(result[key])))
                    if result[key_name] > best_perf:
                        best_perf = result[key_name]
                        for ext in ["meta", "data-00000-of-00001", "index"]:
                            src_ckpt = checkpoint_path + ".{}".format(ext)
                            tgt_ckpt = checkpoint_path.rsplit(
                                "-", 1)[0] + "-best.{}".format(ext)
                            tf.logging.info("saving {} to {}".format(
                                src_ckpt, tgt_ckpt))
                            tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True)
                            writer.write("saved {} to {}\n".format(
                                src_ckpt, tgt_ckpt))
Exemple #16
0
def main(argv):
    del argv  # Unused
    if FLAGS.use_tpu:
        assert FLAGS.model_dir.startswith("gs://"), ("'model_dir' should be a "
                                                     "GCS bucket path!")

    # Fetch the data
    (train_x, train_y), (test_x, test_y) = iris_data.load_data()

    # Feature columns describe how to use the input.
    my_feature_columns = []
    for key in train_x.keys():
        my_feature_columns.append(tf.feature_column.numeric_column(key=key))

    # Resolve TPU cluster and runconfig for this.
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu)

    run_config = contrib_tpu.RunConfig(
        model_dir=FLAGS.model_dir,
        cluster=tpu_cluster_resolver,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=True),
        tpu_config=contrib_tpu.TPUConfig(FLAGS.iterations),
    )

    # Build 2 hidden layer DNN with 10, 10 units respectively.
    classifier = contrib_tpu.TPUEstimator(
        model_fn=my_model,
        use_tpu=FLAGS.use_tpu,
        train_batch_size=FLAGS.batch_size,
        eval_batch_size=FLAGS.batch_size,
        predict_batch_size=FLAGS.batch_size,
        config=run_config,
        params={
            # Name of the feature columns in the input data.
            "feature_columns": my_feature_columns,
            # Two hidden layers of 10 nodes each.
            "hidden_units": [10, 10],
            # The model must choose between 3 classes.
            "n_classes": 3,
            "use_tpu": FLAGS.use_tpu,
        })

    # Train the Model.
    classifier.train(input_fn=lambda params: iris_data.train_input_fn(
        train_x, train_y, params["batch_size"]),
                     max_steps=FLAGS.train_steps)

    # Evaluate the model.
    eval_result = classifier.evaluate(
        input_fn=lambda params: iris_data.eval_input_fn(
            test_x, test_y, params["batch_size"]),
        steps=FLAGS.eval_steps)

    print("\nTest set accuracy: {accuracy:0.3f}\n".format(**eval_result))

    # Generate predictions from the model
    predictions = classifier.predict(
        input_fn=lambda params: iris_data.predict_input_fn(
            iris_data.PREDICTION_INPUT_DATA, params["batch_size"]))

    for pred_dict, expec in zip(predictions, iris_data.PREDICTION_OUTPUT_DATA):
        template = ("\nPrediction is \"{}\" ({:.1f}%), expected \"{}\"")

        class_id = pred_dict["class_ids"][0]
        probability = pred_dict["probabilities"][class_id]

        print(
            template.format(iris_data.SPECIES[class_id], 100 * probability,
                            expec))
Exemple #17
0
def main(unused_argv):
    del unused_argv  # Unused

    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    assert FLAGS.precision == 'bfloat16' or FLAGS.precision == 'float32', (
        'Invalid value for --precision flag; must be bfloat16 or float32.')
    tf.logging.info('Precision: %s', FLAGS.precision)

    params = {
        'input_perm': [0, 1, 2, 3],
        'output_perm': [0, 1, 2, 3],
    }

    batch_axis = 0
    if FLAGS.transpose_enabled:
        params['input_perm'] = [3, 0, 1, 2]
        params['output_perm'] = [1, 2, 3, 0]
        batch_axis = 3

    if FLAGS.eval_total_size > 0:
        eval_size = FLAGS.eval_total_size
    else:
        eval_size = _NUM_EVAL_IMAGES
    eval_steps = eval_size // FLAGS.eval_batch_size

    iterations = (eval_steps if FLAGS.mode == 'eval' else FLAGS.iterations)

    eval_batch_size = (None
                       if FLAGS.mode == 'train' else FLAGS.eval_batch_size)

    tpu_config = contrib_tpu.TPUConfig(iterations_per_loop=iterations,
                                       num_shards=FLAGS.num_shards)

    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_secs=FLAGS.save_checkpoints_secs,
        save_summary_steps=FLAGS.save_summary_steps,
        session_config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement),
        tpu_config=tpu_config)

    inception_classifier = contrib_tpu.TPUEstimator(
        model_fn=inception_model_fn,
        use_tpu=FLAGS.use_tpu,
        config=run_config,
        params=params,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=eval_batch_size,
        batch_axis=(batch_axis, 0))

    # Input pipelines are slightly different (with regards to shuffling and
    # preprocessing) between training and evaluation.
    use_bfloat16 = FLAGS.precision == 'bfloat16'
    imagenet_train = InputPipeline(is_training=True,
                                   data_dir=FLAGS.data_dir,
                                   use_bfloat16=use_bfloat16)
    imagenet_eval = InputPipeline(is_training=False,
                                  data_dir=FLAGS.data_dir,
                                  use_bfloat16=use_bfloat16)

    if FLAGS.moving_average:
        eval_hooks = [LoadEMAHook(FLAGS.model_dir)]
    else:
        eval_hooks = []

    if FLAGS.mode == 'eval':
        # Run evaluation when there is a new checkpoint
        for checkpoint in evaluation.checkpoints_iterator(
                FLAGS.model_dir, timeout=FLAGS.eval_timeout):
            tf.logging.info('Starting to evaluate.')
            try:
                start_timestamp = time.time()  # Includes compilation time
                eval_results = inception_classifier.evaluate(
                    input_fn=imagenet_eval.input_fn,
                    steps=eval_steps,
                    hooks=eval_hooks,
                    checkpoint_path=checkpoint)
                elapsed_time = int(time.time() - start_timestamp)
                tf.logging.info('Eval results: %s. Elapsed seconds: %d',
                                eval_results, elapsed_time)

                # Terminate eval job when final checkpoint is reached
                current_step = int(os.path.basename(checkpoint).split('-')[1])
                if current_step >= FLAGS.train_steps:
                    tf.logging.info(
                        'Evaluation finished after training step %d',
                        current_step)
                    break
            except tf.errors.NotFoundError:
                # Since the coordinator is on a different job than the TPU worker,
                # sometimes the TPU worker does not finish initializing until long after
                # the CPU job tells it to start evaluating. In this case, the checkpoint
                # file could have been deleted already.
                tf.logging.info(
                    'Checkpoint %s no longer exists, skipping checkpoint',
                    checkpoint)

    elif FLAGS.mode == 'train_and_eval':
        for cycle in range(FLAGS.train_steps // FLAGS.train_steps_per_eval):
            tf.logging.info('Starting training cycle %d.' % cycle)
            inception_classifier.train(input_fn=imagenet_train.input_fn,
                                       steps=FLAGS.train_steps_per_eval)

            tf.logging.info('Starting evaluation cycle %d .' % cycle)
            eval_results = inception_classifier.evaluate(
                input_fn=imagenet_eval.input_fn,
                steps=eval_steps,
                hooks=eval_hooks)
            tf.logging.info('Evaluation results: %s' % eval_results)

    else:
        tf.logging.info('Starting training ...')
        inception_classifier.train(input_fn=imagenet_train.input_fn,
                                   max_steps=FLAGS.train_steps)

    if FLAGS.export_dir is not None:
        tf.logging.info('Starting to export model.')
        inception_classifier.export_saved_model(
            export_dir_base=FLAGS.export_dir,
            serving_input_receiver_fn=image_serving_input_fn)
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  validate_flags_or_throw(bert_config)

  tf.gfile.MakeDirs(FLAGS.output_dir)

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
  run_config = contrib_tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=contrib_tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  train_examples = None
  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    train_examples = read_squad_examples(
        input_file=FLAGS.train_file, is_training=True)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    # Pre-shuffle the input to avoid having to make a very large shuffle
    # buffer in in the `input_fn`.
    rng = random.Random(12345)
    rng.shuffle(train_examples)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = contrib_tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    # We write to a temporary file to avoid storing very large constant tensors
    # in memory.
    train_writer = FeatureWriter(
        filename=os.path.join(FLAGS.output_dir, "train.tf_record"),
        is_training=True)
    convert_examples_to_features(
        examples=train_examples,
        tokenizer=tokenizer,
        max_seq_length=FLAGS.max_seq_length,
        doc_stride=FLAGS.doc_stride,
        max_query_length=FLAGS.max_query_length,
        is_training=True,
        output_fn=train_writer.process_feature)
    train_writer.close()

    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num orig examples = %d", len(train_examples))
    tf.logging.info("  Num split examples = %d", train_writer.num_features)
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    del train_examples

    train_input_fn = input_fn_builder(
        input_file=train_writer.filename,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

  if FLAGS.do_predict:
    eval_examples = read_squad_examples(
        input_file=FLAGS.predict_file, is_training=False)

    eval_writer = FeatureWriter(
        filename=os.path.join(FLAGS.output_dir, "eval.tf_record"),
        is_training=False)
    eval_features = []

    def append_feature(feature):
      eval_features.append(feature)
      eval_writer.process_feature(feature)

    convert_examples_to_features(
        examples=eval_examples,
        tokenizer=tokenizer,
        max_seq_length=FLAGS.max_seq_length,
        doc_stride=FLAGS.doc_stride,
        max_query_length=FLAGS.max_query_length,
        is_training=False,
        output_fn=append_feature)
    eval_writer.close()

    tf.logging.info("***** Running predictions *****")
    tf.logging.info("  Num orig examples = %d", len(eval_examples))
    tf.logging.info("  Num split examples = %d", len(eval_features))
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    all_results = []

    predict_input_fn = input_fn_builder(
        input_file=eval_writer.filename,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=False)

    # If running eval on the TPU, you will need to specify the number of
    # steps.
    all_results = []
    for result in estimator.predict(
        predict_input_fn, yield_single_examples=True):
      if len(all_results) % 1000 == 0:
        tf.logging.info("Processing example: %d" % (len(all_results)))
      unique_id = int(result["unique_ids"])
      start_logits = [float(x) for x in result["start_logits"].flat]
      end_logits = [float(x) for x in result["end_logits"].flat]
      all_results.append(
          RawResult(
              unique_id=unique_id,
              start_logits=start_logits,
              end_logits=end_logits))

    output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json")
    output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json")
    output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json")

    write_predictions(eval_examples, eval_features, all_results,
                      FLAGS.n_best_size, FLAGS.max_answer_length,
                      FLAGS.do_lower_case, output_prediction_file,
                      output_nbest_file, output_null_log_odds_file)
Exemple #19
0
def main():
    tf.logging.set_verbosity(tf.logging.INFO)
    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=tpu.TPUConfig(iterations_per_loop=FLAGS.iterations_per_loop,
                                 num_shards=FLAGS.num_tpu_cores,
                                 per_host_input_for_training=is_per_host))

    session_config = tf.ConfigProto(log_device_placement=True)
    session_config.gpu_options.allow_growth = True
    run_config.replace(session_config=session_config)

    num_train_steps = None
    num_warmup_steps = None

    with open('cqa_data.pkl', 'rb') as fr:
        train_features, dev_cid, dev_features = pkl.load(fr)
        dev_label = [feature.label_id for feature in dev_features]

    if FLAGS.do_train:
        num_train_steps = int(
            len(train_features) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                num_labels=2,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu,
                                dev_cid=dev_cid)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        # params={'batch_size': FLAGS.train_batch_size},
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.eval_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_features))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = input_fn_builder(features=train_features,
                                          seq_length=FLAGS.max_seq_length,
                                          is_training=True,
                                          drop_remainder=True)

        estimator.train(input_fn=train_input_fn,
                        max_steps=num_train_steps,
                        hooks=[
                            EvalHook(estimator=estimator,
                                     dev_features=dev_features,
                                     dev_label=dev_label,
                                     dev_cid=dev_cid,
                                     max_seq_length=FLAGS.max_seq_length,
                                     eval_steps=FLAGS.save_checkpoints_steps,
                                     checkpoint_dir=FLAGS.output_dir)
                        ])

    if FLAGS.do_eval:
        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(dev_features))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(dev_features) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = input_fn_builder(features=dev_features,
                                         seq_length=FLAGS.max_seq_length,
                                         is_training=False,
                                         drop_remainder=eval_drop_remainder)

        predictions = estimator.predict(eval_input_fn,
                                        yield_single_examples=False)
        res = np.concatenate([a for a in predictions], axis=0)
        print(res.shape, np.array(dev_label).shape)
        metrics = PRF(np.array(dev_label), res.argmax(axis=-1))
        # print((np.array(dev_label) != res.argmax(axis=-1))[:1000])
        MAP, AvgRec, MRR = eval_reranker(dev_cid, dev_label, res[:, 0])
        metrics['MAP'] = MAP
        metrics['AvgRec'] = AvgRec
        metrics['MRR'] = MRR

        print_metrics(metrics, 'dev')
Exemple #20
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError("At least one of `do_train`, `do_eval` must be True.")

    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

    if FLAGS.max_seq_length > bert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        num_train_steps = int(
            FLAGS.train_data_size / FLAGS.train_batch_size) * FLAGS.epochs
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        if not tf.gfile.Exists(FLAGS.train_file):
            tf.logging.info(
                "DANITER:File doesn't exist, creating tfrecord data")
            examples = model_builder.load_hellaswag(FLAGS.train_raw_data)
            tf.logging.info("DANITER:Read raw data as json")
            model_builder.file_based_convert_examples_for_bilinear(
                examples, 512, tokenizer, FLAGS.train_file, do_copa=True)
        train_input_fn = file_based_input_fn_builder(
            input_file=FLAGS.train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, steps=num_train_steps)

    if FLAGS.do_eval:
        # This tells the estimator to run through the entire set.
        if FLAGS.eval_data_size < 0:
            eval_steps = None
        else:
            eval_steps = int(FLAGS.eval_data_size / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        if not tf.gfile.Exists(FLAGS.eval_file):
            examples = model_builder.load_hellaswag(FLAGS.eval_raw_data)
            model_builder.file_based_convert_examples_for_bilinear(
                examples, 512, tokenizer, FLAGS.eval_file, do_copa=True)
        eval_input_fn = file_based_input_fn_builder(
            input_file=FLAGS.eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if idx != "best" and int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        tf.logging.info("Evaling all models in output dir")
        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        key_name = "eval_accuracy"
        tf.logging.info("Checkpoint path " + checkpoint_path)
        if tf.gfile.Exists(checkpoint_path + ".index"):
            tf.logging.info("Found a best model... not good")
            result = estimator.evaluate(input_fn=eval_input_fn,
                                        steps=eval_steps,
                                        checkpoint_path=checkpoint_path)
            best_perf = result[key_name]
            global_step = result["global_step"]
        else:
            tf.logging.info("Setting global step to -1")
            global_step = -1
            best_perf = -1
            checkpoint_path = None
        tf.logging.info("Openning writer " + output_eval_file)
        writer = tf.gfile.GFile(output_eval_file, "w")

        steps_and_files = {}
        filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
        tf.logging.info("Models found " + "\n".join(filenames))
        for filename in filenames:
            if filename.endswith(".index"):
                ckpt_name = filename[:-6]
                cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                if cur_filename.split("-")[-1] == "best":
                    continue
                gstep = int(cur_filename.split("-")[-1])
                if gstep not in steps_and_files:
                    tf.logging.info(
                        "Add {} to eval list.".format(cur_filename))
                    steps_and_files[gstep] = cur_filename
        tf.logging.info("found {} files.".format(len(steps_and_files)))
        # steps_and_files = sorted(steps_and_files, key=lambda x: x[0])
        if not steps_and_files:
            tf.logging.info(
                "found 0 file, global step: {}. Sleeping.".format(global_step))
        else:
            for ele in sorted(steps_and_files.items()):
                step, checkpoint_path = ele
                if global_step >= step:
                    if len(_find_valid_cands(step)) > 1:
                        for ext in ["meta", "data-00000-of-00001", "index"]:
                            src_ckpt = checkpoint_path + ".{}".format(ext)
                            tf.logging.info("removing {}".format(src_ckpt))
                            # Why should we remove checkpoints?
                            # tf.gfile.Remove(src_ckpt)
                    tf.logging.info("Skipping candidate for some reason")
                    continue
                result = estimator.evaluate(input_fn=eval_input_fn,
                                            steps=eval_steps,
                                            checkpoint_path=checkpoint_path)
                global_step = result["global_step"]
                tf.logging.info("***** Eval results *****")
                for key in sorted(result.keys()):
                    tf.logging.info("  %s = %s", key, str(result[key]))
                    writer.write("%s = %s\n" % (key, str(result[key])))
                writer.write("best = {}\n".format(best_perf))

                if len(_find_valid_cands(global_step)) > 1:
                    for ext in ["meta", "data-00000-of-00001", "index"]:
                        src_ckpt = checkpoint_path + ".{}".format(ext)
                        tf.logging.info("removing {}".format(src_ckpt))
                        # tf.gfile.Remove(src_ckpt)
                writer.write("=" * 50 + "\n")
        writer.close()
Exemple #21
0
def handler(queue, kvm_fd, mm):
    global object_dict
    global object_id
    global callback_stack

    global initialized
    if not initialized:
        callback_stack = []
        object_dict = dict()
        object_id = 1
        # TODO: forward logging or disable it in test
        tf.logging.set_verbosity(tf.logging.INFO)
        initialized = True
        print("handler is initialized")

    while True:
        task = None
        task = queue.get(block=True)

        while task is None:
            try:
                task = queue.get(block=True, timeout=5)
            except Queue.Empty:
                task = None
            if callback_stack:
                if time.time() > callback_stack[-1]["deadline"]:
                    print("callback failed deadline")
                    return STATUS_CALLBACK_TIMEOUT

        vm_id = task.vm_id
        if vm_id == STOP_HANDLER:
            break
        param = TF_PY_PARAM.from_buffer(mm, task.data_ptr)
        callback_param = TF_PY_PARAM.from_buffer(
            mm, task.data_ptr + param.base.callback_param_offset)
        print(
            "retrieve [vm#%d] tensorflow task=%d cmd=%d, obj=%d, dstore=%lx, done=%d"
            % (task.vm_id, task.node_id, param.base.cmd_id,
               param.base.object_id, param.base.dstore_size, param.base.done))
        print(
            "retrieve [vm#%d] callback node cmd=%d, obj=%d, dstore=%lx, done=%d"
            % (task.vm_id, callback_param.base.cmd_id,
               callback_param.base.object_id, callback_param.base.dstore_size,
               callback_param.base.done))

        cmd_id = param.base.cmd_id

        try:
            if cmd_id == TF_PY_NW_CALLBACK_DONE:
                param.base.done = STATUS_TASK_DONE
                ret = fcntl.ioctl(kvm_fd, IOCTL_KVM_NOTIFY_TASK_FINISHED,
                                  task.node_id)
                if ret < 0:
                    print("notify task completion failed: %d\n" % ret)
                if callback_stack and \
                   callback_stack[-1]["callback_id"] == param.base.object_id:
                    print("callback is finished")
                    return STATUS_CALLBACK_DONE
                else:
                    print("callback is error")
                    return STATUS_CALLBACK_ERROR

            if cmd_id == TF_PY_SESSION_INIT:
                print("SessionInit!!!")
                param1 = parse_param(vm_id, mm, param, param.param1)
                print(param1)
                sess = tf.Session(param1)

                # assign object_id
                object_dict[object_id] = sess
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_SESSION_ENTER:
                sess = object_dict[param.base.object_id]
                ctx_sess = sess.__enter__()
                if sess is ctx_sess:
                    pass
                else:  # unlikely
                    print("unlikely to search for sess")
                    param.base.object_id = next(
                        obj_id for obj_id, obj in object_dict.items()
                        if obj is ctx_sess)

            elif cmd_id == TF_PY_SESSION_EXIT:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)

                sess = object_dict[param.base.object_id]
                sess.__exit__(param1, param2, param3)

            elif cmd_id == TF_PY_SESSION_DEL:
                sess = object_dict[param.base.object_id]
                sess.__del__()

            # deprecated
            elif cmd_id == TF_PY_SESSION_RUN:
                sess = object_dict[param.base.object_id]
                param1 = parse_param(vm_id, mm, param, param.param1)

                if type(param1) == NwObject:
                    print("get NwObject=%d" % param1.object_id())
                    param1 = object_dict[param1.object_id()]
                    print(param1)

                ret_val = sess.run(param1)
                print(ret_val)

                writeback_result(vm_id, mm, param, param.ret_val1, ret_val)

            elif cmd_id == TF_PY_TPU_CLUSTER_RESOLVER_INIT:
                print("resloverInit!!!")
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                if param1 is None:
                    param1 = None
                if param2 is None:
                    param2 = None
                if param3 is None:
                    param3 = None
                print("TPUClusterResolver", param1, param2, param3)
                tpu_grpc = tf.contrib.cluster_resolver.TPUClusterResolver(
                    tpu=param1, zone=param2, project=param3)

                # assign object_id
                object_dict[object_id] = tpu_grpc
                param.base.object_id = object_id
                print("assign obj_id=%d" % object_id)
                object_id += 1

            # deprecated
            elif cmd_id == TF_PY_TPU_CLUSTER_RESOLVER_MASTER:
                # FIXED: use __getattr__
                print("master!!")
                tpu_grpc = object_dict[param.base.object_id]
                # FIXED: may have parameters
                tpu_grpc_url = tpu_grpc.master()

                # serialize return value
                writeback_result(vm_id, mm, param, param.ret_val1,
                                 tpu_grpc_url)

            elif cmd_id == TF_PY_TPU_INITIALIZE_SYSTEM:
                # TODO: may have parameters
                ts = tpu.initialize_system()

                object_dict[object_id] = ts
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_TPU_SHUTDOWN_SYSTEM:
                # TODO: may have parameters
                ts = tpu.shutdown_system()

                object_dict[object_id] = ts
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_GLOBAL_VARIABLES_INITIALIZER:
                # TODO: may have parameters
                ts = tf.global_variables_initializer()

                object_dict[object_id] = ts
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_ONES:
                print("param1 size=%ld,offset=%ld" %
                      (param.param1.size, param.param1.offset))
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                if param2 is None:
                    param2 = dtypes.float32
                print(param2)
                var = tf.ones(param1, param2)

                object_dict[object_id] = var
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_RANDOM_UNIFORM:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param5 = parse_param(vm_id, mm, param, param.param5)
                param6 = parse_param(vm_id, mm, param, param.param6)
                if param2 is None:
                    param2 = 0
                if param4 is None:
                    param4 = dtypes.float32
                print(param1, param2, param3, param4)
                var = tf.random_uniform(param1, param2, param3, param4, param5,
                                        param6)

                object_dict[object_id] = var
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_TRANSPOSE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param1 = object_dict[param1.object_id()]
                if param3 is None:
                    param3 = "transpose"
                if param4 is None:
                    param4 = False
                print("transpose", param1, param2, param3, param4)
                var = tf.transpose(param1, param2, param3, param4)

                object_dict[object_id] = var
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_CAST:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param1 = object_dict[param1.object_id()]
                print("cast", param1, param2, param3)
                var = tf.cast(param1, param2, param3)

                object_dict[object_id] = var
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_EXPAND_DIMS:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param1 = object_dict[param1.object_id()]
                print("expand_dims", param1, param2, param3, param4)
                var = tf.expand_dims(param1, param2, param3, param4)

                object_dict[object_id] = var
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_CONCAT:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param1 = object_dict[param1.object_id()]
                if param3 is None:
                    param3 = "concat"
                print("concat", param1, param2, param3)
                var = tf.concat(param1, param2, param3)

                object_dict[object_id] = var
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_EQUAL:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param1 = object_dict[param1.object_id()]
                print("equal", param1, param2, param3)
                if isinstance(param2, NwObject):
                    param2 = object_dict[param2.object_id()]
                result = tf.equal(param1, param2, param3)
                print(result)

                object_dict[object_id] = result
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_FIXED_LEN_FEATURE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)

                feature = tf.FixedLenFeature(param1, param2, param3)
                print(feature)

                object_dict[object_id] = feature
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_VAR_LEN_FEATURE:
                param1 = parse_param(vm_id, mm, param, param.param1)

                feature = tf.VarLenFeature(param1)
                print(feature)

                object_dict[object_id] = feature
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_PARSE_SINGLE_EXAMPLE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                print(param1, param2)

                # expand embedded NwObject
                if isinstance(param1, NwObject):
                    param1 = object_dict[param1.object_id()]
                dict_walker(param2)
                print("after translation", param1, param2)

                result = tf.parse_single_example(param1, param2, param3,
                                                 param4)
                print(result)
                dict_mapper(result)
                print(result)
                writeback_result(vm_id, mm, param, param.ret_val1, result)

            elif cmd_id == TF_PY_CONTROL_FLOW_OPS_SWITCH:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param1 = object_dict[param1.object_id()]
                param2 = object_dict[param2.object_id()]
                print("switch", param1, param2, param3, param4)
                result = control_flow_ops.switch(param1, param2, param3,
                                                 param4)
                print(result)

                mapped_tuple = tuple_mapper(result, [0, 1])
                print(mapped_tuple)
                writeback_result(vm_id, mm, param, param.ret_val1,
                                 mapped_tuple)

            elif cmd_id == TF_PY_CONTROL_FLOW_OPS_MERGE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param1 = object_dict[param1.object_id()]
                print("merge", param1, param2)
                list_walker(param1)
                print("merge-new", param1, param2)
                result = control_flow_ops.merge(param1, param2)
                print(result)

                mapped_tuple = tuple_mapper(result, [0])
                print(mapped_tuple)
                writeback_result(vm_id, mm, param, param.ret_val1,
                                 mapped_tuple)

            elif cmd_id == TF_PY_TPU_REWRITE:
                # TODO: may have parameters
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                # default parameter
                if param2 is None:
                    param2 = None
                # expand embedded NwObject
                list_walker(param2)
                func = tpu.rewrite(param1, param2)

                object_dict[object_id] = func
                param.base.object_id = object_id
                print("rewrite object_id=%d" % object_id)
                object_id += 1

            elif cmd_id == TF_PY_TPU_RUN_CONFIG:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param5 = parse_param(vm_id, mm, param, param.param5)
                # default parameter
                if param1 is None:
                    param1 = None
                if param2 is None:
                    param2 = None
                if param3 is None:
                    param3 = None
                if param4 is None:
                    param4 = None

                # expand embedded NwObject
                param4 = object_dict[param4.object_id()]
                print(param4, param5)
                func = tpu.RunConfig(param1, param2, param3, param4, **param5)

                object_dict[object_id] = func
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_TPU_TPU_ESTIMATOR:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param5 = parse_param(vm_id, mm, param, param.param5)
                param6 = parse_param(vm_id, mm, param, param.param6)
                param7 = parse_param(vm_id, mm, param, param.param7)
                param8 = parse_param(vm_id, mm, param, param.param8)
                param9 = parse_param(vm_id, mm, param, param.param9)
                param10 = parse_param(vm_id, mm, param, param.param10)
                param11 = parse_param(vm_id, mm, param, param.param11)
                param12 = parse_param(vm_id, mm, param, param.param12)
                # default parameter
                if param1 is None:
                    param1 = None
                if param2 is None:
                    param2 = None
                if param3 is None:
                    param3 = None
                if param4 is None:
                    param4 = None
                if param5 is None:
                    param5 = True
                if param6 is None:
                    param6 = None
                if param7 is None:
                    param7 = None
                if param8 is None:
                    param8 = None
                if param9 is None:
                    param9 = None
                if param10 is None:
                    param10 = True
                if param11 is None:
                    param11 = True
                if param12 is None:
                    param12 = None

                # expand embedded NwObject
                param3 = object_dict[param3.object_id()]
                print(param3)
                func = tpu.TPUEstimator(param1, param2, param3, param4, param5,
                                        param6, param7, param8, param9,
                                        param10, param11, param12)

                object_dict[object_id] = func
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_IMAGE_RESIZE_IMAGES:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param5 = parse_param(vm_id, mm, param, param.param5)
                # default parameter
                if param3 is None:
                    param3 = ResizeMethod.BILINEAR
                if param4 is None:
                    param4 = False
                if param5 is None:
                    param5 = False

                # expand embedded NwObject
                param1 = object_dict[param1.object_id()]
                print(param1)
                img = tf.image.resize_images(param1, param2, param3, param4,
                                             param5)

                # TODO: it may return a float
                object_dict[object_id] = img
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_SLICE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)

                # expand embedded NwObject
                print(param1, param2, param3)
                param1 = object_dict[param1.object_id()]
                ret = tf.slice(param1, param2, param3, param4)

                object_dict[object_id] = ret
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_SHAPE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                if param3 is None:
                    param3 = dtypes.int32

                # expand embedded NwObject
                print(param1, param2, param3)
                param1 = object_dict[param1.object_id()]
                ret = tf.shape(param1, param2, param3)

                object_dict[object_id] = ret
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_IMAGE_SAMPLE_DISTORTED_BOUNDING_BOX:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param5 = parse_param(vm_id, mm, param, param.param5)
                param6 = parse_param(vm_id, mm, param, param.param6)
                param7 = parse_param(vm_id, mm, param, param.param7)
                param8 = parse_param(vm_id, mm, param, param.param8)
                param9 = parse_param(vm_id, mm, param, param.param9)
                param10 = parse_param(vm_id, mm, param, param.param10)
                # default parameter
                if param5 is None:
                    param5 = 0.1

                print("sample_distorted_bounding_box", param1, param2)
                result = tf.image.sample_distorted_bounding_box(
                    param1, param2, param3, param4, param5, param6, param7,
                    param8, param9, param10)
                print(result)

                mapped_tuple = tuple_mapper(result, [0, 1, 2])
                print(mapped_tuple)
                writeback_result(vm_id, mm, param, param.ret_val1,
                                 mapped_tuple)

            elif cmd_id == TF_PY_IMAGE_DRAW_BOUNDING_BOXES:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)

                # expand embedded NwObject
                print(param1, param2, param3)
                param1 = object_dict[param1.object_id()]
                param2 = object_dict[param2.object_id()]
                ret = tf.image.draw_bounding_boxes(param1, param2, param3)

                object_dict[object_id] = ret
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_IMAGE_DECODE_JPEG:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)
                param5 = parse_param(vm_id, mm, param, param.param5)
                param6 = parse_param(vm_id, mm, param, param.param6)
                param7 = parse_param(vm_id, mm, param, param.param7)
                param8 = parse_param(vm_id, mm, param, param.param8)

                if param2 is None:
                    param2 = 0
                if param3 is None:
                    param3 = 1
                if param4 is None:
                    param4 = True
                if param5 is None:
                    param5 = False
                if param6 is None:
                    param6 = 1
                if param7 is None:
                    param7 = ""
                param1 = object_dict[param1.object_id()]
                img = tf.image.decode_jpeg(param1, param2, param3, param4,
                                           param5, param6, param7, param8)

                object_dict[object_id] = img
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_IMAGE_CONVERT_IMAGE_DTYPE:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)
                param4 = parse_param(vm_id, mm, param, param.param4)

                # expand embedded NwObject
                print(param1, param2, param3)
                param1 = object_dict[param1.object_id()]
                if param3 is None:
                    param3 = False
                ret = tf.image.convert_image_dtype(param1, param2, param3,
                                                   param4)

                object_dict[object_id] = ret
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_DATA_DATASET_LIST_FILES:
                param1 = parse_param(vm_id, mm, param, param.param1)
                param2 = parse_param(vm_id, mm, param, param.param2)
                param3 = parse_param(vm_id, mm, param, param.param3)

                print(param1, param2, param3)
                if isinstance(param1, NwObject):
                    param1 = object_dict[oaram1.object_id()]
                ret = tf.data.Dataset.list_files(param1, param2, param3)

                object_dict[object_id] = ret
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_NW_OBJECT:
                print("nw_object!! id = %d" % param.base.object_id)
                obj = object_dict[param.base.object_id]
                name = parse_param(vm_id, mm, param, param.param1)
                args = parse_param(vm_id, mm, param, param.param2)
                kwargs = parse_param(vm_id, mm, param, param.param3)
                print("NwObject", obj, name, args, kwargs)

                # expand embedded NwObject
                args = list(args)
                list_walker(args)
                args = tuple(args)
                dict_walker(kwargs)
                print("after translation", obj, name, args, kwargs)

                # run
                result = getattr(obj, name)(*(args or []), **(kwargs or {}))
                param.base.object_id = -1
                param.ret_val1.size = 0
                print("analyze type", type(result), result)

                # TODO: go through tuple, dict or list
                if isinstance(result, tuple):
                    result = tuple_mapper(result, range(len(result)))
                if isinstance(result, dict):
                    dict_mapper(result)
                if isinstance(result, list):
                    list_mapper(result)

                # serialize return value
                if is_unpickleable_type(result) or \
                   pickle.pickles(result) is False:
                    object_dict[object_id] = result
                    param.base.object_id = object_id
                    object_id += 1

                elif result is not None:
                    writeback_result(vm_id, mm, param, param.ret_val1, result)

            elif cmd_id == TF_PY_NW_METHOD:
                # Reuse as callback

                #ins = parse_param(vm_id, mm, param, param.param1)
                #name = parse_param(vm_id, mm, param, param.param2)
                #print(ins, name)

                #method = getattr(ins, name)
                #print(method)
                #object_dict[object_id] = method

                cw = callback_constructor(object_id, callback_param, param, mm,
                                          vm_id, queue, kvm_fd)
                object_dict[object_id] = cw
                param.base.object_id = object_id
                object_id += 1

            elif cmd_id == TF_PY_NW_CALLBACK_TEST:
                nw_func = parse_param(vm_id, mm, param, param.param1)
                print(nw_func, nw_func.object_id())
                func = object_dict[nw_func.object_id()]
                print("callback func", func)
                x = parse_param(vm_id, mm, param, param.param2)
                y = parse_param(vm_id, mm, param, param.param3)
                result = func(x, y)
                print(result)
                writeback_result(vm_id, mm, param, param.ret_val1, result)

            else:
                print("unsupported Tensorflow API")

        except Exception, error:
            param.base.done = STATUS_TASK_ERROR
            #mm.flush(task.data_ptr, sizeof(PARAM_BASE))

            print "fault: ", str(error)
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
            traceback.print_stack()

        print("finished [vm#%d] TF task %d cmd %d" %
              (task.vm_id, task.node_id, param.base.cmd_id))

        param.base.done = STATUS_TASK_DONE
        #mm.flush(task.data_ptr, sizeof(PARAM_BASE))
        #mm.flush(INVOKER_FIFO_SIZE + VGPU_DSTORE_SIZE * (vm_id - 1) +
        #         param.base.dstore_offset + param.ret_val1.offset,
        #         param.ret_val1.size)

        # notify hypervisor
        ret = fcntl.ioctl(kvm_fd, IOCTL_KVM_NOTIFY_TASK_FINISHED, task.node_id)
        if ret < 0:
            print("notify task completion failed: %d\n" % ret)
Exemple #22
0
def main(unused_argv):
    params = params_dict.ParamsDict(squeezenet_config.SQUEEZENET_CFG,
                                    squeezenet_config.SQUEEZENET_RESTRICTIONS)
    params = params_dict.override_params_dict(params,
                                              FLAGS.config_file,
                                              is_strict=True)
    params = params_dict.override_params_dict(params,
                                              FLAGS.params_override,
                                              is_strict=True)

    params = flags_to_params.override_params_from_input_flags(params, FLAGS)

    total_steps = (
        (params.train.num_epochs * params.train.num_examples_per_epoch) //
        params.train.train_batch_size)
    params.override(
        {
            "train": {
                "total_steps": total_steps
            },
            "eval": {
                "num_steps_per_eval": (total_steps // params.eval.num_evals)
            },
        },
        is_strict=False)

    params.validate()
    params.lock()

    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    if not params.use_async_checkpointing:
        save_checkpoints_steps = max(5000, params.train.iterations_per_loop)

    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=params.model_dir,
        save_checkpoints_steps=save_checkpoints_steps,
        session_config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False),
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=params.train.iterations_per_loop,
            num_shards=params.train.num_cores_per_replica,
        ),
    )

    estimator = contrib_tpu.TPUEstimator(
        model_fn=squeezenet_model.model_fn,
        use_tpu=params.use_tpu,
        config=run_config,
        train_batch_size=params.train.train_batch_size,
        eval_batch_size=params.eval.eval_batch_size,
        params=params.as_dict(),
    )

    for eval_cycle in range(params.eval.num_evals):
        current_cycle_last_train_step = ((eval_cycle + 1) *
                                         params.eval.num_steps_per_eval)
        estimator.train(input_fn=data_pipeline.InputReader(FLAGS.data_dir,
                                                           is_training=True),
                        steps=current_cycle_last_train_step)

        tf.logging.info("Running evaluation")
        tf.logging.info(
            "%s",
            estimator.evaluate(input_fn=data_pipeline.InputReader(
                FLAGS.data_dir, is_training=False),
                               steps=(params.eval.num_eval_examples //
                                      params.eval.eval_batch_size)))
Exemple #23
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": ColaProcessor,
        "mnli": MnliProcessor,
        "mrpc": MrpcProcessor,
        "xnli": XnliProcessor,
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    albert_config = modeling.AlbertConfig.from_json_file(
        FLAGS.albert_config_file)

    if FLAGS.max_seq_length > albert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the ALBERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, albert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(
        vocab_file=FLAGS.vocab_file,
        sp_cdc_file=FLAGS.cdc_spm_model_file,
        do_lower_case=FLAGS.do_lower_case,
        spm_model_file=FLAGS.spm_model_file)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        keep_checkpoint_max=8,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(albert_config=albert_config,
                                num_labels=len(label_list),
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        if FLAGS.data_examples:
            tf.gfile.MakeDirs(FLAGS.data_examples)
            train_file = os.path.join(FLAGS.data_examples, "train.tf_record")
        else:
            train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, train_file)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        tf.logging.set_verbosity(tf.logging.INFO)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(PaddingInputExample())

        if FLAGS.data_examples:
            tf.gfile.MakeDirs(FLAGS.data_examples)
            eval_file = os.path.join(FLAGS.data_examples, "eval.tf_record")
        else:
            eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record")
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, eval_file)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(PaddingInputExample())

        if FLAGS.data_examples:
            tf.gfile.MakeDirs(FLAGS.data_examples)
            predict_file = os.path.join(FLAGS.data_examples,
                                        "predict.tf_record")
        else:
            predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        file_based_convert_examples_to_features(predict_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenizer, predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        output_submit_file = os.path.join(FLAGS.output_dir,
                                          "submit_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as pred_writer,\
            tf.gfile.GFile(output_submit_file, "w") as sub_writer:
            num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            for (i, (example, prediction)) in\
                enumerate(zip(predict_examples, result)):
                probabilities = prediction["probabilities"]
                if i >= num_actual_predict_examples:
                    break
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                pred_writer.write(output_line)

                actual_label = label_list[int(prediction["predictions"])]
                sub_writer.write(
                    six.ensure_str(example.guid) + "\t" + actual_label + "\n")
                num_written_lines += 1
        assert num_written_lines == num_actual_predict_examples
Exemple #24
0
def experiment(model_config):
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("SCRIPT START")

    if model_config["use_tpu"]:
        assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

    auth.authenticate_user()

    tf.logging.info("TPU resolver started")

    if 'COLAB_TPU_ADDR' in os.environ:
      TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])

      # Upload credentials to TPU.
      with tf.Session(TF_MASTER) as sess:
        with open('/content/adc.json', 'r') as f:
          auth_info = json.load(f)
        tf.contrib.cloud.configure_gcs(sess, credentials=auth_info)
      # Now credentials are set for all future sessions on this TPU.
    else:
      TF_MASTER=''

    # os.environ['PROJECT_NAME']='nnproj'
    # os.environ['PROJECT_ZONE']='boh'
    # os.environ['TPU_NAME']='bah'
    #
    # tpu_cluster_resolver = TPUClusterResolver(
    #     tpu=os.environ['TPU_NAME'],
    #     project=os.environ['PROJECT_NAME'],
    #     zone=os.environ['PROJECT_ZONE'])

    if model_config["use_tpu"]:
        config = tpu.RunConfig(
            # cluster=tpu_cluster_resolver,
            tf_random_seed=RANDOM_SEED,
            master=TF_MASTER,
            model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]),
            save_checkpoints_steps=500,
            save_summary_steps=250,
            tpu_config=tpu.TPUConfig(
                iterations_per_loop=500,
                num_shards=8,
                per_host_input_for_training=tpu.InputPipelineConfig.PER_HOST_V1
                ))  # pylint: disable=line-too-long
    else:
        config = tpu.RunConfig(
            # cluster=tpu_cluster_resolver,
            # model_dir=model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]),
            save_checkpoints_steps=500,
            save_summary_steps=250)  # pylint: disable=line-too-long

    tf.logging.info("Creating datasets")
    urmp_train, urmp_eval, urmp_test = [urmp_input.URMPInput(
        mode=mode,
        data_dir=model_config['data_path'],
        transpose_input=False,
        use_bfloat16=model_config['use_bfloat16']) for mode in ['train', 'eval', 'test']]

    tf.logging.info("Assigning TPUEstimator")
    # Optimize in a +supervised fashion until validation loss worsens
    separator = tpu.TPUEstimator(
        use_tpu=model_config["use_tpu"],
        model_fn=unet_separator,
        config=config,
        train_batch_size=model_config['batch_size'],
        eval_batch_size=model_config['batch_size'],
        predict_batch_size=model_config['batch_size'],
        params={i: model_config[i] for i in model_config if (i != 'batch_size')}
    )

    if model_config['load_model']:
        tf.logging.info("Load the model")
        current_step = estimator._load_global_step_from_checkpoint_dir(
            model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]))

    if model_config['mode'] == 'train_and_eval':
        tf.logging.info("Train the model")
        # Should be an early stopping here, but it will come with tf 1.10
        separator.train(
            input_fn=urmp_train.input_fn,
            steps=model_config['training_steps'])
        # ...zzz...
        tf.logging.info("Supervised training finished!")
        tf.logging.info("Evaluate model")
        # Evaluate the model.
        eval_result = separator.evaluate(
            input_fn=urmp_eval.input_fn,
            steps=model_config['evaluation_steps'])
        tf.logging.info('Evaluation results: %s' % eval_result)

    elif model_config['mode'] == 'predict':
        tf.logging.info("Test results and save predicted sources:")
        predictions = separator.predict(
            input_fn=urmp_test.input_fn)

        for prediction in predictions:
            Test.save_prediction(prediction,
                                 estimates_path=model_config["estimates_path"],
                                 sample_rate=model_config["expected_sr"])
        Utils.concat_and_upload(model_config["estimates_path"],
                                model_config['model_base_dir'] + os.path.sep + str(model_config["experiment_id"]))
Exemple #25
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "cola": classifier_utils.ColaProcessor,
        "mnli": classifier_utils.MnliProcessor,
        "mrpc": classifier_utils.MrpcProcessor,
    }

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = create_tokenizer_from_hub_module(
        FLAGS.albert_hub_module_handle)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)
        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(
        num_labels=len(label_list),
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        albert_hub_module_handle=FLAGS.albert_hub_module_handle)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        train_features = classifier_utils.convert_examples_to_features(
            train_examples, label_list, FLAGS.max_seq_length, tokenizer,
            task_name)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_input_fn = classifier_utils.input_fn_builder(
            features=train_features,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        eval_features = classifier_utils.convert_examples_to_features(
            eval_examples, label_list, FLAGS.max_seq_length, tokenizer,
            task_name)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d", len(eval_examples))
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            # Eval will be slightly WRONG on the TPU because it will truncate
            # the last batch.
            eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = classifier_utils.input_fn_builder(
            features=eval_features,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder)

        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        with tf.gfile.GFile(output_eval_file, "w") as writer:
            tf.logging.info("***** Eval results *****")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                writer.write("%s = %s\n" % (key, str(result[key])))

    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        if FLAGS.use_tpu:
            # Discard batch remainder if running on TPU
            n = len(predict_examples)
            predict_examples = predict_examples[:(
                n - n % FLAGS.predict_batch_size)]

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        classifier_utils.file_based_convert_examples_to_features(
            predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
            predict_file)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d", len(predict_examples))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=FLAGS.use_tpu)

        result = estimator.predict(input_fn=predict_input_fn)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "test_results.tsv")
        with tf.gfile.GFile(output_predict_file, "w") as writer:
            tf.logging.info("***** Predict results *****")
            for prediction in result:
                probabilities = prediction["probabilities"]
                output_line = "\t".join(
                    str(class_probability)
                    for class_probability in probabilities) + "\n"
                writer.write(output_line)
Exemple #26
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {"race": race_utils.RaceProcessor}

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval` or `do_predict' must be True."
        )

    albert_config = modeling.AlbertConfig.from_json_file(
        FLAGS.albert_config_file)

    if FLAGS.max_seq_length > albert_config.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the ALBERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, albert_config.max_position_embeddings))

    tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name](
        use_spm=True if FLAGS.spm_model_file else False,
        do_lower_case=FLAGS.do_lower_case,
        high_only=FLAGS.high_only,
        middle_only=FLAGS.middle_only)

    label_list = processor.get_labels()

    tokenizer = fine_tuning_utils.create_vocab(
        vocab_file=FLAGS.vocab_file,
        do_lower_case=FLAGS.do_lower_case,
        spm_model_file=FLAGS.spm_model_file,
        hub_module=FLAGS.albert_hub_module_handle)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(
            min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=int(FLAGS.save_checkpoints_steps),
        keep_checkpoint_max=0,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    if FLAGS.do_train:
        train_examples = processor.get_train_examples(FLAGS.data_dir)

    model_fn = race_utils.model_fn_builder(
        albert_config=albert_config,
        num_labels=len(label_list),
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=FLAGS.train_step,
        num_warmup_steps=FLAGS.warmup_step,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        max_seq_length=FLAGS.max_seq_length,
        dropout_prob=FLAGS.dropout_prob,
        hub_module=FLAGS.albert_hub_module_handle)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        eval_batch_size=FLAGS.eval_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        if not tf.gfile.Exists(FLAGS.train_file):
            race_utils.file_based_convert_examples_to_features(
                train_examples, label_list, FLAGS.max_seq_length, tokenizer,
                FLAGS.train_file, FLAGS.max_qa_length)
        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", FLAGS.train_step)
        train_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=FLAGS.train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.train_batch_size,
            multiple=len(label_list))
        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_step)

    if FLAGS.do_eval:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        num_actual_eval_examples = len(eval_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on. These do NOT count towards the metric (all tf.metrics
            # support a per-instance weight, and these get a weight of 0.0).
            while len(eval_examples) % FLAGS.eval_batch_size != 0:
                eval_examples.append(classifier_utils.PaddingInputExample())

        if not tf.gfile.Exists(FLAGS.eval_file):
            race_utils.file_based_convert_examples_to_features(
                eval_examples, label_list, FLAGS.max_seq_length, tokenizer,
                FLAGS.eval_file, FLAGS.max_qa_length)

        tf.logging.info("***** Running evaluation *****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(eval_examples), num_actual_eval_examples,
                        len(eval_examples) - num_actual_eval_examples)
        tf.logging.info("  Batch size = %d", FLAGS.eval_batch_size)

        # This tells the estimator to run through the entire set.
        eval_steps = None
        # However, if running eval on the TPU, you will need to specify the
        # number of steps.
        if FLAGS.use_tpu:
            assert len(eval_examples) % FLAGS.eval_batch_size == 0
            eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=FLAGS.eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=eval_drop_remainder,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.eval_batch_size,
            multiple=len(label_list))

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if idx != "best" and int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        key_name = "eval_accuracy"
        if tf.gfile.Exists(checkpoint_path + ".index"):
            result = estimator.evaluate(input_fn=eval_input_fn,
                                        steps=eval_steps,
                                        checkpoint_path=checkpoint_path)
            best_perf = result[key_name]
            global_step = result["global_step"]
        else:
            global_step = -1
            best_perf = -1
            checkpoint_path = None
        writer = tf.gfile.GFile(output_eval_file, "w")
        while global_step < FLAGS.train_step:
            steps_and_files = {}
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
                    if cur_filename.split("-")[-1] == "best":
                        continue
                    gstep = int(cur_filename.split("-")[-1])
                    if gstep not in steps_and_files:
                        tf.logging.info(
                            "Add {} to eval list.".format(cur_filename))
                        steps_and_files[gstep] = cur_filename
            tf.logging.info("found {} files.".format(len(steps_and_files)))
            # steps_and_files = sorted(steps_and_files, key=lambda x: x[0])
            if not steps_and_files:
                tf.logging.info(
                    "found 0 file, global step: {}. Sleeping.".format(
                        global_step))
                time.sleep(1)
            else:
                for ele in sorted(steps_and_files.items()):
                    step, checkpoint_path = ele
                    if global_step >= step:
                        if len(_find_valid_cands(step)) > 1:
                            for ext in [
                                    "meta", "data-00000-of-00001", "index"
                            ]:
                                src_ckpt = checkpoint_path + ".{}".format(ext)
                                tf.logging.info("removing {}".format(src_ckpt))
                                tf.gfile.Remove(src_ckpt)
                        continue
                    result = estimator.evaluate(
                        input_fn=eval_input_fn,
                        steps=eval_steps,
                        checkpoint_path=checkpoint_path)
                    global_step = result["global_step"]
                    tf.logging.info("***** Eval results *****")
                    for key in sorted(result.keys()):
                        tf.logging.info("  %s = %s", key, str(result[key]))
                        writer.write("%s = %s\n" % (key, str(result[key])))
                    writer.write("best = {}\n".format(best_perf))
                    if result[key_name] > best_perf:
                        best_perf = result[key_name]
                        for ext in ["meta", "data-00000-of-00001", "index"]:
                            src_ckpt = checkpoint_path + ".{}".format(ext)
                            tgt_ckpt = checkpoint_path.rsplit(
                                "-", 1)[0] + "-best.{}".format(ext)
                            tf.logging.info("saving {} to {}".format(
                                src_ckpt, tgt_ckpt))
                            tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True)
                            writer.write("saved {} to {}\n".format(
                                src_ckpt, tgt_ckpt))

                    if len(_find_valid_cands(global_step)) > 1:
                        for ext in ["meta", "data-00000-of-00001", "index"]:
                            src_ckpt = checkpoint_path + ".{}".format(ext)
                            tf.logging.info("removing {}".format(src_ckpt))
                            tf.gfile.Remove(src_ckpt)
                    writer.write("=" * 50 + "\n")
        writer.close()
    if FLAGS.do_predict:
        predict_examples = processor.get_test_examples(FLAGS.data_dir)
        num_actual_predict_examples = len(predict_examples)
        if FLAGS.use_tpu:
            # TPU requires a fixed batch size for all batches, therefore the number
            # of examples must be a multiple of the batch size, or else examples
            # will get dropped. So we pad with fake examples which are ignored
            # later on.
            while len(predict_examples) % FLAGS.predict_batch_size != 0:
                predict_examples.append(classifier_utils.PaddingInputExample())
            assert len(predict_examples) % FLAGS.predict_batch_size == 0
            predict_steps = int(
                len(predict_examples) // FLAGS.predict_batch_size)

        predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
        race_utils.file_based_convert_examples_to_features(
            predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
            predict_file, FLAGS.max_qa_length)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = classifier_utils.file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder,
            task_name=task_name,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.predict_batch_size,
            multiple=len(label_list))

        checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        result = estimator.evaluate(input_fn=predict_input_fn,
                                    steps=predict_steps,
                                    checkpoint_path=checkpoint_path)

        output_predict_file = os.path.join(FLAGS.output_dir,
                                           "predict_results.txt")
        with tf.gfile.GFile(output_predict_file, "w") as pred_writer:
            # num_written_lines = 0
            tf.logging.info("***** Predict results *****")
            pred_writer.write("***** Predict results *****\n")
            for key in sorted(result.keys()):
                tf.logging.info("  %s = %s", key, str(result[key]))
                pred_writer.write("%s = %s\n" % (key, str(result[key])))
            pred_writer.write("best = {}\n".format(best_perf))
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                FLAGS.init_checkpoint)

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  if not FLAGS.do_train and not FLAGS.do_eval:
    raise ValueError("At least one of `do_train`, `do_eval` must be True.")

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length %d because the BERT model "
        "was only trained up to sequence length %d" %
        (FLAGS.max_seq_length, bert_config.max_position_embeddings))

  tf.gfile.MakeDirs(FLAGS.finetune_output_dir)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
  run_config = contrib_tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.finetune_output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=contrib_tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    num_train_steps = int(FLAGS.train_data_size / FLAGS.train_batch_size)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu,
      num_choices=FLAGS.num_choices)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = contrib_tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    if not tf.gfile.Exists(FLAGS.train_file):
      file_based_convert_examples_to_features(
          FLAGS.train_raw_data, 512, tokenizer, FLAGS.train_file)
    train_input_fn = file_based_input_fn_builder(
        input_file=FLAGS.train_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True,
        num_choices=FLAGS.num_choices)
    estimator.train(input_fn=train_input_fn, steps=num_train_steps)

  if FLAGS.do_eval:
    # This tells the estimator to run through the entire set.
    if FLAGS.eval_data_size < 0:
      eval_steps = None
    else:
      eval_steps = int(FLAGS.eval_data_size / FLAGS.eval_batch_size)

    eval_drop_remainder = True if FLAGS.use_tpu else False
    if not tf.gfile.Exists(FLAGS.eval_file):
      file_based_convert_examples_to_features(
          FLAGS.eval_raw_data, 512, tokenizer, FLAGS.eval_file)
    eval_input_fn = file_based_input_fn_builder(
        input_file=FLAGS.eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=eval_drop_remainder,
        num_choices=FLAGS.num_choices)

    # checkpoints_iterator blocks until a new checkpoint appears.
    for ckpt in contrib_training.checkpoints_iterator(estimator.model_dir):
      try:
        result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)
        tf.logging.info("********** Eval results:*******\n")
        for key in sorted(result.keys()):
          tf.logging.info("%s = %s" % (key, str(result[key])))
      except tf.errors.NotFoundError:
        tf.logging.error("Checkpoint path '%s' no longer exists.", ckpt)
Exemple #28
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    albert_config = modeling.AlbertConfig.from_json_file(
        FLAGS.albert_config_file)

    validate_flags_or_throw(albert_config)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tokenizer = fine_tuning_utils.create_vocab(
        vocab_file=FLAGS.vocab_file,
        do_lower_case=FLAGS.do_lower_case,
        spm_model_file=FLAGS.spm_model_file,
        hub_module=FLAGS.albert_hub_module_handle)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    if FLAGS.do_train:
        iterations_per_loop = int(
            min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps))
    else:
        iterations_per_loop = FLAGS.iterations_per_loop
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        keep_checkpoint_max=0,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    train_examples = None
    num_train_steps = None
    num_warmup_steps = None
    # if FLAGS.do_train:
    #     train_examples = squad_utils.read_squad_examples(
    #         input_file=FLAGS.train_file, is_training=True)
    #     num_train_steps = int(
    #         len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    #     num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)
    #
    #     # Pre-shuffle the input to avoid having to make a very large shuffle
    #     # buffer in in the `input_fn`.
    #     rng = random.Random(12345)
    #     rng.shuffle(train_examples)

    model_fn = squad_utils.v2_model_fn_builder(
        albert_config=albert_config,
        init_checkpoint=FLAGS.init_checkpoint,
        learning_rate=FLAGS.learning_rate,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        use_tpu=FLAGS.use_tpu,
        use_one_hot_embeddings=FLAGS.use_tpu,
        max_seq_length=FLAGS.max_seq_length,
        start_n_top=FLAGS.start_n_top,
        end_n_top=FLAGS.end_n_top,
        dropout_prob=FLAGS.dropout_prob,
        hub_module=FLAGS.albert_hub_module_handle)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        # We write to a temporary file to avoid storing very large constant tensors
        # in memory.

        if not tf.gfile.Exists(FLAGS.train_feature_file):
            train_writer = squad_utils.FeatureWriter(filename=os.path.join(
                FLAGS.train_feature_file),
                                                     is_training=True)
            squad_utils.convert_examples_to_features(
                examples=train_examples,
                tokenizer=tokenizer,
                max_seq_length=FLAGS.max_seq_length,
                doc_stride=FLAGS.doc_stride,
                max_query_length=FLAGS.max_query_length,
                is_training=True,
                output_fn=train_writer.process_feature,
                do_lower_case=FLAGS.do_lower_case)
            train_writer.close()

        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num orig examples = %d", len(train_examples))
        # tf.logging.info("  Num split examples = %d", train_writer.num_features)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        del train_examples

        train_input_fn = squad_utils.input_fn_builder(
            input_file=FLAGS.train_feature_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.train_batch_size,
            is_v2=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_predict:
        with tf.gfile.Open(FLAGS.predict_file) as predict_file:
            prediction_json = json.load(predict_file)["data"]
        eval_examples = squad_utils.read_squad_examples(
            input_file=FLAGS.predict_file, is_training=False)

        if (tf.gfile.Exists(FLAGS.predict_feature_file)
                and tf.gfile.Exists(FLAGS.predict_feature_left_file)):
            tf.logging.info("Loading eval features from {}".format(
                FLAGS.predict_feature_left_file))
            with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin:
                eval_features = pickle.load(fin)
        else:
            eval_writer = squad_utils.FeatureWriter(
                filename=FLAGS.predict_feature_file, is_training=False)
            eval_features = []

            def append_feature(feature):
                eval_features.append(feature)
                eval_writer.process_feature(feature)

            squad_utils.convert_examples_to_features(
                examples=eval_examples,
                tokenizer=tokenizer,
                max_seq_length=FLAGS.max_seq_length,
                doc_stride=FLAGS.doc_stride,
                max_query_length=FLAGS.max_query_length,
                is_training=False,
                output_fn=append_feature,
                do_lower_case=FLAGS.do_lower_case)
            eval_writer.close()

            with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout:
                pickle.dump(eval_features, fout)

        tf.logging.info("***** Running predictions *****")
        tf.logging.info("  Num orig examples = %d", len(eval_examples))
        tf.logging.info("  Num split examples = %d", len(eval_features))
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_input_fn = squad_utils.input_fn_builder(
            input_file=FLAGS.predict_feature_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=False,
            use_tpu=FLAGS.use_tpu,
            bsz=FLAGS.predict_batch_size,
            is_v2=True)

        def get_result(checkpoint):
            """Evaluate the checkpoint on SQuAD v2.0."""
            # If running eval on the TPU, you will need to specify the number of
            # steps.
            reader = tf.train.NewCheckpointReader(checkpoint)
            global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP)
            all_results = []
            for result in estimator.predict(predict_input_fn,
                                            yield_single_examples=True,
                                            checkpoint_path=checkpoint):
                if len(all_results) % 1000 == 0:
                    tf.logging.info("Processing example: %d" %
                                    (len(all_results)))
                unique_id = int(result["unique_ids"])

                cls_logits = float(result["cls_logits"].flat[0])
                all_results.append(
                    squad_utils.RawResultV2(unique_id=unique_id,
                                            cls_logits=cls_logits))

            output_prediction_file = os.path.join(FLAGS.output_dir,
                                                  "predictions.json")
            output_nbest_file = os.path.join(FLAGS.output_dir,
                                             "nbest_predictions.json")
            output_null_log_odds_file = os.path.join(FLAGS.output_dir,
                                                     "null_odds.json")

            result_dict = {}
            cls_dict = {}
            squad_utils.accumulate_predictions_v2(
                result_dict, cls_dict, eval_examples, eval_features,
                all_results, FLAGS.n_best_size, FLAGS.max_answer_length,
                FLAGS.start_n_top, FLAGS.end_n_top)

            from squad_utils import make_qid_to_has_ans
            import numpy as np
            qid_to_has_ans = make_qid_to_has_ans(
                prediction_json)  # maps qid to True/False
            has_ans_qids = [k for k, v in qid_to_has_ans.items() if v]
            no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
            print("has_ans", len(has_ans_qids))
            print("no_ans", len(no_ans_qids))

            def compute_metrics_with_threshold(threshold):
                nonlocal result_dict
                result_dict = {}
                tp = 0
                tn = 0
                fp = 0
                fn = 0
                for example_index, example in enumerate(eval_examples):
                    m = np.min(cls_dict[example_index])
                    predict_is_impossible = 1 / (1 + np.exp(-m)) > threshold
                    # predict_is_impossible = m > threshold
                    result_dict[example.qas_id] = m
                    if example.is_impossible:
                        if predict_is_impossible:
                            tp += 1
                        else:
                            fn += 1
                    else:
                        if predict_is_impossible:
                            fp += 1
                        else:
                            tn += 1
                precision = tp / (tp + fp)
                recall = tp / (fn + tp)
                f1 = 2 * tp / (2 * tp + fp + fn)
                tf.logging.info(f"precision: {precision}"
                                f"recall: {recall}"
                                f"f1: {f1}")
                return precision, recall, f1

            # precision, recall, f1 = compute_metrics_with_threshold(0.4)
            precision, recall, f1 = compute_metrics_with_threshold(0.5)
            # precision, recall, f1 = compute_metrics_with_threshold(0.6)

            with tf.gfile.GFile(output_prediction_file, "w") as writer:
                writer.write(json.dumps(result_dict, indent=4) + "\n")

            return {
                "precision": precision,
                "recall": recall,
                "f1": f1,
                "total": len(eval_examples)
            }, int(global_step)

        def _find_valid_cands(curr_step):
            filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
            candidates = []
            for filename in filenames:
                if filename.endswith(".index"):
                    ckpt_name = filename[:-6]
                    idx = ckpt_name.split("-")[-1]
                    if idx != "best" and int(idx) > curr_step:
                        candidates.append(filename)
            return candidates

        # output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt")
        # checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best")
        # key_name = "f1"
        # writer = tf.gfile.GFile(output_eval_file, "w")
        # if tf.gfile.Exists(checkpoint_path + ".index"):
        #     result = get_result(checkpoint_path)
        #     best_perf = result[0][key_name]
        #     global_step = result[1]
        # else:
        #     global_step = -1
        #     best_perf = -1
        #     checkpoint_path = None
        # while global_step < num_train_steps:
        #     steps_and_files = {}
        #     filenames = tf.gfile.ListDirectory(FLAGS.output_dir)
        #     for filename in filenames:
        #         if filename.endswith(".index"):
        #             ckpt_name = filename[:-6]
        #             cur_filename = os.path.join(FLAGS.output_dir, ckpt_name)
        #             if cur_filename.split("-")[-1] == "best":
        #                 continue
        #             gstep = int(cur_filename.split("-")[-1])
        #             if gstep not in steps_and_files:
        #                 tf.logging.info("Add {} to eval list.".format(cur_filename))
        #                 steps_and_files[gstep] = cur_filename
        #     tf.logging.info("found {} files.".format(len(steps_and_files)))
        #     if not steps_and_files:
        #         tf.logging.info("found 0 file, global step: {}. Sleeping."
        #                         .format(global_step))
        #         time.sleep(60)
        #     else:
        #         for ele in sorted(steps_and_files.items()):
        #             step, checkpoint_path = ele
        #             if global_step >= step:
        #                 if len(_find_valid_cands(step)) > 1:
        #                     for ext in ["meta", "data-00000-of-00001", "index"]:
        #                         src_ckpt = checkpoint_path + ".{}".format(ext)
        #                         tf.logging.info("removing {}".format(src_ckpt))
        #                         tf.gfile.Remove(src_ckpt)
        #                 continue
        #             result, global_step = get_result(checkpoint_path)
        #             tf.logging.info("***** Eval results *****")
        #             for key in sorted(result.keys()):
        #                 tf.logging.info("  %s = %s", key, str(result[key]))
        #                 writer.write("%s = %s\n" % (key, str(result[key])))
        #             if result[key_name] > best_perf:
        #                 best_perf = result[key_name]
        #                 for ext in ["meta", "data-00000-of-00001", "index"]:
        #                     src_ckpt = checkpoint_path + ".{}".format(ext)
        #                     tgt_ckpt = checkpoint_path.rsplit(
        #                         "-", 1)[0] + "-best.{}".format(ext)
        #                     tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt))
        #                     tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True)
        #                     writer.write("saved {} to {}\n".format(src_ckpt, tgt_ckpt))
        #             writer.write("best {} = {}\n".format(key_name, best_perf))
        #             tf.logging.info("  best {} = {}\n".format(key_name, best_perf))
        #
        #             if len(_find_valid_cands(global_step)) > 2:
        #                 for ext in ["meta", "data-00000-of-00001", "index"]:
        #                     src_ckpt = checkpoint_path + ".{}".format(ext)
        #                     tf.logging.info("removing {}".format(src_ckpt))
        #                     tf.gfile.Remove(src_ckpt)
        #             writer.write("=" * 50 + "\n")

        result, global_step = get_result(FLAGS.init_checkpoint)
Exemple #29
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
    validate_flags_or_throw(bert_config)

    tf.gfile.MakeDirs(FLAGS.output_dir)

    tpu_cluster_resolver = None
    if FLAGS.use_tpu and FLAGS.tpu_name:
        tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
            FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

    is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
    run_config = contrib_tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        master=FLAGS.master,
        model_dir=FLAGS.output_dir,
        save_checkpoints_steps=FLAGS.save_checkpoints_steps,
        tpu_config=contrib_tpu.TPUConfig(
            iterations_per_loop=FLAGS.iterations_per_loop,
            num_shards=FLAGS.num_tpu_cores,
            per_host_input_for_training=is_per_host))

    num_train_steps = None
    num_warmup_steps = None
    if FLAGS.do_train:
        num_train_features = FLAGS.train_num_precomputed
        num_train_steps = int(num_train_features / FLAGS.train_batch_size *
                              FLAGS.num_train_epochs)
        num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

    model_fn = model_fn_builder(bert_config=bert_config,
                                init_checkpoint=FLAGS.init_checkpoint,
                                learning_rate=FLAGS.learning_rate,
                                num_train_steps=num_train_steps,
                                num_warmup_steps=num_warmup_steps,
                                use_tpu=FLAGS.use_tpu,
                                use_one_hot_embeddings=FLAGS.use_tpu)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    estimator = contrib_tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size)

    if FLAGS.do_train:
        tf.logging.info("***** Running training on precomputed features *****")
        tf.logging.info("  Num split examples = %d", num_train_features)
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", num_train_steps)
        train_filename = FLAGS.train_precomputed_file
        train_input_fn = input_fn_builder(input_file=train_filename,
                                          seq_length=FLAGS.max_seq_length,
                                          is_training=True,
                                          drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_predict:
        tf.logging.info(
            "***** Running predictions on precomputed features *****")
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)
        eval_filename = FLAGS.predict_precomputed_file
        predict_input_fn = input_fn_builder(input_file=eval_filename,
                                            seq_length=FLAGS.max_seq_length,
                                            is_training=False,
                                            drop_remainder=False)

        def create_int_feature(values):
            return tf.train.Feature(int64_list=tf.train.Int64List(
                value=list(values)))

        def create_float_feature(values):
            return tf.train.Feature(float_list=tf.train.FloatList(
                value=list(values)))

        # If running eval on the TPU, you will need to specify the number of
        # steps.
        processed_examples = 0
        output_file = os.path.join(FLAGS.output_dir, FLAGS.output_pred_file)
        tf.logging.info("Writing results to: %s", output_file)
        with tf.python_io.TFRecordWriter(output_file) as writer:
            for result in estimator.predict(predict_input_fn,
                                            yield_single_examples=True):
                if processed_examples % 1000 == 0:
                    tf.logging.info("Processing example: %d" %
                                    processed_examples)
                features = collections.OrderedDict()
                features["img_id"] = create_int_feature([result["img_id"]])
                features["annot_id"] = create_int_feature([result["annot_id"]])
                features["choice_id"] = create_int_feature(
                    [result["choice_id"]])
                features["label"] = create_int_feature([result["label"]])
                features["output_logits"] = create_float_feature(
                    result["output_logits"])
                writer.write(
                    tf.train.Example(features=tf.train.Features(
                        feature=features)).SerializeToString())
                processed_examples += 1
Exemple #30
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    processors = {
        "sst-2": run_classifier.SST2Processor,
        "mnli": run_classifier.MnliProcessor
    }

    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint1)
    tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                  FLAGS.init_checkpoint2)

    if not tf.train.checkpoint_exists(FLAGS.init_checkpoint1):
        raise TFCheckpointNotFoundError("checkpoint1 does not exist!")

    if not tf.train.checkpoint_exists(FLAGS.init_checkpoint2) and \
       not FLAGS.use_random:
        raise TFCheckpointNotFoundError("checkpoint2 does not exist!")

    bert_config1 = modeling.BertConfig.from_json_file(FLAGS.bert_config_file1)
    bert_config2 = modeling.BertConfig.from_json_file(FLAGS.bert_config_file2)

    if FLAGS.max_seq_length > bert_config1.max_position_embeddings:
        raise ValueError(
            "Cannot use sequence length %d because the BERT model "
            "was only trained up to sequence length %d" %
            (FLAGS.max_seq_length, bert_config1.max_position_embeddings))

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name, ))

    processor = processors[task_name]()

    label_list = processor.get_labels()

    tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file,
                                           do_lower_case=FLAGS.do_lower_case)

    all_results = []

    predict_examples = processor.get_test_examples(FLAGS.diff_input_file)
    num_actual_predict_examples = len(predict_examples)

    # For single sentence tasks (like SST2) eg.text_b is None
    original_data = [(eg.text_a, eg.text_b) for eg in predict_examples]
    if FLAGS.use_tpu:
        # TPU requires a fixed batch size for all batches, therefore the number
        # of examples must be a multiple of the batch size, or else examples
        # will get dropped. So we pad with fake examples which are ignored
        # later on.
        while len(predict_examples) % FLAGS.predict_batch_size != 0:
            predict_examples.append(run_classifier.PaddingInputExample())

    predict_file = os.path.join(FLAGS.init_checkpoint1,
                                FLAGS.exp_name + ".predict.tf_record")

    run_classifier.file_based_convert_examples_to_features(
        predict_examples, label_list, FLAGS.max_seq_length, tokenizer,
        predict_file)

    for bert_config_type, output_dir in [
        (bert_config1, FLAGS.init_checkpoint1),
        (bert_config2, FLAGS.init_checkpoint2)
    ]:
        tpu_cluster_resolver = None
        if FLAGS.use_tpu and FLAGS.tpu_name:
            tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver(
                FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

        is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2
        run_config = contrib_tpu.RunConfig(
            cluster=tpu_cluster_resolver,
            master=FLAGS.master,
            model_dir=output_dir,
            save_checkpoints_steps=FLAGS.save_checkpoints_steps,
            tpu_config=contrib_tpu.TPUConfig(
                iterations_per_loop=FLAGS.iterations_per_loop,
                num_shards=FLAGS.num_tpu_cores,
                per_host_input_for_training=is_per_host))

        model_fn = run_classifier.model_fn_builder(
            bert_config=bert_config_type,
            num_labels=len(label_list),
            # This init checkpoint is eventually overriden by the estimator
            init_checkpoint=FLAGS.output_dir,
            learning_rate=FLAGS.learning_rate,
            num_train_steps=None,
            num_warmup_steps=None,
            use_tpu=FLAGS.use_tpu,
            use_one_hot_embeddings=FLAGS.use_tpu)

        # If TPU is not available, this will fall back to normal Estimator on CPU
        # or GPU.
        estimator = contrib_tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size,
            predict_batch_size=FLAGS.predict_batch_size)

        tf.logging.info("***** Running prediction*****")
        tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                        len(predict_examples), num_actual_predict_examples,
                        len(predict_examples) - num_actual_predict_examples)
        tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

        predict_drop_remainder = True if FLAGS.use_tpu else False
        predict_input_fn = run_classifier.file_based_input_fn_builder(
            input_file=predict_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=predict_drop_remainder)

        result = list(estimator.predict(input_fn=predict_input_fn))
        all_results.append(result)

    all_results[0] = all_results[0][:num_actual_predict_examples]
    all_results[1] = all_results[1][:num_actual_predict_examples]

    assert len(all_results[0]) == len(all_results[1])

    # Assuming model1's predictions are gold labels, calculate model2's accuracy
    score = 0
    for prob1, prob2 in zip(all_results[0], all_results[1]):
        if np.argmax(prob1["probabilities"]) == np.argmax(
                prob2["probabilities"]):
            score += 1

    tf.logging.info("Agreement score = %.6f",
                    float(score) / num_actual_predict_examples)

    # Calculate the average value of |v1 - v2|, the distance on the simplex
    # Unlike KL divergence, this is a bounded metric
    # However, these results are not comparable across tasks
    # with different number classes
    distances = []
    for prob1, prob2 in zip(all_results[0], all_results[1]):
        distances.append(
            np.linalg.norm(prob1["probabilities"] - prob2["probabilities"]))

    tf.logging.info("Average length |p1 - p2| = %.8f", np.mean(distances))
    tf.logging.info("Max length |p1 - p2| = %.8f", np.max(distances))
    tf.logging.info("Min length |p1 - p2| = %.8f", np.min(distances))
    tf.logging.info("Std length |p1 - p2| = %.8f", np.std(distances))

    if FLAGS.diff_type == "kld1":
        all_kld = []

        for prob1, prob2 in zip(all_results[0], all_results[1]):
            all_kld.append(
                stats.entropy(prob1["probabilities"], prob2["probabilities"]))

        tf.logging.info("Average kl-divergence (p1, p2) = %.8f",
                        np.mean(all_kld))
        tf.logging.info("Max kl-divergence (p1, p2) = %.8f", np.max(all_kld))
        tf.logging.info("Min kl-divergence (p1, p2) = %.8f", np.min(all_kld))
        tf.logging.info("Std kl-divergence (p1, p2) = %.8f", np.std(all_kld))

    elif FLAGS.diff_type == "kld2":
        all_kld = []

        for prob1, prob2 in zip(all_results[0], all_results[1]):
            all_kld.append(
                stats.entropy(prob2["probabilities"], prob1["probabilities"]))

        tf.logging.info("Average kl-divergence (p2, p1) = %.8f",
                        np.mean(all_kld))
        tf.logging.info("Max kl-divergence (p2, p1) = %.8f", np.max(all_kld))
        tf.logging.info("Min kl-divergence (p2, p1) = %.8f", np.min(all_kld))
        tf.logging.info("Std kl-divergence (p2, p1) = %.8f", np.std(all_kld))

    if FLAGS.diff_output_file:
        output = ""

        # Removing padded examples
        all_results[0] = all_results[0][:len(original_data)]
        all_results[1] = all_results[1][:len(original_data)]

        with tf.gfile.GFile(FLAGS.diff_output_file, "w") as f:
            for i, (eg, prob1, prob2) in enumerate(
                    zip(original_data, all_results[0], all_results[1])):

                if i % 1000 == 0:
                    tf.logging.info("Writing instance %d", i + 1)

                p1_items = [p1.item() for p1 in prob1["probabilities"]]
                p2_items = [p2.item() for p2 in prob2["probabilities"]]

                prob1_str = "%.6f\t%.6f\t%.6f" % (p1_items[0], p1_items[1],
                                                  p1_items[2])
                prob2_str = "%.6f\t%.6f\t%.6f" % (p2_items[0], p2_items[1],
                                                  p2_items[2])

                output = "%s\t%s\t%s\t%s\n" % (eg[0], eg[1], prob1_str,
                                               prob2_str)
                f.write(output)

    return