Exemple #1
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    # Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)

    if FLAGS.do_predict:
        predict_dir = FLAGS.predict_dir
        if not tf.gfile.Exists(predict_dir):
            tf.gfile.MakeDirs(predict_dir)

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval, `do_predict` or "
            "`do_submit` must be True.")

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()

    processor = PaperProcessor(features=FLAGS.features.split(","))
    label_list = processor.get_labels()

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    run_config = model_utils.configure_tpu(FLAGS)

    model_fn = get_model_fn(
        len(label_list) if label_list is not None else None)

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    if FLAGS.use_tpu:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            predict_batch_size=FLAGS.predict_batch_size,
            eval_batch_size=FLAGS.eval_batch_size)
    else:
        estimator = tf.estimator.Estimator(model_fn=model_fn,
                                           config=run_config)

    if FLAGS.do_train:
        train_file_base = "{}.len-{}.train.tf_record".format(
            spm_basename, FLAGS.max_seq_length)
        train_file = os.path.join(FLAGS.output_dir, train_file_base)
        tf.logging.info("Use tfrecord file {}".format(train_file))

        train_examples = processor.get_train_examples(FLAGS.data_dir)
        np.random.shuffle(train_examples)
        tf.logging.info("Num of train samples: {}".format(len(train_examples)))

        FLAGS.train_steps = int(
            len(train_examples) / FLAGS.train_batch_size *
            FLAGS.num_train_epochs)
        FLAGS.warmup_steps = int(FLAGS.train_steps * FLAGS.warmup_proportion)

        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, train_file,
                                                FLAGS.num_passes)

        tf.logging.info("***** Running training *****")
        tf.logging.info("  Num examples = %d", len(train_examples))
        tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
        tf.logging.info("  Num steps = %d", FLAGS.train_steps)

        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)

        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

    if FLAGS.do_eval or FLAGS.do_predict:
        if FLAGS.eval_split == "dev":
            eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        elif FLAGS.eval_split == "train":
            eval_examples = processor.get_train_examples(FLAGS.data_dir,
                                                         shuffle=False)
        else:
            eval_examples = processor.get_test_examples(FLAGS.data_dir)

        tf.logging.info("Num of eval samples: {}".format(len(eval_examples)))

    if FLAGS.do_eval:
        # TPU requires a fixed batch size for all batches, therefore the number
        # of examples must be a multiple of the batch size, or else examples
        # will get dropped. So we pad with fake examples which are ignored
        # later on. These do NOT count towards the metric (all tf.metrics
        # support a per-instance weight, and these get a weight of 0.0).
        #
        # Modified in XL: We also adopt the same mechanism for GPUs.
        while len(eval_examples) % FLAGS.eval_batch_size != 0:
            eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        # Filter out all checkpoints in the directory
        steps_and_files = []
        filenames = tf.gfile.ListDirectory(FLAGS.model_dir)

        for filename in filenames:
            if filename.endswith(".index"):
                ckpt_name = filename[:-6]
                cur_filename = join(FLAGS.model_dir, ckpt_name)
                global_step = int(cur_filename.split("-")[-1])
                tf.logging.info("Add {} to eval list.".format(cur_filename))
                steps_and_files.append([global_step, cur_filename])
        steps_and_files = sorted(steps_and_files, key=lambda x: x[0])

        # Decide whether to evaluate all ckpts
        if not FLAGS.eval_all_ckpt:
            steps_and_files = steps_and_files[-1:]

        eval_results = []
        for global_step, filename in sorted(steps_and_files,
                                            key=lambda x: x[0]):
            ret = estimator.evaluate(input_fn=eval_input_fn,
                                     steps=eval_steps,
                                     checkpoint_path=filename)

            ret["step"] = global_step
            ret["path"] = filename

            eval_results.append(ret)

            tf.logging.info("=" * 80)
            log_str = "Eval result | "
            for key, val in sorted(ret.items(), key=lambda x: x[0]):
                log_str += "{} {} | ".format(key, val)
            tf.logging.info(log_str)

        key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy"
        eval_results.sort(key=lambda x: x[key_name], reverse=True)

        tf.logging.info("=" * 80)
        log_str = "Best result | "
        for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]):
            log_str += "{} {} | ".format(key, val)
        tf.logging.info(log_str)

    if FLAGS.do_predict:
        eval_file_base = "{}.len-{}.{}.predict.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, eval_file)

        pred_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=False)

        predict_results = []
        with tf.gfile.Open(
                os.path.join(predict_dir, "{}.tsv".format(task_name)),
                "w") as fout:
            fout.write("index\tprediction\n")

            for pred_cnt, result in enumerate(
                    estimator.predict(input_fn=pred_input_fn,
                                      yield_single_examples=True,
                                      checkpoint_path=FLAGS.predict_ckpt)):
                if pred_cnt % 1000 == 0:
                    tf.logging.info(
                        "Predicting submission for example: {}".format(
                            pred_cnt))

                logits = [float(x) for x in result["logits"].flat]
                predict_results.append(logits)

                if len(logits) == 1:
                    label_out = logits[0]
                elif len(logits) == 2:
                    if logits[1] - logits[0] > FLAGS.predict_threshold:
                        label_out = label_list[1]
                    else:
                        label_out = label_list[0]
                elif len(logits) > 2:
                    max_index = np.argmax(np.array(logits, dtype=np.float32))
                    label_out = label_list[max_index]
                else:
                    raise NotImplementedError

                fout.write("{}\t{}\n".format(pred_cnt, label_out))

        predict_json_path = os.path.join(predict_dir,
                                         "{}.logits.json".format(task_name))

        with tf.gfile.Open(predict_json_path, "w") as fp:
            json.dump(predict_results, fp, indent=4)
Exemple #2
0
def main(_):
    logger.set_verbosity(logger.INFO)

    #### Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    # TPU Configuration
    run_config = model_utils.configure_tpu(FLAGS)

    model_fn = get_model_fn()

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    if FLAGS.use_tpu:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size)
    else:
        estimator = tf.estimator.Estimator(
            model_fn=model_fn,
            config=run_config)

    if FLAGS.do_train:
        train_file_base = "{}.len-{}.train.tf_record".format(
            spm_basename, FLAGS.max_seq_length)
        train_file = os.path.join(FLAGS.output_dir, train_file_base)

        if not tf.gfile.Exists(train_file) or FLAGS.overwrite_data:
            train_examples = get_examples(FLAGS.data_dir, "train")
            random.shuffle(train_examples)
            file_based_convert_examples_to_features(
                train_examples, tokenize_fn, train_file)

        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)
        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

    if FLAGS.do_eval:
        eval_examples = get_examples(FLAGS.data_dir, FLAGS.eval_split)
        logger.info("Num of eval samples: {}".format(len(eval_examples)))

        # TPU requires a fixed batch size for all batches, therefore the number
        # of examples must be a multiple of the batch size, or else examples
        # will get dropped. So we pad with fake examples which are ignored
        # later on. These do NOT count towards the metric (all tf.metrics
        # support a per-instance weight, and these get a weight of 0.0).
        #
        # Modified in XL: We also adopt the same mechanism for GPUs.

        while len(eval_examples) % FLAGS.eval_batch_size != 0:
            eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)

        if FLAGS.high_only:
            eval_file_base = "high." + eval_file_base
        elif FLAGS.middle_only:
            eval_file_base = "middle." + eval_file_base

        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)
        file_based_convert_examples_to_features(
            eval_examples, tokenize_fn, eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        ret = estimator.evaluate(
            input_fn=eval_input_fn,
            steps=eval_steps)

        # Log current result
        logger.info("=" * 80)
        log_str = "Eval | "
        for key, val in ret.items():
            log_str += "{} {} | ".format(key, val)
        logger.info(log_str)
        logger.info("=" * 80)
Exemple #3
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    print('826')
    #### Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)
    print('830')
    if FLAGS.do_predict:
        predict_dir = FLAGS.predict_dir
        if not tf.gfile.Exists(predict_dir):
            tf.gfile.MakeDirs(predict_dir)
    print('835')
    processors = {
        "mnli_matched": MnliMatchedProcessor,
        "mnli_mismatched": MnliMismatchedProcessor,
        'sts-b': StsbProcessor,
        'imdb': ImdbProcessor,
        'imdb_t': ImdbThreeClassProcessor,
        "yelp5": Yelp5Processor,
        "imdb_reg": ImdbRegressionClassProcessor
    }
    print('845')
    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval, `do_predict` or "
            "`do_submit` must be True.")
    print('850')
    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)
    print('853')
    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels() if not FLAGS.is_regression else None

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    print('start getting the tpu')
    tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR']
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        tpu_address)
    is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
    run_config = tf.contrib.tpu.RunConfig(
        cluster=tpu_cluster_resolver,
        model_dir=FLAGS.model_dir,
        save_checkpoints_steps=2000,
        keep_checkpoint_max=10,
        tpu_config=tf.contrib.tpu.TPUConfig(
            iterations_per_loop=1000,
            num_shards=8,
            per_host_input_for_training=is_per_host))
    print('finish getting the tpu')
    print('Start getting model')
    model_fn = get_model_fn(
        len(label_list) if label_list is not None else None)

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    if FLAGS.use_tpu:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            predict_batch_size=FLAGS.predict_batch_size,
            eval_batch_size=FLAGS.eval_batch_size)
    else:
        estimator = tf.estimator.Estimator(model_fn=model_fn,
                                           config=run_config)

    if FLAGS.do_train:
        train_file_base = "{}.len-{}.train.tf_record".format(
            spm_basename, FLAGS.max_seq_length)
        train_file = os.path.join(FLAGS.output_dir, train_file_base)
        tf.logging.info("Use tfrecord file {}".format(train_file))

        train_examples = processor.get_train_examples(FLAGS.data_dir)
        np.random.shuffle(train_examples)
        tf.logging.info("Num of train samples: {}".format(len(train_examples)))

        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, train_file,
                                                FLAGS.num_passes)

        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)

        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

    if FLAGS.do_eval or FLAGS.do_predict:
        if FLAGS.eval_split == "dev":
            eval_examples = processor.get_dev_examples(FLAGS.data_dir)
        else:
            eval_examples = processor.get_test_examples(FLAGS.data_dir)

        tf.logging.info("Num of eval samples: {}".format(len(eval_examples)))

    if FLAGS.do_eval:
        # TPU requires a fixed batch size for all batches, therefore the number
        # of examples must be a multiple of the batch size, or else examples
        # will get dropped. So we pad with fake examples which are ignored
        # later on. These do NOT count towards the metric (all tf.metrics
        # support a per-instance weight, and these get a weight of 0.0).
        #
        # Modified in XL: We also adopt the same mechanism for GPUs.
        while len(eval_examples) % FLAGS.eval_batch_size != 0:
            eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        # Filter out all checkpoints in the directory
        steps_and_files = []
        filenames = tf.gfile.ListDirectory(FLAGS.model_dir)

        for filename in filenames:
            if filename.endswith(".index"):
                ckpt_name = filename[:-6]
                cur_filename = join(FLAGS.model_dir, ckpt_name)
                global_step = int(cur_filename.split("-")[-1])
                tf.logging.info("Add {} to eval list.".format(cur_filename))
                steps_and_files.append([global_step, cur_filename])
        steps_and_files = sorted(steps_and_files, key=lambda x: x[0])

        # Decide whether to evaluate all ckpts
        if not FLAGS.eval_all_ckpt:
            steps_and_files = steps_and_files[-1:]

        eval_results = []
        for global_step, filename in sorted(steps_and_files,
                                            key=lambda x: x[0]):
            ret = estimator.evaluate(input_fn=eval_input_fn,
                                     steps=eval_steps,
                                     checkpoint_path=filename)

            ret["step"] = global_step
            ret["path"] = filename

            eval_results.append(ret)

            tf.logging.info("=" * 80)
            log_str = "Eval result | "
            for key, val in sorted(ret.items(), key=lambda x: x[0]):
                log_str += "{} {} | ".format(key, val)
            tf.logging.info(log_str)

        key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy"
        eval_results.sort(key=lambda x: x[key_name], reverse=True)

        tf.logging.info("=" * 80)
        log_str = "Best result | "
        print(type(eval_results))
        print(len(eval_results))
        for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]):
            log_str += "{} {} | ".format(key, val)
        tf.logging.info(log_str)

    if FLAGS.do_predict:
        eval_file_base = "{}.len-{}.{}.predict.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)
        # for example in eval_examples:
        #       print('{}:{}'.format(example.text_a, example.label))
        print('*' * 10000)
        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, eval_file)

        pred_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=False)

        predict_results = []
        with tf.gfile.Open(
                os.path.join(predict_dir, "{}.tsv".format(task_name)),
                "w") as fout:
            fout.write("index\tprediction\n")
            print(
                enumerate(
                    estimator.predict(input_fn=pred_input_fn,
                                      yield_single_examples=True,
                                      checkpoint_path=FLAGS.predict_ckpt)))
            for pred_cnt, result in enumerate(
                    estimator.predict(input_fn=pred_input_fn,
                                      yield_single_examples=True,
                                      checkpoint_path=FLAGS.predict_ckpt)):
                print(result)
                if pred_cnt % 1000 == 0:
                    tf.logging.info(
                        "Predicting submission for example: {}".format(
                            pred_cnt))

                logits = [float(x) for x in result["logits"].flat]
                predict_results.append(logits)

                if len(logits) == 1:
                    label_out = logits[0]
                elif len(logits) == 2:
                    if logits[1] - logits[0] > FLAGS.predict_threshold:
                        label_out = label_list[1]
                    else:
                        label_out = label_list[0]
                elif len(logits) > 2:
                    max_index = np.argmax(np.array(logits, dtype=np.float32))
                    label_out = label_list[max_index]
                else:
                    raise NotImplementedError

                fout.write("{}\t{}\n".format(pred_cnt, label_out))

        predict_json_path = os.path.join(predict_dir,
                                         "{}.logits.json".format(task_name))

        with tf.gfile.Open(predict_json_path, "w") as fp:
            json.dump(predict_results, fp, indent=4)
Exemple #4
0
def main(_):
###################################################################
###################################################################
    score_reader = ScoreReader(
    '/u02/datasets/psytesting/scores_combined.csv',
    '/u02/texts/' ,count_words=False,
    system_idx=-1, fname_idx=-2,
    score_idx=17,
    testname_idx=None)
    
    score_reader.normalize_scores()
    ## 365194 train scores, 25013 val scores, 16675 test scores
    ## 374377 train scores, 24959 val scores, 16638 test scores
    trainScores, valScores, testScores = score_reader.split_train_test(0.9,1)
    ########################### need to be the multiple times of batch size
    trainScores=trainScores[:10548]
    valScores=valScores[:1170]    
    testScores=testScores[:12000]
    ###########################
    filename_train=[]
    score_train=[]
    for i,data in enumerate(trainScores):
        filename_train.append(trainScores[i][0])
        score_train.append(trainScores[i][1])
        
    filename_dev=[]
    score_dev=[]
    for i,data in enumerate(valScores):
        filename_dev.append(valScores[i][0])
        score_dev.append(valScores[i][1])
        
    filename_test=[]
    score_test=[]
    for i,data in enumerate(testScores):
        filename_test.append(testScores[i][0])
        score_test.append(testScores[i][1])
#############################################################    
############## SETTING FOT THE FUNCTION #####################
#############################################################

    tf.logging.set_verbosity(tf.logging.INFO)

  #### Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)

    if FLAGS.do_predict:
        predict_dir = FLAGS.predict_dir
        if not tf.gfile.Exists(predict_dir):
          tf.gfile.MakeDirs(predict_dir)

    processor=GLUEProcessor(filename=filename_train, label=score_train, 
                            filename_dev=filename_dev,label_dev=score_dev,
                            filename_test=filename_test,label_test=score_test)


    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
        "At least one of `do_train`, `do_eval, `do_predict` or "
        "`do_submit` must be True.")

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    task_name = FLAGS.task_name.lower()


    
####################################################################
####################### tokenization ###############################
####################################################################

# changed
#     label_list = processor.get_labels() if not FLAGS.is_regression else None #  label list Nonetype
    label_list = processor.get_labels() if FLAGS.is_regression else None #  label list is not None 


#   def get_labels(self):
#     return [0.0]

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)
    def tokenize_fn(text):
        text=str(text[0])

        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    run_config = model_utils.configure_tpu(FLAGS)

    model_fn = get_model_fn(len(label_list) if label_list is not None else None)

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
    if FLAGS.use_tpu:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            predict_batch_size=FLAGS.predict_batch_size,
            eval_batch_size=FLAGS.eval_batch_size)
    else:
        estimator = tf.estimator.Estimator(
            model_fn=model_fn,
            config=run_config)
    
######################################################################################
##############################TRAINING SETTING #######################################
######################################################################################

    if FLAGS.do_train:
        train_file_base = "{}.len-{}.train.tf_record".format(
            spm_basename, FLAGS.max_seq_length)
        train_file = os.path.join(FLAGS.output_dir, train_file_base)
        tf.logging.info("Use tfrecord file {}".format(train_file))

        train_examples = processor.get_train_examples(FLAGS.data_dir, 
                                                      filename_train,filename_dev ,filename_test,
                                                      score_train,score_dev,score_test) # get training data
        
        np.random.shuffle(train_examples)
        tf.logging.info("Num of train samples: {}".format(len(train_examples)))

        file_based_convert_examples_to_features(
            train_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
            train_file, FLAGS.num_passes
        )

        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)

#         estimator.train(input_fn=train_input_fn,steps=1000, max_steps=FLAGS.train_steps)
        estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)  
######################################################################################
##############################  VAL   SETTING ########################################
######################################################################################
    print('----------------------------------')        
    print('------start to evaluate ----------')
    print('----------------------------------')

    if FLAGS.do_eval :
        eval_examples = processor.get_dev_examples(FLAGS.data_dir, 
                                                      filename_train,filename_dev ,filename_test, 
                                                      score_train,score_dev,score_test)
    if FLAGS.do_predict:
        eval_examples = processor.get_dev_examples(FLAGS.data_dir, 
                                                      filename_train,filename_dev ,filename_test, 
                                                      score_train,score_dev,score_test)

        ## print(len(eval_examples)) - >25035
        ## print(eval_examples[1])   <__main__.InputExample object at 0x7fa0ff19ecc0>
        
# data_dir: Directory for input data."   default="/home/calvin/111/glue_data/STS-B/",
#         tf.logging.info("Num of eval/predicted samples: {}".format(len(eval_examples)))
    
    if FLAGS.do_eval:
    # TPU requires a fixed batch size for all batches, therefore the number
    # of examples must be a multiple of the batch size, or else examples
    # will get dropped. So we pad with fake examples which are ignored
    # later on. These do NOT count towards the metric (all tf.metrics
    # support a per-instance weight, and these get a weight of 0.0).
    #
    # Modified in XL: We also adopt the same mechanism for GPUs.
        while len(eval_examples) % FLAGS.eval_batch_size != 0:
          eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features_dev(                             
            eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
            eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_input_fn = file_based_input_fn_builder_dev(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        # Filter out all checkpoints in the directory
        steps_and_files = []
        filenames = tf.gfile.ListDirectory(FLAGS.model_dir)

        for filename in filenames:
          if filename.endswith(".index"):
            ckpt_name = filename[:-6]
            cur_filename = join(FLAGS.model_dir, ckpt_name)
            global_step = int(cur_filename.split("-")[-1])
            tf.logging.info("Add {} to eval list.".format(cur_filename))
            steps_and_files.append([global_step, cur_filename])
        steps_and_files = sorted(steps_and_files, key=lambda x: x[0])

        # Decide whether to evaluate all ckpts
#         if not FLAGS.eval_all_ckpt:
        aa=FLAGS.star_from_dev
        steps_and_files = steps_and_files[aa:]
#         steps_and_files = steps_and_files[::10]
        steps_and_files = steps_and_files[1:]
        eval_results = []

        for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]):
          ret = estimator.evaluate(
              input_fn=eval_input_fn,
              steps=eval_steps,
              checkpoint_path=filename) #################### #### error #### 

          ret["step"] = global_step
          ret["path"] = filename

          eval_results.append(ret)

          tf.logging.info("=" * 80)
          log_str = "Eval result | "
          for key, val in sorted(ret.items(), key=lambda x: x[0]):
            log_str += "{} {} | ".format(key, val)
          tf.logging.info(log_str)

######################################################################################
############################## PREDICT SETTING #######################################
######################################################################################

    if FLAGS.do_predict:
        print('----------------------------------')        
        print('---start to compute Prediction----')
        print('----------------------------------')
        while len(eval_examples) % FLAGS.eval_batch_size != 0:
          eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features_dev(                             
            eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
            eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples))

        eval_input_fn = file_based_input_fn_builder_dev(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        # Filter out all checkpoints in the directory
        steps_and_files = []
        filenames = tf.gfile.ListDirectory(FLAGS.model_dir)

        for filename in filenames:
          if filename.endswith(".index"):
            ckpt_name = filename[:-6]
            cur_filename = join(FLAGS.model_dir, ckpt_name)
            global_step = int(cur_filename.split("-")[-1])
            tf.logging.info("Add {} to eval list.".format(cur_filename))
            steps_and_files.append([global_step, cur_filename])
        steps_and_files = sorted(steps_and_files, key=lambda x: x[0])

        # Decide whether to evaluate all ckpts
#         if not FLAGS.eval_all_ckpt:
        steps_and_files = steps_and_files[-1:]

        eval_results = []

        for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]):
          ret = estimator.evaluate(
              input_fn=eval_input_fn,
              steps=eval_steps,
              checkpoint_path=filename) 

          ret["step"] = global_step
          ret["path"] = filename

          eval_results.append(ret)

          tf.logging.info("=" * 80)
          log_str = "Eval result | "
          for key, val in sorted(ret.items(), key=lambda x: x[0]):
            log_str += "{} {} | ".format(key, val)
          tf.logging.info(log_str)
        
        print('----------------------------------')        
        print('----start to compute Pearson------')
        print('----------------------------------')
    ################################################################################# Peason start
        
        predict_results = []
        label_results=[]
        pearson=[]
        with tf.gfile.Open(os.path.join(predict_dir, "{}.tsv".format(
            task_name)), "w") as fout:
            fout.write("index\tprediction\n")
            import copy
            import math
            
            
            for pred_cnt, result in enumerate(estimator.predict(
              input_fn=eval_input_fn,
              yield_single_examples=False,
              checkpoint_path=FLAGS.predict_ckpt)):

#                 if pred_cnt % 1000 == 0:
#                     tf.logging.info("Predicting submission for example: {}".format(
#                       pred_cnt))
##################

                logits = [float(x) for x in result["logits"].flat]
                list_pre=[]
    #             predict_results.append(logits)

                list_pre=list(result['labels']).copy()

                for i in range(len(result['labels'])):
#                     if math.isnan(list_pre[i]) is False:
#                         list_pre[i]=list(result['logits']).pop(0)
                  
                ## need to add as many as batch size #####      
                pearson.append(list(zip(list(list_pre[:1]), list(result["labels"][:1]))))
                pearson.append(list(zip(list(list_pre[1:2]), list(result["labels"][1:2]))))      
                pearson.append(list(zip(list(list_pre[2:3]), list(result["labels"][2:3]))))
                pearson.append(list(zip(list(list_pre[3:4]), list(result["labels"][3:4])))) 
                pearson.append(list(zip(list(list_pre[4:5]), list(result["labels"][4:5]))))
                pearson.append(list(zip(list(list_pre[5:]), list(result["labels"][5:])))) 
                
            pearson=np.array(pearson) # dimension: [data count,31 ,2]
            import pdb
            pdb.set_trace()
            pearson_res=[]
            from sklearn.metrics import mean_squared_error
            sk_mse_res=[]
            register1=pearson[:,0,0]
            register2=pearson[:,0,1]

#                 register1=pearson[:,i,0]
#                 register2=pearson[:,i,1]
#                 register1 = register1[~np.isnan(register1)]
#                 register2 = register2[~np.isnan(register2)]    
            pearson_res.append(pd.Series(register1).corr(pd.Series(register2)))
#                 sk_mse_res.append(np.sqrt(mean_squared_error(register1,register2)))

#             pearson = list((zip(predict_results, label_results)))
            print('pearson ###################')
            print(list(pearson_res))
#             print('sk_mse_res ################')                    
#             print(list(sk_mse_res))
#             print('label_results   ###################')        
#             print(label_results)
##################
#                 logits = [float(x) for x in result["logits"].flat]
#                 #             list_pre=[]
#                 predict_results.append(logits)


        pearson_path = os.path.join(predict_dir, "{}.pearson.json".format(
            task_name))

        with tf.gfile.Open(pearson_path, "w") as fp:
            json.dump(pearson_res, fp, indent=4)

if __name__ == "__main__":
  tf.app.run()


# In[ ]:


import json

with open('/home/calvin/xlnet/predict_result/sts-b.pearson.json') as json_file:
    data = json.load(json_file)
    print(data)


# In[ ]:


# with open('/home/calvin/xlnet/predict_result/sts-b.label_results.json') as json_file:
#     data = json.load(json_file)
#     print(data)

#   file_based_convert_examples_to_features_dev(                             
#             eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
#             eval_file)
Exemple #5
0
def main(_):
    if FLAGS.server_ip and FLAGS.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd
        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(FLAGS.server_ip, FLAGS.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)

    tf.logging.set_verbosity(tf.logging.INFO)

    #### Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.log_step_count_steps = min(FLAGS.log_step_count_steps,
                                         FLAGS.save_steps)

    if FLAGS.do_predict:
        predict_dir = FLAGS.predict_dir
        if not tf.gfile.Exists(predict_dir):
            tf.gfile.MakeDirs(predict_dir)

    processors = {
        "mnli_matched": MnliMatchedProcessor,
        "mnli_mismatched": MnliMismatchedProcessor,
        'sts-b': StsbProcessor,
        'imdb': ImdbProcessor,
        "yelp5": Yelp5Processor
    }

    if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
        raise ValueError(
            "At least one of `do_train`, `do_eval, `do_predict` or "
            "`do_submit` must be True.")

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    if not tf.gfile.Exists(FLAGS.model_dir):
        tf.gfile.MakeDirs(FLAGS.model_dir)

#   ########################### LOAD PT model
#   ########################### LOAD PT model
#   import torch
#   from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification

#   save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME)
#   tf.logging.info("Model loaded from path: {}".format(save_path))

#   device = torch.device("cuda", 4)
#   config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b')
#   config_path = os.path.join(FLAGS.model_dir, CONFIG_NAME)
#   config.to_json_file(config_path)
#   pt_model = XLNetForSequenceClassification.from_pretrained(FLAGS.model_dir, from_tf=True, num_labels=1)
#   pt_model.to(device)
#   pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7])

#   from torch.optim import Adam
#   optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999),
#                     eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay,
#                     amsgrad=False)
#   ########################### LOAD PT model
#   ########################### LOAD PT model

    task_name = FLAGS.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]()
    label_list = processor.get_labels() if not FLAGS.is_regression else None

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

    # run_config = model_utils.configure_tpu(FLAGS)


#   model_fn = get_model_fn(len(label_list) if label_list is not None else None)

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    # estimator = tf.estimator.Estimator(
    #     model_fn=model_fn,
    #     config=run_config)

    if FLAGS.do_train:
        train_file_base = "{}.len-{}.train.tf_record".format(
            spm_basename, FLAGS.max_seq_length)
        train_file = os.path.join(FLAGS.output_dir, train_file_base)
        tf.logging.info("Use tfrecord file {}".format(train_file))

        train_examples = processor.get_train_examples(FLAGS.data_dir)
        tf.logging.info("Num of train samples: {}".format(len(train_examples)))

        file_based_convert_examples_to_features(train_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, train_file,
                                                FLAGS.num_passes)

        train_input_fn = file_based_input_fn_builder(
            input_file=train_file,
            seq_length=FLAGS.max_seq_length,
            is_training=True,
            drop_remainder=True)

        # estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

        ##### Create input tensors / placeholders
        bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host

        params = {
            "batch_size": FLAGS.train_batch_size  # the whole batch
        }
        train_set = train_input_fn(params)

        example = train_set.make_one_shot_iterator().get_next()
        if FLAGS.num_core_per_host > 1:
            examples = [{} for _ in range(FLAGS.num_core_per_host)]
            for key in example.keys():
                vals = tf.split(example[key], FLAGS.num_core_per_host, 0)
                for device_id in range(FLAGS.num_core_per_host):
                    examples[device_id][key] = vals[device_id]
        else:
            examples = [example]

        ##### Create computational graph
        tower_losses, tower_grads_and_vars, tower_inputs, tower_hidden_states, tower_logits = [], [], [], [], []

        for i in range(FLAGS.num_core_per_host):
            reuse = True if i > 0 else None
            with tf.device(assign_to_gpu(i, "/gpu:0")), \
                tf.variable_scope(tf.get_variable_scope(), reuse=reuse):

                loss_i, grads_and_vars_i, inputs_i, hidden_states_i, logits_i = single_core_graph(
                    is_training=True,
                    features=examples[i],
                    label_list=label_list)

                tower_losses.append(loss_i)
                tower_grads_and_vars.append(grads_and_vars_i)
                tower_inputs.append(inputs_i)
                tower_hidden_states.append(hidden_states_i)
                tower_logits.append(logits_i)

        ## average losses and gradients across towers
        if len(tower_losses) > 1:
            loss = tf.add_n(tower_losses) / len(tower_losses)
            grads_and_vars = average_grads_and_vars(tower_grads_and_vars)
            inputs = dict((n, tf.concat([t[n] for t in tower_inputs], 0))
                          for n in tower_inputs[0])
            hidden_states = list(
                tf.concat(t, 0) for t in zip(*tower_hidden_states))
            logits = tf.concat(tower_logits, 0)
        else:
            loss = tower_losses[0]
            grads_and_vars = tower_grads_and_vars[0]
            inputs = tower_inputs[0]
            hidden_states = tower_hidden_states[0]
            logits = tower_logits[0]

        # Summaries
        merged = tf.summary.merge_all()

        ## get train op
        train_op, learning_rate, gnorm = model_utils.get_train_op(
            FLAGS, None, grads_and_vars=grads_and_vars)
        global_step = tf.train.get_global_step()

        ##### Training loop
        saver = tf.train.Saver(max_to_keep=FLAGS.max_save)

        gpu_options = tf.GPUOptions(allow_growth=True)

        #### load pretrained models
        model_utils.init_from_checkpoint(FLAGS, global_vars=True)

        writer = tf.summary.FileWriter(logdir=FLAGS.model_dir,
                                       graph=tf.get_default_graph())
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, gpu_options=gpu_options)) as sess:
            sess.run(tf.global_variables_initializer())

            #########
            ##### PYTORCH
            import torch
            from torch.optim import Adam
            from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification, BertAdam

            save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME + '-00')
            saver.save(sess, save_path)
            tf.logging.info("Model saved in path: {}".format(save_path))

            device = torch.device("cuda", 4)
            config = XLNetConfig.from_pretrained('xlnet-large-cased',
                                                 finetuning_task=u'sts-b',
                                                 num_labels=1)
            tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')

            # pt_model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels=1)
            pt_model = XLNetForSequenceClassification.from_pretrained(
                save_path, from_tf=True, config=config)
            pt_model.to(device)
            pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7])

            optimizer = Adam(pt_model.parameters(),
                             lr=0.001,
                             betas=(0.9, 0.999),
                             eps=FLAGS.adam_epsilon,
                             weight_decay=FLAGS.weight_decay,
                             amsgrad=False)
            # optimizer = BertAdam(pt_model.parameters(), lr=FLAGS.learning_rate, t_total=FLAGS.train_steps, warmup=FLAGS.warmup_steps / FLAGS.train_steps,
            #                      eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay)
            ##### PYTORCH
            #########

            fetches = [
                loss, global_step, gnorm, learning_rate, train_op, merged,
                inputs, hidden_states, logits
            ]

            total_loss, total_loss_pt, prev_step, gnorm_pt = 0., 0., -1, 0.0
            total_logits = None
            total_labels = None
            while True:
                feed_dict = {}
                # for i in range(FLAGS.num_core_per_host):
                #   for key in tower_mems_np[i].keys():
                #     for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]):
                #       feed_dict[m] = m_np

                fetched = sess.run(fetches)

                loss_np, curr_step, gnorm_np, learning_rate_np, _, summary_np, inputs_np, hidden_states_np, logits_np = fetched
                total_loss += loss_np

                if total_logits is None:
                    total_logits = logits_np
                    total_labels = inputs_np['label_ids']
                else:
                    total_logits = np.append(total_logits, logits_np, axis=0)
                    total_labels = np.append(total_labels,
                                             inputs_np['label_ids'],
                                             axis=0)

                #########
                ##### PYTORCH
                f_inp = torch.tensor(inputs_np["input_ids"],
                                     dtype=torch.long,
                                     device=device)
                f_seg_id = torch.tensor(inputs_np["segment_ids"],
                                        dtype=torch.long,
                                        device=device)
                f_inp_mask = torch.tensor(inputs_np["input_mask"],
                                          dtype=torch.float,
                                          device=device)
                f_label = torch.tensor(inputs_np["label_ids"],
                                       dtype=torch.float,
                                       device=device)

                # with torch.no_grad():
                #   _, hidden_states_pt, _ = pt_model.transformer(f_inp, f_seg_id, f_inp_mask)
                # logits_pt, _ = pt_model(f_inp, token_type_ids=f_seg_id, input_mask=f_inp_mask)

                pt_model.train()
                outputs = pt_model(f_inp,
                                   token_type_ids=f_seg_id,
                                   input_mask=f_inp_mask,
                                   labels=f_label)
                loss_pt = outputs[0]
                loss_pt = loss_pt.mean()
                total_loss_pt += loss_pt.item()

                # # hidden_states_pt = list(t.detach().cpu().numpy() for t in hidden_states_pt)
                # # special_pt = special_pt.detach().cpu().numpy()

                # # Optimizer pt
                pt_model.zero_grad()
                loss_pt.backward()
                gnorm_pt = torch.nn.utils.clip_grad_norm_(
                    pt_model.parameters(), FLAGS.clip)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = learning_rate_np
                optimizer.step()
                ##### PYTORCH
                #########

                if curr_step > 0 and curr_step % FLAGS.log_step_count_steps == 0:
                    curr_loss = total_loss / (curr_step - prev_step)
                    curr_loss_pt = total_loss_pt / (curr_step - prev_step)
                    tf.logging.info(
                        "[{}] | gnorm {:.2f} lr {:8.6f} "
                        "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format(
                            curr_step, gnorm_np, learning_rate_np, curr_loss,
                            math.exp(curr_loss), curr_loss / math.log(2)))

                    #########
                    ##### PYTORCH
                    tf.logging.info(
                        "  PT [{}] | gnorm PT {:.2f} lr PT {:8.6f} "
                        "| loss PT {:.2f} | pplx PT {:>7.2f}, bpc PT {:>7.4f}".
                        format(curr_step, gnorm_pt, learning_rate_np,
                               curr_loss_pt, math.exp(curr_loss_pt),
                               curr_loss_pt / math.log(2)))
                    ##### PYTORCH
                    #########

                    total_loss, total_loss_pt, prev_step = 0., 0., curr_step
                    writer.add_summary(summary_np, global_step=curr_step)

                if curr_step > 0 and curr_step % FLAGS.save_steps == 0:
                    save_path = os.path.join(FLAGS.model_dir,
                                             "model.ckpt-{}".format(curr_step))
                    saver.save(sess, save_path)
                    tf.logging.info(
                        "Model saved in path: {}".format(save_path))

                    #########
                    ##### PYTORCH
                    # Save a trained model, configuration and tokenizer
                    model_to_save = pt_model.module if hasattr(
                        pt_model,
                        'module') else pt_model  # Only save the model it-self
                    # If we save using the predefined names, we can load using `from_pretrained`
                    output_dir = os.path.join(
                        FLAGS.output_dir, "pytorch-ckpt-{}".format(curr_step))
                    if not tf.gfile.Exists(output_dir):
                        tf.gfile.MakeDirs(output_dir)
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)
                    tf.logging.info(
                        "PyTorch Model saved in path: {}".format(output_dir))
                    ##### PYTORCH
                    #########

                if curr_step >= FLAGS.train_steps:
                    break

    if FLAGS.do_eval:
        # TPU requires a fixed batch size for all batches, therefore the number
        # of examples must be a multiple of the batch size, or else examples
        # will get dropped. So we pad with fake examples which are ignored
        # later on. These do NOT count towards the metric (all tf.metrics
        # support a per-instance weight, and these get a weight of 0.0).
        #
        # Modified in XL: We also adopt the same mechanism for GPUs.
        while len(eval_examples) % FLAGS.eval_batch_size != 0:
            eval_examples.append(PaddingInputExample())

        eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
            spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
        eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

        file_based_convert_examples_to_features(eval_examples, label_list,
                                                FLAGS.max_seq_length,
                                                tokenize_fn, eval_file)

        assert len(eval_examples) % FLAGS.eval_batch_size == 0
        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

        eval_input_fn = file_based_input_fn_builder(
            input_file=eval_file,
            seq_length=FLAGS.max_seq_length,
            is_training=False,
            drop_remainder=True)

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, gpu_options=gpu_options)) as sess:
            sess.run(tf.global_variables_initializer())

            ########################### LOAD PT model
            #   import torch
            #   from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification, BertAdam

            #   save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME)
            #   saver.save(sess, save_path)
            #   tf.logging.info("Model saved in path: {}".format(save_path))

            #   device = torch.device("cuda", 4)
            #   config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b', num_labels=1)
            #   tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
            #   config_path = os.path.join(FLAGS.model_dir, CONFIG_NAME)
            #   config.to_json_file(config_path)
            #   # pt_model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels=1)
            #   pt_model = XLNetForSequenceClassification.from_pretrained(FLAGS.model_dir, from_tf=True)
            #   pt_model.to(device)
            #   pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7])
            #   from torch.optim import Adam
            #   optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999),
            #                    eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay,
            #                    amsgrad=False)
            #   optimizer = BertAdam(pt_model.parameters(), lr=FLAGS.learning_rate, t_total=FLAGS.train_steps, warmup=FLAGS.warmup_steps / FLAGS.train_steps,
            #                        eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay)

            ##### PYTORCH
            #########

            fetches = [
                loss, global_step, gnorm, learning_rate, train_op, merged,
                inputs, hidden_states, logits
            ]

            total_loss, total_loss_pt, prev_step, gnorm_pt = 0., 0., -1, 0.0
            total_logits = None
            total_labels = None
            while True:
                feed_dict = {}
                # for i in range(FLAGS.num_core_per_host):
                #   for key in tower_mems_np[i].keys():
                #     for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]):
                #       feed_dict[m] = m_np

                fetched = sess.run(fetches)

                loss_np, curr_step, gnorm_np, learning_rate_np, _, summary_np, inputs_np, hidden_states_np, logits_np = fetched
                total_loss += loss_np

                if total_logits is None:
                    total_logits = logits_np
                    total_labels = inputs_np['label_ids']
                else:
                    total_logits = np.append(total_logits, logits_np, axis=0)
                    total_labels = np.append(total_labels,
                                             inputs_np['label_ids'],
                                             axis=0)
Exemple #6
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    all_labels = get_labels(FLAGS.data_dir, FLAGS.train_file)
    all_labels.append('##')
    all_labels.append('PAD')

    ### only for testing
    all_labels.append('SYM')

    #### Validate flags
    if FLAGS.save_steps is not None:
        FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)

    if not FLAGS.do_train and not FLAGS.do_eval:
        raise ValueError(
            "At least one of `do_train` or `do_eval` must be True.")

    if not tf.gfile.Exists(FLAGS.output_dir):
        tf.gfile.MakeDirs(FLAGS.output_dir)

    sp = spm.SentencePieceProcessor()
    sp.Load(FLAGS.spiece_model_file)

    def tokenize_fn(text):
        text = preprocess_text(text, lower=FLAGS.uncased)
        return encode_ids(sp, text)

        # TPU Configuration

    run_config = model_utils.configure_tpu(FLAGS)

    model_fn = get_model_fn(len(all_labels))

    spm_basename = os.path.basename(FLAGS.spiece_model_file)

    # If TPU is not available, this will fall back to normal Estimator on CPU
    # or GPU.
    if FLAGS.use_tpu:
        estimator = tf.contrib.tpu.TPUEstimator(
            use_tpu=FLAGS.use_tpu,
            model_fn=model_fn,
            config=run_config,
            train_batch_size=FLAGS.train_batch_size,
            eval_batch_size=FLAGS.eval_batch_size)
    else:
        estimator = tf.estimator.Estimator(
            model_fn=model_fn,
            config=run_config)



    if FLAGS.do_train:

        train_examples = create_examples(FLAGS.data_dir, FLAGS.train_file)
        random.shuffle(train_examples)

        train_features = conver_examples_to_features(train_examples, all_labels, tokenize_fn)

        num_train_steps = int(
            len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
        tf.logging.info('Train steps: '+ str(num_train_steps) + '.')

        train_input_fn = input_fn_builder(
            features=train_features,
            drop_remainder=True,
            is_training=True
        )
        estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

    if FLAGS.do_eval:
        eval_examples = create_examples(FLAGS.data_dir, FLAGS.test_file)

        while len(eval_examples) % FLAGS.eval_batch_size != 0:
            eval_examples.append(PaddingInputExample())
        assert len(eval_examples) % FLAGS.eval_batch_size == 0

        eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)
        eval_features = conver_examples_to_features(eval_examples, all_labels, tokenize_fn)

        eval_drop_remainder = True if FLAGS.use_tpu else False
        eval_input_fn = input_fn_builder(
                features=eval_features,
                drop_remainder=eval_drop_remainder,
                is_training=False
            )

        ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps)

        tf.logging.info("=" * 80)
        log_str = "Eval | "
        for key, val in ret.items():
            log_str += "{} {} | ".format(key, val)
        tf.logging.info(log_str)
        tf.logging.info("=" * 80)
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  #### Validate flags
  if FLAGS.save_steps is not None:
    FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)

  processors = {
      "detect": DetectProcessor,
  }

  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
    raise ValueError(
        "At least one of `do_train`, `do_eval, `do_predict` or "
        "`do_submit` must be True.")

  if not tf.gfile.Exists(FLAGS.output_dir):
    tf.gfile.MakeDirs(FLAGS.output_dir)

  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name]()
  label_list = processor.get_labels() if not FLAGS.is_regression else None

  sp = spm.SentencePieceProcessor()
  sp.Load(FLAGS.spiece_model_file)
  def tokenize_fn(text):
    text = preprocess_text(text, lower=FLAGS.uncased)
    return encode_ids(sp, text)

  run_config = model_utils.configure_tpu(FLAGS)

  model_fn = get_model_fn(len(label_list) if label_list is not None else None)

  spm_basename = os.path.basename(FLAGS.spiece_model_file)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  if FLAGS.use_tpu:
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)
  else:
    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config)

  if FLAGS.do_train:
    train_file_base = "{}.len-{}.train.tf_record".format(
        spm_basename, FLAGS.max_seq_length)
    train_file = os.path.join(FLAGS.output_dir, train_file_base)
    tf.logging.info("Use tfrecord file {}".format(train_file))

    train_examples = processor.get_train_examples(FLAGS.data_dir)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    np.random.shuffle(train_examples)
    tf.logging.info("Num of train samples: {}".format(len(train_examples)))
    tf.logging.info("Num of train steps: {}".format(num_train_steps))

    file_based_convert_examples_to_features(
        train_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        train_file, FLAGS.num_passes)

    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)

    estimator.train(input_fn=train_input_fn, steps=num_train_steps)

  # TODO
  if FLAGS.do_train_test:
    train_test_file_base = "{}.len-{}.train_test.tf_record".format(
        spm_basename, FLAGS.max_seq_length)
    train_test_file = os.path.join(FLAGS.output_dir, train_test_file_base)
    tf.logging.info("Use tfrecord file {}".format(train_test_file))

    train_test_examples = processor.get_train_test_examples(FLAGS.data_dir)
    num_train_test_steps = int(
        len(train_test_examples) / FLAGS.train_batch_size * 1)
    np.random.shuffle(train_examples)
    tf.logging.info("Num of test samples: {}".format(len(train_test_examples)))
    tf.logging.info("Num of test steps: {}".format(num_train_test_steps))

    file_based_convert_examples_to_features(
        train_test_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        train_test_file, FLAGS.num_passes)

    train_test_input_fn = file_based_input_fn_builder(
        input_file=train_test_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)

    estimator.train(input_fn=train_test_input_fn, steps=num_train_test_steps)

  if FLAGS.do_eval:
    # TPU requires a fixed batch size for all batches, therefore the number
    # of examples must be a multiple of the batch size, or else examples
    # will get dropped. So we pad with fake examples which are ignored
    # later on. These do NOT count towards the metric (all tf.metrics
    # support a per-instance weight, and these get a weight of 0.0).
    #
    # Modified in XL: We also adopt the same mechanism for GPUs.

    eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    tf.logging.info("Num of eval samples: {}".format(len(eval_examples)))

    while len(eval_examples) % FLAGS.eval_batch_size != 0:
      eval_examples.append(PaddingInputExample())

    eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
        spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
    eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

    file_based_convert_examples_to_features(
        eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        eval_file)

    assert len(eval_examples) % FLAGS.eval_batch_size == 0
    eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

    eval_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=True)

    # Filter out all checkpoints in the directory
    steps_and_files = []
    filenames = tf.gfile.ListDirectory(FLAGS.model_dir)

    for filename in filenames:
      if filename.endswith(".index"):
        ckpt_name = filename[:-6]
        cur_filename = join(FLAGS.model_dir, ckpt_name)
        global_step = int(cur_filename.split("-")[-1])
        tf.logging.info("Add {} to eval list.".format(cur_filename))
        steps_and_files.append([global_step, cur_filename])
    steps_and_files = sorted(steps_and_files, key=lambda x: x[0])

    # Decide whether to evaluate all ckpts
    if not FLAGS.eval_all_ckpt:
      steps_and_files = steps_and_files[-1:]

    eval_results = []
    for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]):
      ret = estimator.evaluate(
          input_fn=eval_input_fn,
          steps=eval_steps,
          checkpoint_path=filename)

      ret["step"] = global_step
      ret["path"] = filename

      eval_results.append(ret)

      tf.logging.info("=" * 80)
      log_str = "Eval result | "
      for key, val in sorted(ret.items(), key=lambda x: x[0]):
        log_str += "{} {} | ".format(key, val)
      tf.logging.info(log_str)

    key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy"
    eval_results.sort(key=lambda x: x[key_name], reverse=True)

    tf.logging.info("=" * 80)
    log_str = "Best result | "
    for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]):
      log_str += "{} {} | ".format(key, val)
    tf.logging.info(log_str)

  if FLAGS.do_predict:

    predict_dir = FLAGS.predict_dir
    if not tf.gfile.Exists(predict_dir):
      tf.gfile.MakeDirs(predict_dir)

    predict_file_base = "{}.len-{}.{}.predict.tf_record".format(spm_basename, FLAGS.max_seq_length, FLAGS.predict_split)
    predict_file = os.path.join(FLAGS.output_dir, predict_file_base)

    predict_examples = processor.get_test_examples(FLAGS.data_dir)
    num_actual_predict_examples = len(predict_examples)
    tf.logging.info("Num of predict samples: {}".format(len(predict_examples)))
    file_based_convert_examples_to_features(
        predict_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        predict_file)

    pred_input_fn = file_based_input_fn_builder(
        input_file=predict_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=False)

    if FLAGS.predict_batch_size != 1:
        result = estimator.predict(input_fn=pred_input_fn)
    else:
        result = estimator.predict(input_fn=pred_input_fn, yield_single_examples=False)

    if FLAGS.use_stack:
        logits = [ prediction["logits"] for prediction in result ]
        save_pickle(FLAGS.stack_dir, logits)

    # TODO
    output_predict_file = FLAGS.test_save
    original_file = os.path.join(FLAGS.data_dir, FLAGS.test_set)

    df = pd.read_csv(original_file)

    lines = [row['id'] for index, row in df.iterrows()]

    with open(output_predict_file, "w") as f:
      writer = csv.writer(f, delimiter=',')
      writer.writerow(['id','label'])
      num_written_lines = 0
      tf.logging.info("***** Predict results *****")
      for (i, prediction) in enumerate(zip(lines, result)):
        ID = prediction[0]
        label = prediction[1]["labels"]
        if i >= num_actual_predict_examples:
          break
        writer.writerow([ID, label])
        num_written_lines += 1
    assert num_written_lines == num_actual_predict_examples
Exemple #8
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  #### Validate flags
  if FLAGS.save_steps is not None:
    FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps)

  if FLAGS.calc_ists_metrics and FLAGS.metrics_dir:
    predictions = []

    if not FLAGS.pred_file:
      predictions = model_utils.get_predictions(FLAGS.predict_dir)
    else:
      predictions = [model_utils.extract_global_step(FLAGS.pred_file[:-4]), FLAGS.pred_file]
    
    if not tf.gfile.Exists(FLAGS.metrics_dir):
      tf.gfile.MakeDirs(FLAGS.metrics_dir)
    
    dataset_name = FLAGS.data_dir.split("/")[-1]

    # Write metrics to file
    with tf.gfile.Open(os.path.join(FLAGS.metrics_dir, "{}.tsv".format("metrics-" + dataset_name)), "w") as fout:
      fout.write("step\tf1-type\tf1-socre\tf1-t+s\tpearson-type\tpearson-score\n")

      # Calc metric for all predictions
      for global_step, pred_file_path in sorted(predictions, key=lambda x: x[0]):
        metrics = calc_ists_metrics(pred_file_path, FLAGS.data_dir + "/test.tsv")
        print('\n Dataset: {}\n Step: {}\n [F1 Type]: {}\n [F1 Score]: {}\n [F1 T+S]: {}\n [P T]: {}\n [P S]: {}'.format(dataset_name, global_step, *metrics))
        fout.write('{}\t{}\t{}\t{}\t{}\t\n'.format(global_step, *metrics))
        

    # End execution after caclulations
    return None

  if FLAGS.do_predict:
    predict_dir = FLAGS.predict_dir
    if not tf.gfile.Exists(predict_dir):
      tf.gfile.MakeDirs(predict_dir)

  processors = {
      "mnli_matched": MnliMatchedProcessor,
      "mnli_mismatched": MnliMismatchedProcessor,
      'sts-b': StsbProcessor,
      'imdb': ImdbProcessor,
      "yelp5": Yelp5Processor,
      "ists": IStsProcessor
  }

  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict and not FLAGS.calc_ists_metrics:
    raise ValueError(
        "At least one of `do_train`, `do_eval, `do_predict`, `calc_ists_metrics` or "
        "`do_submit` must be True.")

  if not tf.gfile.Exists(FLAGS.output_dir):
    tf.gfile.MakeDirs(FLAGS.output_dir)

  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name]()
  label_list = processor.get_labels() if not FLAGS.is_regression else None

  sp = spm.SentencePieceProcessor()
  sp.Load(FLAGS.spiece_model_file)
  def tokenize_fn(text):
    text = preprocess_text(text, lower=FLAGS.uncased)
    return encode_ids(sp, text)

  run_config = model_utils.configure_tpu(FLAGS)

  model_fn = get_model_fn(len(label_list) if label_list is not None else None)

  spm_basename = os.path.basename(FLAGS.spiece_model_file)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  if FLAGS.use_tpu:
    estimator = tf.contrib.tpu.TPUEstimator(
        use_tpu=FLAGS.use_tpu,
        model_fn=model_fn,
        config=run_config,
        train_batch_size=FLAGS.train_batch_size,
        predict_batch_size=FLAGS.predict_batch_size,
        eval_batch_size=FLAGS.eval_batch_size)
  else:
    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=run_config)

  if FLAGS.do_train:
    train_file_base = "{}.len-{}.train.tf_record".format(
        spm_basename, FLAGS.max_seq_length)
    train_file = os.path.join(FLAGS.output_dir, train_file_base)
    tf.logging.info("Use tfrecord file {}".format(train_file))

    train_examples = processor.get_train_examples(FLAGS.data_dir)
    np.random.shuffle(train_examples)
    tf.logging.info("Num of train samples: {}".format(len(train_examples)))

    file_based_convert_examples_to_features(
        train_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        train_file, FLAGS.num_passes)

    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)

    estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps)

  if FLAGS.do_eval or FLAGS.do_predict:
    if FLAGS.eval_split == "dev":
      eval_examples = processor.get_dev_examples(FLAGS.data_dir)
    else:
      eval_examples = processor.get_test_examples(FLAGS.data_dir)

    tf.logging.info("Num of eval samples: {}".format(len(eval_examples)))

  if FLAGS.do_eval:
    # TPU requires a fixed batch size for all batches, therefore the number
    # of examples must be a multiple of the batch size, or else examples
    # will get dropped. So we pad with fake examples which are ignored
    # later on. These do NOT count towards the metric (all tf.metrics
    # support a per-instance weight, and these get a weight of 0.0).
    #
    # Modified in XL: We also adopt the same mechanism for GPUs.
    while len(eval_examples) % FLAGS.eval_batch_size != 0:
      eval_examples.append(PaddingInputExample())

    eval_file_base = "{}.len-{}.{}.eval.tf_record".format(
        spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
    eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

    file_based_convert_examples_to_features(
        eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        eval_file)

    assert len(eval_examples) % FLAGS.eval_batch_size == 0
    eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size)

    eval_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=True)

    # Filter out all checkpoints in the directory
    steps_and_files = model_utils.get_checkpoints(FLAGS.model_dir)

    # Decide whether to evaluate all ckpts
    if not FLAGS.eval_all_ckpt:
      steps_and_files = steps_and_files[-1:]

    eval_results = []
    for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]):
      ret = estimator.evaluate(
          input_fn=eval_input_fn,
          steps=eval_steps,
          checkpoint_path=filename)

      ret["step"] = global_step
      ret["path"] = filename

      eval_results.append(ret)

      tf.logging.info("=" * 80)
      log_str = "Eval result | "
      for key, val in sorted(ret.items(), key=lambda x: x[0]):
        log_str += "{} {} | ".format(key, val)
      tf.logging.info(log_str)

    key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy"
    eval_results.sort(key=lambda x: x[key_name], reverse=True)

    tf.logging.info("=" * 80)
    log_str = "Best result | "
    for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]):
      log_str += "{} {} | ".format(key, val)
    tf.logging.info(log_str)

  if FLAGS.do_predict:
    eval_file_base = "{}.len-{}.{}.predict.tf_record".format(
        spm_basename, FLAGS.max_seq_length, FLAGS.eval_split)
    eval_file = os.path.join(FLAGS.output_dir, eval_file_base)

    file_based_convert_examples_to_features(
        eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn,
        eval_file)

    pred_input_fn = file_based_input_fn_builder(
        input_file=eval_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=False)
    
    
    steps_and_files = []
    if not FLAGS.predict_ckpt:
      steps_and_files = steps_and_files[-1:]

      # Filter out all checkpoints in the directory
      steps_and_files = model_utils.get_checkpoints(FLAGS.model_dir)

      # Decide whether to predict all ckpts
      if not FLAGS.pred_all_ckpt:
        steps_and_files = steps_and_files[-1:]
    else:
      steps_and_files = [model_utils.extract_global_step(FLAGS.predict_ckpt), FLAGS.predict_ckpt]

    for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]):
      predict_results = []

      with tf.gfile.Open(os.path.join(predict_dir, "{}.tsv".format(
          "step-" + str(global_step))), "w") as fout:
        fout.write("index\tprediction\n")

        for pred_cnt, result in enumerate(estimator.predict(
            input_fn=pred_input_fn,
            yield_single_examples=True,
            checkpoint_path=filename)):
          if pred_cnt % 1000 == 0:
            tf.logging.info("Predicting submission for example: {}".format(
                pred_cnt))

          logits = [float(x) for x in result["logits"].flat]
          predict_results.append(logits)

          if len(logits) == 1:
            label_out = logits[0]
          elif len(logits) == 2:
            if logits[1] - logits[0] > FLAGS.predict_threshold:
              label_out = label_list[1]
            else:
              label_out = label_list[0]
          elif len(logits) > 2:
            max_index = np.argmax(np.array(logits, dtype=np.float32))
            label_out = label_list[max_index]
          else:
            raise NotImplementedError

          fout.write("{}\t{}\n".format(pred_cnt, label_out))

      predict_json_path = os.path.join(predict_dir, "{}.logits.json".format(
         "step-" + str(global_step)))

      with tf.gfile.Open(predict_json_path, "w") as fp:
        json.dump(predict_results, fp, indent=4)