def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() processor = PaperProcessor(features=FLAGS.features.split(",")) label_list = processor.get_labels() sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn( len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) FLAGS.train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) FLAGS.warmup_steps = int(FLAGS.train_steps * FLAGS.warmup_proportion) file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval or FLAGS.do_predict: if FLAGS.eval_split == "dev": eval_examples = processor.get_dev_examples(FLAGS.data_dir) elif FLAGS.eval_split == "train": eval_examples = processor.get_train_examples(FLAGS.data_dir, shuffle=False) else: eval_examples = processor.get_test_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_file_base = "{}.len-{}.{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predict_results = [] with tf.gfile.Open( os.path.join(predict_dir, "{}.tsv".format(task_name)), "w") as fout: fout.write("index\tprediction\n") for pred_cnt, result in enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt)): if pred_cnt % 1000 == 0: tf.logging.info( "Predicting submission for example: {}".format( pred_cnt)) logits = [float(x) for x in result["logits"].flat] predict_results.append(logits) if len(logits) == 1: label_out = logits[0] elif len(logits) == 2: if logits[1] - logits[0] > FLAGS.predict_threshold: label_out = label_list[1] else: label_out = label_list[0] elif len(logits) > 2: max_index = np.argmax(np.array(logits, dtype=np.float32)) label_out = label_list[max_index] else: raise NotImplementedError fout.write("{}\t{}\n".format(pred_cnt, label_out)) predict_json_path = os.path.join(predict_dir, "{}.logits.json".format(task_name)) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)
def main(_): logger.set_verbosity(logger.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn() spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) if not tf.gfile.Exists(train_file) or FLAGS.overwrite_data: train_examples = get_examples(FLAGS.data_dir, "train") random.shuffle(train_examples) file_based_convert_examples_to_features( train_examples, tokenize_fn, train_file) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval: eval_examples = get_examples(FLAGS.data_dir, FLAGS.eval_split) logger.info("Num of eval samples: {}".format(len(eval_examples))) # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) if FLAGS.high_only: eval_file_base = "high." + eval_file_base elif FLAGS.middle_only: eval_file_base = "middle." + eval_file_base eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps) # Log current result logger.info("=" * 80) log_str = "Eval | " for key, val in ret.items(): log_str += "{} {} | ".format(key, val) logger.info(log_str) logger.info("=" * 80)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) print('826') #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) print('830') if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) print('835') processors = { "mnli_matched": MnliMatchedProcessor, "mnli_mismatched": MnliMismatchedProcessor, 'sts-b': StsbProcessor, 'imdb': ImdbProcessor, 'imdb_t': ImdbThreeClassProcessor, "yelp5": Yelp5Processor, "imdb_reg": ImdbRegressionClassProcessor } print('845') if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") print('850') if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) print('853') task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) print('start getting the tpu') tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR'] tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_address) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, model_dir=FLAGS.model_dir, save_checkpoints_steps=2000, keep_checkpoint_max=10, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=1000, num_shards=8, per_host_input_for_training=is_per_host)) print('finish getting the tpu') print('Start getting model') model_fn = get_model_fn( len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval or FLAGS.do_predict: if FLAGS.eval_split == "dev": eval_examples = processor.get_dev_examples(FLAGS.data_dir) else: eval_examples = processor.get_test_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " print(type(eval_results)) print(len(eval_results)) for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_file_base = "{}.len-{}.{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) # for example in eval_examples: # print('{}:{}'.format(example.text_a, example.label)) print('*' * 10000) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predict_results = [] with tf.gfile.Open( os.path.join(predict_dir, "{}.tsv".format(task_name)), "w") as fout: fout.write("index\tprediction\n") print( enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt))) for pred_cnt, result in enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt)): print(result) if pred_cnt % 1000 == 0: tf.logging.info( "Predicting submission for example: {}".format( pred_cnt)) logits = [float(x) for x in result["logits"].flat] predict_results.append(logits) if len(logits) == 1: label_out = logits[0] elif len(logits) == 2: if logits[1] - logits[0] > FLAGS.predict_threshold: label_out = label_list[1] else: label_out = label_list[0] elif len(logits) > 2: max_index = np.argmax(np.array(logits, dtype=np.float32)) label_out = label_list[max_index] else: raise NotImplementedError fout.write("{}\t{}\n".format(pred_cnt, label_out)) predict_json_path = os.path.join(predict_dir, "{}.logits.json".format(task_name)) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)
def main(_): ################################################################### ################################################################### score_reader = ScoreReader( '/u02/datasets/psytesting/scores_combined.csv', '/u02/texts/' ,count_words=False, system_idx=-1, fname_idx=-2, score_idx=17, testname_idx=None) score_reader.normalize_scores() ## 365194 train scores, 25013 val scores, 16675 test scores ## 374377 train scores, 24959 val scores, 16638 test scores trainScores, valScores, testScores = score_reader.split_train_test(0.9,1) ########################### need to be the multiple times of batch size trainScores=trainScores[:10548] valScores=valScores[:1170] testScores=testScores[:12000] ########################### filename_train=[] score_train=[] for i,data in enumerate(trainScores): filename_train.append(trainScores[i][0]) score_train.append(trainScores[i][1]) filename_dev=[] score_dev=[] for i,data in enumerate(valScores): filename_dev.append(valScores[i][0]) score_dev.append(valScores[i][1]) filename_test=[] score_test=[] for i,data in enumerate(testScores): filename_test.append(testScores[i][0]) score_test.append(testScores[i][1]) ############################################################# ############## SETTING FOT THE FUNCTION ##################### ############################################################# tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processor=GLUEProcessor(filename=filename_train, label=score_train, filename_dev=filename_dev,label_dev=score_dev, filename_test=filename_test,label_test=score_test) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() #################################################################### ####################### tokenization ############################### #################################################################### # changed # label_list = processor.get_labels() if not FLAGS.is_regression else None # label list Nonetype label_list = processor.get_labels() if FLAGS.is_regression else None # label list is not None # def get_labels(self): # return [0.0] sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text=str(text[0]) text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) ###################################################################################### ##############################TRAINING SETTING ####################################### ###################################################################################### if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir, filename_train,filename_dev ,filename_test, score_train,score_dev,score_test) # get training data np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes ) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) # estimator.train(input_fn=train_input_fn,steps=1000, max_steps=FLAGS.train_steps) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) ###################################################################################### ############################## VAL SETTING ######################################## ###################################################################################### print('----------------------------------') print('------start to evaluate ----------') print('----------------------------------') if FLAGS.do_eval : eval_examples = processor.get_dev_examples(FLAGS.data_dir, filename_train,filename_dev ,filename_test, score_train,score_dev,score_test) if FLAGS.do_predict: eval_examples = processor.get_dev_examples(FLAGS.data_dir, filename_train,filename_dev ,filename_test, score_train,score_dev,score_test) ## print(len(eval_examples)) - >25035 ## print(eval_examples[1]) <__main__.InputExample object at 0x7fa0ff19ecc0> # data_dir: Directory for input data." default="/home/calvin/111/glue_data/STS-B/", # tf.logging.info("Num of eval/predicted samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features_dev( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder_dev( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts # if not FLAGS.eval_all_ckpt: aa=FLAGS.star_from_dev steps_and_files = steps_and_files[aa:] # steps_and_files = steps_and_files[::10] steps_and_files = steps_and_files[1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) #################### #### error #### ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) ###################################################################################### ############################## PREDICT SETTING ####################################### ###################################################################################### if FLAGS.do_predict: print('----------------------------------') print('---start to compute Prediction----') print('----------------------------------') while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features_dev( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples)) eval_input_fn = file_based_input_fn_builder_dev( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts # if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) print('----------------------------------') print('----start to compute Pearson------') print('----------------------------------') ################################################################################# Peason start predict_results = [] label_results=[] pearson=[] with tf.gfile.Open(os.path.join(predict_dir, "{}.tsv".format( task_name)), "w") as fout: fout.write("index\tprediction\n") import copy import math for pred_cnt, result in enumerate(estimator.predict( input_fn=eval_input_fn, yield_single_examples=False, checkpoint_path=FLAGS.predict_ckpt)): # if pred_cnt % 1000 == 0: # tf.logging.info("Predicting submission for example: {}".format( # pred_cnt)) ################## logits = [float(x) for x in result["logits"].flat] list_pre=[] # predict_results.append(logits) list_pre=list(result['labels']).copy() for i in range(len(result['labels'])): # if math.isnan(list_pre[i]) is False: # list_pre[i]=list(result['logits']).pop(0) ## need to add as many as batch size ##### pearson.append(list(zip(list(list_pre[:1]), list(result["labels"][:1])))) pearson.append(list(zip(list(list_pre[1:2]), list(result["labels"][1:2])))) pearson.append(list(zip(list(list_pre[2:3]), list(result["labels"][2:3])))) pearson.append(list(zip(list(list_pre[3:4]), list(result["labels"][3:4])))) pearson.append(list(zip(list(list_pre[4:5]), list(result["labels"][4:5])))) pearson.append(list(zip(list(list_pre[5:]), list(result["labels"][5:])))) pearson=np.array(pearson) # dimension: [data count,31 ,2] import pdb pdb.set_trace() pearson_res=[] from sklearn.metrics import mean_squared_error sk_mse_res=[] register1=pearson[:,0,0] register2=pearson[:,0,1] # register1=pearson[:,i,0] # register2=pearson[:,i,1] # register1 = register1[~np.isnan(register1)] # register2 = register2[~np.isnan(register2)] pearson_res.append(pd.Series(register1).corr(pd.Series(register2))) # sk_mse_res.append(np.sqrt(mean_squared_error(register1,register2))) # pearson = list((zip(predict_results, label_results))) print('pearson ###################') print(list(pearson_res)) # print('sk_mse_res ################') # print(list(sk_mse_res)) # print('label_results ###################') # print(label_results) ################## # logits = [float(x) for x in result["logits"].flat] # # list_pre=[] # predict_results.append(logits) pearson_path = os.path.join(predict_dir, "{}.pearson.json".format( task_name)) with tf.gfile.Open(pearson_path, "w") as fp: json.dump(pearson_res, fp, indent=4) if __name__ == "__main__": tf.app.run() # In[ ]: import json with open('/home/calvin/xlnet/predict_result/sts-b.pearson.json') as json_file: data = json.load(json_file) print(data) # In[ ]: # with open('/home/calvin/xlnet/predict_result/sts-b.label_results.json') as json_file: # data = json.load(json_file) # print(data) # file_based_convert_examples_to_features_dev( # eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, # eval_file)
def main(_): if FLAGS.server_ip and FLAGS.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(FLAGS.server_ip, FLAGS.server_port), redirect_output=True) ptvsd.wait_for_attach() tf.set_random_seed(FLAGS.seed) np.random.seed(FLAGS.seed) tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.log_step_count_steps = min(FLAGS.log_step_count_steps, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processors = { "mnli_matched": MnliMatchedProcessor, "mnli_mismatched": MnliMismatchedProcessor, 'sts-b': StsbProcessor, 'imdb': ImdbProcessor, "yelp5": Yelp5Processor } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) # ########################### LOAD PT model # ########################### LOAD PT model # import torch # from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification # save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME) # tf.logging.info("Model loaded from path: {}".format(save_path)) # device = torch.device("cuda", 4) # config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b') # config_path = os.path.join(FLAGS.model_dir, CONFIG_NAME) # config.to_json_file(config_path) # pt_model = XLNetForSequenceClassification.from_pretrained(FLAGS.model_dir, from_tf=True, num_labels=1) # pt_model.to(device) # pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7]) # from torch.optim import Adam # optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999), # eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay, # amsgrad=False) # ########################### LOAD PT model # ########################### LOAD PT model task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) # run_config = model_utils.configure_tpu(FLAGS) # model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. # estimator = tf.estimator.Estimator( # model_fn=model_fn, # config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) # estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) ##### Create input tensors / placeholders bsz_per_core = FLAGS.train_batch_size // FLAGS.num_core_per_host params = { "batch_size": FLAGS.train_batch_size # the whole batch } train_set = train_input_fn(params) example = train_set.make_one_shot_iterator().get_next() if FLAGS.num_core_per_host > 1: examples = [{} for _ in range(FLAGS.num_core_per_host)] for key in example.keys(): vals = tf.split(example[key], FLAGS.num_core_per_host, 0) for device_id in range(FLAGS.num_core_per_host): examples[device_id][key] = vals[device_id] else: examples = [example] ##### Create computational graph tower_losses, tower_grads_and_vars, tower_inputs, tower_hidden_states, tower_logits = [], [], [], [], [] for i in range(FLAGS.num_core_per_host): reuse = True if i > 0 else None with tf.device(assign_to_gpu(i, "/gpu:0")), \ tf.variable_scope(tf.get_variable_scope(), reuse=reuse): loss_i, grads_and_vars_i, inputs_i, hidden_states_i, logits_i = single_core_graph( is_training=True, features=examples[i], label_list=label_list) tower_losses.append(loss_i) tower_grads_and_vars.append(grads_and_vars_i) tower_inputs.append(inputs_i) tower_hidden_states.append(hidden_states_i) tower_logits.append(logits_i) ## average losses and gradients across towers if len(tower_losses) > 1: loss = tf.add_n(tower_losses) / len(tower_losses) grads_and_vars = average_grads_and_vars(tower_grads_and_vars) inputs = dict((n, tf.concat([t[n] for t in tower_inputs], 0)) for n in tower_inputs[0]) hidden_states = list( tf.concat(t, 0) for t in zip(*tower_hidden_states)) logits = tf.concat(tower_logits, 0) else: loss = tower_losses[0] grads_and_vars = tower_grads_and_vars[0] inputs = tower_inputs[0] hidden_states = tower_hidden_states[0] logits = tower_logits[0] # Summaries merged = tf.summary.merge_all() ## get train op train_op, learning_rate, gnorm = model_utils.get_train_op( FLAGS, None, grads_and_vars=grads_and_vars) global_step = tf.train.get_global_step() ##### Training loop saver = tf.train.Saver(max_to_keep=FLAGS.max_save) gpu_options = tf.GPUOptions(allow_growth=True) #### load pretrained models model_utils.init_from_checkpoint(FLAGS, global_vars=True) writer = tf.summary.FileWriter(logdir=FLAGS.model_dir, graph=tf.get_default_graph()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True, gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) ######### ##### PYTORCH import torch from torch.optim import Adam from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification, BertAdam save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME + '-00') saver.save(sess, save_path) tf.logging.info("Model saved in path: {}".format(save_path)) device = torch.device("cuda", 4) config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b', num_labels=1) tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') # pt_model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels=1) pt_model = XLNetForSequenceClassification.from_pretrained( save_path, from_tf=True, config=config) pt_model.to(device) pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7]) optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay, amsgrad=False) # optimizer = BertAdam(pt_model.parameters(), lr=FLAGS.learning_rate, t_total=FLAGS.train_steps, warmup=FLAGS.warmup_steps / FLAGS.train_steps, # eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay) ##### PYTORCH ######### fetches = [ loss, global_step, gnorm, learning_rate, train_op, merged, inputs, hidden_states, logits ] total_loss, total_loss_pt, prev_step, gnorm_pt = 0., 0., -1, 0.0 total_logits = None total_labels = None while True: feed_dict = {} # for i in range(FLAGS.num_core_per_host): # for key in tower_mems_np[i].keys(): # for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]): # feed_dict[m] = m_np fetched = sess.run(fetches) loss_np, curr_step, gnorm_np, learning_rate_np, _, summary_np, inputs_np, hidden_states_np, logits_np = fetched total_loss += loss_np if total_logits is None: total_logits = logits_np total_labels = inputs_np['label_ids'] else: total_logits = np.append(total_logits, logits_np, axis=0) total_labels = np.append(total_labels, inputs_np['label_ids'], axis=0) ######### ##### PYTORCH f_inp = torch.tensor(inputs_np["input_ids"], dtype=torch.long, device=device) f_seg_id = torch.tensor(inputs_np["segment_ids"], dtype=torch.long, device=device) f_inp_mask = torch.tensor(inputs_np["input_mask"], dtype=torch.float, device=device) f_label = torch.tensor(inputs_np["label_ids"], dtype=torch.float, device=device) # with torch.no_grad(): # _, hidden_states_pt, _ = pt_model.transformer(f_inp, f_seg_id, f_inp_mask) # logits_pt, _ = pt_model(f_inp, token_type_ids=f_seg_id, input_mask=f_inp_mask) pt_model.train() outputs = pt_model(f_inp, token_type_ids=f_seg_id, input_mask=f_inp_mask, labels=f_label) loss_pt = outputs[0] loss_pt = loss_pt.mean() total_loss_pt += loss_pt.item() # # hidden_states_pt = list(t.detach().cpu().numpy() for t in hidden_states_pt) # # special_pt = special_pt.detach().cpu().numpy() # # Optimizer pt pt_model.zero_grad() loss_pt.backward() gnorm_pt = torch.nn.utils.clip_grad_norm_( pt_model.parameters(), FLAGS.clip) for param_group in optimizer.param_groups: param_group['lr'] = learning_rate_np optimizer.step() ##### PYTORCH ######### if curr_step > 0 and curr_step % FLAGS.log_step_count_steps == 0: curr_loss = total_loss / (curr_step - prev_step) curr_loss_pt = total_loss_pt / (curr_step - prev_step) tf.logging.info( "[{}] | gnorm {:.2f} lr {:8.6f} " "| loss {:.2f} | pplx {:>7.2f}, bpc {:>7.4f}".format( curr_step, gnorm_np, learning_rate_np, curr_loss, math.exp(curr_loss), curr_loss / math.log(2))) ######### ##### PYTORCH tf.logging.info( " PT [{}] | gnorm PT {:.2f} lr PT {:8.6f} " "| loss PT {:.2f} | pplx PT {:>7.2f}, bpc PT {:>7.4f}". format(curr_step, gnorm_pt, learning_rate_np, curr_loss_pt, math.exp(curr_loss_pt), curr_loss_pt / math.log(2))) ##### PYTORCH ######### total_loss, total_loss_pt, prev_step = 0., 0., curr_step writer.add_summary(summary_np, global_step=curr_step) if curr_step > 0 and curr_step % FLAGS.save_steps == 0: save_path = os.path.join(FLAGS.model_dir, "model.ckpt-{}".format(curr_step)) saver.save(sess, save_path) tf.logging.info( "Model saved in path: {}".format(save_path)) ######### ##### PYTORCH # Save a trained model, configuration and tokenizer model_to_save = pt_model.module if hasattr( pt_model, 'module') else pt_model # Only save the model it-self # If we save using the predefined names, we can load using `from_pretrained` output_dir = os.path.join( FLAGS.output_dir, "pytorch-ckpt-{}".format(curr_step)) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) tf.logging.info( "PyTorch Model saved in path: {}".format(output_dir)) ##### PYTORCH ######### if curr_step >= FLAGS.train_steps: break if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True, gpu_options=gpu_options)) as sess: sess.run(tf.global_variables_initializer()) ########################### LOAD PT model # import torch # from pytorch_transformers import CONFIG_NAME, TF_WEIGHTS_NAME, WEIGHTS_NAME, XLNetTokenizer, XLNetConfig, XLNetForSequenceClassification, BertAdam # save_path = os.path.join(FLAGS.model_dir, TF_WEIGHTS_NAME) # saver.save(sess, save_path) # tf.logging.info("Model saved in path: {}".format(save_path)) # device = torch.device("cuda", 4) # config = XLNetConfig.from_pretrained('xlnet-large-cased', finetuning_task=u'sts-b', num_labels=1) # tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased') # config_path = os.path.join(FLAGS.model_dir, CONFIG_NAME) # config.to_json_file(config_path) # # pt_model = XLNetForSequenceClassification.from_pretrained('xlnet-large-cased', num_labels=1) # pt_model = XLNetForSequenceClassification.from_pretrained(FLAGS.model_dir, from_tf=True) # pt_model.to(device) # pt_model = torch.nn.DataParallel(pt_model, device_ids=[4, 5, 6, 7]) # from torch.optim import Adam # optimizer = Adam(pt_model.parameters(), lr=0.001, betas=(0.9, 0.999), # eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay, # amsgrad=False) # optimizer = BertAdam(pt_model.parameters(), lr=FLAGS.learning_rate, t_total=FLAGS.train_steps, warmup=FLAGS.warmup_steps / FLAGS.train_steps, # eps=FLAGS.adam_epsilon, weight_decay=FLAGS.weight_decay) ##### PYTORCH ######### fetches = [ loss, global_step, gnorm, learning_rate, train_op, merged, inputs, hidden_states, logits ] total_loss, total_loss_pt, prev_step, gnorm_pt = 0., 0., -1, 0.0 total_logits = None total_labels = None while True: feed_dict = {} # for i in range(FLAGS.num_core_per_host): # for key in tower_mems_np[i].keys(): # for m, m_np in zip(tower_mems[i][key], tower_mems_np[i][key]): # feed_dict[m] = m_np fetched = sess.run(fetches) loss_np, curr_step, gnorm_np, learning_rate_np, _, summary_np, inputs_np, hidden_states_np, logits_np = fetched total_loss += loss_np if total_logits is None: total_logits = logits_np total_labels = inputs_np['label_ids'] else: total_logits = np.append(total_logits, logits_np, axis=0) total_labels = np.append(total_labels, inputs_np['label_ids'], axis=0)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) all_labels = get_labels(FLAGS.data_dir, FLAGS.train_file) all_labels.append('##') all_labels.append('PAD') ### only for testing all_labels.append('SYM') #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(all_labels)) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_examples = create_examples(FLAGS.data_dir, FLAGS.train_file) random.shuffle(train_examples) train_features = conver_examples_to_features(train_examples, all_labels, tokenize_fn) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) tf.logging.info('Train steps: '+ str(num_train_steps) + '.') train_input_fn = input_fn_builder( features=train_features, drop_remainder=True, is_training=True ) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = create_examples(FLAGS.data_dir, FLAGS.test_file) while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_features = conver_examples_to_features(eval_examples, all_labels, tokenize_fn) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = input_fn_builder( features=eval_features, drop_remainder=eval_drop_remainder, is_training=False ) ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) tf.logging.info("=" * 80) log_str = "Eval | " for key, val in ret.items(): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) tf.logging.info("=" * 80)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) processors = { "detect": DetectProcessor, } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) tf.logging.info("Num of train steps: {}".format(num_train_steps)) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, steps=num_train_steps) # TODO if FLAGS.do_train_test: train_test_file_base = "{}.len-{}.train_test.tf_record".format( spm_basename, FLAGS.max_seq_length) train_test_file = os.path.join(FLAGS.output_dir, train_test_file_base) tf.logging.info("Use tfrecord file {}".format(train_test_file)) train_test_examples = processor.get_train_test_examples(FLAGS.data_dir) num_train_test_steps = int( len(train_test_examples) / FLAGS.train_batch_size * 1) np.random.shuffle(train_examples) tf.logging.info("Num of test samples: {}".format(len(train_test_examples))) tf.logging.info("Num of test steps: {}".format(num_train_test_steps)) file_based_convert_examples_to_features( train_test_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_test_file, FLAGS.num_passes) train_test_input_fn = file_based_input_fn_builder( input_file=train_test_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_test_input_fn, steps=num_train_test_steps) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. eval_examples = processor.get_dev_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) predict_file_base = "{}.len-{}.{}.predict.tf_record".format(spm_basename, FLAGS.max_seq_length, FLAGS.predict_split) predict_file = os.path.join(FLAGS.output_dir, predict_file_base) predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) tf.logging.info("Num of predict samples: {}".format(len(predict_examples))) file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenize_fn, predict_file) pred_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) if FLAGS.predict_batch_size != 1: result = estimator.predict(input_fn=pred_input_fn) else: result = estimator.predict(input_fn=pred_input_fn, yield_single_examples=False) if FLAGS.use_stack: logits = [ prediction["logits"] for prediction in result ] save_pickle(FLAGS.stack_dir, logits) # TODO output_predict_file = FLAGS.test_save original_file = os.path.join(FLAGS.data_dir, FLAGS.test_set) df = pd.read_csv(original_file) lines = [row['id'] for index, row in df.iterrows()] with open(output_predict_file, "w") as f: writer = csv.writer(f, delimiter=',') writer.writerow(['id','label']) num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(zip(lines, result)): ID = prediction[0] label = prediction[1]["labels"] if i >= num_actual_predict_examples: break writer.writerow([ID, label]) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.calc_ists_metrics and FLAGS.metrics_dir: predictions = [] if not FLAGS.pred_file: predictions = model_utils.get_predictions(FLAGS.predict_dir) else: predictions = [model_utils.extract_global_step(FLAGS.pred_file[:-4]), FLAGS.pred_file] if not tf.gfile.Exists(FLAGS.metrics_dir): tf.gfile.MakeDirs(FLAGS.metrics_dir) dataset_name = FLAGS.data_dir.split("/")[-1] # Write metrics to file with tf.gfile.Open(os.path.join(FLAGS.metrics_dir, "{}.tsv".format("metrics-" + dataset_name)), "w") as fout: fout.write("step\tf1-type\tf1-socre\tf1-t+s\tpearson-type\tpearson-score\n") # Calc metric for all predictions for global_step, pred_file_path in sorted(predictions, key=lambda x: x[0]): metrics = calc_ists_metrics(pred_file_path, FLAGS.data_dir + "/test.tsv") print('\n Dataset: {}\n Step: {}\n [F1 Type]: {}\n [F1 Score]: {}\n [F1 T+S]: {}\n [P T]: {}\n [P S]: {}'.format(dataset_name, global_step, *metrics)) fout.write('{}\t{}\t{}\t{}\t{}\t\n'.format(global_step, *metrics)) # End execution after caclulations return None if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processors = { "mnli_matched": MnliMatchedProcessor, "mnli_mismatched": MnliMismatchedProcessor, 'sts-b': StsbProcessor, 'imdb': ImdbProcessor, "yelp5": Yelp5Processor, "ists": IStsProcessor } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict and not FLAGS.calc_ists_metrics: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict`, `calc_ists_metrics` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval or FLAGS.do_predict: if FLAGS.eval_split == "dev": eval_examples = processor.get_dev_examples(FLAGS.data_dir) else: eval_examples = processor.get_test_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = model_utils.get_checkpoints(FLAGS.model_dir) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_file_base = "{}.len-{}.{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) steps_and_files = [] if not FLAGS.predict_ckpt: steps_and_files = steps_and_files[-1:] # Filter out all checkpoints in the directory steps_and_files = model_utils.get_checkpoints(FLAGS.model_dir) # Decide whether to predict all ckpts if not FLAGS.pred_all_ckpt: steps_and_files = steps_and_files[-1:] else: steps_and_files = [model_utils.extract_global_step(FLAGS.predict_ckpt), FLAGS.predict_ckpt] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): predict_results = [] with tf.gfile.Open(os.path.join(predict_dir, "{}.tsv".format( "step-" + str(global_step))), "w") as fout: fout.write("index\tprediction\n") for pred_cnt, result in enumerate(estimator.predict( input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=filename)): if pred_cnt % 1000 == 0: tf.logging.info("Predicting submission for example: {}".format( pred_cnt)) logits = [float(x) for x in result["logits"].flat] predict_results.append(logits) if len(logits) == 1: label_out = logits[0] elif len(logits) == 2: if logits[1] - logits[0] > FLAGS.predict_threshold: label_out = label_list[1] else: label_out = label_list[0] elif len(logits) > 2: max_index = np.argmax(np.array(logits, dtype=np.float32)) label_out = label_list[max_index] else: raise NotImplementedError fout.write("{}\t{}\n".format(pred_cnt, label_out)) predict_json_path = os.path.join(predict_dir, "{}.logits.json".format( "step-" + str(global_step))) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)