def __init__(self): self.sp = spm.SentencePieceProcessor() self.sp.Load(FLAGS.spiece_model_file) tf.logging.set_verbosity(tf.logging.INFO) self.processor = CSCProcessor() self.run_config = model_utils.configure_tpu(FLAGS) label_list = self.processor.get_labels( ) if not FLAGS.is_regression else None self.model_fn = get_model_fn( len(label_list) if label_list is not None else None) self.estimator = tf.estimator.Estimator(model_fn=self.model_fn, config=self.run_config, model_dir=FLAGS.predict_ckpt)
def main(unused_argv): del unused_argv # Unused tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.seq_len > 0 assert FLAGS.perm_size > 0 FLAGS.n_token = data_utils.VOCAB_SIZE tf.logging.info("n_token {}".format(FLAGS.n_token)) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) # Get train input function train_input_fn, train_record_info_dict = get_input_fn("train") tf.logging.info("num of batches {}".format( train_record_info_dict["num_batch"])) # Get train cache function train_cache_fn = get_cache_fn(FLAGS.mem_len) ##### Get model function model_fn = get_model_fn() ##### Create TPUEstimator # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) # TPU Estimator estimator = tpu_estimator.TPUEstimator( model_fn=model_fn, train_cache_fn=train_cache_fn, use_tpu=FLAGS.use_tpu, config=run_config, params={"track_mean": FLAGS.track_mean}, train_batch_size=FLAGS.train_batch_size, eval_on_tpu=FLAGS.use_tpu) hooks = None if FLAGS.debug: if FLAGS.debug_dump_dir: hooks = [tf_debug.DumpingDebugHook(FLAGS.debug_dump_dir)] else: hooks = [tf_debug.LocalCLIDebugHook()] #### Training estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps, hooks=hooks)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tpu_config = model_utils.configure_tpu(FLAGS) model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(False, True, FLAGS) model_builder = XLNetModelBuilder( default_model_config=model_config, default_run_config=run_config, default_init_checkpoint=FLAGS.init_checkpoint, use_tpu=FLAGS.use_tpu) model_fn = model_builder.get_model_fn(model_config, run_config, FLAGS.init_checkpoint, FLAGS.model_type) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. estimator = tf.contrib.tpu.TPUEstimator(use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=FLAGS.use_tpu, train_batch_size=1) tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file, lower_case=FLAGS.lower_case) example_converter = XLNetExampleConverter( label_list=[], max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) features = example_converter.convert_examples_to_features( [PaddingInputExample()]) input_fn = XLNetInputBuilder.get_input_builder(features, FLAGS.max_seq_length, True, False) estimator.train(input_fn, max_steps=1) tf.gfile.MakeDirs(FLAGS.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn( FLAGS.max_seq_length) estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn, as_text=False)
def main(unused_argv): del unused_argv # Unused tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.seq_len > 0 assert FLAGS.perm_size > 0 FLAGS.n_token = data_utils.VOCAB_SIZE tf.logging.info('n_token {}'.format(FLAGS.n_token)) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) # Get train input function train_input_fn, train_record_info_dict = get_input_fn('train') tf.logging.info( 'num of batches {}'.format(train_record_info_dict['num_batch']) ) # Get train cache function train_cache_fn = get_cache_fn(FLAGS.mem_len) ##### Get model function model_fn = get_model_fn() ##### Create TPUEstimator # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) # TPU Estimator estimator = tpu_estimator.TPUEstimator( model_fn = model_fn, train_cache_fn = train_cache_fn, use_tpu = FLAGS.use_tpu, config = run_config, params = {'track_mean': FLAGS.track_mean}, train_batch_size = FLAGS.train_batch_size, eval_on_tpu = FLAGS.use_tpu, ) #### Training estimator.train(input_fn = train_input_fn, max_steps = FLAGS.train_steps)
def run(): # 设置日志的打印级别:把日志设置为INFO级别 tf.logging.set_verbosity(tf.logging.INFO) # 得到训练数据 x_train, y_train, label_train = gen_train_samples(FLAGS.train_samples) # 运行参数配置 run_config = model_utils.configure_tpu(FLAGS) # Build the Estimator model = tf.estimator.Estimator(model_fn, params={"seq_len": SEQ_LEN}, config=run_config) # Define the input function for training # input_fn = tf.estimator.inputs.numpy_input_fn( # x={'entity_ids': x_train, 'entity_ids_list': y_train, 'labels': label_train}, # batch_size=FLAGS.batch_size, num_epochs=None, shuffle=True) # Define the input function based on tf.record file input_fn = gen_train_input_fn(FLAGS.train_samples) # Train the Model model.train(input_fn, steps=FLAGS.train_steps) # save model feature_spec = {'entity_ids': tf.placeholder(dtype=tf.int32, shape=[None, SEQ_LEN], name='entity'), \ 'entity_ids_list': tf.placeholder(dtype=tf.int32, shape=[None, None, SEQ_LEN], name='entity_list')} serving_input_receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_spec) model.export_savedmodel(FLAGS.serving_model_dir, serving_input_receiver_fn)
def main(_): ################################################################### ################################################################### score_reader = ScoreReader( '/u02/datasets/psytesting/scores_combined.csv', '/u02/texts/' ,count_words=False, system_idx=-1, fname_idx=-2, score_idx=17, testname_idx=None) score_reader.normalize_scores() ## 365194 train scores, 25013 val scores, 16675 test scores ## 374377 train scores, 24959 val scores, 16638 test scores trainScores, valScores, testScores = score_reader.split_train_test(0.9,1) ########################### need to be the multiple times of batch size trainScores=trainScores[:10548] valScores=valScores[:1170] testScores=testScores[:12000] ########################### filename_train=[] score_train=[] for i,data in enumerate(trainScores): filename_train.append(trainScores[i][0]) score_train.append(trainScores[i][1]) filename_dev=[] score_dev=[] for i,data in enumerate(valScores): filename_dev.append(valScores[i][0]) score_dev.append(valScores[i][1]) filename_test=[] score_test=[] for i,data in enumerate(testScores): filename_test.append(testScores[i][0]) score_test.append(testScores[i][1]) ############################################################# ############## SETTING FOT THE FUNCTION ##################### ############################################################# tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processor=GLUEProcessor(filename=filename_train, label=score_train, filename_dev=filename_dev,label_dev=score_dev, filename_test=filename_test,label_test=score_test) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() #################################################################### ####################### tokenization ############################### #################################################################### # changed # label_list = processor.get_labels() if not FLAGS.is_regression else None # label list Nonetype label_list = processor.get_labels() if FLAGS.is_regression else None # label list is not None # def get_labels(self): # return [0.0] sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text=str(text[0]) text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) ###################################################################################### ##############################TRAINING SETTING ####################################### ###################################################################################### if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir, filename_train,filename_dev ,filename_test, score_train,score_dev,score_test) # get training data np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes ) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) # estimator.train(input_fn=train_input_fn,steps=1000, max_steps=FLAGS.train_steps) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) ###################################################################################### ############################## VAL SETTING ######################################## ###################################################################################### print('----------------------------------') print('------start to evaluate ----------') print('----------------------------------') if FLAGS.do_eval : eval_examples = processor.get_dev_examples(FLAGS.data_dir, filename_train,filename_dev ,filename_test, score_train,score_dev,score_test) if FLAGS.do_predict: eval_examples = processor.get_dev_examples(FLAGS.data_dir, filename_train,filename_dev ,filename_test, score_train,score_dev,score_test) ## print(len(eval_examples)) - >25035 ## print(eval_examples[1]) <__main__.InputExample object at 0x7fa0ff19ecc0> # data_dir: Directory for input data." default="/home/calvin/111/glue_data/STS-B/", # tf.logging.info("Num of eval/predicted samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features_dev( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder_dev( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts # if not FLAGS.eval_all_ckpt: aa=FLAGS.star_from_dev steps_and_files = steps_and_files[aa:] # steps_and_files = steps_and_files[::10] steps_and_files = steps_and_files[1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) #################### #### error #### ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) ###################################################################################### ############################## PREDICT SETTING ####################################### ###################################################################################### if FLAGS.do_predict: print('----------------------------------') print('---start to compute Prediction----') print('----------------------------------') while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features_dev( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples)) eval_input_fn = file_based_input_fn_builder_dev( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts # if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) print('----------------------------------') print('----start to compute Pearson------') print('----------------------------------') ################################################################################# Peason start predict_results = [] label_results=[] pearson=[] with tf.gfile.Open(os.path.join(predict_dir, "{}.tsv".format( task_name)), "w") as fout: fout.write("index\tprediction\n") import copy import math for pred_cnt, result in enumerate(estimator.predict( input_fn=eval_input_fn, yield_single_examples=False, checkpoint_path=FLAGS.predict_ckpt)): # if pred_cnt % 1000 == 0: # tf.logging.info("Predicting submission for example: {}".format( # pred_cnt)) ################## logits = [float(x) for x in result["logits"].flat] list_pre=[] # predict_results.append(logits) list_pre=list(result['labels']).copy() for i in range(len(result['labels'])): # if math.isnan(list_pre[i]) is False: # list_pre[i]=list(result['logits']).pop(0) ## need to add as many as batch size ##### pearson.append(list(zip(list(list_pre[:1]), list(result["labels"][:1])))) pearson.append(list(zip(list(list_pre[1:2]), list(result["labels"][1:2])))) pearson.append(list(zip(list(list_pre[2:3]), list(result["labels"][2:3])))) pearson.append(list(zip(list(list_pre[3:4]), list(result["labels"][3:4])))) pearson.append(list(zip(list(list_pre[4:5]), list(result["labels"][4:5])))) pearson.append(list(zip(list(list_pre[5:]), list(result["labels"][5:])))) pearson=np.array(pearson) # dimension: [data count,31 ,2] import pdb pdb.set_trace() pearson_res=[] from sklearn.metrics import mean_squared_error sk_mse_res=[] register1=pearson[:,0,0] register2=pearson[:,0,1] # register1=pearson[:,i,0] # register2=pearson[:,i,1] # register1 = register1[~np.isnan(register1)] # register2 = register2[~np.isnan(register2)] pearson_res.append(pd.Series(register1).corr(pd.Series(register2))) # sk_mse_res.append(np.sqrt(mean_squared_error(register1,register2))) # pearson = list((zip(predict_results, label_results))) print('pearson ###################') print(list(pearson_res)) # print('sk_mse_res ################') # print(list(sk_mse_res)) # print('label_results ###################') # print(label_results) ################## # logits = [float(x) for x in result["logits"].flat] # # list_pre=[] # predict_results.append(logits) pearson_path = os.path.join(predict_dir, "{}.pearson.json".format( task_name)) with tf.gfile.Open(pearson_path, "w") as fp: json.dump(pearson_res, fp, indent=4) if __name__ == "__main__": tf.app.run() # In[ ]: import json with open('/home/calvin/xlnet/predict_result/sts-b.pearson.json') as json_file: data = json.load(json_file) print(data) # In[ ]: # with open('/home/calvin/xlnet/predict_result/sts-b.label_results.json') as json_file: # data = json.load(json_file) # print(data) # file_based_convert_examples_to_features_dev( # eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, # eval_file)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) if FLAGS.do_prepro: preprocess() return #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_predict: raise ValueError( "At least one of `do_train` and `do_predict` must be True.") if FLAGS.do_predict and not tf.gfile.Exists(FLAGS.predict_dir): tf.gfile.MakeDirs(FLAGS.predict_dir) sp_model = spm.SentencePieceProcessor() sp_model.Load(FLAGS.spiece_model_file) ### TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn() spm_basename = _get_spm_basename() # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_rec_glob = os.path.join( FLAGS.output_dir, "{}.*.slen-{}.qlen-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.max_query_length)) train_input_fn = input_fn_builder( input_glob=train_rec_glob, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, num_hosts=FLAGS.num_hosts) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_predict: eval_examples = read_squad_examples(FLAGS.predict_file, is_training=False) with tf.gfile.Open(FLAGS.predict_file) as f: orig_data = json.load(f)["data"] eval_rec_file = os.path.join( FLAGS.output_dir, "{}.slen-{}.qlen-{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.max_query_length)) eval_feature_file = os.path.join( FLAGS.output_dir, "{}.slen-{}.qlen-{}.eval.features.pkl".format( spm_basename, FLAGS.max_seq_length, FLAGS.max_query_length)) if tf.gfile.Exists(eval_rec_file) and tf.gfile.Exists( eval_feature_file) and not FLAGS.overwrite_data: tf.logging.info("Loading eval features from {}".format(eval_feature_file)) with tf.gfile.Open(eval_feature_file, 'rb') as fin: eval_features = pickle.load(fin) else: eval_writer = FeatureWriter(filename=eval_rec_file, is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) convert_examples_to_features( examples=eval_examples, sp_model=sp_model, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature) eval_writer.close() with tf.gfile.Open(eval_feature_file, 'wb') as fout: pickle.dump(eval_features, fout) eval_input_fn = input_fn_builder( input_glob=eval_rec_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, num_hosts=1) cur_results = [] for result in estimator.predict( input_fn=eval_input_fn, yield_single_examples=True): if len(cur_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(cur_results))) unique_id = int(result["unique_ids"]) start_top_log_probs = ( [float(x) for x in result["start_top_log_probs"].flat]) start_top_index = [int(x) for x in result["start_top_index"].flat] end_top_log_probs = ( [float(x) for x in result["end_top_log_probs"].flat]) end_top_index = [int(x) for x in result["end_top_index"].flat] cls_logits = float(result["cls_logits"].flat[0]) cur_results.append( RawResult( unique_id=unique_id, start_top_log_probs=start_top_log_probs, start_top_index=start_top_index, end_top_log_probs=end_top_log_probs, end_top_index=end_top_index, cls_logits=cls_logits)) output_prediction_file = os.path.join( FLAGS.predict_dir, "predictions.json") output_nbest_file = os.path.join( FLAGS.predict_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join( FLAGS.predict_dir, "null_odds.json") ret = write_predictions(eval_examples, eval_features, cur_results, FLAGS.n_best_size, FLAGS.max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file, orig_data) # Log current result tf.logging.info("=" * 80) log_str = "Result | " for key, val in ret.items(): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) tf.logging.info("=" * 80)
def main(_): FLAGS.use_tpu = True if FLAGS.tpu else False # ### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_predict: raise ValueError( "At least one of `do_train` and `do_predict` must be True.") logger.info("FLAGS: {}".format(FLAGS.flag_values_dict())) # ## TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = function_builder.get_qa_model_fn(FLAGS) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_input_fn = function_builder.qa_input_fn_builder(FLAGS, is_training=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_predict: eval_input_fn = function_builder.qa_input_fn_builder(FLAGS, is_training=False) cur_results = [] checkpoint_path = FLAGS.checkpoint_path if not checkpoint_path: checkpoint_path = None for result in estimator.predict(input_fn=eval_input_fn, checkpoint_path=checkpoint_path, yield_single_examples=True): if len(cur_results) % 1000 == 0: logger.info("Processing example: %d" % (len(cur_results))) unique_id = int(result["feature_id"]) start_logits = ([float(x) for x in result["start_logits"].flat]) end_logits = ([float(x) for x in result["end_logits"].flat]) cls_logits = ([float(x) for x in result["cls_logits"].flat]) cur_results.append( (unique_id, start_logits, end_logits, cls_logits)) eval_examples = load_examples(FLAGS.eval_example_file) final_cls = collections.OrderedDict() final_cls_prob = collections.OrderedDict() final_predictions = collections.OrderedDict() final_span_scores = collections.OrderedDict() all_ground_truths = collections.OrderedDict() pred_info = collections.defaultdict(dict) for cur_result in cur_results: unique_id, start_logits, end_logits, cls_logits = cur_result item = eval_examples[int(unique_id)] orig_id = item['orig_id'] # save cls scores and spans scores for tune final outputs pred_item = { 'orig_id': orig_id, 'pred_cls_scores': [float(s) for s in np.exp(cls_logits)] } # final_cls[orig_id] == 0 context_len = len(item['context_spans']) context_start = 0 # self.get_context_start(item) context_end = context_start + context_len # only consider valid context logits x_s = np.exp(start_logits[context_start:context_end]) y_s = np.exp(end_logits[context_start:context_end]) z = np.outer(x_s, y_s) zn = np.tril(np.triu(z), FLAGS.max_answer_length) pred_start, pred_end = np.unravel_index(np.argmax(zn), zn.shape) pred_score = zn[pred_start, pred_end] pred_item['pred_score'] = pred_score pred_item['pred_span'] = [pred_start, pred_end] if pred_score > final_span_scores.get(orig_id, 0): start_span = item['context_spans'][pred_start] predicted_char_start = start_span[0] end_span = item['context_spans'][pred_end] predicted_char_end = end_span[1] predicted_text = item['context'][ predicted_char_start:predicted_char_end] pred_item['pred_text'] = predicted_text.strip() final_predictions[orig_id] = predicted_text.strip() final_span_scores[orig_id] = pred_score if 'label' in item: answer_cls = item['label']['cls'] pred_item['label_cls'] = answer_cls if answer_cls == 0: answers = item['label']['ans'] answer_texts = [a[1] for a in answers] all_ground_truths[orig_id] = answer_texts pred_item['label_span'] = answer_texts else: answer = 'yes' if answer_cls == 1 else 'no' all_ground_truths[orig_id] = [answer] pred_item['label_span'] = [answer] cls_prob = np.exp(cls_logits) cls_idx = np.argmax(cls_prob) pred_cls_prob = cls_prob[cls_idx] if pred_cls_prob > final_cls_prob.get(orig_id, 0): final_cls_prob[orig_id] = pred_cls_prob final_cls[orig_id] = cls_idx if final_cls[orig_id] == 1: # yes for hotpot, impossible for squad 2.0 if FLAGS.task == 'squad_v2.0': final_predictions[orig_id] = '' elif FLAGS.task == 'hotpot': final_predictions[orig_id] = 'yes' elif final_cls[orig_id] == 2: # cls == 2 # no final_predictions[orig_id] = 'no' pred_info[int(unique_id)] = pred_item ckpt = os.path.basename(checkpoint_path) if checkpoint_path else '' dec_suffix = '_s{}'.format(FLAGS.sep_layer) if FLAGS.decompose else '' prediction_prefix = FLAGS.eval_file + ckpt + dec_suffix + '.predictions' prediction_file = prediction_prefix + '.json' pred_path = FLAGS.prediction_file or prediction_file pred_dir = os.path.dirname(pred_path) if not tf.io.gfile.exists(pred_dir): tf.io.gfile.makedirs(pred_dir) if FLAGS.task == 'hotpot': final_predictions_data = {"answer": final_predictions, "sp": {}} else: final_predictions_data = final_predictions with tf.io.gfile.GFile(pred_path, "w") as f: f.write( json.dumps( final_predictions_data, indent=2, ensure_ascii=False) + "\n") logger.info("final predictions written to {}".format(pred_path)) em_score, f1_score = get_em_f1(final_predictions, all_ground_truths) logger.info("em={:.4f}, f1={:.4f}".format(em_score, f1_score))
def main(_): FLAGS.use_tpu = True if FLAGS.tpu else False # ### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_predict: raise ValueError("At least one of `do_train`, `do_predict` or " "`do_submit` must be True.") logger.info("FLAGS: {}".format(FLAGS.flag_values_dict())) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(FLAGS) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.predict_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_input_fn = file_based_input_fn_builder(FLAGS, is_training=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_predict: pred_input_fn = file_based_input_fn_builder(FLAGS, is_training=False) cur_results = [] checkpoint_path = FLAGS.checkpoint_path if not checkpoint_path: checkpoint_path = None for result in estimator.predict(input_fn=pred_input_fn, checkpoint_path=checkpoint_path, yield_single_examples=True): if len(cur_results) % 1000 == 0: logger.info("Processing example: %d" % (len(cur_results))) unique_id = int(result["feature_id"]) cls_logits = ([float(x) for x in result["cls_logits"].flat]) cur_results.append((unique_id, cls_logits)) eval_examples = load_examples(FLAGS.eval_example_file) final_predictions = collections.OrderedDict() labels, predicted_labels = [], [] for cur_result in cur_results: unique_id, cls_logits = cur_result item = eval_examples[int(unique_id)] orig_id = item['orig_id'] scores = softmax(cls_logits) if 'label' in item: answer_cls = item['label']['cls'] label_id = int(answer_cls) labels.append(label_id) predicted_label = int(np.argmax(scores)) final_predictions[orig_id] = predicted_label predicted_labels.append(predicted_label) acc = accuracy_score(y_true=labels, y_pred=predicted_labels) acc *= 100.0 f1_str = '' if FLAGS.num_classes == 2 and not FLAGS.num_choices: f1 = f1_score(y_true=labels, y_pred=predicted_labels) f1 *= 100.0 f1_str = ", f1={:.4f}".format(f1) ckpt = os.path.basename(checkpoint_path) if checkpoint_path else '' dec_suffix = '_s{}'.format(FLAGS.sep_layer) if FLAGS.decompose else '' prediction_prefix = FLAGS.eval_file + ckpt + dec_suffix + '.predictions' prediction_file = prediction_prefix + '.json' pred_path = FLAGS.prediction_file or prediction_file pred_dir = os.path.dirname(pred_path) if not tf.io.gfile.exists(pred_dir): tf.io.gfile.makedirs(pred_dir) final_predictions_data = final_predictions with tf.io.gfile.GFile(pred_path, "w") as f: f.write( json.dumps( final_predictions_data, indent=2, ensure_ascii=False) + "\n") logger.info("final predictions written to {}".format(pred_path)) logger.info("acc={:.4f}{}".format(acc, f1_str))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) all_labels = get_labels(FLAGS.data_dir, FLAGS.train_file) all_labels.append('##') all_labels.append('PAD') ### only for testing all_labels.append('SYM') #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(all_labels)) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_examples = create_examples(FLAGS.data_dir, FLAGS.train_file) random.shuffle(train_examples) train_features = conver_examples_to_features(train_examples, all_labels, tokenize_fn) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) tf.logging.info('Train steps: '+ str(num_train_steps) + '.') train_input_fn = input_fn_builder( features=train_features, drop_remainder=True, is_training=True ) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = create_examples(FLAGS.data_dir, FLAGS.test_file) while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_features = conver_examples_to_features(eval_examples, all_labels, tokenize_fn) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = input_fn_builder( features=eval_features, drop_remainder=eval_drop_remainder, is_training=False ) ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) tf.logging.info("=" * 80) log_str = "Eval | " for key, val in ret.items(): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) tf.logging.info("=" * 80)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) processors = { "detect": DetectProcessor, } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) tf.logging.info("Num of train steps: {}".format(num_train_steps)) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, steps=num_train_steps) # TODO if FLAGS.do_train_test: train_test_file_base = "{}.len-{}.train_test.tf_record".format( spm_basename, FLAGS.max_seq_length) train_test_file = os.path.join(FLAGS.output_dir, train_test_file_base) tf.logging.info("Use tfrecord file {}".format(train_test_file)) train_test_examples = processor.get_train_test_examples(FLAGS.data_dir) num_train_test_steps = int( len(train_test_examples) / FLAGS.train_batch_size * 1) np.random.shuffle(train_examples) tf.logging.info("Num of test samples: {}".format(len(train_test_examples))) tf.logging.info("Num of test steps: {}".format(num_train_test_steps)) file_based_convert_examples_to_features( train_test_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_test_file, FLAGS.num_passes) train_test_input_fn = file_based_input_fn_builder( input_file=train_test_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_test_input_fn, steps=num_train_test_steps) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. eval_examples = processor.get_dev_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) predict_file_base = "{}.len-{}.{}.predict.tf_record".format(spm_basename, FLAGS.max_seq_length, FLAGS.predict_split) predict_file = os.path.join(FLAGS.output_dir, predict_file_base) predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) tf.logging.info("Num of predict samples: {}".format(len(predict_examples))) file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenize_fn, predict_file) pred_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) if FLAGS.predict_batch_size != 1: result = estimator.predict(input_fn=pred_input_fn) else: result = estimator.predict(input_fn=pred_input_fn, yield_single_examples=False) if FLAGS.use_stack: logits = [ prediction["logits"] for prediction in result ] save_pickle(FLAGS.stack_dir, logits) # TODO output_predict_file = FLAGS.test_save original_file = os.path.join(FLAGS.data_dir, FLAGS.test_set) df = pd.read_csv(original_file) lines = [row['id'] for index, row in df.iterrows()] with open(output_predict_file, "w") as f: writer = csv.writer(f, delimiter=',') writer.writerow(['id','label']) num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(zip(lines, result)): ID = prediction[0] label = prediction[1]["labels"] if i >= num_actual_predict_examples: break writer.writerow([ID, label]) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) choice = ["A", "B", "C", "D"] #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_predict: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if not tf.gfile.Exists(FLAGS.predict_dir): tf.gfile.MakeDirs(FLAGS.predict_dir) if not tf.gfile.Exists(FLAGS.result_dir): tf.gfile.MakeDirs(FLAGS.result_dir) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn() estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_predict: for level in ["middle", "high"]: if level == "middle" and FLAGS.high_only: continue if level == "high" and FLAGS.middle_only: continue cur_dir = os.path.join(FLAGS.data_dir, FLAGS.predict_split, level) for filename in tf.gfile.ListDirectory(cur_dir): cur_path = os.path.join(cur_dir, filename) with tf.gfile.Open(cur_path) as f: cur_data = json.load(f) answers = cur_data["answers"] options = cur_data["options"] questions = cur_data["questions"] context = cur_data["article"] id_ = cur_data["id"] for i in range(len(answers)): label = ord(answers[i]) - ord("A") qa_list = [] question = questions[i] # 对应问题 for j in range(4): option = options[i][j] if "_" in question: qa_cat = question.replace("_", option) else: qa_cat = " ".join([question, option]) qa_list.append(qa_cat) example = InputExample(context, qa_list, label, level, id) # 单个问题形成的example predict_file_base = "{}|{}tf_record".format( id_, questions[i]) predict_file = os.path.join(FLAGS.predict_dir, predict_file_base) file_based_convert_examples_to_features( example, tokenize_fn, predict_file) predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) result_file_base = "{}|{}.txt".format(id_[:-4], i) result_file = os.path.join(FLAGS.result_dir, result_file_base) if not tf.gfile.Exists(result_file): predictions = estimator.predict( input_fn=predict_input_fn) predictions = list(predictions) choose = predictions[0]["predictions"] result = { "id": id_, "question": questions[i], "answer": choice[choose] } f = open(result_file, "a+") result = json.dumps(result) f.write(result) f.close() tf.logging.info("Finish output!") tf.logging.info(list(predictions))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "conll2003": crf_util.Conll2003Processor, "bc5cdr": crf_util.BC5CDRProcessor, "anatem": crf_util.AnatEMProcessor, "bc2gm": crf_util.BC2GMProcessor, "bc4chemd": crf_util.BC4CHEMDProcessor, "bionlp09": crf_util.BioNLP09Processor, "bionlp11id": crf_util.BioNLP11IDProcessor, "bionlp11ep": crf_util.BioNLP11EPIProcessor, "bionlp13cg": crf_util.BioNLP13CGProcessor, "bionlp13ge": crf_util.BioNLP13GEProcessor, "bionlp13pc": crf_util.BioNLP13PCProcessor, "craft": crf_util.CRAFTProcessor, "exptm": crf_util.ExPTMProcessor, "jnlpba": crf_util.JNLPBAProcessor, "linnaeus": crf_util.linnaeusProcessor, "ncbi": crf_util.NCBIProcessor } #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") processor = processors[FLAGS.task](FLAGS.spiece_model_file, FLAGS.lower, "data", FLAGS.cache_dir, FLAGS.max_seq_length) FLAGS.crf_classes = processor.classes if not FLAGS.train_steps: FLAGS.train_steps = processor.train_step FLAGS.model_dir = "{}_{}_{}_{}_{}".format( FLAGS.model_dir, FLAGS.task, "nocrf" if FLAGS.no_crf else "crf", FLAGS.label_mode, FLAGS.label_mask) ### GPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn() # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file = processor.get_train_data() if not tf.gfile.Exists(train_file): raise ValueError("no train file") train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) if FLAGS.do_eval: eval_file = processor.get_dev_data() eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) eval_results = [] for global_step, filename in steps_and_files: ret = estimator.evaluate(input_fn=eval_input_fn, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) with open( os.path.join(FLAGS.predict_dir, "{}_eval.txt".format(FLAGS.model_dir)), "w") as f: for ret in eval_results: log_str = "Eval result : " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} \n ".format(key, val) f.write(log_str) if FLAGS.do_predict: f = open( os.path.join(FLAGS.predict_dir, "{}.txt".format(FLAGS.model_dir)), "w") pred_file = processor.get_test_data() pred_input_fn = file_based_input_fn_builder( input_file=pred_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) for global_step, filename in steps_and_files: predict_results = [] for pred_cnt, result in enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=filename)): if pred_cnt % 100 == 0: tf.logging.info( "Predicting submission for example: {}".format( pred_cnt)) for key in result.keys(): result[key] = result[key].tolist() predict_results.append(result) predict_json_path = os.path.join( FLAGS.predict_dir, "{}_{}.json".format(FLAGS.model_dir, global_step)) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4) f.write("%d\n" % global_step) acc_f1.get_result(predict_json_path, f, processor.decode_map) f.close() def _remove_checkpoint(checkpoint_path): for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) for global_step, filename in steps_and_files[:-1]: _remove_checkpoint(filename)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) np.random.seed(FLAGS.random_seed) processor = NerProcessor(data_dir=FLAGS.data_dir, input_file=FLAGS.input_file, task_name=FLAGS.task_name.lower()) label_list = processor.get_labels() tf.logging.info(label_list) tpu_config = model_utils.configure_tpu(FLAGS) model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(False, True, FLAGS) model_builder = XLNetModelBuilder( default_model_config=model_config, default_run_config=run_config, default_init_checkpoint=FLAGS.init_checkpoint, use_tpu=FLAGS.use_tpu) model_fn = model_builder.get_model_fn(model_config, run_config, FLAGS.init_checkpoint, label_list) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file, lower_case=FLAGS.lower_case) example_converter = XLNetExampleConverter( label_list=label_list, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) if FLAGS.do_train: train_examples = processor.get_chem_examples() tf.logging.info("***** Run training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_steps) train_features = example_converter.convert_examples_to_features( train_examples) train_input_fn = XLNetInputBuilder.get_input_builder( train_features, FLAGS.max_seq_length, True, True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples() tf.logging.info("***** Run evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_features = example_converter.convert_examples_to_features( eval_examples) eval_input_fn = XLNetInputBuilder.get_input_builder( eval_features, FLAGS.max_seq_length, False, False) result = estimator.evaluate(input_fn=eval_input_fn) precision = result["precision"] recall = result["recall"] f1_score = 2.0 * precision * recall / (precision + recall) tf.logging.info("***** Evaluation result *****") tf.logging.info(" Precision (token-level) = %s", str(precision)) tf.logging.info(" Recall (token-level) = %s", str(recall)) tf.logging.info(" F1 score (token-level) = %s", str(f1_score)) if FLAGS.do_predict: predict_examples = processor.get_test_examples() pmids = [e.guid for e in predict_examples] tokens = [e.guid for e in predict_examples] tf.logging.info("***** Run prediction *****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_features = example_converter.convert_examples_to_features( predict_examples) predict_input_fn = XLNetInputBuilder.get_input_builder( predict_features, FLAGS.max_seq_length, False, False) result = estimator.predict(input_fn=predict_input_fn) predict_recorder = XLNetPredictRecorder( output_dir=FLAGS.output_dir, label_list=label_list, guids=pmids, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer, predict_tag=FLAGS.predict_tag) predicts = [{ "input_ids": feature.input_ids, "input_masks": feature.input_masks, "label_ids": feature.label_ids, "predict_ids": predict["predict"].tolist() } for feature, predict in zip(predict_features, result)] predict_recorder.record(predicts) if FLAGS.do_export: tf.logging.info("***** Running exporting *****") tf.gfile.MakeDirs(FLAGS.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn( FLAGS.max_seq_length) estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn, as_text=False)
def main(_): logger.set_verbosity(logger.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if not FLAGS.do_train and not FLAGS.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn() spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) if not tf.gfile.Exists(train_file) or FLAGS.overwrite_data: train_examples = get_examples(FLAGS.data_dir, "train") random.shuffle(train_examples) file_based_convert_examples_to_features( train_examples, tokenize_fn, train_file) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval: eval_examples = get_examples(FLAGS.data_dir, FLAGS.eval_split) logger.info("Num of eval samples: {}".format(len(eval_examples))) # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) if FLAGS.high_only: eval_file_base = "high." + eval_file_base elif FLAGS.middle_only: eval_file_base = "middle." + eval_file_base eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps) # Log current result logger.info("=" * 80) log_str = "Eval | " for key, val in ret.items(): log_str += "{} {} | ".format(key, val) logger.info(log_str) logger.info("=" * 80)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.calc_ists_metrics and FLAGS.metrics_dir: predictions = [] if not FLAGS.pred_file: predictions = model_utils.get_predictions(FLAGS.predict_dir) else: predictions = [model_utils.extract_global_step(FLAGS.pred_file[:-4]), FLAGS.pred_file] if not tf.gfile.Exists(FLAGS.metrics_dir): tf.gfile.MakeDirs(FLAGS.metrics_dir) dataset_name = FLAGS.data_dir.split("/")[-1] # Write metrics to file with tf.gfile.Open(os.path.join(FLAGS.metrics_dir, "{}.tsv".format("metrics-" + dataset_name)), "w") as fout: fout.write("step\tf1-type\tf1-socre\tf1-t+s\tpearson-type\tpearson-score\n") # Calc metric for all predictions for global_step, pred_file_path in sorted(predictions, key=lambda x: x[0]): metrics = calc_ists_metrics(pred_file_path, FLAGS.data_dir + "/test.tsv") print('\n Dataset: {}\n Step: {}\n [F1 Type]: {}\n [F1 Score]: {}\n [F1 T+S]: {}\n [P T]: {}\n [P S]: {}'.format(dataset_name, global_step, *metrics)) fout.write('{}\t{}\t{}\t{}\t{}\t\n'.format(global_step, *metrics)) # End execution after caclulations return None if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processors = { "mnli_matched": MnliMatchedProcessor, "mnli_mismatched": MnliMismatchedProcessor, 'sts-b': StsbProcessor, 'imdb': ImdbProcessor, "yelp5": Yelp5Processor, "ists": IStsProcessor } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict and not FLAGS.calc_ists_metrics: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict`, `calc_ists_metrics` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator( model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval or FLAGS.do_predict: if FLAGS.eval_split == "dev": eval_examples = processor.get_dev_examples(FLAGS.data_dir) else: eval_examples = processor.get_test_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = model_utils.get_checkpoints(FLAGS.model_dir) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_file_base = "{}.len-{}.{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) steps_and_files = [] if not FLAGS.predict_ckpt: steps_and_files = steps_and_files[-1:] # Filter out all checkpoints in the directory steps_and_files = model_utils.get_checkpoints(FLAGS.model_dir) # Decide whether to predict all ckpts if not FLAGS.pred_all_ckpt: steps_and_files = steps_and_files[-1:] else: steps_and_files = [model_utils.extract_global_step(FLAGS.predict_ckpt), FLAGS.predict_ckpt] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): predict_results = [] with tf.gfile.Open(os.path.join(predict_dir, "{}.tsv".format( "step-" + str(global_step))), "w") as fout: fout.write("index\tprediction\n") for pred_cnt, result in enumerate(estimator.predict( input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=filename)): if pred_cnt % 1000 == 0: tf.logging.info("Predicting submission for example: {}".format( pred_cnt)) logits = [float(x) for x in result["logits"].flat] predict_results.append(logits) if len(logits) == 1: label_out = logits[0] elif len(logits) == 2: if logits[1] - logits[0] > FLAGS.predict_threshold: label_out = label_list[1] else: label_out = label_list[0] elif len(logits) > 2: max_index = np.argmax(np.array(logits, dtype=np.float32)) label_out = label_list[max_index] else: raise NotImplementedError fout.write("{}\t{}\n".format(pred_cnt, label_out)) predict_json_path = os.path.join(predict_dir, "{}.logits.json".format( "step-" + str(global_step))) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processors = { "xnli": XnliProcessor, "tnews": TnewsProcessor, "afqmc": AFQMCProcessor, "iflytek": iFLYTEKDataProcessor, "copa": COPAProcessor, "cmnli": CMNLIProcessor, "wsc": WSCProcessor, "csl": CslProcessor, "copa": COPAProcessor, } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn( len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) if task_name == "inews": file_based_convert_examples_to_features_for_inews( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) else: file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) # here we use epoch number to calculate total train_steps train_steps = int( len(train_examples) * FLAGS.num_train_epochs / FLAGS.train_batch_size) FLAGS.warmup_steps = int(0.1 * train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=train_steps) if FLAGS.do_eval or FLAGS.do_predict: eval_examples = processor.get_dev_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) if task_name == "inews": file_based_convert_examples_to_features_for_inews( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) else: file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] output_eval_file = os.path.join(FLAGS.data_dir, "dev_results_bert.txt") print("output_eval_file:", output_eval_file) tf.logging.info("output_eval_file:" + output_eval_file) with tf.gfile.GFile(output_eval_file, "w") as writer: for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) writer.write("%s = %s\n" % (key, val)) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_examples = processor.get_test_examples(FLAGS.data_dir) eval_file_base = "{}.len-{}.{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) if task_name == "inews": file_based_convert_examples_to_features_for_inews( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) else: file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) result = estimator.predict(input_fn=predict_input_fn) index2label_map = {} for (i, label) in enumerate(label_list): index2label_map[i] = label output_predict_file_label_name = task_name + "_predict.json" output_predict_file_label = os.path.join( FLAGS.output_dir, output_predict_file_label_name) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file_label, "w") as writer_label: with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] label_index = probabilities.argmax(0) if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" test_label_dict = {} test_label_dict["id"] = i test_label_dict["label"] = str( index2label_map[label_index]) if task_name == "tnews": test_label_dict["label_desc"] = "" writer.write(output_line) json.dump(test_label_dict, writer_label) writer_label.write("\n") num_written_lines += 1 assert num_written_lines == num_actual_predict_examples output_predict_file_label_name = task_name + "_predict.json" output_predict_file_label = os.path.join( FLAGS.output_dir, output_predict_file_label_name) predict_results = [] with tf.gfile.GFile(output_predict_file_label, "w") as writer_label: with tf.gfile.Open( os.path.join(predict_dir, "{}.tsv".format(task_name)), "w") as fout: fout.write("index\tprediction\n") for pred_cnt, result in enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt)): if pred_cnt % 1000 == 0: tf.logging.info( "Predicting submission for example: {}".format( pred_cnt)) logits = [float(x) for x in result["logits"].flat] predict_results.append(logits) if len(logits) == 1: label_out = logits[0] elif len(logits) == 2: if logits[1] - logits[0] > FLAGS.predict_threshold: label_out = label_list[1] else: label_out = label_list[0] elif len(logits) > 2: max_index = np.argmax( np.array(logits, dtype=np.float32)) label_out = label_list[max_index] else: raise NotImplementedError fout.write("{}\t{}\n".format(pred_cnt, label_out)) test_label_dict = {} test_label_dict["id"] = pred_cnt test_label_dict["label"] = str(label_out) if task_name == "tnews": test_label_dict["label_desc"] = "" writer.write(output_line) json.dump(test_label_dict, writer_label) writer_label.write("\n") predict_json_path = os.path.join(predict_dir, "{}.logits.json".format(task_name)) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) if not tf.gfile.Exists(FLAGS.init_checkpoint): FLAGS.init_checkpoint = None processor = Processors[TaskType]() source_ntoken = len(processor.src_idx2word) target_ntoken = len(processor.trg_idx2word) with tf.gfile.Open(FLAGS.model_config_path, "w") as fp: json.dump( { "source_ntoken": source_ntoken, "target_ntoken": target_ntoken }, fp, indent=4) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn() spm_basename = os.path.basename(FLAGS.spiece_model_file) estimator = tf.estimator.Estimator(params={ "source_ntoken": source_ntoken, "target_ntoken": target_ntoken }, model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( TaskType, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples() np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features(train_examples, FLAGS.max_seq_length, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval or FLAGS.do_predict: eval_examples = processor.get_dev_examples() tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: eval_file_base = "{}.len-{}.{}.eval.tf_record".format( TaskType, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, FLAGS.max_seq_length, eval_file, FLAGS.num_passes) #assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) eval_results.sort(key=lambda x: x["loss"], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_file_base = "{}.len-{}.{}.predict.tf_record".format( TaskType, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, FLAGS.max_seq_length, eval_file, FLAGS.num_passes) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predict_results = [] with tf.gfile.Open( os.path.join(predict_dir, "{}.tsv".format(TaskType)), "w") as fout: fout.write("cnt\tsource\ttarget\n") for pred_cnt, result in enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt)): if pred_cnt % 1000 == 0: tf.logging.info( "Predicting submission for example: {}".format( pred_cnt)) pred = " ".join([str(e) for e in result["pred"]]) output_ids = " ".join([str(e) for e in result["output_ids"]]) predict_results.append({ "pred": pred, "output_ids": output_ids }) fout.write("{}\t{}\t{}\n".format(pred_cnt, pred, output_ids)) predict_json_path = os.path.join(predict_dir, "{}.logits.json".format(TaskType)) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) spiece_model_file = FLAGS.spiece_model_file sp_model = spm.SentencePieceProcessor() sp_model.Load(spiece_model_file) model_fn = model_fn_builder() run_config = configure_tpu(FLAGS) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) examples = read_examples(FLAGS.input_file) original_examples_length = len(examples) # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(examples) % FLAGS.predict_batch_size != 0: examples.append(PaddingInputExample()) features = convert_examples_to_features( examples=examples, max_seq_length=FLAGS.max_seq_length, sp_model=sp_model, uncased=FLAGS.uncased) unique_id_to_feature = {} for feature in features: unique_id_to_feature[feature.unique_id] = feature assert len(examples) % FLAGS.predict_batch_size == 0 input_fn = input_fn_builder(features=features, seq_length=FLAGS.max_seq_length) with codecs.getwriter("utf-8")(tf.gfile.Open(FLAGS.output_file, "w")) as writer: for example_cnt, result in enumerate( estimator.predict(input_fn=input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt)): if example_cnt % 1000 == 0: tf.logging.info( "Predicting submission for example_cnt: {}".format( example_cnt)) # output only real examples, and not padded examples if example_cnt < original_examples_length: unique_id = int(result["unique_id"]) feature = unique_id_to_feature[unique_id] output_json = collections.OrderedDict() output_json["linex_index"] = unique_id input_mask = result['input_mask'].flat first_real_token_index = np.where(input_mask == 0)[0][0] all_features = [] for (i, token) in enumerate(feature.tokens): if len(token) > 0: features = collections.OrderedDict() features["token"] = token features["values"] = _round_vector( result['tokens'][first_real_token_index + i].flat, 6) all_features.append(features) output_json["features"] = all_features writer.write(json.dumps(output_json) + "\n")
def main(unused_argv): del unused_argv # Unused tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.seq_len > 0 assert FLAGS.perm_size > 0 FLAGS.n_token = data_utils.VOCAB_SIZE tf.logging.info("n_token {}".format(FLAGS.n_token)) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) # Get train input function train_input_fn, train_record_info_dict = get_input_fn("train") tf.logging.info("num of batches {}".format( train_record_info_dict["num_batch"])) # Get train cache function train_cache_fn = get_cache_fn(FLAGS.mem_len) ##### Get model function model_fn = get_model_fn() ##### Create TPUEstimator # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) if FLAGS.use_tpu: # TPU Estimator estimator = tpu_estimator.TPUEstimator( model_fn=model_fn, train_cache_fn=train_cache_fn, use_tpu=FLAGS.use_tpu, config=run_config, params={"track_mean": FLAGS.track_mean}, train_batch_size=FLAGS.train_batch_size, eval_on_tpu=FLAGS.use_tpu) else: estimator = tf.estimator.Estimator( params={"batch_size": FLAGS.train_batch_size}, model_fn=model_fn, config=run_config) #### Training estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) ### tensorflow serving save model ### model_path = "./serving_model/" ''' self.input_ids = tf.placeholder(dtype=tf.int64, shape=[batch_size, FLAGS.seq_len], name="input_ids") self.segment_ids = tf.placeholder(dtype=tf.int32, shape=[batch_size, FLAGS.seq_len], name="segment_ids") self.input_mask = tf.placeholder(dtype=tf_float, shape=[batch_size, FLAGS.seq_len], name="input_mask") self.label_ids = tf.placeholder(dtype=tf.int64, shape=[batch_size], name="label_ids") ''' feature_spec = { 'input': tf.placeholder(dtype=tf.int32, shape=[None, 16], name='feat_input') } serving_input_receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_spec) estimator.export_savedmodel(model_path, serving_input_receiver_fn) a = 1
def main(_): tf.logging.set_verbosity(tf.logging.INFO) np.random.seed(FLAGS.random_seed) processor = ClassificationProcessor(data_dir=FLAGS.data_dir, task_name=FLAGS.task_name.lower()) sent_label_list = processor.get_sent_labels() model_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) model_builder = XLNetModelBuilder(model_config=model_config, use_tpu=FLAGS.use_tpu) model_fn = model_builder.get_model_fn(sent_label_list) # If TPU is not available, this will fall back to normal Estimator on CPU or GPU. tpu_config = model_utils.configure_tpu(FLAGS) estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=tpu_config, export_to_tpu=FLAGS.use_tpu, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) tokenizer = XLNetTokenizer(sp_model_file=FLAGS.spiece_model_file, lower_case=FLAGS.lower_case) example_converter = XLNetExampleConverter( sent_label_list=sent_label_list, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer) if FLAGS.do_train: train_examples = processor.get_train_examples() tf.logging.info("***** Run training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_steps) train_features = example_converter.convert_examples_to_features( train_examples) train_input_fn = XLNetInputBuilder.get_input_builder( train_features, FLAGS.max_seq_length, True, True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples() tf.logging.info("***** Run evaluation *****") tf.logging.info(" Num examples = %d", len(eval_examples)) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) eval_features = example_converter.convert_examples_to_features( eval_examples) eval_input_fn = XLNetInputBuilder.get_input_builder( eval_features, FLAGS.max_seq_length, False, False) result = estimator.evaluate(input_fn=eval_input_fn) sent_accuracy = result["sent_accuracy"] tf.logging.info("***** Evaluation result *****") tf.logging.info(" Accuracy (sent-level) = %s", str(sent_accuracy)) if FLAGS.do_predict: predict_examples = processor.get_test_examples() tf.logging.info("***** Run prediction *****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_features = example_converter.convert_examples_to_features( predict_examples) predict_input_fn = XLNetInputBuilder.get_input_builder( predict_features, FLAGS.max_seq_length, False, False) result = estimator.predict(input_fn=predict_input_fn) predict_recorder = XLNetPredictRecorder( output_dir=FLAGS.output_dir, sent_label_list=sent_label_list, max_seq_length=FLAGS.max_seq_length, tokenizer=tokenizer, predict_tag=FLAGS.predict_tag) predicts = [{ "input_ids": feature.input_ids, "input_masks": feature.input_masks, "sent_label_id": feature.sent_label_id, "sent_predict_id": predict["sent_predict_id"], "sent_predict_score": predict["sent_predict_score"], "sent_predict_prob": predict["sent_predict_prob"].tolist() } for feature, predict in zip(predict_features, result)] predict_recorder.record(predicts) if FLAGS.do_export: tf.logging.info("***** Running exporting *****") tf.gfile.MakeDirs(FLAGS.export_dir) serving_input_fn = XLNetInputBuilder.get_serving_input_fn( FLAGS.max_seq_length) estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn, as_text=False)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) #### Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) processors = { "ner": NerProcessor, } if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() if not FLAGS.is_regression else None print('label_list') print(label_list) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) if sp.PieceToId(text) == 0: return 99999 return sp.PieceToId(text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn(len(label_list)) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. #if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) #else: # estimator = tf.estimator.Estimator( # model_fn=model_fn, # config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) # print('get train examples') train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, batch_size=FLAGS.train_batch_size) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) # if FLAGS.do_eval or FLAGS.do_predict: # if FLAGS.eval_split == "dev": # eval_examples = processor.get_dev_examples(FLAGS.data_dir) # else: # eval_examples = processor.get_test_examples(FLAGS.data_dir) # # tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) logging.info("***** Running evaluation *****") logging.info(" Num examples = %d", len(eval_examples)) logging.info(" Batch size = %d", FLAGS.eval_batch_size) # if FLAGS.use_tpu: # eval_steps = int(len(eval_examples) / FLAGS.eval_batch_size) # eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True, batch_size=FLAGS.train_batch_size) result = estimator.evaluate(input_fn=eval_input_fn) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with open(output_eval_file, "w") as wf: logging.info("***** Eval results *****") confusion_matrix = result["confusion_matrix"] p, r, f = metrics.calculate(confusion_matrix, len(label_list) - 1) logging.info("***********************************************") logging.info("********************P = %s*********************", str(p)) logging.info("********************R = %s*********************", str(r)) logging.info("********************F = %s*********************", str(f)) logging.info("***********************************************") if FLAGS.do_predict: # eval_file_base = "{}.len-{}.{}.predict.tf_record".format( # spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) # eval_file = os.path.join(FLAGS.output_dir, eval_file_base) test_file_base = "{}.len-{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length) test_file = os.path.join(FLAGS.output_dir, test_file_base) test_examples = processor.get_test_examples(FLAGS.data_dir) file_based_convert_examples_to_features( test_examples, label_list, FLAGS.max_seq_length, tokenize_fn, test_file) pred_input_fn = file_based_input_fn_builder( input_file=test_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, batch_size=FLAGS.train_batch_size) predict_results = [] result = estimator.predict(input_fn=pred_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "label_test.txt") # print(result) print(list(result))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Validate flags if FLAGS.save_steps is not None: FLAGS.iterations = min(FLAGS.iterations, FLAGS.save_steps) if FLAGS.do_predict: predict_dir = FLAGS.predict_dir if not tf.gfile.Exists(predict_dir): tf.gfile.MakeDirs(predict_dir) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval, `do_predict` or " "`do_submit` must be True.") if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() processor = PaperProcessor(features=FLAGS.features.split(",")) label_list = processor.get_labels() sp = spm.SentencePieceProcessor() sp.Load(FLAGS.spiece_model_file) def tokenize_fn(text): text = preprocess_text(text, lower=FLAGS.uncased) return encode_ids(sp, text) run_config = model_utils.configure_tpu(FLAGS) model_fn = get_model_fn( len(label_list) if label_list is not None else None) spm_basename = os.path.basename(FLAGS.spiece_model_file) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. if FLAGS.use_tpu: estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size, eval_batch_size=FLAGS.eval_batch_size) else: estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config) if FLAGS.do_train: train_file_base = "{}.len-{}.train.tf_record".format( spm_basename, FLAGS.max_seq_length) train_file = os.path.join(FLAGS.output_dir, train_file_base) tf.logging.info("Use tfrecord file {}".format(train_file)) train_examples = processor.get_train_examples(FLAGS.data_dir) np.random.shuffle(train_examples) tf.logging.info("Num of train samples: {}".format(len(train_examples))) FLAGS.train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) FLAGS.warmup_steps = int(FLAGS.train_steps * FLAGS.warmup_proportion) file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenize_fn, train_file, FLAGS.num_passes) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_steps) if FLAGS.do_eval or FLAGS.do_predict: if FLAGS.eval_split == "dev": eval_examples = processor.get_dev_examples(FLAGS.data_dir) elif FLAGS.eval_split == "train": eval_examples = processor.get_train_examples(FLAGS.data_dir, shuffle=False) else: eval_examples = processor.get_test_examples(FLAGS.data_dir) tf.logging.info("Num of eval samples: {}".format(len(eval_examples))) if FLAGS.do_eval: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). # # Modified in XL: We also adopt the same mechanism for GPUs. while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file_base = "{}.len-{}.{}.eval.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=True) # Filter out all checkpoints in the directory steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.model_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = join(FLAGS.model_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) # Decide whether to evaluate all ckpts if not FLAGS.eval_all_ckpt: steps_and_files = steps_and_files[-1:] eval_results = [] for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): ret = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) ret["step"] = global_step ret["path"] = filename eval_results.append(ret) tf.logging.info("=" * 80) log_str = "Eval result | " for key, val in sorted(ret.items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) key_name = "eval_pearsonr" if FLAGS.is_regression else "eval_accuracy" eval_results.sort(key=lambda x: x[key_name], reverse=True) tf.logging.info("=" * 80) log_str = "Best result | " for key, val in sorted(eval_results[0].items(), key=lambda x: x[0]): log_str += "{} {} | ".format(key, val) tf.logging.info(log_str) if FLAGS.do_predict: eval_file_base = "{}.len-{}.{}.predict.tf_record".format( spm_basename, FLAGS.max_seq_length, FLAGS.eval_split) eval_file = os.path.join(FLAGS.output_dir, eval_file_base) file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenize_fn, eval_file) pred_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False) predict_results = [] with tf.gfile.Open( os.path.join(predict_dir, "{}.tsv".format(task_name)), "w") as fout: fout.write("index\tprediction\n") for pred_cnt, result in enumerate( estimator.predict(input_fn=pred_input_fn, yield_single_examples=True, checkpoint_path=FLAGS.predict_ckpt)): if pred_cnt % 1000 == 0: tf.logging.info( "Predicting submission for example: {}".format( pred_cnt)) logits = [float(x) for x in result["logits"].flat] predict_results.append(logits) if len(logits) == 1: label_out = logits[0] elif len(logits) == 2: if logits[1] - logits[0] > FLAGS.predict_threshold: label_out = label_list[1] else: label_out = label_list[0] elif len(logits) > 2: max_index = np.argmax(np.array(logits, dtype=np.float32)) label_out = label_list[max_index] else: raise NotImplementedError fout.write("{}\t{}\n".format(pred_cnt, label_out)) predict_json_path = os.path.join(predict_dir, "{}.logits.json".format(task_name)) with tf.gfile.Open(predict_json_path, "w") as fp: json.dump(predict_results, fp, indent=4)
def main(unused_argv): del unused_argv # Unused tf.logging.set_verbosity(tf.logging.INFO) assert FLAGS.seq_len > 0 assert FLAGS.perm_size > 0 FLAGS.batch_size = FLAGS.batch_size * FLAGS.num_hosts FLAGS.n_token = data_utils.VOCAB_SIZE tf.logging.info("n_token {}".format(FLAGS.n_token)) if FLAGS.bucket_uri is not None: FLAGS.model_dir = os.path.join(FLAGS.bucket_uri, FLAGS.model_dir) if not tf.gfile.Exists(FLAGS.model_dir): tf.gfile.MakeDirs(FLAGS.model_dir) # Get train input function train_input_fn, train_record_info_dict = get_input_fn("train") valid_input_fn, valid_record_info_dict = get_input_fn("valid") train_steps = train_record_info_dict["num_batch"] valid_steps = valid_record_info_dict["num_batch"] FLAGS.train_steps = train_steps FLAGS.save_steps = train_steps * FLAGS.epochs tf.logging.info("num of batches {}".format( train_record_info_dict["num_batch"])) # Get train cache function train_cache_fn = get_cache_fn(FLAGS.mem_len) eval_cache_fn = get_cache_fn(FLAGS.mem_len) ##### Get model function info_dict = { "id": FLAGS.run_id, "n_layers": FLAGS.n_layer, "d_model": FLAGS.d_model, "n_heads": FLAGS.n_head } _dir = get_logdir(os.path.join(FLAGS.bucket_uri, FLAGS.logDir), info_dict) model_fn = get_model_fn(_dir) ##### Create TPUEstimator # TPU Configuration run_config = model_utils.configure_tpu(FLAGS) # TPU Estimator estimator = tpu_estimator.TPUEstimator( model_fn=model_fn, train_cache_fn=train_cache_fn, eval_cache_fn=eval_cache_fn, use_tpu=FLAGS.use_tpu, config=run_config, params={"track_mean": FLAGS.track_mean}, train_batch_size=FLAGS.batch_size, eval_batch_size=FLAGS.batch_size, eval_on_tpu=FLAGS.use_tpu) #### Training and Validation eval_errs = [] xs = list(range(PATIENCE)) train_times, eval_times = [], [] stopped_early = False for i in range(FLAGS.epochs): if FLAGS.do_train: tf.logging.info("#### Starting training cycle") start = time.time() train_ret = estimator.train(input_fn=train_input_fn, steps=train_steps) end = time.time() train_times.append((end - start) / 60) tf.logging.info( "##################################### EPOCH {} #####################################" .format(i + 1)) if FLAGS.do_eval: tf.logging.info("#### Starting evaluation/validation cycle") start = time.time() eval_ret = estimator.evaluate(input_fn=valid_input_fn, steps=valid_steps) end = time.time() eval_times.append((end - start) / 60) if FLAGS.do_early_stop: # Early Stopping based on gradient from last PATIENCE points eval_errs.append(eval_ret['avg_loss']) if len(eval_errs) > PATIENCE: last_errs = eval_errs[-PATIENCE:] slope = round( np.polyfit(xs, last_errs, deg=1)[0], ROUNDING_PRECISION) if slope >= 0: stopped_early = True break if not FLAGS.do_train: break if FLAGS.do_save_results: best_loss = min(eval_errs) best_pplx = np.exp(best_loss) std = np.std(list(map(np.exp, eval_errs))) if last_errs is None: last_errs = [] slope = 0 result = { 'loss': str(best_loss), 'pplx': str(best_pplx), 'std': str(std), 'avg_train_time': str(np.mean(train_times)), 'avg_eval_time': str(np.mean(eval_times)), 'stopped_early': str(stopped_early), 'last_errors': str(last_errs), 'slope': str(slope), 'epoch': str(i) } result = {'loss': eval_errs} with tf.gfile.Open( os.path.join(FLAGS.bucket_uri, "results", "{}.json".format(FLAGS.run_id)), "w") as fp: json.dump(result, fp)