def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def bert_lr_model(num_epoches, shuffle, train_batch_size, eval_batch_size, test_batch_size, learning_rate, save_frequence, start_eval, early_stop, num_labels=2): start1 = time.clock() start2 = datetime.now() tf.logging.set_verbosity(tf.logging.INFO) processors = {"mrpc": MrpcProcessor} tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) train_examples = processor.get_train_examples(FLAGS.data_dir) train_len = len(train_examples) train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) eval_examples = processor.get_dev_examples(FLAGS.data_dir) eval_len = len(eval_examples) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) test_examples = processor.get_test_examples(FLAGS.data_dir) test_len = len(test_examples) test_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(test_examples, label_list, FLAGS.max_seq_length, tokenizer, test_file) params = { "train_file": train_file, "eval_file": eval_file, "test_file": test_file, "num_epochs": num_epoches, "shuffle": shuffle, "train_batch_size": train_batch_size, "eval_batch_size": eval_batch_size, "test_batch_size": test_batch_size } config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # sess = tf.Session() with sess.as_default(): data = file_based_input_fn_builder(FLAGS.max_seq_length, False, params) data_iter = data.creat_train_dataset() iterator = data_iter.make_one_shot_iterator() next_element = iterator.get_next() sess_bert_lr = model_fn_builder(FLAGS.max_seq_length, bert_config, num_labels, use_one_hot_embeddings=FLAGS.use_tpu) tvars = tf.trainable_variables() if FLAGS.init_checkpoint: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, FLAGS.init_checkpoint) tf.train.init_from_checkpoint(FLAGS.init_checkpoint, assignment_map) sess_only_input_fro_QA = only_input_fro_QA() global_step = tf.get_variable(initializer=0, name="globle_step", trainable=False) num_train_steps = int(train_len * num_epoches / train_batch_size) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # train_op = optimization.create_optimizer( # sess_only_input_fro_QA.loss, learning_rate, num_train_steps, num_warmup_steps, global_step, use_tpu=FLAGS.use_tpu) train_op = tf.train.AdamOptimizer(learning_rate).minimize( sess_only_input_fro_QA.loss, global_step=global_step) # optimizer = tf.train.AdamOptimizer(learning_rate) # gradients, vriables = zip(*optimizer.compute_gradients(sess_bert_lr.total_loss)) # gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_grad_norm) # train_op = optimizer.apply_gradients(zip(gradients, vriables), global_step=global_step) # # update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # train_op = tf.group([train_op, update_ops]) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) saver = tf.train.Saver() best_auc = 0.0 best_iter = 0 early_stop_iter = 1 for epoch in range(num_epoches): elapsed1 = (time.clock() - start1) elapsed2 = (datetime.now() - start2) print("Time used1:", elapsed1) print("Time used2:", elapsed2) flag = True while flag: train_data = sess.run(next_element) output_layer1, output_layer2 = sess.run( [sess_bert_lr.output_layer1, sess_bert_lr.output_layer2], feed_dict={ sess_bert_lr.input_ids1: train_data["input_ids1"], sess_bert_lr.input_mask1: train_data["input_mask1"], sess_bert_lr.segment_ids1: train_data["segment_ids1"], sess_bert_lr.label_ids1: train_data["label_ids1"], sess_bert_lr.is_real_example1: train_data["is_real_example1"], sess_bert_lr.input_ids2: train_data["input_ids2"], sess_bert_lr.input_mask2: train_data["input_mask2"], sess_bert_lr.segment_ids2: train_data["segment_ids2"], sess_bert_lr.label_ids2: train_data["label_ids2"], sess_bert_lr.is_real_example2: train_data["is_real_example2"], sess_bert_lr.is_training: False }) _, loss, acc = sess.run( [ train_op, sess_only_input_fro_QA.loss, sess_only_input_fro_QA.acc ], feed_dict={ sess_only_input_fro_QA.output_layer1: output_layer1, sess_only_input_fro_QA.output_layer2: output_layer2, }) cur_step = tf.train.global_step(sess, global_step) if int(cur_step * train_batch_size / train_len) > epoch: flag = False print("epoch:{},global_step:{},loss:{},acc:{}".format( epoch, cur_step, loss, acc)) if cur_step % save_frequence == 0 and cur_step > start_eval: valid_dev(sess, sess_bert_lr, sess_only_input_fro_QA, data, test_batch_size, eval_len, file="eval") step_auc = valid_dev(sess, sess_bert_lr, sess_only_input_fro_QA, data, test_batch_size, test_len, file="test") if step_auc > best_auc and cur_step >= start_eval: early_stop_iter = 1 best_auc = step_auc best_iter = cur_step print('Saving model for step {}'.format(cur_step)) saver.save(sess, FLAGS.checkpoint_model_path, global_step=cur_step) elif step_auc < best_auc and cur_step > start_eval: early_stop_iter += 1 if early_stop_iter >= early_stop: print("train_over, best_iter={}, best_auc={}".format( best_iter, best_auc)) sess.close() exit()
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, "clef2019": CLEF2019Processor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) if FLAGS.file_with_predictions is not None: output_predict_file = os.path.join(FLAGS.output_dir, FLAGS.file_with_predictions) else: output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "xnli": XnliProcessor, "tnews": TnewsProcessor, "afqmc": AFQMCProcessor, "iflytek": iFLYTEKDataProcessor, "copa": COPAProcessor, "cmnli": CMNLIProcessor, "wsc": WSCProcessor, "csl": CslProcessor, # "cola": classifier_utils.ColaProcessor, # "mnli": classifier_utils.MnliProcessor, # "mismnli": classifier_utils.MisMnliProcessor, # "mrpc": classifier_utils.MrpcProcessor, # "rte": classifier_utils.RteProcessor, # "sst-2": classifier_utils.Sst2Processor, # "sts-b": classifier_utils.StsbProcessor, # "qqp": classifier_utils.QqpProcessor, # "qnli": classifier_utils.QnliProcessor, # "wnli": classifier_utils.WnliProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) if not FLAGS.albert_config_file and not FLAGS.albert_hub_module_handle: raise ValueError("At least one of `--albert_config_file` and " "`--albert_hub_module_handle` must be set") if FLAGS.albert_config_file: albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) if FLAGS.max_seq_length > albert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the ALBERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, albert_config.max_position_embeddings)) else: albert_config = None # Get the config from TF-Hub. tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]( use_spm=True if FLAGS.spm_model_file else False, do_lower_case=FLAGS.do_lower_case) label_list = processor.get_labels() if FLAGS.albert_hub_module_handle: tokenizer = tokenization.FullTokenizer.from_hub_module( hub_module=FLAGS.albert_hub_module_handle, spm_model_file=FLAGS.spm_model_file) else: tokenizer = tokenization.FullTokenizer.from_scratch( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=int(FLAGS.save_checkpoints_steps), keep_checkpoint_max=0, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) model_fn = classifier_utils.model_fn_builder( albert_config=albert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=FLAGS.train_step, num_warmup_steps=FLAGS.warmup_step, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, task_name=task_name, hub_module=FLAGS.albert_hub_module_handle, optimizer=FLAGS.optimizer) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: cached_dir = FLAGS.cached_dir if not cached_dir: cached_dir = FLAGS.output_dir train_file = os.path.join(cached_dir, task_name + "_train.tf_record") if not tf.gfile.Exists(train_file): classifier_utils.file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file, task_name) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", FLAGS.train_step) train_input_fn = classifier_utils.file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size) estimator.train(input_fn=train_input_fn, max_steps=FLAGS.train_step) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(classifier_utils.PaddingInputExample()) cached_dir = FLAGS.cached_dir if not cached_dir: cached_dir = FLAGS.output_dir eval_file = os.path.join(cached_dir, task_name + "_eval.tf_record") if not tf.gfile.Exists(eval_file): classifier_utils.file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file, task_name) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = classifier_utils.file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.eval_batch_size) best_trial_info_file = os.path.join(FLAGS.output_dir, "best_trial.txt") def _best_trial_info(): """Returns information about which checkpoints have been evaled so far.""" if tf.gfile.Exists(best_trial_info_file): with tf.gfile.GFile(best_trial_info_file, "r") as best_info: global_step, best_metric_global_step, metric_value = ( best_info.read().split(":")) global_step = int(global_step) best_metric_global_step = int(best_metric_global_step) metric_value = float(metric_value) else: metric_value = -1 best_metric_global_step = -1 global_step = -1 tf.logging.info( "Best trial info: Step: %s, Best Value Step: %s, " "Best Value: %s", global_step, best_metric_global_step, metric_value) return global_step, best_metric_global_step, metric_value def _remove_checkpoint(checkpoint_path): for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") if task_name == "sts-b": key_name = "pearson" elif task_name == "cola": key_name = "matthew_corr" else: key_name = "eval_accuracy" global_step, best_perf_global_step, best_perf = _best_trial_info() writer = tf.gfile.GFile(output_eval_file, "w") while global_step < FLAGS.train_step: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format( global_step)) time.sleep(60) else: for checkpoint in sorted(steps_and_files.items()): step, checkpoint_path = checkpoint if global_step >= step: if (best_perf_global_step != step and len(_find_valid_cands(step)) > 1): _remove_checkpoint(checkpoint_path) continue result = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best = {}\n".format(best_perf)) if result[key_name] > best_perf: best_perf = result[key_name] best_perf_global_step = global_step elif len(_find_valid_cands(global_step)) > 1: _remove_checkpoint(checkpoint_path) writer.write("=" * 50 + "\n") writer.flush() with tf.gfile.GFile(best_trial_info_file, "w") as best_info: best_info.write("{}:{}:{}".format( global_step, best_perf_global_step, best_perf)) writer.close() for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = "model.ckpt-{}.{}".format(best_perf_global_step, ext) tgt_ckpt = "model.ckpt-best.{}".format(ext) tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt)) tf.io.gfile.rename(os.path.join(FLAGS.output_dir, src_ckpt), os.path.join(FLAGS.output_dir, tgt_ckpt), overwrite=True) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(classifier_utils.PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") classifier_utils.file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file, task_name) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = classifier_utils.file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder, task_name=task_name, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size) checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result = estimator.predict(input_fn=predict_input_fn, checkpoint_path=checkpoint_path) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") output_submit_file = os.path.join(FLAGS.output_dir, "submit_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as pred_writer,\ tf.gfile.GFile(output_submit_file, "w") as sub_writer: sub_writer.write("index" + "\t" + "prediction\n") num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, (example, prediction)) in\ enumerate(zip(predict_examples, result)): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" pred_writer.write(output_line) if task_name != "sts-b": actual_label = label_list[int(prediction["predictions"])] else: actual_label = str(prediction["predictions"]) sub_writer.write(example.guid + "\t" + actual_label + "\n") num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "imdb": ImdbProcessor } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) if not tf.gfile.IsDirectory(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) if not tf.gfile.IsDirectory(os.path.join(FLAGS.output_dir, FLAGS.subset_dir)): # no model has been trained on this subdataset before tf.gfile.MakeDirs(os.path.join(FLAGS.output_dir, FLAGS.subset_dir)) # verify model id if FLAGS.mode == "train": FLAGS.model_id = model_hash() # generate model hash new_dir = os.path.join(FLAGS.output_dir, FLAGS.subset_dir, FLAGS.model_id) tf.gfile.MakeDirs(new_dir) # make directory based on hash # write config flag_dict = FLAGS.flag_values_dict() with tf.gfile.GFile(os.path.join(new_dir, "config.txt"), "w") as writer: tf.logging.info("***** Writing training hyperparams to directory *****") for key in flag_dict.keys(): if key in CONFIG_HYPERPARAMS: writer.write("%s = %s\n" % (key, str(flag_dict[key]))) elif FLAGS.model_id is None: # train off; either pred or eval is on raise ValueError( "No model ID provided. Model ID is required for eval/predict when not training.") ## change FLAGS.output_dir to new dir FLAGS.output_dir = os.path.join(FLAGS.output_dir, FLAGS.subset_dir, FLAGS.model_id) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name](FLAGS.data_dir) label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) # ============== TPU settings ==================== tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=None, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) # ================================================= train_examples = None num_train_steps_per_epoch = None num_train_steps_total = None num_warmup_steps = None if FLAGS.mode == "train": train_examples = processor.get_train_examples(FLAGS.subset_dir) eval_examples = processor.get_dev_examples() num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) num_train_steps_per_epoch = int( len(train_examples) / FLAGS.train_batch_size) # * FLAGS.num_train_epochs) num_train_steps_total = num_train_steps_per_epoch * FLAGS.num_train_epochs num_warmup_steps = int(num_train_steps_total * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps_total, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.mode == "train": train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features( train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num epochs = %d", FLAGS.num_train_epochs) # tf.logging.info(" Num steps = %d", num_train_steps) best_val_loss = np.inf best_val_acc = 0. best_epoch = None best_result = None patience = FLAGS.patience for i in range(FLAGS.num_train_epochs): curr_epoch = i + 1 tf.logging.info("===== Running training for EPOCH %d =====", curr_epoch) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps_per_epoch * curr_epoch) tf.logging.info("===== Running evaluation for EPOCH %d =====", curr_epoch) tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results_epoch_{}.txt".format(curr_epoch)) with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results for EPOCH %d *****", curr_epoch) for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.early_stopping_criterion == "acc": if result['eval_accuracy'] > best_val_acc: best_val_acc = result['eval_accuracy'] best_epoch = curr_epoch best_result = result patience = FLAGS.patience else: tf.logging.info("Validation accuracy did not increase.") if patience == 0: tf.logging.info("Early stopping.") break else: tf.logging.info("Will try for %d more epochs.", patience) patience -= 1 else: # use loss as early stopping criterion if result['eval_loss'] < best_val_loss: best_val_loss = result['eval_loss'] best_epoch = curr_epoch best_result = result patience = FLAGS.patience else: tf.logging.info("Validation loss did not decrease.") if patience == 0: tf.logging.info("Early stopping.") break else: tf.logging.info("Will try for %d more epochs.", patience) patience -= 1 best_output_eval_file = os.path.join(FLAGS.output_dir, "best_eval_results.txt") best_global_step = best_result['global_step'] best_checkpoint_path = os.path.join(FLAGS.output_dir, 'model.ckpt-{}'.format(best_global_step)) with tf.gfile.GFile(best_output_eval_file, "w") as writer: tf.logging.info("***** Best eval results: EPOCH %d *****", best_epoch) writer.write("Best checkpoint path: {}\n".format(best_checkpoint_path)) for key in sorted(best_result.keys()): tf.logging.info(" %s = %s", key, str(best_result[key])) writer.write("%s = %s\n" % (key, str(best_result[key]))) # training complete. start autoeval on test set using best checkpoint. if FLAGS.mode == "eval" or FLAGS.mode == "train": # get checkpoint best_output_eval_file = os.path.join(FLAGS.output_dir, "best_eval_results.txt") with tf.gfile.GFile(best_output_eval_file, "r") as reader: best_checkpoint_path = reader.readline().replace("Best checkpoint path: ", "").replace("\n", "") eval_examples = processor.get_test_examples() num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features( eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=best_checkpoint_path) output_eval_file = os.path.join(FLAGS.output_dir, "test_eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results on TEST set *****") writer.write("Checkpoint path: {}\n".format(best_checkpoint_path)) for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.mode == "predict": # get checkpoint best_output_eval_file = os.path.join(FLAGS.output_dir, "best_eval_results.txt") with tf.gfile.GFile(best_output_eval_file, "r") as reader: best_checkpoint_path = reader.readline().replace("Best checkpoint path: ", "").replace("\n", "") if FLAGS.pred_ds == 'train': predict_examples = processor.get_train_examples('og') elif FLAGS.pred_ds == 'dev': predict_examples = processor.get_dev_examples() else: # = 'test' predict_examples = processor.get_test_examples() num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict_{}.tf_record".format(FLAGS.pred_ds)) file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction on {} set*****".format(FLAGS.pred_ds)) tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn, checkpoint_path=best_checkpoint_path) output_predict_file = os.path.join(FLAGS.output_dir, "preds_on_{}.tsv".format(FLAGS.pred_ds)) with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "sentence_pair": SentencePairClassificationProcessor, "lcqmc_pair": LCQMCPairClassificationProcessor } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 # Cloud TPU: Invalid TPU configuration, ensure ClusterResolver is passed to tpu. print("###tpu_cluster_resolver:", tpu_cluster_resolver) run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) # TODO print("###length of total train_examples:", len(train_examples)) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") train_file_exists = os.path.exists(train_file) print("###train_file_exists:", train_file_exists, " ;train_file:", train_file) if not train_file_exists: # if tf_record file not exist, convert from raw text file. # TODO file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) ####################################################################################################################### # evaluate all checkpoints; you can use the checkpoint with the best dev accuarcy steps_and_files = [] filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) global_step = int(cur_filename.split("-")[-1]) tf.logging.info("Add {} to eval list.".format(cur_filename)) steps_and_files.append([global_step, cur_filename]) steps_and_files = sorted(steps_and_files, key=lambda x: x[0]) output_eval_file = os.path.join(FLAGS.data_dir, "eval_results_albert_zh.txt") print("output_eval_file:", output_eval_file) tf.logging.info("output_eval_file:" + output_eval_file) with tf.gfile.GFile(output_eval_file, "w") as writer: for global_step, filename in sorted(steps_and_files, key=lambda x: x[0]): result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=filename) tf.logging.info("***** Eval results %s *****" % (filename)) writer.write("***** Eval results %s *****\n" % (filename)) for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) ####################################################################################################################### #result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) # #output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") #with tf.gfile.GFile(output_eval_file, "w") as writer: # tf.logging.info("***** Eval results *****") # for key in sorted(result.keys()): # tf.logging.info(" %s = %s", key, str(result[key])) # writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "iflytek": iFLYTEKDataProcessor, "cluewsc2020": WSCProcessor, "cmnli": CMNLIProcessor, "csl": CslProcessor, "afqmc": AFQMCProcessor, "tnews": TnewsProcessor, "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "xnli": XnliProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = tf.contrib.tpu.RunConfig( keep_checkpoint_max=FLAGS.keep_checkpoint_max, cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train or FLAGS.do_eval: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") train_file_exists = os.path.exists(train_file) if not train_file_exists: file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) ### select best model ### best_trial_info_file = os.path.join(FLAGS.output_dir, "best_trial.txt") def _best_trial_info(): """Returns information about which checkpoints have been evaled so far.""" if tf.gfile.Exists(best_trial_info_file): with tf.gfile.GFile(best_trial_info_file, "r") as best_info: global_step, best_metric_global_step, metric_value = ( best_info.read().split(":")) global_step = int(global_step) best_metric_global_step = int(best_metric_global_step) metric_value = float(metric_value) else: metric_value = -1 best_metric_global_step = -1 global_step = -1 tf.logging.info( "Best trial info: Step: %s, Best Value Step: %s, " "Best Value: %s", global_step, best_metric_global_step, metric_value) return global_step, best_metric_global_step, metric_value def _remove_checkpoint(checkpoint_path): for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") key_name = "eval_accuracy" global_step, best_perf_global_step, best_perf = _best_trial_info() writer = tf.gfile.GFile(output_eval_file, "w") while global_step < num_train_steps: #while global_step < FLAGS.train_step: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format( global_step)) time.sleep(60) else: for checkpoint in sorted(steps_and_files.items()): step, checkpoint_path = checkpoint if global_step >= step: if (best_perf_global_step != step and len(_find_valid_cands(step)) > 1): _remove_checkpoint(checkpoint_path) continue result = estimator.evaluate( input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=checkpoint_path) global_step = result["global_step"] tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best = {}\n".format(best_perf)) if result[key_name] > best_perf: best_perf = result[key_name] best_perf_global_step = global_step elif len(_find_valid_cands(global_step)) > 1: _remove_checkpoint(checkpoint_path) writer.write("=" * 50 + "\n") writer.flush() with tf.gfile.GFile(best_trial_info_file, "w") as best_info: best_info.write("{}:{}:{}".format( global_step, best_perf_global_step, best_perf)) writer.close() for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = "model.ckpt-{}.{}".format(best_perf_global_step, ext) tgt_ckpt = "model.ckpt-best.{}".format(ext) tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt)) tf.gfile.Rename(os.path.join(FLAGS.output_dir, src_ckpt), os.path.join(FLAGS.output_dir, tgt_ckpt), overwrite=True) ####################################################################################################################### '''output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) ''' if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result = estimator.predict(input_fn=predict_input_fn, checkpoint_path=checkpoint_path) index2label_map = {} for (i, label) in enumerate(label_list): index2label_map[i] = label output_predict_file_label_name = task_name + "_predict.json" output_predict_file_label = os.path.join( FLAGS.output_dir, output_predict_file_label_name) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file_label, "w") as writer_label: with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] label_index = probabilities.argmax(0) if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" test_label_dict = {} test_label_dict["id"] = i test_label_dict["label"] = str( index2label_map[label_index]) if task_name == "tnews": test_label_dict["label_desc"] = "" writer.write(output_line) json.dump(test_label_dict, writer_label) writer_label.write("\n") num_written_lines += 1 assert num_written_lines == num_actual_predict_examples '''
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "emotion": EmotionProcessor, "entity": EntityProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 session_config = tf.ConfigProto(log_device_placement=False, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0, allow_soft_placement=True) run_config = tf.estimator.RunConfig(model_dir=FLAGS.output_dir, save_summary_steps=500, save_checkpoints_steps=500, session_config=session_config) # run_config = tf.contrib.tpu.RunConfig( # cluster=tpu_cluster_resolver, # master=FLAGS.master, # model_dir=FLAGS.output_dir, # save_checkpoints_steps=FLAGS.save_checkpoints_steps, # tpu_config=tf.contrib.tpu.TPUConfig( # iterations_per_loop=FLAGS.iterations_per_loop, # num_shards=FLAGS.num_tpu_cores, # per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train and FLAGS.do_eval: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, # use_tpu=FLAGS.use_tpu, # use_one_hot_embeddings=FLAGS.use_tpu ) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. # estimator = tf.contrib.tpu.TPUEstimator( # use_tpu=FLAGS.use_tpu, # model_fn=model_fn, # config=run_config, # train_batch_size=FLAGS.train_batch_size, # eval_batch_size=FLAGS.eval_batch_size, # predict_batch_size=FLAGS.predict_batch_size) params = {'batch_size': FLAGS.train_batch_size} estimator = tf.estimator.Estimator(model_fn, params=params, config=run_config) if FLAGS.do_train and FLAGS.do_eval: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") if not os.path.exists(train_file): file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) #estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) #result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) # early stop hook early_stopping_hook = tf.contrib.estimator.stop_if_no_decrease_hook( estimator=estimator, metric_name='loss', max_steps_without_decrease=num_train_steps, eval_dir=None, min_steps=0, run_every_secs=None, run_every_steps=FLAGS.save_checkpoints_steps) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=num_train_steps, hooks=[early_stopping_hook]) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) # output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") # with tf.gfile.GFile(output_eval_file, "w") as writer: # tf.logging.info("***** Eval results *****") # for key in sorted(result.keys()): # tf.logging.info(" %s = %s", key, str(result[key])) # writer.write("%s = %s\n" % (key, str(result[key]))) if FLAGS.do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.txt") tf.logging.info("***** Predict results *****") res = [] label_lists = processor.get_labels() for (i, prediction) in enumerate(result): #print(prediction) probabilities = prediction["probabilities"] pred_label = label_lists[np.argmax(probabilities)] res.append(pred_label) processor.save_predict(res, output_predict_file)
def main(_): tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) processors = { "cola": ColaProcessor, "mnlim": MnliMProcessor, "mnlimm": MnliMMProcessor, "mrpc": MrpcProcessor, "qnli": QnliProcessor, "qqp": QqpProcessor, "rte": RteProcessor, "sst2": Sst2Processor, "stsb": StsbProcessor, "wnli": WnliProcessor, "ax": AxProcessor, "mnlimdevastest": MnliMDevAsTestProcessor } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and \ not FLAGS.do_eval and \ not FLAGS.do_pred: raise ValueError( "At least one of 'do_train', 'do_eval' or 'do_pred' must be True.") bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.io.gfile.makedirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() print("Current task", task_name) if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() # special handling for mnlimdevastest if task_name == 'mnlimdevastest': task_name = 'mnlim' label_list = processor.get_labels() print("Label list of current task", label_list) tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) train_examples = processor.get_train_examples(FLAGS.data_dir) eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_train_examples = len(train_examples) num_actual_eval_examples = len(eval_examples) print("num_actual_train_examples", num_actual_train_examples) print("num_actual_eval_examples", num_actual_eval_examples) if FLAGS.do_pred: test_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_test_examples = len(test_examples) print("num_actual_test_examples", num_actual_test_examples) batch_size = FLAGS.train_batch_size epochs = FLAGS.num_train_epochs embed_dim = FLAGS.hidden_size # hidden size, 768 for BERT-base, 512 for BERT-small seq_length = FLAGS.max_seq_length num_labels = len(label_list) # Define some placeholders for the input input_ids_ph = tf.compat.v1.placeholder(tf.int32, shape=[None, seq_length], name='input_ids') input_mask_ph = tf.compat.v1.placeholder(tf.int32, shape=[None, seq_length], name='input_mask') segment_ids_ph = tf.compat.v1.placeholder(tf.int32, shape=[None, seq_length], name='segment_ids') label_ids_ph = tf.compat.v1.placeholder(tf.int32, shape=[ None, ], name='label_ids') tf.compat.v1.logging.info("Running freezing experiments!") num_train_steps = num_actual_train_examples // batch_size * epochs num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # get the layer(s) which need freezing. if FLAGS.layers is None: raise ValueError("In freezing experiments, layers must not be None. ") layer_folder_name = FLAGS.layers freeze_layers = list(map(int, FLAGS.layers.split(','))) freeze_layers.sort() print("Current layers: ", freeze_layers) # multiple random runs if FLAGS.tf_seed is not None: tf.compat.v1.random.set_random_seed(FLAGS.tf_seed) layer_folder_name = "{}_{}".format(layer_folder_name, FLAGS.tf_seed) # this placeholder is to control the flag for the dropout keep_prob_ph = tf.compat.v1.placeholder(tf.float32, name="keep_prob") is_training_ph = tf.compat.v1.placeholder(tf.bool, name='is_training') model = modeling.BertModel( config=bert_config, is_training=is_training_ph, input_ids=input_ids_ph, # input_ids, input_mask=input_mask_ph, # input_mask, token_type_ids=segment_ids_ph, # segment_ids, use_one_hot_embeddings=False, use_estimator=False) output_layer = model.get_pooled_output() output_layer = tf.nn.dropout(output_layer, keep_prob=keep_prob_ph) output_weights = tf.get_variable( "output_weights", [num_labels, embed_dim], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) with tf.compat.v1.variable_scope("loss"): # for stsb if num_labels == 1: logits = tf.squeeze(logits, [-1]) per_example_loss = tf.square(logits - label_ids_ph) loss = tf.reduce_mean(per_example_loss) else: log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(label_ids_ph, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # metric and summary # metric is tf.metric object, (val, op) metric = metric_fn(per_example_loss, label_ids_ph, logits, num_labels, task_name) metric_name = list(metric.keys()) metric_val = [m[0] for m in metric.values()] metric_op = [m[1] for m in metric.values()] metric_phs = [ tf.compat.v1.placeholder(tf.float32, name="{}_ph".format(key)) for key in metric.keys() ] summaries = [ tf.compat.v1.summary.scalar(key, metric_phs[i]) for i, key in enumerate(metric.keys()) ] train_summary_total = tf.summary.merge(summaries) eval_summary_total = tf.summary.merge(summaries) log_dir = FLAGS.output_dir + 'layer_{}/'.format(layer_folder_name) init_checkpoint = FLAGS.init_checkpoint tvars = tf.compat.v1.trainable_variables() var_init = [ v for v in tvars if 'output_weights' not in v.name and 'output_bias' not in v.name ] var_output = [ v for v in tvars if 'output_weights' in v.name or "output_bias" in v.name ] # parameters need to be frozen if FLAGS.freeze_part == 'ffn': var_freeze = [ v for i in freeze_layers for v in var_init if "layer_{}/intermediate/dense".format(i) in v.name or "layer_{}/output/dense".format(i) in v.name ] elif FLAGS.freeze_part == 'att': var_freeze = [ v for i in freeze_layers for v in var_init if "layer_{}/attention/output/dense".format(i) in v.name or "layer_{}/attention/self".format(i) in v.name ] elif FLAGS.freeze_part == 'encoder': var_freeze = [ v for i in freeze_layers for v in var_init if "layer_{}/".format(i) in v.name ] elif FLAGS.freeze_part == 'pooler+embedding': var_freeze = [ v for v in var_init if "pooler" in v.name or "embeddings" in v.name ] elif FLAGS.freeze_part == 'allbutoutput': var_freeze = var_init elif FLAGS.freeze_part == 'allbutpooler+output': var_freeze = [v for v in var_init if 'pooler' not in v.name] elif FLAGS.freeze_part == 'allbutonelayer+output': var_freeze = [ v for i in freeze_layers for v in var_init if "layer_{}/".format(i) not in v.name ] elif FLAGS.freeze_part == 'allbutonelayer+pooler+output': var_freeze = [ v for i in freeze_layers for v in var_init if "layer_{}/".format(i) not in v.name and 'pooler' not in v.name ] elif FLAGS.freeze_part == 'nothing': var_freeze = [] else: raise ValueError("freeze_part should be specified. ") print("Freezing parameters") for v in var_freeze: print(v) if not FLAGS.load_from_finetuned: # Init from Model0 saver_init = tf.train.Saver(var_init) else: # Init from Model1 saver_init = tf.train.Saver(var_init + var_output) var_train = [v for v in var_init if v not in var_freeze] + var_output print("Training parameters") for v in var_train: print(v) train_op = optimization.create_optimizer(loss=loss, init_lr=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=False, tvars=var_train) saver_all = tf.train.Saver(var_list=var_train + var_freeze, max_to_keep=1) # Isolate the variables stored behind the scenes by the metric operation var_metric = [] for key in metric.keys(): var_metric.extend( tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope=key)) # Define initializer to initialize/reset running variables metric_vars_initializer = tf.variables_initializer(var_list=var_metric) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver_init.restore(sess, init_checkpoint) writer = tf.compat.v1.summary.FileWriter(log_dir + 'log/train/', sess.graph) writer_eval = tf.compat.v1.summary.FileWriter(log_dir + 'log/eval/') # if number of eval examples < 1000, just load it directly, or load by batch. if num_actual_eval_examples <= 1000: eval_input_ids, eval_input_mask, eval_segment_ids, \ eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=num_actual_eval_examples, seq_length=seq_length, examples=eval_examples, label_list=label_list, tokenizer=tokenizer) start_metric = {"eval_{}".format(key): 0 for key in metric_name} if FLAGS.do_train: tf.logging.info("***** Run training *****") step = 1 for n in range(epochs): np.random.shuffle(train_examples) num_batch = num_actual_train_examples // batch_size if num_actual_train_examples % batch_size == 0 \ else num_actual_train_examples // batch_size + 1 id = 0 for b in range(num_batch): input_ids, input_mask, \ segment_ids, label_ids, is_real_example = generate_ph_input(batch_size=batch_size, seq_length=seq_length, examples=train_examples, label_list=label_list, tokenizer=tokenizer, train_idx_offset=id) id += batch_size sess.run(metric_vars_initializer) sess.run([train_op] + metric_op, feed_dict={ input_ids_ph: input_ids, input_mask_ph: input_mask, segment_ids_ph: segment_ids, label_ids_ph: label_ids, is_training_ph: True, keep_prob_ph: 0.9 }) train_metric_val = sess.run(metric_val) train_summary_str = sess.run( train_summary_total, feed_dict={ ph: value for ph, value in zip(metric_phs, train_metric_val) }) writer.add_summary(train_summary_str, step) if step % 100 == 0 or step % num_batch == 0 or step == 1: # evaluate on dev set if num_actual_eval_examples <= 1000: sess.run(metric_vars_initializer) sess.run(metric_op, feed_dict={ input_ids_ph: eval_input_ids, input_mask_ph: eval_input_mask, segment_ids_ph: eval_segment_ids, label_ids_ph: eval_label_ids, is_training_ph: False, keep_prob_ph: 1 }) eval_metric_val = sess.run(metric_val) eval_summary_str = sess.run( eval_summary_total, feed_dict={ ph: value for ph, value in zip( metric_phs, eval_metric_val) }) else: num_batch_eval = num_actual_eval_examples // batch_size \ if num_actual_eval_examples % batch_size == 0 \ else num_actual_eval_examples // batch_size + 1 id_eval = 0 sess.run(metric_vars_initializer) for _ in range(num_batch_eval): eval_input_ids, eval_input_mask, eval_segment_ids, \ eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=batch_size, seq_length=seq_length, examples=eval_examples, label_list=label_list, tokenizer=tokenizer, train_idx_offset=id_eval) id_eval += batch_size sess.run(metric_op, feed_dict={ input_ids_ph: eval_input_ids, input_mask_ph: eval_input_mask, segment_ids_ph: eval_segment_ids, label_ids_ph: eval_label_ids, is_training_ph: False, keep_prob_ph: 1 }) eval_metric_val = sess.run(metric_val) eval_summary_str = sess.run( eval_summary_total, feed_dict={ ph: value for ph, value in zip( metric_phs, eval_metric_val) }) writer_eval.add_summary(eval_summary_str, step) if step == 1: for key, val in zip(metric_name, eval_metric_val): start_metric["eval_{}".format(key)] = val if step % 100 == 0 or step % num_batch == 0 or step == 1: train_metric_list = [] for i in range(len(train_metric_val)): if metric_name[i] == 'loss': train_metric_list.append( "{}: %2.4f".format(metric_name[i]) % train_metric_val[i]) else: train_metric_list.append( "{}: %.4f".format(metric_name[i]) % train_metric_val[i]) train_str = 'Train ' + '|'.join(train_metric_list) eval_metric_list = [] for i in range(len(eval_metric_val)): if metric_name[i] == 'loss': eval_metric_list.append( "{}: %2.4f".format(metric_name[i]) % eval_metric_val[i]) else: eval_metric_list.append( "{}: %.4f".format(metric_name[i]) % eval_metric_val[i]) eval_str = 'Eval ' + '|'.join(eval_metric_list) print( "Freezing {} | Epoch: %4d/%4d | Batch: %4d/%4d | {} | {}" .format(layer_folder_name, train_str, eval_str) % (n, epochs, b, num_batch)) if step % num_batch == 0: saver_all.save(sess, log_dir + 'freeze_{}'.format(layer_folder_name), global_step=step) step += 1 writer.close() writer_eval.close() end_metric = {"eval_{}".format(key): 0 for key in metric_name} if FLAGS.do_eval: tf.logging.info("***** Run evaluation *****") if num_actual_eval_examples <= 1000: sess.run(metric_vars_initializer) sess.run(metric_op, feed_dict={ input_ids_ph: eval_input_ids, input_mask_ph: eval_input_mask, segment_ids_ph: eval_segment_ids, label_ids_ph: eval_label_ids, is_training_ph: False, keep_prob_ph: 1 }) eval_metric_val = sess.run(metric_val) preds = sess.run(predictions, feed_dict={ input_ids_ph: eval_input_ids, input_mask_ph: eval_input_mask, segment_ids_ph: eval_segment_ids, label_ids_ph: eval_label_ids, is_training_ph: False, keep_prob_ph: 1 }) eval_label_ids_lst = eval_label_ids else: num_batch_eval = num_actual_eval_examples // batch_size \ if num_actual_eval_examples % batch_size == 0 \ else num_actual_eval_examples // batch_size + 1 id_eval = 0 preds = np.zeros(num_actual_eval_examples) eval_label_ids_lst = np.zeros(num_actual_eval_examples) sess.run(metric_vars_initializer) for i in range(num_batch_eval): eval_input_ids, eval_input_mask, eval_segment_ids, \ eval_label_ids, eval_is_real_example = generate_ph_input(batch_size=batch_size, seq_length=seq_length, examples=eval_examples, label_list=label_list, tokenizer=tokenizer, train_idx_offset=id_eval) id_eval += batch_size sess.run(metric_op, feed_dict={ input_ids_ph: eval_input_ids, input_mask_ph: eval_input_mask, segment_ids_ph: eval_segment_ids, label_ids_ph: eval_label_ids, is_training_ph: False, keep_prob_ph: 1 }) pred = sess.run(predictions, feed_dict={ input_ids_ph: eval_input_ids, input_mask_ph: eval_input_mask, segment_ids_ph: eval_segment_ids, label_ids_ph: eval_label_ids, is_training_ph: False, keep_prob_ph: 1 }) preds[i * batch_size:min(id_eval, num_actual_eval_examples )] = pred[:] eval_label_ids_lst[i * batch_size:min( id_eval, num_actual_eval_examples)] = eval_label_ids[:] eval_metric_val = sess.run(metric_val) for key, val in zip(metric_name, eval_metric_val): end_metric["eval_{}".format(key)] = val output_predict_file = os.path.join(log_dir, 'dev_predictions.tsv') writer_output = tf.io.gfile.GFile(output_predict_file, "w") preds = preds.astype(int) eval_label_ids_lst = eval_label_ids_lst.astype(int) num_written_lines = 0 if task_name != 'stsb': writer_output.write( "ID \t Label ID \t Label \t Ground Truth ID \t Ground Truth \n" ) else: writer_output.write("ID \t Label \n") for (i, pred) in enumerate(preds): if task_name != 'stsb': writer_output.write("{} \t {} \t {} \t {} \t {} \n".format( i, pred, label_list[pred], eval_label_ids_lst[i], label_list[eval_label_ids_lst[i]])) else: writer_output.write("{} \t {} \n".format( num_written_lines, pred)) writer_output.close() tf.logging.info("***** Finish writing *****") print("Start metric", start_metric) print("End metric", end_metric) test_metric = {"test_{}".format(key): 0 for key in metric_name} if FLAGS.do_pred: # if number of test examples < 1000, just load it directly, or load by batch. # prediction tf.logging.info("***** Predict results *****") if num_actual_test_examples <= 1000: test_input_ids, test_input_mask, test_segment_ids, \ test_label_ids, test_is_real_example = generate_ph_input(batch_size=num_actual_test_examples, seq_length=seq_length, examples=test_examples, label_list=label_list, tokenizer=tokenizer) sess.run(metric_vars_initializer) sess.run(metric_op, feed_dict={ input_ids_ph: test_input_ids, input_mask_ph: test_input_mask, segment_ids_ph: test_segment_ids, label_ids_ph: test_label_ids, is_training_ph: False, keep_prob_ph: 1 }) test_metric_val = sess.run(metric_val) preds = sess.run(predictions, feed_dict={ input_ids_ph: test_input_ids, input_mask_ph: test_input_mask, segment_ids_ph: test_segment_ids, label_ids_ph: test_label_ids, is_training_ph: False, keep_prob_ph: 1 }) test_label_ids_lst = test_label_ids else: num_batch_test = num_actual_test_examples // batch_size \ if num_actual_test_examples % batch_size == 0 \ else num_actual_test_examples // batch_size + 1 id_test = 0 preds = np.zeros(num_actual_test_examples) test_label_ids_lst = np.zeros(num_actual_test_examples) sess.run(metric_vars_initializer) for i in range(num_batch_test): test_input_ids, test_input_mask, test_segment_ids, \ test_label_ids, test_is_real_example = generate_ph_input(batch_size=batch_size, seq_length=seq_length, examples=test_examples, label_list=label_list, tokenizer=tokenizer, train_idx_offset=id_test) id_test += batch_size sess.run(metric_op, feed_dict={ input_ids_ph: test_input_ids, input_mask_ph: test_input_mask, segment_ids_ph: test_segment_ids, label_ids_ph: test_label_ids, is_training_ph: False, keep_prob_ph: 1 }) pred = sess.run(predictions, feed_dict={ input_ids_ph: test_input_ids, input_mask_ph: test_input_mask, segment_ids_ph: test_segment_ids, label_ids_ph: test_label_ids, is_training_ph: False, keep_prob_ph: 1 }) preds[i * batch_size:min(id_test, num_actual_test_examples )] = pred[:] test_label_ids_lst[i * batch_size:min( id_test, num_actual_test_examples)] = test_label_ids[:] test_metric_val = sess.run(metric_val) for key, val in zip(metric_name, test_metric_val): test_metric["test_{}".format(key)] = val output_predict_file = os.path.join(log_dir, 'test_predictions.tsv') submit_predict_file = os.path.join( log_dir, "{}.tsv".format(standard_file_name[task_name])) writer_output = tf.io.gfile.GFile(output_predict_file, "w") writer_submit = tf.io.gfile.GFile(submit_predict_file, 'w') preds = preds.astype(int) test_label_ids_lst = test_label_ids_lst.astype(int) num_written_lines = 0 if task_name != 'stsb': writer_output.write( "ID \t Label ID \t Label \t Ground Truth ID \t Ground Truth \n" ) else: writer_output.write("ID \t Label \n") writer_submit.write("ID \t Label \n") for (i, pred) in enumerate(preds): if task_name != 'stsb': writer_output.write("{} \t {} \t {} \t {} \t {} \n".format( i, pred, label_list[pred], test_label_ids_lst[i], label_list[test_label_ids_lst[i]])) writer_submit.write("{} \t {} \n".format( i, label_list[pred])) else: writer_output.write("{} \t {} \n".format( num_written_lines, pred)) writer_submit.write("{} \t {} \n".format(i, pred)) writer_output.close() writer_submit.close() tf.logging.info("***** Finish writing *****") with tf.io.gfile.GFile(FLAGS.output_dir + 'results.txt', 'a') as writer: eval_start, eval_end, test_end = [], [], [] for metric in metric_name: if metric != 'loss': eval_start.append("{}: %.4f".format(metric) % start_metric["eval_{}".format(metric)]) eval_end.append("{}: %.4f".format(metric) % end_metric["eval_{}".format(metric)]) test_end.append("{}: %.4f".format(metric) % test_metric["test_{}".format(metric)]) writer.write( "Freezing {}: Dev start: {} | Dev end: {} | Test end: {}\n". format(layer_folder_name, ','.join(eval_start), ','.join(eval_end), ','.join(test_end)))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) processors = { "ner": CoNLL2013Processor, } skip_flags = ('load_config_file', 'do_train', 'do_eval', 'do_predict', 'eval_fole', 'test_file', 'pred_results_file', 'checkpoint_path') if FLAGS.load_config_file: with open(FLAGS.load_config_file, 'r') as reader: for name, value in json.loads(reader.read()).items(): if name not in skip_flags: FLAGS.__flags[name].value = value bert_config_file = os.path.join(FLAGS.init_checkpoint_dir, 'bert_config.json') vocab_file = os.path.join(FLAGS.init_checkpoint_dir, 'vocab.txt') if FLAGS.finetune_checkpoint: init_checkpoint = FLAGS.finetune_checkpoint else: init_checkpoint = os.path.join(FLAGS.init_checkpoint_dir, 'bert_model.ckpt') tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True.") bert_config = modeling.BertConfig.from_json_file(bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) # NOTE get TF logger logging.basicConfig( level=logging.INFO, format="INFO#%(asctime)s# %(message)s", datefmt='%Y-%m-%d %H:%M:%S', handlers=[logging.FileHandler(os.path.join(FLAGS.output_dir, 'log.log'))] ) tf.logging.info("*"*10+" Config "+"*"*10+": \n{}".format( FLAGS.flag_values_dict())) if FLAGS.do_train: with open(os.path.join(FLAGS.output_dir, 'config.json'), 'w', encoding='utf-8') as writer: writer.write(to_json_string(FLAGS.flag_values_dict())) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() label_map = {} for (i, label) in enumerate(label_list): label_map[label] = i tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, keep_checkpoint_max=FLAGS.keep_checkpoint_max, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) model_fn = model_fn_builder( bert_config=bert_config, label_list=label_list, init_checkpoint=init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: num_train_files = -(-len(train_examples) // FLAGS.shard_size) train_files = [os.path.join(FLAGS.output_dir, "train.tf_record_"+f"{i:05d}") for i in range(num_train_files)] tf.logging.info("*** Writing training examples to tf record files ***") for train_file in train_files: tf.logging.info(" %s", train_file) if not FLAGS.read_record: [tf.gfile.Remove(_f) for _f in tf.gfile.Glob( os.path.join(FLAGS.output_dir, "train.tf_record*"))] file_based_convert_examples_to_features( train_examples, label_map, FLAGS.max_seq_length, FLAGS.seq_overlap, tokenizer, train_files) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_files=train_files, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_files = [os.path.join(FLAGS.output_dir, "eval.tf_record")] file_based_convert_examples_to_features( eval_examples, label_map, FLAGS.max_seq_length, FLAGS.seq_overlap, tokenizer, eval_files) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_files=eval_files, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder) tf.logging.info("***** Eval results *****") output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") results = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps, checkpoint_path=FLAGS.checkpoint_path) with open(output_eval_file, "a") as writer: tf.logging.info(write_eval(writer, results, label_list, label_list[1:])) # NOTE self-defined metric predictions = estimator.predict(input_fn=eval_input_fn, checkpoint_path=FLAGS.checkpoint_path, yield_single_examples=True) predict_entitys = reconstruct_from_estimator(eval_examples, predictions) try: next(predictions) except StopIteration: print("Complete Evaluation!") else: tf.logging.info("ERROR: Output examples number not matched! \ This is likely due to bugs in splitting and reconstruct long text ") pred = [[e.decode() for e in entity] for entity in predict_entitys] label = [example.text_entity for example in eval_examples] sb_measure = SpanBasedF1Measure() sb_measure(pred, label) tf.logging.info(sb_measure.log_measure(output_eval_file)) if FLAGS.do_predict: " For demo only, the actual predict phase should be run by other API " predict_examples = processor.get_test_examples(FLAGS.data_dir) predict_files = [os.path.join(FLAGS.output_dir, "predict.tf_record")] file_based_convert_examples_to_features(predict_examples, label_map, FLAGS.max_seq_length, FLAGS.seq_overlap, tokenizer, predict_files) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d", len(predict_examples)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_files=predict_files, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) predict_results = estimator.predict(input_fn=predict_input_fn, checkpoint_path=FLAGS.checkpoint_path, yield_single_examples=True) output_predict_file = os.path.join(FLAGS.output_dir, FLAGS.pred_results_file) tf.logging.info("***** Predict results *****") # Writing prediction results for CoNLL-2003 perl evaluation conlleval.pl with open(output_predict_file, 'w', encoding='utf-8') as writer: for example, predict_result in zip(predict_examples, predict_results): tokens = [token for token in tokenizer.tokenize(word) for word in example.words] words = (word for word in example.words) for i, tag in enumerate(predict_result): if tokens[i].startswith("##"): continue line = "{}\t{}\t{}\n".format(next(words), example.labels[i], tag) writer.write(line)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) tf.gfile.MakeDirs(FLAGS.output_dir) processor = MrpcProcessor() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) app = Flask(__name__) if FLAGS.do_predict: predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") predict_drop_remainder = True if FLAGS.use_tpu else False @app.route('/pred', methods=['POST', 'GET']) def index(): response = {} try: data = request.json predict_examples = [ InputExample('predict', data["sentence1"], data["sentence2"], '0') ] num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len( predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) file_based_convert_examples_to_features( predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder) result = estimator.predict(input_fn=predict_input_fn) api_return = [] #list用于以后添加返回多个句子同时计算的功能 for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] api_return.append(probabilities) response["prediction_0"] = str(list(api_return[0])[0]) response["prediction_1"] = str(list(api_return[0])[1]) except Exception as e: response[ "error_message"] = "An error occur, please read the document or contact me by [email protected]" return json.dumps(response) app.run("0.0.0.0", port=5005, threaded=True)
def main(_): import time start_time = time.time() if FLAGS.use_perseus and FLAGS.use_horovod: raise ValeuError( "Could not set use_perseus and use_horovod at the same time.") if FLAGS.use_perseus: import perseus.tensorflow.horovod as hvd hvd.init() if FLAGS.use_horovod: import horovod.tensorflow as hvd hvd.init() if FLAGS.use_perseus or FLAGS.use_horovod: config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) if FLAGS.use_xla: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 tf.logging.set_verbosity(tf.logging.INFO) processors = { "cola": ColaProcessor, "mnli": MnliProcessor, "mrpc": MrpcProcessor, "news": NewsProcessor, "xnli": XnliProcessor, } tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case, FLAGS.init_checkpoint) if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict: raise ValueError( "At least one of `do_train`, `do_eval` or `do_predict' must be True." ) bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file) if FLAGS.max_seq_length > bert_config.max_position_embeddings: raise ValueError( "Cannot use sequence length %d because the BERT model " "was only trained up to sequence length %d" % (FLAGS.max_seq_length, bert_config.max_position_embeddings)) if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) task_name = FLAGS.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels() tokenizer = tokenization.FullTokenizer(vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 save_checkpoints_steps = FLAGS.save_checkpoints_steps learning_rate = FLAGS.learning_rate if FLAGS.use_perseus or FLAGS.use_horovod: model_dir = FLAGS.output_dir if hvd.rank() == 0 else None save_checkpoints_steps = save_checkpoints_steps // hvd.size() else: model_dir = FLAGS.output_dir run_config = tf.contrib.tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=model_dir, session_config=config, save_checkpoints_steps=save_checkpoints_steps, tpu_config=tf.contrib.tpu.TPUConfig( iterations_per_loop=FLAGS.iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None num_workers = 1 worker_index = 0 if FLAGS.use_perseus or FLAGS.use_horovod: bcast_hook = [hvd.BroadcastGlobalVariablesHook(0)] num_workers = hvd.size() worker_index = hvd.rank() else: bcast_hook = [] if FLAGS.do_train: train_examples = processor.get_train_examples(FLAGS.data_dir) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) if FLAGS.use_perseus or FLAGS.use_horovod: num_train_steps = num_train_steps // hvd.size() num_warmup_steps = num_warmup_steps // hvd.size() num_workers = hvd.size() worker_index = hvd.rank() model_fn = model_fn_builder(bert_config=bert_config, num_labels=len(label_list), init_checkpoint=FLAGS.init_checkpoint, learning_rate=learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_perseus=FLAGS.use_perseus, use_horovod=FLAGS.use_horovod, use_one_hot_embeddings=FLAGS.use_tpu) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = tf.contrib.tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, eval_batch_size=FLAGS.eval_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: if FLAGS.use_perseus or FLAGS.use_horovod: train_file = os.path.join( FLAGS.output_dir, "train-" + str(hvd.rank()) + ".tf_record") else: train_file = os.path.join(FLAGS.output_dir, "train.tf_record") file_based_convert_examples_to_features(train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file) tf.logging.info("***** Running training *****") tf.logging.info(" Num examples = %d", len(train_examples)) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) train_input_fn = file_based_input_fn_builder( input_file=train_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, num_workers=num_workers, worker_index=worker_index) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps, hooks=bcast_hook) do_eval = FLAGS.do_eval if FLAGS.use_perseus or FLAGS.use_horovod: if hvd.rank() == 0: do_eval = FLAGS.do_eval else: do_eval = False if do_eval: eval_examples = processor.get_dev_examples(FLAGS.data_dir) num_actual_eval_examples = len(eval_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. These do NOT count towards the metric (all tf.metrics # support a per-instance weight, and these get a weight of 0.0). while len(eval_examples) % FLAGS.eval_batch_size != 0: eval_examples.append(PaddingInputExample()) eval_file = os.path.join(FLAGS.output_dir, "eval.tf_record") file_based_convert_examples_to_features(eval_examples, label_list, FLAGS.max_seq_length, tokenizer, eval_file) tf.logging.info("***** Running evaluation *****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(eval_examples), num_actual_eval_examples, len(eval_examples) - num_actual_eval_examples) tf.logging.info(" Batch size = %d", FLAGS.eval_batch_size) # This tells the estimator to run through the entire set. eval_steps = None # However, if running eval on the TPU, you will need to specify the # number of steps. if FLAGS.use_tpu: assert len(eval_examples) % FLAGS.eval_batch_size == 0 eval_steps = int(len(eval_examples) // FLAGS.eval_batch_size) eval_drop_remainder = True if FLAGS.use_tpu else False eval_input_fn = file_based_input_fn_builder( input_file=eval_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=eval_drop_remainder, num_workers=1, worker_index=0) result = estimator.evaluate(input_fn=eval_input_fn, steps=eval_steps) output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") with tf.gfile.GFile(output_eval_file, "w") as writer: tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) do_predict = FLAGS.do_predict if FLAGS.use_perseus or FLAGS.use_horovod: if hvd.rank() == 0: do_predict = FLAGS.do_predict else: do_predict = False if do_predict: predict_examples = processor.get_test_examples(FLAGS.data_dir) num_actual_predict_examples = len(predict_examples) if FLAGS.use_tpu: # TPU requires a fixed batch size for all batches, therefore the number # of examples must be a multiple of the batch size, or else examples # will get dropped. So we pad with fake examples which are ignored # later on. while len(predict_examples) % FLAGS.predict_batch_size != 0: predict_examples.append(PaddingInputExample()) predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record") file_based_convert_examples_to_features(predict_examples, label_list, FLAGS.max_seq_length, tokenizer, predict_file) tf.logging.info("***** Running prediction*****") tf.logging.info(" Num examples = %d (%d actual, %d padding)", len(predict_examples), num_actual_predict_examples, len(predict_examples) - num_actual_predict_examples) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_drop_remainder = True if FLAGS.use_tpu else False predict_input_fn = file_based_input_fn_builder( input_file=predict_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=predict_drop_remainder, num_workers=1, worker_index=0) result = estimator.predict(input_fn=predict_input_fn) output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv") with tf.gfile.GFile(output_predict_file, "w") as writer: num_written_lines = 0 tf.logging.info("***** Predict results *****") for (i, prediction) in enumerate(result): probabilities = prediction["probabilities"] if i >= num_actual_predict_examples: break output_line = "\t".join( str(class_probability) for class_probability in probabilities) + "\n" writer.write(output_line) num_written_lines += 1 assert num_written_lines == num_actual_predict_examples if FLAGS.do_export: estimator._export_to_tpu = False estimator.export_savedmodel(FLAGS.export_dir, serving_input_fn) eclapse_time = time.time() - start_time print(f'overall time is {eclapse_time} s')