def get_squad_data_loader(tokenizer, file, shuffle, args): examples = read_squad_examples(file, is_training=True, debug=args.debug) features = convert_examples_to_features_answer_id( examples, tokenizer=tokenizer, max_seq_length=args.max_c_len, max_query_length=args.max_q_len, max_ans_length=args.max_q_len, doc_stride=128, is_training=True) all_c_ids = torch.tensor([f.c_ids for f in features], dtype=torch.long) all_q_ids = torch.tensor([f.q_ids for f in features], dtype=torch.long) all_tag_ids = torch.tensor([f.tag_ids for f in features], dtype=torch.long) all_a_ids = (all_tag_ids != 0).long() all_start_positions = torch.tensor( [f.noq_start_position for f in features], dtype=torch.long) all_end_positions = torch.tensor([f.noq_end_position for f in features], dtype=torch.long) all_data = TensorDataset(all_c_ids, all_q_ids, all_a_ids, all_start_positions, all_end_positions) data_loader = DataLoader(all_data, args.batch_size, shuffle=shuffle) return data_loader, examples, features
def get_data_loader(self, file): train_examples = read_squad_examples(file, is_training=True, debug=config.debug) train_features = convert_examples_to_features( train_examples, tokenizer=self.tokenizer, max_seq_length=config.max_seq_len, max_query_length=config.max_query_len, doc_stride=128, is_training=True) all_c_ids = torch.tensor([f.c_ids for f in train_features], dtype=torch.long) all_c_lens = torch.sum(torch.sign(all_c_ids), 1) all_q_ids = torch.tensor([f.q_ids for f in train_features], dtype=torch.long) all_tag_ids = torch.tensor([f.tag_ids for f in train_features], dtype=torch.long) train_data = TensorDataset(all_c_ids, all_c_lens, all_tag_ids, all_q_ids) train_loader = DataLoader(train_data, shuffle=False, batch_size=1) self.all_c_tokens = [f.context_tokens for f in train_features] self.all_answer_text = [f.answer_text for f in train_features] self.golden_q_ids = all_q_ids return train_loader
def get_data_loader(self, file): train_examples = read_squad_examples(file, is_training=True, debug=config.debug) train_features = convert_examples_to_features(train_examples, tokenizer=self.tokenizer, max_seq_length=config.max_seq_len, max_query_length=config.max_query_len, doc_stride=128, is_training=True) all_c_ids = torch.tensor([f.c_ids for f in train_features], dtype=torch.long) all_c_lens = torch.sum(torch.sign(all_c_ids), 1) all_tag_ids = torch.tensor([f.tag_ids for f in train_features], dtype=torch.long) all_q_ids = torch.tensor([f.q_ids for f in train_features], dtype=torch.long) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long) all_noq_start_positions = torch.tensor([f.noq_start_position for f in train_features], dtype=torch.long) all_noq_end_positions = torch.tensor([f.noq_end_position for f in train_features], dtype=torch.long) train_data = TensorDataset(all_c_ids, all_c_lens, all_tag_ids, all_q_ids, all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions, all_noq_start_positions, all_noq_end_positions) sampler = RandomSampler(train_data) batch_size = int(config.batch_size / config.gradient_accumulation_steps) train_loader = DataLoader(train_data, sampler=sampler, batch_size=batch_size) return train_loader
def get_harv_data_loader(tokenizer, file, shuffle, ratio, args): examples = read_squad_examples(file, is_training=True, debug=args.debug) random.shuffle(examples) num_ex = int(len(examples) * ratio) examples = examples[:num_ex] features = convert_examples_to_harv_features(examples, tokenizer=tokenizer, max_seq_length=args.max_c_len, max_query_length=args.max_q_len, doc_stride=128, is_training=True) all_c_ids = torch.tensor([f.c_ids for f in features], dtype=torch.long) dataset = TensorDataset(all_c_ids) dataloader = DataLoader(dataset, shuffle=shuffle, batch_size=args.batch_size) return features, dataloader
def get_harv_data_loader(tokenizer, file, shuffle, args): examples = read_squad_examples(file, is_training=True, debug=args.debug) num_examples = len(examples) random.shuffle(examples) examples = examples[:5000] features = convert_examples_to_features_answer_id(examples, tokenizer=tokenizer, max_seq_length=args.max_c_len, max_query_length=args.max_q_len, doc_stride=128, is_training=True) all_c_ids = torch.tensor([f.c_ids for f in features], dtype=torch.long) all_data = TensorDataset(all_c_ids) data_loader = DataLoader(all_data, args.batch_size, shuffle=shuffle) return data_loader
def get_data_loader(self, file): train_examples = read_squad_examples(file, is_training=True, debug=config.debug) train_features = convert_examples_to_features(train_examples, tokenizer=self.tokenizer, max_seq_length=config.max_seq_len, max_query_length=config.max_query_len, doc_stride=128, is_training=True) all_c_ids = torch.tensor([f.c_ids for f in train_features], dtype=torch.long) all_c_lens = torch.sum(torch.sign(all_c_ids), 1).long() all_noq_start_positions = torch.tensor([f.noq_start_position for f in train_features], dtype=torch.long) all_noq_end_positions = torch.tensor([f.noq_end_position for f in train_features], dtype=torch.long) train_data = TensorDataset(all_c_ids, all_c_lens, all_noq_start_positions, all_noq_end_positions) train_loader = DataLoader(train_data, shuffle=True, batch_size=config.batch_size) return train_loader
def main(_): tf.logging.set_verbosity(tf.logging.INFO) tf.get_logger().propagate = False albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) validate_flags_or_throw(albert_config) tf.gfile.MakeDirs(FLAGS.output_dir) print("Output:", FLAGS.output_dir) tokenizer = fine_tuning_utils.create_vocab( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file, hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, keep_checkpoint_max=0, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None train_examples = squad_utils.read_squad_examples( input_file=FLAGS.train_file, is_training=True) num_train_steps = int( len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) if FLAGS.do_train: num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # Pre-shuffle the input to avoid having to make a very large shuffle # buffer in in the `input_fn`. rng = random.Random(12345) rng.shuffle(train_examples) model_fn = squad_utils.v2_model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, start_n_top=FLAGS.start_n_top, end_n_top=FLAGS.end_n_top, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. if not tf.gfile.Exists(FLAGS.train_feature_file): train_writer = squad_utils.FeatureWriter(filename=os.path.join( FLAGS.train_feature_file), is_training=True) squad_utils.convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature, do_lower_case=FLAGS.do_lower_case) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) # tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.train_feature_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, is_v2=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: with tf.gfile.Open(FLAGS.predict_file) as predict_file: prediction_json = json.load(predict_file)["data"] eval_examples = squad_utils.read_squad_examples( input_file=FLAGS.predict_file, is_training=False) if (tf.gfile.Exists(FLAGS.predict_feature_file) and tf.gfile.Exists(FLAGS.predict_feature_left_file)): tf.logging.info("Loading eval features from {}".format( FLAGS.predict_feature_left_file)) with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin: eval_features = pickle.load(fin) else: eval_writer = squad_utils.FeatureWriter( filename=FLAGS.predict_feature_file, is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) squad_utils.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature, do_lower_case=FLAGS.do_lower_case) eval_writer.close() with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout: pickle.dump(eval_features, fout) tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.predict_feature_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, is_v2=True) def get_result(checkpoint): """Evaluate the checkpoint on SQuAD v2.0.""" # If running eval on the TPU, you will need to specify the number of # steps. reader = tf.train.NewCheckpointReader(checkpoint) global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP) all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True, checkpoint_path=checkpoint): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) start_top_log_probs = ([ float(x) for x in result["start_top_log_probs"].flat ]) start_top_index = [ int(x) for x in result["start_top_index"].flat ] end_top_log_probs = ([ float(x) for x in result["end_top_log_probs"].flat ]) end_top_index = [int(x) for x in result["end_top_index"].flat] cls_logits = float(result["cls_logits"].flat[0]) all_results.append( squad_utils.RawResultV2( unique_id=unique_id, start_top_log_probs=start_top_log_probs, start_top_index=start_top_index, end_top_log_probs=end_top_log_probs, end_top_index=end_top_index, cls_logits=cls_logits)) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") result_dict = {} cls_dict = {} squad_utils.accumulate_predictions_v2( result_dict, cls_dict, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.start_n_top, FLAGS.end_n_top) return squad_utils.evaluate_v2( result_dict, cls_dict, prediction_json, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, output_prediction_file, output_nbest_file, output_null_log_odds_file), int(global_step) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") key_name = "f1" writer = tf.gfile.GFile(output_eval_file, "w") if tf.gfile.Exists(checkpoint_path + ".index"): result = get_result(checkpoint_path) best_perf = result[0][key_name] global_step = result[1] else: global_step = -1 best_perf = -1 checkpoint_path = None while global_step < num_train_steps: steps_and_files = {} filenames = tf.gfile.ListDirectory(FLAGS.output_dir) for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) if cur_filename.split("-")[-1] == "best": continue gstep = int(cur_filename.split("-")[-1]) if gstep not in steps_and_files: tf.logging.info( "Add {} to eval list.".format(cur_filename)) steps_and_files[gstep] = cur_filename tf.logging.info("found {} files.".format(len(steps_and_files))) if not steps_and_files: tf.logging.info( "found 0 file, global step: {}. Sleeping.".format( global_step)) time.sleep(60) else: for ele in sorted(steps_and_files.items()): step, checkpoint_path = ele print("GS: ", global_step, step) if global_step >= step: if len(_find_valid_cands(step)) > 1: for ext in [ "meta", "data-00000-of-00001", "index" ]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) continue result, global_step = get_result(checkpoint_path) print("EVAL RESULTS") tf.logging.info("***** Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) if result[key_name] > best_perf: best_perf = result[key_name] for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tgt_ckpt = checkpoint_path.rsplit( "-", 1)[0] + "-best.{}".format(ext) tf.logging.info("saving {} to {}".format( src_ckpt, tgt_ckpt)) tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) writer.write("saved {} to {}\n".format( src_ckpt, tgt_ckpt)) writer.write("best {} = {}\n".format(key_name, best_perf)) tf.logging.info(" best {} = {}\n".format( key_name, best_perf)) if len(_find_valid_cands(global_step)) > 2: for ext in ["meta", "data-00000-of-00001", "index"]: src_ckpt = checkpoint_path + ".{}".format(ext) tf.logging.info("removing {}".format(src_ckpt)) tf.gfile.Remove(src_ckpt) writer.write("=" * 50 + "\n") print("Sleeping") time.sleep(10) checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") result, global_step = get_result(checkpoint_path) tf.logging.info("***** Final Eval results *****") for key in sorted(result.keys()): tf.logging.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key]))) writer.write("best perf happened at step: {}".format(global_step))
def main(_): tf.logging.set_verbosity(tf.logging.INFO) albert_config = modeling.AlbertConfig.from_json_file( FLAGS.albert_config_file) validate_flags_or_throw(albert_config) tf.gfile.MakeDirs(FLAGS.output_dir) tokenizer = fine_tuning_utils.create_vocab( vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case, spm_model_file=FLAGS.spm_model_file, hub_module=FLAGS.albert_hub_module_handle) tpu_cluster_resolver = None if FLAGS.use_tpu and FLAGS.tpu_name: tpu_cluster_resolver = contrib_cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) is_per_host = contrib_tpu.InputPipelineConfig.PER_HOST_V2 if FLAGS.do_train: iterations_per_loop = int( min(FLAGS.iterations_per_loop, FLAGS.save_checkpoints_steps)) else: iterations_per_loop = FLAGS.iterations_per_loop run_config = contrib_tpu.RunConfig( cluster=tpu_cluster_resolver, master=FLAGS.master, model_dir=FLAGS.output_dir, keep_checkpoint_max=0, save_checkpoints_steps=FLAGS.save_checkpoints_steps, tpu_config=contrib_tpu.TPUConfig( iterations_per_loop=iterations_per_loop, num_shards=FLAGS.num_tpu_cores, per_host_input_for_training=is_per_host)) train_examples = None num_train_steps = None num_warmup_steps = None # if FLAGS.do_train: # train_examples = squad_utils.read_squad_examples( # input_file=FLAGS.train_file, is_training=True) # num_train_steps = int( # len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs) # num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion) # # # Pre-shuffle the input to avoid having to make a very large shuffle # # buffer in in the `input_fn`. # rng = random.Random(12345) # rng.shuffle(train_examples) model_fn = squad_utils.v2_model_fn_builder( albert_config=albert_config, init_checkpoint=FLAGS.init_checkpoint, learning_rate=FLAGS.learning_rate, num_train_steps=num_train_steps, num_warmup_steps=num_warmup_steps, use_tpu=FLAGS.use_tpu, use_one_hot_embeddings=FLAGS.use_tpu, max_seq_length=FLAGS.max_seq_length, start_n_top=FLAGS.start_n_top, end_n_top=FLAGS.end_n_top, dropout_prob=FLAGS.dropout_prob, hub_module=FLAGS.albert_hub_module_handle) # If TPU is not available, this will fall back to normal Estimator on CPU # or GPU. estimator = contrib_tpu.TPUEstimator( use_tpu=FLAGS.use_tpu, model_fn=model_fn, config=run_config, train_batch_size=FLAGS.train_batch_size, predict_batch_size=FLAGS.predict_batch_size) if FLAGS.do_train: # We write to a temporary file to avoid storing very large constant tensors # in memory. if not tf.gfile.Exists(FLAGS.train_feature_file): train_writer = squad_utils.FeatureWriter(filename=os.path.join( FLAGS.train_feature_file), is_training=True) squad_utils.convert_examples_to_features( examples=train_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=True, output_fn=train_writer.process_feature, do_lower_case=FLAGS.do_lower_case) train_writer.close() tf.logging.info("***** Running training *****") tf.logging.info(" Num orig examples = %d", len(train_examples)) # tf.logging.info(" Num split examples = %d", train_writer.num_features) tf.logging.info(" Batch size = %d", FLAGS.train_batch_size) tf.logging.info(" Num steps = %d", num_train_steps) del train_examples train_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.train_feature_file, seq_length=FLAGS.max_seq_length, is_training=True, drop_remainder=True, use_tpu=FLAGS.use_tpu, bsz=FLAGS.train_batch_size, is_v2=True) estimator.train(input_fn=train_input_fn, max_steps=num_train_steps) if FLAGS.do_predict: with tf.gfile.Open(FLAGS.predict_file) as predict_file: prediction_json = json.load(predict_file)["data"] eval_examples = squad_utils.read_squad_examples( input_file=FLAGS.predict_file, is_training=False) if (tf.gfile.Exists(FLAGS.predict_feature_file) and tf.gfile.Exists(FLAGS.predict_feature_left_file)): tf.logging.info("Loading eval features from {}".format( FLAGS.predict_feature_left_file)) with tf.gfile.Open(FLAGS.predict_feature_left_file, "rb") as fin: eval_features = pickle.load(fin) else: eval_writer = squad_utils.FeatureWriter( filename=FLAGS.predict_feature_file, is_training=False) eval_features = [] def append_feature(feature): eval_features.append(feature) eval_writer.process_feature(feature) squad_utils.convert_examples_to_features( examples=eval_examples, tokenizer=tokenizer, max_seq_length=FLAGS.max_seq_length, doc_stride=FLAGS.doc_stride, max_query_length=FLAGS.max_query_length, is_training=False, output_fn=append_feature, do_lower_case=FLAGS.do_lower_case) eval_writer.close() with tf.gfile.Open(FLAGS.predict_feature_left_file, "wb") as fout: pickle.dump(eval_features, fout) tf.logging.info("***** Running predictions *****") tf.logging.info(" Num orig examples = %d", len(eval_examples)) tf.logging.info(" Num split examples = %d", len(eval_features)) tf.logging.info(" Batch size = %d", FLAGS.predict_batch_size) predict_input_fn = squad_utils.input_fn_builder( input_file=FLAGS.predict_feature_file, seq_length=FLAGS.max_seq_length, is_training=False, drop_remainder=False, use_tpu=FLAGS.use_tpu, bsz=FLAGS.predict_batch_size, is_v2=True) def get_result(checkpoint): """Evaluate the checkpoint on SQuAD v2.0.""" # If running eval on the TPU, you will need to specify the number of # steps. reader = tf.train.NewCheckpointReader(checkpoint) global_step = reader.get_tensor(tf.GraphKeys.GLOBAL_STEP) all_results = [] for result in estimator.predict(predict_input_fn, yield_single_examples=True, checkpoint_path=checkpoint): if len(all_results) % 1000 == 0: tf.logging.info("Processing example: %d" % (len(all_results))) unique_id = int(result["unique_ids"]) cls_logits = float(result["cls_logits"].flat[0]) all_results.append( squad_utils.RawResultV2(unique_id=unique_id, cls_logits=cls_logits)) output_prediction_file = os.path.join(FLAGS.output_dir, "predictions.json") output_nbest_file = os.path.join(FLAGS.output_dir, "nbest_predictions.json") output_null_log_odds_file = os.path.join(FLAGS.output_dir, "null_odds.json") result_dict = {} cls_dict = {} squad_utils.accumulate_predictions_v2( result_dict, cls_dict, eval_examples, eval_features, all_results, FLAGS.n_best_size, FLAGS.max_answer_length, FLAGS.start_n_top, FLAGS.end_n_top) from squad_utils import make_qid_to_has_ans import numpy as np qid_to_has_ans = make_qid_to_has_ans( prediction_json) # maps qid to True/False has_ans_qids = [k for k, v in qid_to_has_ans.items() if v] no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v] print("has_ans", len(has_ans_qids)) print("no_ans", len(no_ans_qids)) def compute_metrics_with_threshold(threshold): nonlocal result_dict result_dict = {} tp = 0 tn = 0 fp = 0 fn = 0 for example_index, example in enumerate(eval_examples): m = np.min(cls_dict[example_index]) predict_is_impossible = 1 / (1 + np.exp(-m)) > threshold # predict_is_impossible = m > threshold result_dict[example.qas_id] = m if example.is_impossible: if predict_is_impossible: tp += 1 else: fn += 1 else: if predict_is_impossible: fp += 1 else: tn += 1 precision = tp / (tp + fp) recall = tp / (fn + tp) f1 = 2 * tp / (2 * tp + fp + fn) tf.logging.info(f"precision: {precision}" f"recall: {recall}" f"f1: {f1}") return precision, recall, f1 # precision, recall, f1 = compute_metrics_with_threshold(0.4) precision, recall, f1 = compute_metrics_with_threshold(0.5) # precision, recall, f1 = compute_metrics_with_threshold(0.6) with tf.gfile.GFile(output_prediction_file, "w") as writer: writer.write(json.dumps(result_dict, indent=4) + "\n") return { "precision": precision, "recall": recall, "f1": f1, "total": len(eval_examples) }, int(global_step) def _find_valid_cands(curr_step): filenames = tf.gfile.ListDirectory(FLAGS.output_dir) candidates = [] for filename in filenames: if filename.endswith(".index"): ckpt_name = filename[:-6] idx = ckpt_name.split("-")[-1] if idx != "best" and int(idx) > curr_step: candidates.append(filename) return candidates # output_eval_file = os.path.join(FLAGS.output_dir, "eval_results.txt") # checkpoint_path = os.path.join(FLAGS.output_dir, "model.ckpt-best") # key_name = "f1" # writer = tf.gfile.GFile(output_eval_file, "w") # if tf.gfile.Exists(checkpoint_path + ".index"): # result = get_result(checkpoint_path) # best_perf = result[0][key_name] # global_step = result[1] # else: # global_step = -1 # best_perf = -1 # checkpoint_path = None # while global_step < num_train_steps: # steps_and_files = {} # filenames = tf.gfile.ListDirectory(FLAGS.output_dir) # for filename in filenames: # if filename.endswith(".index"): # ckpt_name = filename[:-6] # cur_filename = os.path.join(FLAGS.output_dir, ckpt_name) # if cur_filename.split("-")[-1] == "best": # continue # gstep = int(cur_filename.split("-")[-1]) # if gstep not in steps_and_files: # tf.logging.info("Add {} to eval list.".format(cur_filename)) # steps_and_files[gstep] = cur_filename # tf.logging.info("found {} files.".format(len(steps_and_files))) # if not steps_and_files: # tf.logging.info("found 0 file, global step: {}. Sleeping." # .format(global_step)) # time.sleep(60) # else: # for ele in sorted(steps_and_files.items()): # step, checkpoint_path = ele # if global_step >= step: # if len(_find_valid_cands(step)) > 1: # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tf.logging.info("removing {}".format(src_ckpt)) # tf.gfile.Remove(src_ckpt) # continue # result, global_step = get_result(checkpoint_path) # tf.logging.info("***** Eval results *****") # for key in sorted(result.keys()): # tf.logging.info(" %s = %s", key, str(result[key])) # writer.write("%s = %s\n" % (key, str(result[key]))) # if result[key_name] > best_perf: # best_perf = result[key_name] # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tgt_ckpt = checkpoint_path.rsplit( # "-", 1)[0] + "-best.{}".format(ext) # tf.logging.info("saving {} to {}".format(src_ckpt, tgt_ckpt)) # tf.gfile.Copy(src_ckpt, tgt_ckpt, overwrite=True) # writer.write("saved {} to {}\n".format(src_ckpt, tgt_ckpt)) # writer.write("best {} = {}\n".format(key_name, best_perf)) # tf.logging.info(" best {} = {}\n".format(key_name, best_perf)) # # if len(_find_valid_cands(global_step)) > 2: # for ext in ["meta", "data-00000-of-00001", "index"]: # src_ckpt = checkpoint_path + ".{}".format(ext) # tf.logging.info("removing {}".format(src_ckpt)) # tf.gfile.Remove(src_ckpt) # writer.write("=" * 50 + "\n") result, global_step = get_result(FLAGS.init_checkpoint)
exact_match += metric_max_over_ground_truths( exact_match_score, prediction, ground_truths) f1 += metric_max_over_ground_truths( f1_score, prediction, ground_truths) exact_match = 100.0 * exact_match / total f1 = 100.0 * f1 / total return {'exact_match': exact_match, 'f1': f1} tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) test_file = "./squad/new_test-v1.1.json" eval_examples = read_squad_examples(test_file, is_training=False, debug=False) eval_features = convert_examples_to_features(eval_examples, tokenizer=tokenizer, max_seq_length=config.max_seq_len, max_query_length=config.max_query_len, doc_stride=128, is_training=False) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0)) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=8)
def main(args): tokenizer = BertTokenizer.from_pretrained(args.bert_model) args.tokenizer = tokenizer device = torch.cuda.current_device() checkpoint = torch.load(args.checkpoint, map_location="cpu") vae = DiscreteVAE(checkpoint["args"]) vae.load_state_dict(checkpoint["state_dict"]) vae.eval() vae = vae.to(device) if args.squad: examples = read_squad_examples(args.data_file, is_training=True, debug=args.debug) features = convert_examples_to_harv_features( examples, tokenizer=tokenizer, max_seq_length=args.max_c_len, max_query_length=args.max_q_len, doc_stride=128, is_training=True) else: examples = read_examples(args.data_file, is_training=True, debug=args.debug) features = convert_examples_to_harv_features( examples, tokenizer=tokenizer, max_seq_length=args.max_c_len, max_query_length=args.max_q_len, doc_stride=128, is_training=True) features = features[:int(len(features) * args.ratio)] all_c_ids = torch.tensor([f.c_ids for f in features], dtype=torch.long) data = TensorDataset(all_c_ids) data_loader = DataLoader(data, shuffle=False, batch_size=args.batch_size) new_features = [] for batch in tqdm(data_loader, total=len(data_loader)): c_ids = batch[0] _, c_len = return_mask_lengths(c_ids) max_c_len = torch.max(c_len) c_ids = c_ids[:, :max_c_len].to(device) # sample latent variable K times for _ in range(args.k): with torch.no_grad(): _, _, zq, _, za = vae.prior_encoder(c_ids) batch_q_ids, batch_start, batch_end = vae.generate( zq, za, c_ids) all_input_ids, all_seg_ids, \ all_input_mask, all_start, all_end = post_process(batch_q_ids, batch_start, batch_end, c_ids) for i in range(c_ids.size(0)): new_features.append( InputFeatures(unique_id=None, example_index=None, doc_span_index=None, tokens=None, token_to_orig_map=None, token_is_max_context=None, input_ids=all_input_ids[i].cpu().tolist(), input_mask=all_input_mask[i].cpu().tolist(), c_ids=None, context_tokens=None, q_ids=None, q_tokens=None, answer_text=None, tag_ids=None, segment_ids=all_seg_ids[i].cpu().tolist(), noq_start_position=None, noq_end_position=None, start_position=all_start[i].cpu().tolist(), end_position=all_end[i].cpu().tolist(), is_impossible=None)) dir_name = os.path.dirname(args.output_file) if not os.path.exists(dir_name): os.makedirs(dir_name) with open(args.output_file, "wb") as f: pickle.dump(new_features, f)
examples = processor.get_dev_examples('glue_data') print('RTE Dev: ' + str(len(examples))) processor = classifier_utils.Sst2Processor(use_spm=True, do_lower_case=True) examples = processor.get_train_examples('glue_data') print('SST-2 Train: ' + str(len(examples))) examples = processor.get_dev_examples('glue_data') print('SST-2 Dev: ' + str(len(examples))) processor = classifier_utils.StsbProcessor(use_spm=True, do_lower_case=True) examples = processor.get_train_examples('glue_data') print('STS-B Train: ' + str(len(examples))) examples = processor.get_dev_examples('glue_data') print('STS-B Dev: ' + str(len(examples))) examples = squad_utils.read_squad_examples( input_file='squad_data/train-v1.1.json', is_training=True) print('SQuAD Train: ' + str(len(examples))) examples = squad_utils.read_squad_examples( input_file='squad_data/dev-v1.1.json', is_training=False) print('SQuAD Dev: ' + str(len(examples))) examples = squad_utils.read_squad_examples( input_file='squad_data/train-v2.0.json', is_training=True) print('SQuADV2 Train: ' + str(len(examples))) examples = squad_utils.read_squad_examples( input_file='squad_data/dev-v2.0.json', is_training=False) print('SQuADV2 Dev: ' + str(len(examples))) examples = race_utils.RaceProcessor(