def evaluate(model: TransformerModelWrapper, eval_data: List[InputExample], config: EvalConfig) -> Dict: metrics = config.metrics if config.metrics else ['acc'] results = model.eval(eval_data=eval_data, per_gpu_eval_batch_size=config.per_gpu_eval_batch_size, n_gpu=config.n_gpu) # print("results['logits'].shape=", results['logits'].shape) predictions = np.argmax(results['logits'], axis=1) scores = {} for metric in metrics: if metric == 'acc': scores[metric] = simple_accuracy(predictions, results['labels']) elif metric == 'f1': scores[metric] = f1_score(results['labels'], predictions) elif metric == 'f1-macro': scores[metric] = f1_score(results['labels'], predictions, average='macro') elif metric == 'em': scores[metric] = exact_match(predictions, results['labels'], results['question_ids']) else: raise ValueError(f"Metric '{metric}' not implemented") results['scores'] = scores results['predictions'] = predictions return results
def train_single_model(train_data: List[InputExample], eval_data: List[InputExample], dev32_data: List[InputExample], pattern_iter_output_dir: str, model: TransformerModelWrapper, config: TrainConfig, eval_config: EvalConfig): """ Train a single model. :param model: the model to train :param train_data: the training examples to use :param config: the training config :param eval_config: the evaluation config :return: a dictionary containing the global step, average loss and (optionally) results on the train set """ results_dict = {} results_dict['train_set_before_training'] = evaluate(model, train_data, eval_config)['scores']['acc'] if not train_data: logger.warning('Training method was called without training examples') else: global_step, tr_loss = model.train( pattern_iter_output_dir=pattern_iter_output_dir, eval_config=eval_config, train_data=train_data, dev32_data=dev32_data, eval_data=eval_data, per_gpu_train_batch_size=config.per_gpu_train_batch_size, n_gpu=config.n_gpu, num_train_epochs=config.num_train_epochs, max_steps=config.max_steps, gradient_accumulation_steps=config.gradient_accumulation_steps, weight_decay=config.weight_decay, learning_rate=config.learning_rate, adam_epsilon=config.adam_epsilon, warmup_steps=config.warmup_steps, max_grad_norm=config.max_grad_norm, alpha=config.alpha ) results_dict['global_step'] = global_step results_dict['average_loss'] = tr_loss model = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) results_dict['train_set_after_training'] = evaluate(model, train_data, eval_config)['scores']['acc'] return results_dict
def evaluate( model: TransformerModelWrapper, eval_data: List[InputExample], config: EvalConfig, priming_data: List[InputExample] = None, local_rank=-1, ) -> Dict: """ Evaluate a model. :param model: the model to evaluate :param eval_data: the examples for evaluation :param config: the evaluation config :param priming_data: an optional list of priming data to use :return: a dictionary containing the model's logits, predictions and (if any metrics are given) scores """ if config.priming: for example in eval_data: example.meta["priming_data"] = priming_data metrics = config.metrics if config.metrics else ["acc"] device = torch.device(config.device if config.device else "cuda" if torch. cuda.is_available() else "cpu") model.model.to(device) results = model.eval( eval_data, device, per_gpu_eval_batch_size=config.per_gpu_eval_batch_size, n_gpu=config.n_gpu, decoding_strategy=config.decoding_strategy, priming=config.priming, local_rank=local_rank, ) predictions = np.argmax(results["logits"], axis=1) scores = {} for metric in metrics: if metric == "acc": scores[metric] = simple_accuracy(predictions, results["labels"]) elif metric == "f1": scores[metric] = f1_score(results["labels"], predictions) elif metric == "f1-macro": scores[metric] = f1_score(results["labels"], predictions, average="macro") elif metric == "em": scores[metric] = exact_match(predictions, results["labels"], results["question_ids"]) else: raise ValueError(f"Metric '{metric}' not implemented") results["scores"] = scores results["predictions"] = predictions return results
def init_model(config: WrapperConfig) -> TransformerModelWrapper: """Initialize a new model from the given config.""" assert config.pattern_id is not None, 'A pattern_id must be set for initializing a new PET model' ## edit by wjn ## 如果当前任务是single task,则获取single task对应的model if config.task_type is None or config.task_type == 'single_task': model = TransformerModelWrapper(config) else: # 对于多任务情况下,则获取cross-task model = TransPromptModelWrapper(config) return model
def train_single_model( model: TransformerModelWrapper, train_data: List[InputExample], config: TrainConfig, output_dir, dev_data: List[InputExample] = None, eval_config: EvalConfig = None, ipet_train_data: List[InputExample] = None, unlabeled_data: List[InputExample] = None, return_train_set_results: bool = True, local_rank=-1, ): """ Train a single model. :param model: the model to train :param train_data: the training examples to use :param config: the training config :param eval_config: the evaluation config :param ipet_train_data: an optional list of iPET training examples to use :param unlabeled_data: an optional list of unlabeled examples to use :param return_train_set_results: whether results on the train set before and after training should be computed and returned :return: a dictionary containing the global step, average loss and (optionally) results on the train set """ device = torch.device(config.device if config.device else "cuda" if torch. cuda.is_available() else "cpu") if not ipet_train_data: ipet_train_data = [] results_dict = {} model.model.to(device) if train_data and return_train_set_results: results_dict["train_set_before_training"] = evaluate( model, train_data, eval_config, local_rank=local_rank)["scores"]["acc"] all_train_data = train_data + ipet_train_data if dev_data is not None and eval_config is not None: eval_kwargs = { "eval_data": dev_data, "device": device, "per_gpu_eval_batch_size": eval_config.per_gpu_eval_batch_size, "n_gpu": eval_config.n_gpu, "decoding_strategy": eval_config.decoding_strategy, "priming": eval_config.priming, "local_rank": local_rank, "metrics": eval_config.metrics, } else: eval_kwargs = None if not all_train_data and not config.use_logits: logger.warning("Training method was called without training examples") else: global_step, tr_loss = model.train( all_train_data, device, per_gpu_train_batch_size=config.per_gpu_train_batch_size, per_gpu_unlabeled_batch_size=config.per_gpu_unlabeled_batch_size, n_gpu=config.n_gpu, num_train_epochs=config.num_train_epochs, max_steps=config.max_steps, min_steps=config.min_steps, gradient_accumulation_steps=config.gradient_accumulation_steps, weight_decay=config.weight_decay, learning_rate=config.learning_rate, adam_epsilon=config.adam_epsilon, warmup_steps=config.warmup_steps, max_grad_norm=config.max_grad_norm, logging_steps=config.logging_steps, logging_number=config.logging_number, unlabeled_data=unlabeled_data if config.lm_training or config.use_logits else None, lm_training=config.lm_training, use_logits=config.use_logits, alpha=config.alpha, temperature=config.temperature, output_dir=output_dir, eval_kwargs=eval_kwargs, local_rank=local_rank, ) results_dict["global_step"] = global_step results_dict["average_loss"] = tr_loss if train_data and return_train_set_results: results_dict["train_set_after_training"] = evaluate( model, train_data, eval_config, local_rank=local_rank)["scores"]["acc"] return results_dict
def train_pet_ensemble( model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[Union[str, int]], output_dir: str, ipet_data_dir: str = None, repetitions: int = 3, train_data: List[InputExample] = None, unlabeled_data: List[InputExample] = None, dev_data: List[InputExample] = None, test_data: List[InputExample] = None, do_train: bool = True, do_eval: bool = True, save_unlabeled_logits: bool = False, seed: int = 42, overwrite_dir: bool = False, save_model=False, local_rank=-1, ): """ Train and evaluate an ensemble of PET models without knowledge distillation. :param model_config: the model configuration to use :param train_config: the training configuration to use :param eval_config: the evaluation configuration to use :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param ipet_data_dir: optional directory containing additional training data for iPET :param repetitions: the number of training repetitions :param train_data: the training examples to use :param unlabeled_data: the unlabeled examples to use :param dev_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param save_unlabeled_logits: whether logits for unlabeled examples should be saved in a file ``logits.txt``. This is required for both iPET and knowledge distillation. :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) set_seed(seed) for pattern_id in pattern_ids: for iteration in range(repetitions): model_config.pattern_id = pattern_id results_dict = {} shots = 0 if train_data is None else len(train_data) pattern_iter_output_dir = "{}/{}shots-{}-i{}-seed{}".format( output_dir, shots, pattern_name(pattern_id), iteration, seed) if os.path.exists(pattern_iter_output_dir) and not overwrite_dir: logger.warning( f"Path {pattern_iter_output_dir} already exists, skipping it..." ) continue if not os.path.exists(pattern_iter_output_dir) and local_rank in [ -1, 0 ]: os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # Training if do_train: if ipet_data_dir: p = os.path.join( ipet_data_dir, "{}-i{}-train.bin".format(pattern_name(pattern_id), iteration)) ipet_train_data = InputExample.load_examples(p) for example in ipet_train_data: example.logits = None else: ipet_train_data = None results_dict.update( train_single_model( wrapper, train_data, train_config, pattern_iter_output_dir, dev_data, eval_config, ipet_train_data=ipet_train_data, unlabeled_data=unlabeled_data, return_train_set_results=False, local_rank=local_rank, )) with open(os.path.join(pattern_iter_output_dir, "results.txt"), "w") as fh: fh.write(str(results_dict)) if local_rank in [-1, 0]: logger.info("Saving trained model at {}...".format( pattern_iter_output_dir)) train_config.save( os.path.join(pattern_iter_output_dir, "train_config.json")) eval_config.save( os.path.join(pattern_iter_output_dir, "eval_config.json")) logger.info("Saving complete") if save_unlabeled_logits: logits = evaluate(wrapper, unlabeled_data, eval_config, local_rank=local_rank)["logits"] save_logits( os.path.join(pattern_iter_output_dir, "logits.txt"), logits) if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") try: wrapper = TransformerModelWrapper.from_pretrained( pattern_iter_output_dir) except OSError: warnings.warn( "No model found saved, proceeding with current model instead of best" ) pass for split, eval_data in { "dev": dev_data, "test": test_data }.items(): if eval_data is None: continue eval_result = evaluate(wrapper, eval_data, eval_config, priming_data=train_data, local_rank=local_rank) if local_rank in [-1, 0]: save_predictions( os.path.join(pattern_iter_output_dir, "predictions.jsonl"), wrapper, eval_result) save_logits( os.path.join(pattern_iter_output_dir, "eval_logits.txt"), eval_result["logits"]) scores = eval_result["scores"] logger.info( "--- {} result (pattern_id={}, iteration={}) ---". format(split, pattern_id, iteration)) logger.info(scores) results_dict[f"{split}_set_after_training"] = scores with open( os.path.join(pattern_iter_output_dir, "results.json"), "w") as fh: json.dump(results_dict, fh) for metric, value in scores.items(): results[split][metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache() if do_eval: logger.info("=== OVERALL RESULTS ===") results_to_log = _write_results( os.path.join(output_dir, "result_test.txt"), results) else: logger.info("=== ENSEMBLE TRAINING COMPLETE ===") results_to_log = None if do_train and not save_model: outputs = os.listdir(pattern_iter_output_dir) for item in outputs: if item.endswith(".bin"): os.remove(os.path.join(pattern_iter_output_dir, item)) return results_to_log
def init_model(config: WrapperConfig) -> TransformerModelWrapper: """Initialize a new model from the given config.""" assert config.pattern_id is not None, "A pattern_id must be set for initializing a new PET model" model = TransformerModelWrapper(config) return model
def evaluate(model: TransformerModelWrapper, eval_data: List[InputExample], config: EvalConfig, priming_data: List[InputExample] = None) -> Dict: """ Evaluate a model. :param model: the model to evaluate :param eval_data: the examples for evaluation :param config: the evaluation config :param priming_data: an optional list of priming data to use :return: a dictionary containing the model's logits, predictions and (if any metrics are given) scores """ if config.priming: for example in eval_data: example.meta['priming_data'] = priming_data metrics = config.metrics if config.metrics else ['acc'] device = torch.device(config.device if config.device else "cuda" if torch. cuda.is_available() else "cpu") model.model.to(device) results = model.eval( eval_data, device, per_gpu_eval_batch_size=config.per_gpu_eval_batch_size, n_gpu=config.n_gpu, decoding_strategy=config.decoding_strategy, priming=config.priming) predictions = np.argmax(results['logits'], axis=1) scores = {} for metric in metrics: if metric == 'acc': scores[metric] = simple_accuracy(predictions, results['labels']) elif metric == 'f1': scores[metric] = f1_score(results['labels'], predictions) elif metric == 'f1-macro': scores[metric] = f1_score(results['labels'], predictions, average='macro') elif metric == 'em': scores[metric] = exact_match(predictions, results['labels'], results['question_ids']) elif metric == 'dist-loss': if eval_data[0].logits is not None: scores[metric] = distillation_loss( torch.tensor(results['logits']), torch.stack([ torch.tensor(ex.logits, dtype=torch.float32) for ex in eval_data ]), config.temperature) else: scores[metric] = 0. else: raise ValueError(f"Metric '{metric}' not implemented") results['scores'] = scores results['predictions'] = predictions return results
def train_single_model(model: TransformerModelWrapper, train_data: List[InputExample], config: TrainConfig, eval_config: EvalConfig = None, ipet_train_data: List[InputExample] = None, unlabeled_data: List[InputExample] = None, return_train_set_results: bool = True): """ Train a single model. :param model: the model to train :param train_data: the training examples to use :param config: the training config :param eval_config: the evaluation config :param ipet_train_data: an optional list of iPET training examples to use :param unlabeled_data: an optional list of unlabeled examples to use :param return_train_set_results: whether results on the train set before and after training should be computed and returned :return: a dictionary containing the global step, average loss and (optionally) results on the train set """ device = torch.device(config.device if config.device else "cuda" if torch. cuda.is_available() else "cpu") if not ipet_train_data: ipet_train_data = [] results_dict = {} model.model.to(device) if train_data and return_train_set_results: results_dict['train_set_before_training'] = evaluate( model, train_data, eval_config)['scores']['acc'] all_train_data = train_data + ipet_train_data if not all_train_data and not config.use_logits: logger.warning('Training method was called without training examples') else: global_step, tr_loss = model.train( all_train_data, device, per_gpu_train_batch_size=config.per_gpu_train_batch_size, per_gpu_unlabeled_batch_size=config.per_gpu_unlabeled_batch_size, n_gpu=config.n_gpu, num_train_epochs=config.num_train_epochs, max_steps=config.max_steps, gradient_accumulation_steps=config.gradient_accumulation_steps, weight_decay=config.weight_decay, learning_rate=config.learning_rate, adam_epsilon=config.adam_epsilon, warmup_steps=config.warmup_steps, max_grad_norm=config.max_grad_norm, unlabeled_data=unlabeled_data if config.lm_training or config.use_logits else None, lm_training=config.lm_training, use_logits=config.use_logits, alpha=config.alpha, temperature=config.temperature, mlm_logits=config.mlm_logits) results_dict['global_step'] = global_step results_dict['average_loss'] = tr_loss if train_data and return_train_set_results: results_dict['train_set_after_training'] = evaluate( model, train_data, eval_config)['scores']['acc'] return results_dict
def train_pet_ensemble(model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, ipet_data_dir: str = None, repetitions: int = 3, train_data: List[InputExample] = None, unlabeled_data: List[InputExample] = None, eval_data: List[InputExample] = None, do_train: bool = True, do_eval: bool = True, save_unlabeled_logits: bool = False, seed: int = 42): """ Train and evaluate an ensemble of PET models without knowledge distillation. :param model_config: the model configuration to use :param train_config: the training configuration to use :param eval_config: the evaluation configuration to use :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param ipet_data_dir: optional directory containing additional training data for iPET :param repetitions: the number of training repetitions :param train_data: the training examples to use :param unlabeled_data: the unlabeled examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param save_unlabeled_logits: whether logits for unlabeled examples should be saved in a file ``logits.txt``. This is required for both iPET and knowledge distillation. :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) set_seed(seed) for pattern_id in pattern_ids: for iteration in range(repetitions): model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format( output_dir, pattern_id, iteration) if os.path.exists(pattern_iter_output_dir): logger.warning( f"Path {pattern_iter_output_dir} already exists, skipping it..." ) continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # Training if do_train: if ipet_data_dir: p = os.path.join( ipet_data_dir, 'p{}-i{}-train.bin'.format(pattern_id, iteration)) ipet_train_data = InputExample.load_examples(p) for example in ipet_train_data: example.logits = None else: ipet_train_data = None results_dict.update( train_single_model(wrapper, train_data, train_config, eval_config, ipet_train_data=ipet_train_data, unlabeled_data=unlabeled_data)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) logger.info("Saving trained model at {}...".format( pattern_iter_output_dir)) wrapper.save(pattern_iter_output_dir) train_config.save( os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save( os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if save_unlabeled_logits: logits = evaluate(wrapper, unlabeled_data, eval_config)['logits'] save_logits( os.path.join(pattern_iter_output_dir, 'logits.txt'), logits) if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") if not wrapper: wrapper = TransformerModelWrapper.from_pretrained( pattern_iter_output_dir) eval_result = evaluate(wrapper, eval_data, eval_config, priming_data=train_data) save_predictions( os.path.join(pattern_iter_output_dir, 'predictions.jsonl'), wrapper, eval_result) save_logits( os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) scores = eval_result['scores'] logger.info( "--- RESULT (pattern_id={}, iteration={}) ---".format( pattern_id, iteration)) logger.info(scores) results_dict['test_set_after_training'] = scores with open( os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: json.dump(results_dict, fh) for metric, value in scores.items(): results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache() if do_eval: logger.info("=== OVERALL RESULTS ===") _write_results(os.path.join(output_dir, 'result_test.txt'), results) else: logger.info("=== ENSEMBLE TRAINING COMPLETE ===")
def main(): parser = argparse.ArgumentParser() # required parameters parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory. The verbalizers are written to a file 'verbalizer.json' in this directory.", ) parser.add_argument( "--data_dir", default=None, type=str, required=True, help="The input data dir. Should contain the data files for the task.", ) parser.add_argument( "--model_type", default=None, type=str, required=True, help="The model type", ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name", ) parser.add_argument( "--task_name", default=None, type=str, required=True, help="The name of the task to train selected in the list: " + ", ".join(PROCESSORS.keys()), ) # verbalizer search hyperparameters parser.add_argument( "--normalize", action="store_true", help= "Whether to normalize the loss as proposed in the paper. It is recommended to set this to 'true'.", ) parser.add_argument( "--combine_patterns", action="store_true", help= "If set to true, a single joint verbalizer is searched for all patterns", ) parser.add_argument( "--num_candidates", default=1000, type=int, help= "The number of candidate tokens to consider as verbalizers (see Section 4.1 of the paper)", ) parser.add_argument( "--words_per_label", default=10, type=int, help="The number of verbalizer tokens to assign to each label", ) parser.add_argument( "--score_fct", default="llr", choices=["llr", "ce", "random"], help= "The function used to score verbalizers. Choices are: the log-likelihood ratio loss proposed in the paper " "('llr'), cross-entropy loss ('ce') and 'random', which assigns random tokens to each label.", ) # other optional parameters parser.add_argument( "--train_examples", default=50, type=int, help= "The total number of train examples to use, where -1 equals all examples.", ) parser.add_argument( "--pattern_ids", default=[0], type=int, nargs="+", help="The ids of the PVPs to be used", ) parser.add_argument( "--max_seq_length", default=256, type=int, help= "The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument( "--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.", ) parser.add_argument( "--words_file", default=None, type=str, help= "Path to a file containing (unlabeled) texts from the task's domain. This text is used to compute " "verbalization candidates by selecting the most frequent words.", ) parser.add_argument( "--max_words", default=10000, type=int, help= "Only the 10,000 tokens that occur most frequently in the task’s unlabeled data (see --words_file) are " "considered as verbalization candidates", ) parser.add_argument( "--additional_input_examples", type=str, help= "An optional path to an additional set of input examples (e.g., obtained using iPET)", ) parser.add_argument("--seed", default=42, type=int, help="random seed for initialization") args = parser.parse_args() random.seed(args.seed) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(os.path.join(args.output_dir, "config.txt"), "w", encoding="utf8") as fh: json.dump(args.__dict__, fh, indent=2) # setup gpu/cpu device = torch.device("cuda" if torch.cuda.is_available() else "cpu") args.n_gpu = torch.cuda.device_count() # prepare task args.task_name = args.task_name.lower() if args.task_name not in PROCESSORS: raise ValueError("Task not found: {}".format(args.task_name)) processor = PROCESSORS[args.task_name]() args.label_list = processor.get_labels() args.cache_dir = "" args.do_lower_case = False args.verbalizer_file = None args.wrapper_type = "mlm" # get training data train_examples_per_label = (eq_div(args.train_examples, len( args.label_list)) if args.train_examples != -1 else -1) train_data = load_examples( args.task_name, args.data_dir, set_type=TRAIN_SET, num_examples_per_label=train_examples_per_label, ) if args.additional_input_examples: additional_data = InputExample.load_examples( args.additional_input_examples) train_data += additional_data logger.info( f"Loaded {len(additional_data)} additional examples from {args.additional_input_examples}, total" f"training set size is now {len(train_data)}") expected = { label: np.array([1 if x.label == label else 0 for x in train_data]) for label in args.label_list } if args.words_file: with open(args.words_file, "r", encoding="utf8") as fh: word_counts = Counter(fh.read().split()) else: word_counts = None tokenizer_class = MODEL_CLASSES[args.model_type]["tokenizer"] tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path) word2idx = get_word_to_id_map(tokenizer, word_counts=word_counts, max_words=args.max_words) logits = [] for pattern_id in args.pattern_ids: logger.info(f"Processing examples with pattern id {pattern_id}...") args.pattern_id = pattern_id config = WrapperConfig( model_type=args.model_type, model_name_or_path=args.model_name_or_path, wrapper_type="mlm", task_name=args.task_name, max_seq_length=args.max_seq_length, label_list=args.label_list, pattern_id=args.pattern_id, ) wrapper = TransformerModelWrapper(config) wrapper.model.to(device) # modify all patterns so that they return a single text segment instead of two segments get_parts = wrapper.preprocessor.pvp.get_parts wrapper.preprocessor.pvp.get_parts = lambda example: ( get_parts(example)[0] + get_parts(example)[1], [], ) wrapper.preprocessor.pvp.convert_mlm_logits_to_cls_logits = lambda mask, x, _=None: x[ mask >= 0] pattern_logits = wrapper.eval( train_data, device, per_gpu_eval_batch_size=args.per_gpu_eval_batch_size, n_gpu=args.n_gpu, )["logits"] pattern_logits = pattern_logits - np.expand_dims( np.max(pattern_logits, axis=1), axis=1) logits.append(pattern_logits) logger.info("Starting verbalizer search...") if args.combine_patterns: avs = AutomaticVerbalizerSearch(word2idx, args.label_list, logits, expected) verbalizer = avs.find_verbalizer( num_candidates=args.num_candidates, words_per_label=args.words_per_label, normalize=args.normalize, score_fct=args.score_fct, ) verbalizers = { pattern_id: verbalizer for pattern_id in args.pattern_ids } else: verbalizers = {} for idx, pattern_id in enumerate(args.pattern_ids): avs = AutomaticVerbalizerSearch(word2idx, args.label_list, [logits[idx]], expected) verbalizers[pattern_id] = avs.find_verbalizer( num_candidates=args.num_candidates, words_per_label=args.words_per_label, normalize=args.normalize, score_fct=args.score_fct, ) print(json.dumps(verbalizers, indent=2)) logger.info("Verbalizer search complete, writing output...") with open(os.path.join(args.output_dir, "verbalizers.json"), "w", encoding="utf8") as fh: json.dump(verbalizers, fh, indent=2) logger.info("Done")
def train_pet(train_data: List[InputExample], eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) # set_seed(seed) assert model_config.task_type == "single_task" for pattern_id in pattern_ids: # pattern只有1个 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, 1) # if os.path.exists(pattern_iter_output_dir): # logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") # continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # 初始化一个模型 # Training if do_train: # 开始多轮epoch训练,并将训练的结果保存到results_dict中 results_dict.update(train_single_model(train_data, eval_data, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") logger.info("Single: Task {} 's Test examples number: {}".format(model_config.task_name, len(eval_data))) logger.info("************Test Example:**************") logger.info("text_a={}".format(eval_data[0].text_a)) logger.info("text_b={}".format(eval_data[0].text_b)) logger.info("task={}".format(eval_data[0].task)) logger.info("label={}".format(eval_data[0].label)) logger.info("**********************************") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) eval_result = evaluate(wrapper, eval_data, eval_config) # dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, Task={}) ---".format(pattern_id, model_config.task_name)) logger.info("eval_results:") logger.info(eval_result['scores']) # logger.info("dev32_results:") # logger.info(dev32_result['scores']) # results_dict['eval_set_after_training'] = eval_result['scores'] # results_dict['dev32_set_after_training'] = dev32_result['scores'] # with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: # json.dump(results_dict, fh) # # for metric, value in eval_result['scores'].items(): # results[metric][pattern_id].append(value) # # for metric, value in dev32_result['scores'].items(): # dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_pet_cross(train_data: List[InputExample], # eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) # set_seed(seed) assert model_config.task_type == "cross_task" # 当前是cross-task,则task_name是group的名称,需要获得group内的所有task tasks = groups[model_config.task_name] for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, 1) # if os.path.exists(pattern_iter_output_dir): # logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") # continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # 初始化一个模型 # Training if do_train: # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(train_data, None, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是cross-task,对当前group内的所有task,分别进行测试 for task_name in tasks: eval_data = load_examples( task_name, cross_data_dir + data_to_name[task_name] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, task_name, len(eval_data))) logger.info("************Test Example:**************") logger.info("text_a={}".format(eval_data[0].text_a)) logger.info("text_b={}".format(eval_data[0].text_b)) logger.info("task={}".format(eval_data[0].task)) logger.info("label={}".format(eval_data[0].label)) logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(task_name, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(wrapper, eval_data, eval_config) # dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, task_name)) logger.info("eval_results:") logger.info(eval_result['scores']) # logger.info("dev32_results:") # logger.info(dev32_result['scores']) # results_dict['eval_set_after_training'] = eval_result['scores'] # # results_dict['dev32_set_after_training'] = dev32_result['scores'] # with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: # json.dump(results_dict, fh) # # for metric, value in eval_result['scores'].items(): # results[metric][pattern_id].append(value) # # for metric, value in dev32_result['scores'].items(): # dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_pet(train_data: List[InputExample], eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) set_seed(seed) for pattern_id in pattern_ids: for iteration in range(repetitions): model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, iteration) if os.path.exists(pattern_iter_output_dir): logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # Training if do_train: results_dict.update(train_single_model(train_data, eval_data, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) eval_result = evaluate(wrapper, eval_data, eval_config) dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, iteration={}) ---".format(pattern_id, iteration)) logger.info("eval_results:") logger.info(eval_result['scores']) logger.info("dev32_results:") logger.info(dev32_result['scores']) results_dict['eval_set_after_training'] = eval_result['scores'] results_dict['dev32_set_after_training'] = dev32_result['scores'] with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: json.dump(results_dict, fh) for metric, value in eval_result['scores'].items(): results[metric][pattern_id].append(value) for metric, value in dev32_result['scores'].items(): dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache() if do_eval: logger.info("=== OVERALL RESULTS ===") _write_results(os.path.join(output_dir, 'result_test.txt'), results, dev32_results) else: logger.info("=== ENSEMBLE TRAINING COMPLETE ===")