def train_pet_ensemble(model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, ipet_data_dir: str = None, repetitions: int = 3, train_data: List[InputExample] = None, unlabeled_data: List[InputExample] = None, eval_data: List[InputExample] = None, do_train: bool = True, do_eval: bool = True, save_unlabeled_logits: bool = False, seed: int = 42): """ Train and evaluate an ensemble of PET models without knowledge distillation. :param model_config: the model configuration to use :param train_config: the training configuration to use :param eval_config: the evaluation configuration to use :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param ipet_data_dir: optional directory containing additional training data for iPET :param repetitions: the number of training repetitions :param train_data: the training examples to use :param unlabeled_data: the unlabeled examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param save_unlabeled_logits: whether logits for unlabeled examples should be saved in a file ``logits.txt``. This is required for both iPET and knowledge distillation. :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) set_seed(seed) for pattern_id in pattern_ids: for iteration in range(repetitions): model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format( output_dir, pattern_id, iteration) if os.path.exists(pattern_iter_output_dir): logger.warning( f"Path {pattern_iter_output_dir} already exists, skipping it..." ) continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # Training if do_train: if ipet_data_dir: p = os.path.join( ipet_data_dir, 'p{}-i{}-train.bin'.format(pattern_id, iteration)) ipet_train_data = InputExample.load_examples(p) for example in ipet_train_data: example.logits = None else: ipet_train_data = None results_dict.update( train_single_model(wrapper, train_data, train_config, eval_config, ipet_train_data=ipet_train_data, unlabeled_data=unlabeled_data)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) logger.info("Saving trained model at {}...".format( pattern_iter_output_dir)) wrapper.save(pattern_iter_output_dir) train_config.save( os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save( os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if save_unlabeled_logits: logits = evaluate(wrapper, unlabeled_data, eval_config)['logits'] save_logits( os.path.join(pattern_iter_output_dir, 'logits.txt'), logits) if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") if not wrapper: wrapper = TransformerModelWrapper.from_pretrained( pattern_iter_output_dir) eval_result = evaluate(wrapper, eval_data, eval_config, priming_data=train_data) save_predictions( os.path.join(pattern_iter_output_dir, 'predictions.jsonl'), wrapper, eval_result) save_logits( os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) scores = eval_result['scores'] logger.info( "--- RESULT (pattern_id={}, iteration={}) ---".format( pattern_id, iteration)) logger.info(scores) results_dict['test_set_after_training'] = scores with open( os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: json.dump(results_dict, fh) for metric, value in scores.items(): results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache() if do_eval: logger.info("=== OVERALL RESULTS ===") _write_results(os.path.join(output_dir, 'result_test.txt'), results) else: logger.info("=== ENSEMBLE TRAINING COMPLETE ===")
def train_pet_ensemble( model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[Union[str, int]], output_dir: str, ipet_data_dir: str = None, repetitions: int = 3, train_data: List[InputExample] = None, unlabeled_data: List[InputExample] = None, dev_data: List[InputExample] = None, test_data: List[InputExample] = None, do_train: bool = True, do_eval: bool = True, save_unlabeled_logits: bool = False, seed: int = 42, overwrite_dir: bool = False, save_model=False, local_rank=-1, ): """ Train and evaluate an ensemble of PET models without knowledge distillation. :param model_config: the model configuration to use :param train_config: the training configuration to use :param eval_config: the evaluation configuration to use :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param ipet_data_dir: optional directory containing additional training data for iPET :param repetitions: the number of training repetitions :param train_data: the training examples to use :param unlabeled_data: the unlabeled examples to use :param dev_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param save_unlabeled_logits: whether logits for unlabeled examples should be saved in a file ``logits.txt``. This is required for both iPET and knowledge distillation. :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) set_seed(seed) for pattern_id in pattern_ids: for iteration in range(repetitions): model_config.pattern_id = pattern_id results_dict = {} shots = 0 if train_data is None else len(train_data) pattern_iter_output_dir = "{}/{}shots-{}-i{}-seed{}".format( output_dir, shots, pattern_name(pattern_id), iteration, seed) if os.path.exists(pattern_iter_output_dir) and not overwrite_dir: logger.warning( f"Path {pattern_iter_output_dir} already exists, skipping it..." ) continue if not os.path.exists(pattern_iter_output_dir) and local_rank in [ -1, 0 ]: os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # Training if do_train: if ipet_data_dir: p = os.path.join( ipet_data_dir, "{}-i{}-train.bin".format(pattern_name(pattern_id), iteration)) ipet_train_data = InputExample.load_examples(p) for example in ipet_train_data: example.logits = None else: ipet_train_data = None results_dict.update( train_single_model( wrapper, train_data, train_config, pattern_iter_output_dir, dev_data, eval_config, ipet_train_data=ipet_train_data, unlabeled_data=unlabeled_data, return_train_set_results=False, local_rank=local_rank, )) with open(os.path.join(pattern_iter_output_dir, "results.txt"), "w") as fh: fh.write(str(results_dict)) if local_rank in [-1, 0]: logger.info("Saving trained model at {}...".format( pattern_iter_output_dir)) train_config.save( os.path.join(pattern_iter_output_dir, "train_config.json")) eval_config.save( os.path.join(pattern_iter_output_dir, "eval_config.json")) logger.info("Saving complete") if save_unlabeled_logits: logits = evaluate(wrapper, unlabeled_data, eval_config, local_rank=local_rank)["logits"] save_logits( os.path.join(pattern_iter_output_dir, "logits.txt"), logits) if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") try: wrapper = TransformerModelWrapper.from_pretrained( pattern_iter_output_dir) except OSError: warnings.warn( "No model found saved, proceeding with current model instead of best" ) pass for split, eval_data in { "dev": dev_data, "test": test_data }.items(): if eval_data is None: continue eval_result = evaluate(wrapper, eval_data, eval_config, priming_data=train_data, local_rank=local_rank) if local_rank in [-1, 0]: save_predictions( os.path.join(pattern_iter_output_dir, "predictions.jsonl"), wrapper, eval_result) save_logits( os.path.join(pattern_iter_output_dir, "eval_logits.txt"), eval_result["logits"]) scores = eval_result["scores"] logger.info( "--- {} result (pattern_id={}, iteration={}) ---". format(split, pattern_id, iteration)) logger.info(scores) results_dict[f"{split}_set_after_training"] = scores with open( os.path.join(pattern_iter_output_dir, "results.json"), "w") as fh: json.dump(results_dict, fh) for metric, value in scores.items(): results[split][metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache() if do_eval: logger.info("=== OVERALL RESULTS ===") results_to_log = _write_results( os.path.join(output_dir, "result_test.txt"), results) else: logger.info("=== ENSEMBLE TRAINING COMPLETE ===") results_to_log = None if do_train and not save_model: outputs = os.listdir(pattern_iter_output_dir) for item in outputs: if item.endswith(".bin"): os.remove(os.path.join(pattern_iter_output_dir, item)) return results_to_log
def train_pet(train_data: List[InputExample], eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) # set_seed(seed) assert model_config.task_type == "single_task" for pattern_id in pattern_ids: # pattern只有1个 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, 1) # if os.path.exists(pattern_iter_output_dir): # logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") # continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # 初始化一个模型 # Training if do_train: # 开始多轮epoch训练,并将训练的结果保存到results_dict中 results_dict.update(train_single_model(train_data, eval_data, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") logger.info("Single: Task {} 's Test examples number: {}".format(model_config.task_name, len(eval_data))) logger.info("************Test Example:**************") logger.info("text_a={}".format(eval_data[0].text_a)) logger.info("text_b={}".format(eval_data[0].text_b)) logger.info("task={}".format(eval_data[0].task)) logger.info("label={}".format(eval_data[0].label)) logger.info("**********************************") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) eval_result = evaluate(wrapper, eval_data, eval_config) # dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, Task={}) ---".format(pattern_id, model_config.task_name)) logger.info("eval_results:") logger.info(eval_result['scores']) # logger.info("dev32_results:") # logger.info(dev32_result['scores']) # results_dict['eval_set_after_training'] = eval_result['scores'] # results_dict['dev32_set_after_training'] = dev32_result['scores'] # with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: # json.dump(results_dict, fh) # # for metric, value in eval_result['scores'].items(): # results[metric][pattern_id].append(value) # # for metric, value in dev32_result['scores'].items(): # dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_generalization_cross(unseen_task_train_data: List[InputExample], unseen_task_dev_data: List[InputExample], seen_task_train_data: List[InputExample], seen_task_dev_data: List[InputExample], # dev32_data: List[InputExample], unseen_task: str, model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) set_seed(seed) assert model_config.task_type == "cross_task" for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/{}/generalization/{}".format(output_dir, model_config.scene, model_config.task_name) if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) # wrapper = TransPromptModelWrapper(model_config) # 初始化一个TransPrompt模型 wrapper = TransPromptModelWrapper2(model_config) # 初始化一个TransPrompt模型 # wrapper = TransformerModelWrapper(model_config) # Multi-Task Meta-Learning Training if do_train: logger.info("========= Stage1: Starting Fine-tuning Multi-Task Meta-Learner ... =========") # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(seen_task_train_data, None, seen_task_dev_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config, use_debias=True)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() logger.info("========= Stage1: Finish Fine-tuning Multi-Task Meta-Learner =========") # Task Adaptation Fine-tune if do_eval: logger.info("========= Stage2: Starting Task Generalization (Unseen Task-Specific Fine-tuning) ... =========") # 重新加载训练好的meta-learner # wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir) wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是task generalization,对每个group的每个task: # 在训练好的meta learner基础上,在当前task对应的训练集上再次task specific fine-tune;并在验证集上选择模型 # 最后在对应的测试集上测试; # group内的每个task在此阶段独立地进行 ### task-specific fine-tune logger.info("========= Stage2: Specific fine-tuning on Unseen Task {} =========".format(unseen_task)) # wrapper.config.task_name = task_name # 在task-specific微调时,更改当前微调的task名称 train_config.max_steps = eval_config.max_steps # 在task-specific微调时,更改max_steps train_config.per_gpu_train_batch_size = eval_config.per_gpu_eval_batch_size # 更改batch_size # 在meta-learner基础上对unseen task继续做task-specific微调,并保存 train_single_model(unseen_task_train_data, None, unseen_task_dev_data, pattern_iter_output_dir + '/' + unseen_task, \ wrapper, train_config, eval_config, use_debias=False) # 将task-specific微调后保存的模型加载进来 # task_specific_wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir + '/' + unseen_task) task_specific_wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir + '/' + unseen_task) logger.info("========= Stage2: Evaluating test set on Task {}".format(unseen_task)) ### evaluate on test dataset eval_data = load_examples( unseen_task, cross_data_dir + data_to_name[unseen_task] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, unseen_task, len(eval_data))) # logger.info("************Test Example:**************") # logger.info("text_a={}".format(eval_data[0].text_a)) # logger.info("text_b={}".format(eval_data[0].text_b)) # logger.info("task={}".format(eval_data[0].task)) # logger.info("label={}".format(eval_data[0].label)) # logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(unseen_task, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(task_specific_wrapper, eval_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir + '/' + unseen_task, 'eval_predictions.jsonl'), task_specific_wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir + '/' + unseen_task, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- Unseen Task Generalization Result (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, unseen_task)) logger.info("eval_results: {}".format(eval_result['scores'])) task_specific_wrapper.model = None task_specific_wrapper = None wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_adaptation_cross(train_data: List[InputExample], # eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) set_seed(seed) assert model_config.task_type == "cross_task" # 当前是cross-task,则task_name是group的名称,需要获得group内的所有task tasks = groups[model_config.task_name] for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/{}/adaptation/{}".format(output_dir, model_config.scene, model_config.task_name) if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) # wrapper = TransPromptModelWrapper(model_config) # 初始化一个TransPrompt模型 wrapper = TransPromptModelWrapper2(model_config) # 初始化一个TransPrompt模型 # wrapper = TransformerModelWrapper(model_config) # Multi-Task Meta-Learning Training if do_train: logger.info("========= Stage1: Starting Fine-tuning Multi-Task Meta-Learner ... =========") # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(train_data, None, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config, use_debias=True)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() logger.info("========= Stage1: Finish Fine-tuning Multi-Task Meta-Learner =========") # Task Adaptation Fine-tune if do_eval: logger.info("========= Stage2: Starting Task Adaptation (Task-Specific Fine-tuning) ... =========") # 用于保存每次试验跑出的结果 # 加载先前的结果 t = time.time() ada_res_acc = dict() if os.path.exists('ada_res_acc.npy'): ada_res_acc = np.load('ada_res_acc.npy', allow_pickle=True)[()] # dict {time: {task:acc, ...}} accs = dict() # 重新加载训练好的meta-learner # wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir) wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是task adaptation,对每个group的每个task: # 在训练好的meta learner基础上,在当前task对应的训练集上再次task specific fine-tune;并在验证集上选择模型 # 最后在对应的测试集上测试; # group内的每个task在此阶段独立地进行 # 获得cross-task上每个task对应的训练集和测试集 task_to_train_example, task_to_dev_example = dict(), dict() # {task_name: [.., ..], ..} task_to_train_example = load_examples( model_config.task_name, None, SPE_TRAIN_SET, num_examples=-1, num_examples_per_label=None, examples=train_data) task_to_dev_example = load_examples( model_config.task_name, None, SPE_DEV_SET, num_examples=-1, num_examples_per_label=None, examples=dev32_data) for ei, task_name in enumerate(tasks): ### task-specific fine-tune logger.info("========= Stage2.{}: Specific fine-tuning on Task {} =========".format(ei + 1, task_name)) # wrapper.config.task_name = task_name # 在task-specific微调时,更改当前微调的task名称 train_config.max_steps = eval_config.max_steps # 在task-specific微调时,更改max_steps train_config.per_gpu_train_batch_size = eval_config.per_gpu_eval_batch_size # 更改batch_size if task_name == 'mrpc': # group3内的两个task(MRPC和QQP)训练集样本数量悬殊过大,直接指定MRPC只有1200steps train_config.max_steps = 4800 train_config.per_gpu_train_batch_size = 16 eval_config.per_gpu_eval_batch_size = 8 # 在meta-learner基础上继续做task-specific微调,并保存 train_single_model(task_to_train_example[data_to_name[task_name]], None, task_to_dev_example[data_to_name[task_name]], pattern_iter_output_dir + '/' + task_name, \ wrapper, train_config, eval_config, use_debias=False) # 将task-specific微调后保存的模型加载进来 # task_specific_wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir + '/' + task_name) task_specific_wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir + '/' + task_name) logger.info("========= Stage2.{}: Evaluating test set on Task {}".format(ei + 1, task_name)) ### evaluate on test dataset eval_data = load_examples( task_name, cross_data_dir + data_to_name[task_name] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, task_name, len(eval_data))) # logger.info("************Test Example:**************") # logger.info("text_a={}".format(eval_data[0].text_a)) # logger.info("text_b={}".format(eval_data[0].text_b)) # logger.info("task={}".format(eval_data[0].task)) # logger.info("label={}".format(eval_data[0].label)) # logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(task_name, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(task_specific_wrapper, eval_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir + '/' + task_name, 'eval_predictions.jsonl'), task_specific_wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir + '/' + task_name, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- Task Adaptation Result (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, task_name)) logger.info("eval_results: {}".format(eval_result['scores'])) accs[task_name] = eval_result['scores'] task_specific_wrapper.model = None task_specific_wrapper = None ada_res_acc[t] = accs np.save('ada_res_acc.npy', ada_res_acc) wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_pet_cross(train_data: List[InputExample], # eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) # set_seed(seed) assert model_config.task_type == "cross_task" # 当前是cross-task,则task_name是group的名称,需要获得group内的所有task tasks = groups[model_config.task_name] for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, 1) # if os.path.exists(pattern_iter_output_dir): # logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") # continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # 初始化一个模型 # Training if do_train: # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(train_data, None, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是cross-task,对当前group内的所有task,分别进行测试 for task_name in tasks: eval_data = load_examples( task_name, cross_data_dir + data_to_name[task_name] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, task_name, len(eval_data))) logger.info("************Test Example:**************") logger.info("text_a={}".format(eval_data[0].text_a)) logger.info("text_b={}".format(eval_data[0].text_b)) logger.info("task={}".format(eval_data[0].task)) logger.info("label={}".format(eval_data[0].label)) logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(task_name, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(wrapper, eval_data, eval_config) # dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, task_name)) logger.info("eval_results:") logger.info(eval_result['scores']) # logger.info("dev32_results:") # logger.info(dev32_result['scores']) # results_dict['eval_set_after_training'] = eval_result['scores'] # # results_dict['dev32_set_after_training'] = dev32_result['scores'] # with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: # json.dump(results_dict, fh) # # for metric, value in eval_result['scores'].items(): # results[metric][pattern_id].append(value) # # for metric, value in dev32_result['scores'].items(): # dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_pet(train_data: List[InputExample], eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) set_seed(seed) for pattern_id in pattern_ids: for iteration in range(repetitions): model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, iteration) if os.path.exists(pattern_iter_output_dir): logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # Training if do_train: results_dict.update(train_single_model(train_data, eval_data, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) eval_result = evaluate(wrapper, eval_data, eval_config) dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, iteration={}) ---".format(pattern_id, iteration)) logger.info("eval_results:") logger.info(eval_result['scores']) logger.info("dev32_results:") logger.info(dev32_result['scores']) results_dict['eval_set_after_training'] = eval_result['scores'] results_dict['dev32_set_after_training'] = dev32_result['scores'] with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: json.dump(results_dict, fh) for metric, value in eval_result['scores'].items(): results[metric][pattern_id].append(value) for metric, value in dev32_result['scores'].items(): dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache() if do_eval: logger.info("=== OVERALL RESULTS ===") _write_results(os.path.join(output_dir, 'result_test.txt'), results, dev32_results) else: logger.info("=== ENSEMBLE TRAINING COMPLETE ===")