def main(): parser = argparse.ArgumentParser(description="Command line interface for P-Tuning.") # Required parameters parser.add_argument("--data_dir", default=None, type=str, required=True, help="The input data dir. Should contain the data files for the task.") parser.add_argument("--model_type", default="albert", type=str, required=True, choices=MODEL_CLASSES.keys(), help="The type of the pretrained language model to use") parser.add_argument("--model_name_or_path", default="roberta-large", type=str, required=True, help="Path to the pre-trained model or shortcut name") parser.add_argument("--task_type", default='cross_task', type=str, required=False, choices=['single_task', 'cross_task'], help="The type of the task to train/evaluate on") # add by wjn parser.add_argument("--task_name", default=None, type=str, required=True, choices=['g1', 'g2', 'g3'], help="The name of the task to train/evaluate on") parser.add_argument("--k", default=16, type=int, required=False, help="The number of examples of each label") # add by wjn parser.add_argument("--scene", default="few-shot", type=str, required=True, choices=['few-shot', 'full'], help="The scene of data, if choose few-shot, please give k, otherwise please ignore the k") # add by wjn parser.add_argument("--output_dir", default=None, type=str, required=True, help="The output directory where the model predictions and checkpoints will be written") # PET-specific optional parameters parser.add_argument("--pattern_ids", default=[1], type=int, nargs='+', help="The ids of the PVPs to be used (only for PET)") parser.add_argument("--cross_prompt", action='store_true', help="If true, when task_type is cross-task, each task in one group has different specific PVPs," "If false, all the task in one group share the same PVPs") parser.add_argument("--alpha", default=0.9999, type=float, help="Weighting term for the auxiliary language modeling task (only for PET)") parser.add_argument("--pet_repetitions", default=3, type=int, help="The number of times to repeat PET training and testing with different seeds.") parser.add_argument("--pet_max_seq_length", default=256, type=int, help="The maximum total input sequence length after tokenization for PET. Sequences longer " "than this will be truncated, sequences shorter will be padded.") parser.add_argument("--pet_per_gpu_train_batch_size", default=4, type=int, help="Batch size per GPU/CPU for PET training.") parser.add_argument("--pet_per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for PET evaluation.") parser.add_argument('--pet_gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass in PET.") parser.add_argument("--pet_num_train_epochs", default=3, type=float, help="Total number of training epochs to perform in PET.") parser.add_argument("--pet_max_meta_steps", default=-1, type=int, help="If > 0: set total number of multi-task meta-learning training steps to perform in PET. Override num_train_epochs.") parser.add_argument("--pet_max_adaptation_steps", default=-1, type=int, help="If > 0: set total number of task-specific adaptation training steps to perform in PET. Override num_train_epochs.") # Other optional parameters parser.add_argument("--train_examples", default=-1, type=int, help="The total number of train examples to use, where -1 equals all examples.") parser.add_argument("--eval_examples", default=-1, type=int, help="The total number of test examples to use, where -1 equals all examples.") parser.add_argument("--dev32_examples", default=-1, type=int, help="The total number of dev32 examples to use, where -1 equals all examples.") parser.add_argument("--split_examples_evenly", action='store_true', help="If true, train examples are not chosen randomly, but split evenly across all labels.") parser.add_argument("--cache_dir", default="", type=str, help="Where to store the pre-trained models downloaded from S3.") parser.add_argument("--learning_rate", default=1e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.1, type=float, help="Weight decay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument('--logging_steps', type=int, default=50, help="Log every X updates steps.") parser.add_argument("--no_cuda", action='store_true', help="Avoid using CUDA when available") parser.add_argument('--overwrite_output_dir', action='store_true', help="Overwrite the content of the output directory") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument('--do_train', action='store_true', help="Whether to perform training") parser.add_argument('--do_eval', action='store_true', help="Whether to perform evaluation") parser.add_argument("--eval_set", choices=['dev', 'test'], default='dev', help="Whether to perform evaluation on the dev set or the test set") parser.add_argument("--embed_size", default=128, type=int, help="albert: 128, roberta-large:1024, roberta-base:768") parser.add_argument('--prompt_encoder_type', type=str, default="lstm", choices=['lstm', 'mlp']) parser.add_argument("--eval_every_step", default=20, type=int, help="") args = parser.parse_args() logger.info("Parameters: {}".format(args)) # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) \ # and args.do_train and not args.overwrite_output_dir: # raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) # Setup CUDA, GPU & distributed training args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu" args.n_gpu = torch.cuda.device_count() # Prepare task args.task_name = args.task_name.lower() if args.task_name not in PROCESSORS: raise ValueError("Task '{}' not found".format(args.task_name)) processor = PROCESSORS[args.task_name](args.task_name) # if args.task_name in ['g1', 'mr', 'cr']: # args.label_list = processor.get_labels(args.task_name) # else: args.label_list = processor.get_labels() train_ex_per_label, eval_ex_per_label, dev32_ex_per_label = None, None, None train_ex, eval_ex, dev32_ex = args.train_examples, args.eval_examples, args.dev32_examples if args.split_examples_evenly: train_ex_per_label = eq_div(args.train_examples, len(args.label_list)) if args.train_examples != -1 else -1 eval_ex_per_label = eq_div(args.eval_examples, len(args.label_list)) if args.eval_examples != -1 else -1 dev32_ex_per_label = eq_div(args.dev32_examples, len(args.label_list)) if args.dev32_examples != -1 else -1 train_ex, eval_ex, dev32_ex = None, None, None eval_set = TEST_SET if args.eval_set == 'test' else DEV_SET # task adaptation 只支持cross-task assert args.task_type == 'cross_task' # 如果是cross_task,则先加载一个group内的数据 train_data = load_examples( args.task_name, args.data_dir, TRAIN_SET, num_examples=-1, num_examples_per_label=None) dev_data = load_examples( args.task_name, args.data_dir, DEV_SET, num_examples=-1, num_examples_per_label=None) args.metrics = METRICS.get(args.task_name, DEFAULT_METRICS) # cross-task group 的 metrics pet_model_cfg, pet_train_cfg, pet_eval_cfg = load_pet_configs(args) logger.info("************Training Example:**************") logger.info("text_a={}".format(train_data[0].text_a)) logger.info("text_b={}".format(train_data[0].text_b)) logger.info("task={}".format(train_data[0].task)) logger.info("label={}".format(train_data[0].label)) logger.info("**********************************") # 执行multi-task meta-learning train_adaptation_cross(dev32_data=dev_data, # 相当于验证集 train_data=train_data, # 相当于训练集 train_config=pet_train_cfg, eval_config=pet_eval_cfg, model_config=pet_model_cfg, pattern_ids=args.pattern_ids, output_dir=args.output_dir, repetitions=args.pet_repetitions, do_train=args.do_train, do_eval=args.do_eval, seed=args.seed)
def train_adaptation_cross(train_data: List[InputExample], # eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) set_seed(seed) assert model_config.task_type == "cross_task" # 当前是cross-task,则task_name是group的名称,需要获得group内的所有task tasks = groups[model_config.task_name] for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/{}/adaptation/{}".format(output_dir, model_config.scene, model_config.task_name) if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) # wrapper = TransPromptModelWrapper(model_config) # 初始化一个TransPrompt模型 wrapper = TransPromptModelWrapper2(model_config) # 初始化一个TransPrompt模型 # wrapper = TransformerModelWrapper(model_config) # Multi-Task Meta-Learning Training if do_train: logger.info("========= Stage1: Starting Fine-tuning Multi-Task Meta-Learner ... =========") # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(train_data, None, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config, use_debias=True)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() logger.info("========= Stage1: Finish Fine-tuning Multi-Task Meta-Learner =========") # Task Adaptation Fine-tune if do_eval: logger.info("========= Stage2: Starting Task Adaptation (Task-Specific Fine-tuning) ... =========") # 用于保存每次试验跑出的结果 # 加载先前的结果 t = time.time() ada_res_acc = dict() if os.path.exists('ada_res_acc.npy'): ada_res_acc = np.load('ada_res_acc.npy', allow_pickle=True)[()] # dict {time: {task:acc, ...}} accs = dict() # 重新加载训练好的meta-learner # wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir) wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是task adaptation,对每个group的每个task: # 在训练好的meta learner基础上,在当前task对应的训练集上再次task specific fine-tune;并在验证集上选择模型 # 最后在对应的测试集上测试; # group内的每个task在此阶段独立地进行 # 获得cross-task上每个task对应的训练集和测试集 task_to_train_example, task_to_dev_example = dict(), dict() # {task_name: [.., ..], ..} task_to_train_example = load_examples( model_config.task_name, None, SPE_TRAIN_SET, num_examples=-1, num_examples_per_label=None, examples=train_data) task_to_dev_example = load_examples( model_config.task_name, None, SPE_DEV_SET, num_examples=-1, num_examples_per_label=None, examples=dev32_data) for ei, task_name in enumerate(tasks): ### task-specific fine-tune logger.info("========= Stage2.{}: Specific fine-tuning on Task {} =========".format(ei + 1, task_name)) # wrapper.config.task_name = task_name # 在task-specific微调时,更改当前微调的task名称 train_config.max_steps = eval_config.max_steps # 在task-specific微调时,更改max_steps train_config.per_gpu_train_batch_size = eval_config.per_gpu_eval_batch_size # 更改batch_size if task_name == 'mrpc': # group3内的两个task(MRPC和QQP)训练集样本数量悬殊过大,直接指定MRPC只有1200steps train_config.max_steps = 4800 train_config.per_gpu_train_batch_size = 16 eval_config.per_gpu_eval_batch_size = 8 # 在meta-learner基础上继续做task-specific微调,并保存 train_single_model(task_to_train_example[data_to_name[task_name]], None, task_to_dev_example[data_to_name[task_name]], pattern_iter_output_dir + '/' + task_name, \ wrapper, train_config, eval_config, use_debias=False) # 将task-specific微调后保存的模型加载进来 # task_specific_wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir + '/' + task_name) task_specific_wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir + '/' + task_name) logger.info("========= Stage2.{}: Evaluating test set on Task {}".format(ei + 1, task_name)) ### evaluate on test dataset eval_data = load_examples( task_name, cross_data_dir + data_to_name[task_name] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, task_name, len(eval_data))) # logger.info("************Test Example:**************") # logger.info("text_a={}".format(eval_data[0].text_a)) # logger.info("text_b={}".format(eval_data[0].text_b)) # logger.info("task={}".format(eval_data[0].task)) # logger.info("label={}".format(eval_data[0].label)) # logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(task_name, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(task_specific_wrapper, eval_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir + '/' + task_name, 'eval_predictions.jsonl'), task_specific_wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir + '/' + task_name, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- Task Adaptation Result (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, task_name)) logger.info("eval_results: {}".format(eval_result['scores'])) accs[task_name] = eval_result['scores'] task_specific_wrapper.model = None task_specific_wrapper = None ada_res_acc[t] = accs np.save('ada_res_acc.npy', ada_res_acc) wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_generalization_cross(unseen_task_train_data: List[InputExample], unseen_task_dev_data: List[InputExample], seen_task_train_data: List[InputExample], seen_task_dev_data: List[InputExample], # dev32_data: List[InputExample], unseen_task: str, model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) set_seed(seed) assert model_config.task_type == "cross_task" for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/{}/generalization/{}".format(output_dir, model_config.scene, model_config.task_name) if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) # wrapper = TransPromptModelWrapper(model_config) # 初始化一个TransPrompt模型 wrapper = TransPromptModelWrapper2(model_config) # 初始化一个TransPrompt模型 # wrapper = TransformerModelWrapper(model_config) # Multi-Task Meta-Learning Training if do_train: logger.info("========= Stage1: Starting Fine-tuning Multi-Task Meta-Learner ... =========") # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(seen_task_train_data, None, seen_task_dev_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config, use_debias=True)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() logger.info("========= Stage1: Finish Fine-tuning Multi-Task Meta-Learner =========") # Task Adaptation Fine-tune if do_eval: logger.info("========= Stage2: Starting Task Generalization (Unseen Task-Specific Fine-tuning) ... =========") # 重新加载训练好的meta-learner # wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir) wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是task generalization,对每个group的每个task: # 在训练好的meta learner基础上,在当前task对应的训练集上再次task specific fine-tune;并在验证集上选择模型 # 最后在对应的测试集上测试; # group内的每个task在此阶段独立地进行 ### task-specific fine-tune logger.info("========= Stage2: Specific fine-tuning on Unseen Task {} =========".format(unseen_task)) # wrapper.config.task_name = task_name # 在task-specific微调时,更改当前微调的task名称 train_config.max_steps = eval_config.max_steps # 在task-specific微调时,更改max_steps train_config.per_gpu_train_batch_size = eval_config.per_gpu_eval_batch_size # 更改batch_size # 在meta-learner基础上对unseen task继续做task-specific微调,并保存 train_single_model(unseen_task_train_data, None, unseen_task_dev_data, pattern_iter_output_dir + '/' + unseen_task, \ wrapper, train_config, eval_config, use_debias=False) # 将task-specific微调后保存的模型加载进来 # task_specific_wrapper = TransPromptModelWrapper.from_pretrained(pattern_iter_output_dir + '/' + unseen_task) task_specific_wrapper = TransPromptModelWrapper2.from_pretrained(pattern_iter_output_dir + '/' + unseen_task) logger.info("========= Stage2: Evaluating test set on Task {}".format(unseen_task)) ### evaluate on test dataset eval_data = load_examples( unseen_task, cross_data_dir + data_to_name[unseen_task] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, unseen_task, len(eval_data))) # logger.info("************Test Example:**************") # logger.info("text_a={}".format(eval_data[0].text_a)) # logger.info("text_b={}".format(eval_data[0].text_b)) # logger.info("task={}".format(eval_data[0].task)) # logger.info("label={}".format(eval_data[0].label)) # logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(unseen_task, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(task_specific_wrapper, eval_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir + '/' + unseen_task, 'eval_predictions.jsonl'), task_specific_wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir + '/' + unseen_task, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- Unseen Task Generalization Result (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, unseen_task)) logger.info("eval_results: {}".format(eval_result['scores'])) task_specific_wrapper.model = None task_specific_wrapper = None wrapper.model = None wrapper = None torch.cuda.empty_cache()
def train_pet_cross(train_data: List[InputExample], # eval_data: List[InputExample], dev32_data: List[InputExample], model_config: WrapperConfig, train_config: TrainConfig, eval_config: EvalConfig, pattern_ids: List[int], output_dir: str, repetitions: int = 3, do_train: bool = True, do_eval: bool = True, seed: int = 42 ): """ Train and evaluate a new PET model for a given task. :param model_config: the model configuration for each model corresponding to an individual PVP :param train_config: the training configuration for each model corresponding to an individual PVP :param eval_config: the evaluation configuration for each model corresponding to an individual PVP :param pattern_ids: the ids of all PVPs to use :param output_dir: the output directory :param repetitions: the number of training repetitions for each model corresponding to an individual PVP :param train_data: the training examples to use :param dev32_data: the dev32 examples to use :param eval_data: the evaluation examples to use :param do_train: whether to perform training :param do_eval: whether to perform evaluation :param seed: the random seed to use """ results = defaultdict(lambda: defaultdict(list)) dev32_results = defaultdict(lambda: defaultdict(list)) # set_seed(seed) assert model_config.task_type == "cross_task" # 当前是cross-task,则task_name是group的名称,需要获得group内的所有task tasks = groups[model_config.task_name] for pattern_id in pattern_ids: # 只选择1个模式 model_config.pattern_id = pattern_id results_dict = {} pattern_iter_output_dir = "{}/p{}-i{}".format(output_dir, pattern_id, 1) # if os.path.exists(pattern_iter_output_dir): # logger.warning(f"Path {pattern_iter_output_dir} already exists, skipping it...") # continue if not os.path.exists(pattern_iter_output_dir): os.makedirs(pattern_iter_output_dir) wrapper = init_model(model_config) # 初始化一个模型 # Training if do_train: # 开始多轮epoch训练,并将训练的结果保存到results_dict中 # edit by wjn : eval_data -> None results_dict.update(train_single_model(train_data, None, dev32_data, pattern_iter_output_dir, \ wrapper, train_config, eval_config)) with open(os.path.join(pattern_iter_output_dir, 'results.txt'), 'w') as fh: fh.write(str(results_dict)) train_config.save(os.path.join(pattern_iter_output_dir, 'train_config.json')) eval_config.save(os.path.join(pattern_iter_output_dir, 'eval_config.json')) logger.info("Saving complete") if not do_eval: wrapper.model = None wrapper = None torch.cuda.empty_cache() # Evaluation if do_eval: logger.info("Starting evaluation...") # if not wrapper: wrapper = TransformerModelWrapper.from_pretrained(pattern_iter_output_dir) cross_data_dir = "data/k-shot-cross/" # add by wjn ## 当前是cross-task,对当前group内的所有task,分别进行测试 for task_name in tasks: eval_data = load_examples( task_name, cross_data_dir + data_to_name[task_name] + "/" + str(model_config.k) + "-" + str(seed), TEST_SET, num_examples=-1, num_examples_per_label=None) logger.info("Group {}: Task {} 's Test examples number: {}".format(model_config.task_name, task_name, len(eval_data))) logger.info("************Test Example:**************") logger.info("text_a={}".format(eval_data[0].text_a)) logger.info("text_b={}".format(eval_data[0].text_b)) logger.info("task={}".format(eval_data[0].task)) logger.info("label={}".format(eval_data[0].label)) logger.info("**********************************") # 更新当前group task的metrics: eval_config.metrics = METRICS.get(task_name, DEFAULT_METRICS) # cross-task group 的 metrics eval_result = evaluate(wrapper, eval_data, eval_config) # dev32_result = evaluate(wrapper, dev32_data, eval_config) save_predictions(os.path.join(pattern_iter_output_dir, 'eval_predictions.jsonl'), wrapper, eval_result) save_logits(os.path.join(pattern_iter_output_dir, 'eval_logits.txt'), eval_result['logits']) # save_predictions(os.path.join(pattern_iter_output_dir, 'dev32_predictions.jsonl'), wrapper, dev32_result) # save_logits(os.path.join(pattern_iter_output_dir, 'dev32_logits.txt'), dev32_result['logits']) logger.info("--- RESULT (pattern_id={}, Group={}, Task={}) ---".format(pattern_id, model_config.task_name, task_name)) logger.info("eval_results:") logger.info(eval_result['scores']) # logger.info("dev32_results:") # logger.info(dev32_result['scores']) # results_dict['eval_set_after_training'] = eval_result['scores'] # # results_dict['dev32_set_after_training'] = dev32_result['scores'] # with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh: # json.dump(results_dict, fh) # # for metric, value in eval_result['scores'].items(): # results[metric][pattern_id].append(value) # # for metric, value in dev32_result['scores'].items(): # dev32_results[metric][pattern_id].append(value) wrapper.model = None wrapper = None torch.cuda.empty_cache()