merged_preds[i] = merge_regression(prediction_dict[i]) elif args.task_type == 'multiclass': merged_preds[i] = avg_probs_multiclass( np.array(prediction_dict[i])) return merged_preds for predictor_params in grid: print(predictor_params, flush=True) predictor = Classifier(**predictor_params).to(device) if n_gpu > 1: predictor = torch.nn.DataParallel(predictor) if not(args.freeze_bert) and not(args.use_adversary): param_optimizer = list(model.named_parameters()) + \ list(predictor.named_parameters()) elif args.freeze_bert and not(args.use_adversary): param_optimizer = list(predictor.named_parameters()) elif args.freeze_bert and args.use_adversary: raise Exception( 'No purpose in using an adversary if BERT layers are frozen') else: param_optimizer = list(model.named_parameters( )) + list(predictor.named_parameters()) + list(discriminator.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any( nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any( nd in n for nd in no_decay)], 'weight_decay': 0.0}
merged_preds[i] = merge_regression(prediction_dict[i]) elif args.task_type == 'multiclass': merged_preds[i] = avg_probs_multiclass(np.array( prediction_dict[i])) return merged_preds for predictor_params in grid: print(predictor_params, flush=True) predictor = Classifier(**predictor_params).to(device) if n_gpu > 1: predictor = torch.nn.DataParallel(predictor) if not (args.freeze_bert) and not (args.use_adversary): param_optimizer = list(model.named_parameters()) + list( predictor.named_parameters()) elif args.freeze_bert and not (args.use_adversary): param_optimizer = list(predictor.named_parameters()) elif args.freeze_bert and args.use_adversary: raise Exception( 'No purpose in using an adversary if BERT layers are frozen') else: param_optimizer = list(model.named_parameters()) + list( predictor.named_parameters()) + list( discriminator.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay':