def evaluate(lr, beta1, beta2, alpha, T0, verbose=False): model = load_model(data.num_features, config.get('hyperparameters', {})) model.load_state_dict(model_state_dict) model.to(device) loss_fn = torch.nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=alpha) scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, int(T0)) for epoch in range(201): model.train() batch_idxs = torch.split(torch.randperm(data.X_valid.size(0)), 64) train_loss = 0 for batch in batch_idxs: X = data.X_valid_gpu[batch, :] y = data.y_valid_gpu[batch] optimizer.zero_grad() loss = loss_fn(model(X)[:, 0], y) loss.backward() train_loss += loss.item() optimizer.step() scheduler.step(X.size(0)) if epoch % 10 == 0 and verbose: model.eval() with torch.no_grad(): valid_loss = loss_fn( model(data.X_valid_valid.to(device))[:, 0], data.y_valid_valid.to(device)) print( f'=======> Epoch: {epoch} Train loss: {train_loss / len(batch_idxs)} ' f'Valid loss: {valid_loss}') model.eval() with torch.no_grad(): scores = model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy() best_thresh, _ = get_best_thresh(scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['fairBO']['margin']) return get_valid_objective(scores > best_thresh, data, config, valid=False), model, best_thresh
def fairBO_debiasing(model_state_dict, data, config, device): def evaluate(lr, beta1, beta2, alpha, T0, verbose=False): model = load_model(data.num_features, config.get('hyperparameters', {})) model.load_state_dict(model_state_dict) model.to(device) loss_fn = torch.nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=lr, betas=(beta1, beta2), weight_decay=alpha) scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, int(T0)) for epoch in range(201): model.train() batch_idxs = torch.split(torch.randperm(data.X_valid.size(0)), 64) train_loss = 0 for batch in batch_idxs: X = data.X_valid_gpu[batch, :] y = data.y_valid_gpu[batch] optimizer.zero_grad() loss = loss_fn(model(X)[:, 0], y) loss.backward() train_loss += loss.item() optimizer.step() scheduler.step(X.size(0)) if epoch % 10 == 0 and verbose: model.eval() with torch.no_grad(): valid_loss = loss_fn( model(data.X_valid_valid.to(device))[:, 0], data.y_valid_valid.to(device)) print( f'=======> Epoch: {epoch} Train loss: {train_loss / len(batch_idxs)} ' f'Valid loss: {valid_loss}') model.eval() with torch.no_grad(): scores = model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy() best_thresh, _ = get_best_thresh(scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['fairBO']['margin']) return get_valid_objective(scores > best_thresh, data, config, valid=False), model, best_thresh space = config['fairBO']['hyperparameters'] search_space = {} bounds_dict = {} for var in space: search_space[var] = np.arange(space[var]['start'], space[var]['end'], space[var]['step']) bounds_dict[var] = torch.tensor( [space[var]['start'], space[var]['end']]) if space[var]['log_scale']: search_space[var] = np.exp(np.log(10) * search_space[var]) bounds_dict[var] = torch.exp(float(np.log(10)) * bounds_dict[var]) def sample_space(): return { var: np.random.choice(rng) for var, rng in search_space.items() } X_hyp = [] y_hyp = [] best_model = [None, -math.inf, -1] for it in range(config['fairBO']['initial_budget']): X_hyp.append(sample_space()) logger.info( f'(Iteration {it}) Evaluating fairBO with sample {X_hyp[-1]}') y_eval, model_candidate, thresh = evaluate(**X_hyp[-1]) logger.info(f'Result: {y_eval}') if y_eval['objective'] > best_model[1]: best_model[0] = copy.deepcopy(model_candidate) best_model[1] = y_eval['objective'] best_model[2] = thresh y_hyp.append(y_eval) X_df = pd.DataFrame(X_hyp) X = torch.tensor(X_df.to_numpy()) y = torch.tensor(pd.DataFrame(y_hyp)[['performance', 'bias']].to_numpy()) for it in range(config['fairBO']['total_budget'] - config['fairBO']['initial_budget']): xscaler = StandardScaler() gp = SingleTaskGP(torch.tensor(xscaler.fit_transform(X)), y) mll = ExactMarginalLogLikelihood(gp.likelihood, gp) fit_gpytorch_model(mll) cEI = ConstrainedExpectedImprovement(gp, y[:, 0].max().item(), 0, {1: (-0.05, 0.05)}) bounds = torch.stack([bounds_dict[x] for x in X_df.columns]) candidate, _ = optimize_acqf(cEI, bounds.T, 1, 100, 1024) inv_candidate = xscaler.inverse_transform(candidate) hyp = {k: v.item() for k, v in zip(X_df.columns, inv_candidate[0])} logger.info( f'(Iteration {it+config["fairBO"]["initial_budget"]}) Evaluating fairBO with sample {hyp}' ) X = torch.cat((X, candidate)) y_eval, model_candidate, thresh = evaluate(**hyp) logger.info(f'Result: {y_eval}') if y_eval['objective'] > best_model[1]: best_model[0] = copy.deepcopy(model_candidate) best_model[1] = y_eval['objective'] best_model[2] = thresh y = torch.cat( (y, torch.tensor([[y_eval['performance'], y_eval['bias']]]))) logger.info('Evaluating best fairBO debiased model.') best_model[0].eval() with torch.no_grad(): y_pred = (best_model[0](data.X_valid_gpu)[:, 0] > best_model[2]).reshape(-1).cpu().numpy() results_valid = get_valid_objective(y_pred, data, config) logger.info(f'Results: {results_valid}') best_model[0].eval() with torch.no_grad(): y_pred = (best_model[0](data.X_test_gpu)[:, 0] > best_model[2]).reshape(-1).cpu().numpy() results_test = get_test_objective(y_pred, data, config) return results_valid, results_test
def random_debiasing(model_state_dict, data, config, device, verbose=True): logger.info('Generating Random Debiased models.') rand_model = load_model(data.num_features, config.get('hyperparameters', {})) rand_model.to(device) rand_result = { 'objective': -math.inf, 'model': rand_model.state_dict(), 'thresh': -1 } for iteration in range(config['random']['num_trials']): rand_model.load_state_dict(model_state_dict) for param in rand_model.parameters(): param.data = param.data * ( torch.randn_like(param) * config['random']['stddev'] + 1) rand_model.eval() with torch.no_grad(): scores = rand_model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy() threshs = np.linspace(0, 1, 501) best_rand_thresh, best_obj = get_best_thresh( scores, threshs, data, config, valid=False, margin=config['random']['margin']) if best_obj > rand_result['objective']: rand_result = { 'objective': best_obj, 'model': copy.deepcopy(rand_model.state_dict()), 'thresh': best_rand_thresh } rand_model.eval() with torch.no_grad(): y_pred = (rand_model(data.X_test_gpu)[:, 0] > best_rand_thresh).reshape(-1).cpu().numpy() best_test_result = get_test_objective(y_pred, data, config)['objective'] if iteration % 10 == 0 and verbose: logger.info( f'{iteration} / {config["random"]["num_trials"]} trials have been sampled.' ) logger.info(f'Best result so far = {rand_result["objective"]}') logger.info(f'Best test result so = {best_test_result}') logger.info('Evaluating best random debiased model.') rand_model.load_state_dict(rand_result['model']) rand_model.eval() with torch.no_grad(): y_pred = (rand_model(data.X_valid_gpu)[:, 0] > rand_result['thresh']).reshape(-1).cpu().numpy() results_valid = get_valid_objective(y_pred, data, config) logger.info(f'Results: {results_valid}') rand_model.eval() with torch.no_grad(): y_pred = (rand_model(data.X_test_gpu)[:, 0] > rand_result['thresh']).reshape(-1).cpu().numpy() results_test = get_test_objective(y_pred, data, config) return results_valid, results_test
def adversarial_debiasing(model_state_dict, data, config, device): logger.info('Training Adversarial model.') actor = load_model(data.num_features, config.get('hyperparameters', {})) actor.load_state_dict(model_state_dict) actor.to(device) hid = config['hyperparameters'][ 'hid'] if 'hyperparameters' in config else 32 critic = Critic(hid * config['adversarial']['batch_size'], num_deep=config['adversarial']['num_deep'], hid=hid) critic.to(device) critic_optimizer = optim.Adam(critic.parameters()) critic_loss_fn = torch.nn.MSELoss() actor_optimizer = optim.Adam(actor.parameters(), lr=config['adversarial']['lr']) actor_loss_fn = torch.nn.BCELoss() for epoch in range(config['adversarial']['epochs']): for param in critic.parameters(): param.requires_grad = True for param in actor.parameters(): param.requires_grad = False actor.eval() critic.train() for step in range(config['adversarial']['critic_steps']): critic_optimizer.zero_grad() indices = torch.randint(0, data.X_valid.size(0), (config['adversarial']['batch_size'], )) cX_valid = data.X_valid_gpu[indices] cy_valid = data.y_valid[indices] cp_valid = data.p_valid[indices] with torch.no_grad(): scores = actor(cX_valid)[:, 0].reshape(-1).cpu().numpy() bias = compute_bias(scores, cy_valid.numpy(), cp_valid, config['metric']) res = critic(actor.trunc_forward(cX_valid)) loss = critic_loss_fn(torch.tensor([bias], device=device), res[0]) loss.backward() train_loss = loss.item() critic_optimizer.step() if (epoch % 10 == 0) and (step % 100 == 0): logger.info( f'=======> Critic Epoch: {(epoch, step)} loss: {train_loss}' ) for param in critic.parameters(): param.requires_grad = False for param in actor.parameters(): param.requires_grad = True actor.train() critic.eval() for step in range(config['adversarial']['actor_steps']): actor_optimizer.zero_grad() indices = torch.randint(0, data.X_valid.size(0), (config['adversarial']['batch_size'], )) cy_valid = data.y_valid_gpu[indices] cX_valid = data.X_valid_gpu[indices] pred_bias = critic(actor.trunc_forward(cX_valid)) bceloss = actor_loss_fn(actor(cX_valid)[:, 0], cy_valid) # loss = lam*abs(pred_bias) + (1-lam)*loss objloss = max( 1, config['adversarial']['lambda'] * (abs(pred_bias[0][0]) - config['objective']['epsilon'] + config['adversarial']['margin']) + 1) * bceloss objloss.backward() train_loss = objloss.item() actor_optimizer.step() if (epoch % 10 == 0) and (step % 100 == 0): logger.info( f'=======> Actor Epoch: {(epoch, step)} loss: {train_loss}' ) if epoch % 10 == 0: with torch.no_grad(): scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy() _, best_adv_obj = get_best_thresh( scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['adversarial']['margin']) logger.info(f'Objective: {best_adv_obj}') logger.info('Finding optimal threshold for Adversarial model.') with torch.no_grad(): scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy() best_adv_thresh, _ = get_best_thresh( scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['adversarial']['margin']) logger.info('Evaluating Adversarial model on best threshold.') with torch.no_grad(): labels = (actor(data.X_valid_gpu)[:, 0] > best_adv_thresh).reshape( -1, 1).cpu().numpy() results_valid = get_valid_objective(labels, data, config) logger.info(f'Results: {results_valid}') with torch.no_grad(): labels = (actor(data.X_test_gpu)[:, 0] > best_adv_thresh).reshape( -1, 1).cpu().numpy() results_test = get_test_objective(labels, data, config) return results_valid, results_test
def layerwiseOpt_debiasing(model_state_dict, data, config, device): logger.info('Training layerwiseOpt model.') base_model = load_model(data.num_features, config.get('hyperparameters', {})) base_model.load_state_dict(model_state_dict) base_model.to(device) best_state_dict, best_obj, best_thresh = None, math.inf, -1 total_params = len(list(base_model.parameters())) for index, param in enumerate(base_model.parameters()): if index < total_params - config['layerwiseOpt']['num_layers']: continue logger.info(f'Evaluating param number {index} of {total_params}') param_copy = copy.deepcopy(param) def objective(new_param, return_thresh=False): param.data[indices] = torch.tensor(new_param) base_model.eval() with torch.no_grad(): scores = base_model(data.X_valid_gpu)[:, 0].reshape(-1).numpy() best_thresh, best_obj = get_best_thresh( scores, np.linspace(0, 1, 501), data, config, valid=False, margin=config['layerwiseOpt']['margin']) print(f'Evaluating param number {index} of {total_params}') if return_thresh: return -float(best_obj), float(best_thresh) return -float(best_obj) mean = param.flatten().cpu().detach().numpy().mean() std = param.flatten().cpu().detach().numpy().std() num_elems = param.size().numel() ratio = min(1., config['layerwiseOpt']['max_sparsity'] / num_elems) indices = torch.rand(param.size()) < ratio space = [ Real( float(x.cpu().detach()) - 2.2 * std, float(x.cpu().detach()) + 2.2 * std) for x in param[indices] ] # std = param.flatten().cpu().detach().numpy().std() # num_elems = param.size().numel() # ratio = min(1., config['layerwiseOpt']['max_sparsity'] / num_elems) # indices = torch.rand(param.size()) < ratio logger.info(f'Number of sparse indices: {indices.sum().item()}') res_gbrt = gbrt_minimize(objective, space, n_calls=config['layerwiseOpt']['n_calls'], verbose=True) if res_gbrt.fun < best_obj: param.data[indices] = torch.tensor(res_gbrt.x) best_state_dict = base_model.state_dict() best_obj, best_thresh = objective(res_gbrt.x, return_thresh=True) best_obj = -best_obj param.data = param_copy.data best_model = load_model(data.num_features, config.get('hyperparameters', {})) best_model.to(device) best_model.load_state_dict(best_state_dict) best_model.eval() with torch.no_grad(): y_pred = (best_model(data.X_valid_gpu)[:, 0] > best_thresh).reshape(-1).numpy() results_valid = get_valid_objective(y_pred, data, config) best_model.eval() with torch.no_grad(): y_pred = (best_model(data.X_test_gpu)[:, 0] > best_thresh).reshape(-1).numpy() results_test = get_test_objective(y_pred, data, config) return results_valid, results_test
def mitigating_debiasing(model_state_dict, data, config, device): logger.info('Training Mitigating model.') actor = load_model(data.num_features, config.get('hyperparameters', {})) actor.load_state_dict(model_state_dict) actor.to(device) critic = nn.Sequential(nn.Linear(32, 32), nn.Dropout(0.2), nn.ReLU(), nn.Linear(32, 32), nn.Dropout(0.2), nn.ReLU(), nn.Linear(32, 32), nn.Dropout(0.2), nn.ReLU(), nn.Linear(32, 2), nn.Softmax()) critic.to(device) critic_optimizer = optim.Adam(critic.parameters()) critic_loss_fn = torch.nn.BCELoss() actor_optimizer = optim.Adam(actor.parameters(), lr=config['mitigating']['lr']) actor_loss_fn = torch.nn.BCELoss() for epoch in range(config['mitigating']['epochs']): for param in critic.parameters(): param.requires_grad = True for param in actor.parameters(): param.requires_grad = False actor.eval() critic.train() for step in range(config['mitigating']['critic_steps']): critic_optimizer.zero_grad() indices = torch.randint(0, data.X_valid.size(0), (config['mitigating']['batch_size'], )) cy_valid = data.y_valid_gpu[indices] cX_valid = data.X_valid_gpu[indices] cp_valid = data.p_valid_gpu[indices] with torch.no_grad(): scores = actor(cX_valid)[:, 0].reshape(-1).cpu().numpy() res = critic(actor.trunc_forward(cX_valid)) loss = critic_loss_fn(res[:, 0], cp_valid.type(torch.float32)) loss.backward() train_loss = loss.item() critic_optimizer.step() if (epoch % 5 == 0) and (step % 100 == 0): logger.info( f'=======> Critic Epoch: {(epoch, step)} loss: {train_loss}' ) for param in critic.parameters(): param.requires_grad = False for param in actor.parameters(): param.requires_grad = True actor.train() critic.eval() for step in range(config['mitigating']['actor_steps']): actor_optimizer.zero_grad() indices = torch.randint(0, data.X_valid.size(0), (config['mitigating']['batch_size'], )) cy_valid = data.y_valid_gpu[indices] cX_valid = data.X_valid_gpu[indices] cp_valid = data.p_valid_gpu[indices] cx_predict = actor(cX_valid) loss_pred = actor_loss_fn(cx_predict[:, 0], cy_valid) cp_predict = critic(actor.trunc_forward(cX_valid)) loss_adv = critic_loss_fn(cp_predict[:, 0], cp_valid.type(torch.float32)) for param in actor.parameters(): try: lp = torch.autograd.grad(loss_pred, param, retain_graph=True)[0] la = torch.autograd.grad(loss_adv, param, retain_graph=True)[0] except RuntimeError: continue shape = la.shape lp = lp.flatten() la = la.flatten() lp_proj = (lp.T @ la) * la grad = lp - lp_proj - config['mitigating']['alpha'] * la grad = grad.reshape(shape) param.backward(grad) actor_optimizer.step() if (epoch % 5 == 0) and (step % 100 == 0): logger.info(f'=======> Actor Epoch: {(epoch, step)}') if epoch % 5 == 0: with torch.no_grad(): scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy() _, best_mit_obj = get_best_thresh( scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['mitigating']['margin']) logger.info(f'Objective: {best_mit_obj}') logger.info('Finding optimal threshold for Mitigating model.') with torch.no_grad(): scores = actor(data.X_valid_gpu)[:, 0].reshape(-1, 1).cpu().numpy() best_mit_thresh, _ = get_best_thresh(scores, np.linspace(0, 1, 1001), data, config, valid=False, margin=config['mitigating']['margin']) logger.info('Evaluating Mitigating model on best threshold.') with torch.no_grad(): labels = (actor(data.X_valid_gpu)[:, 0] > best_mit_thresh).reshape( -1, 1).cpu().numpy() results_valid = get_valid_objective(labels, data, config) logger.info(f'Results: {results_valid}') with torch.no_grad(): labels = (actor(data.X_test_gpu)[:, 0] > best_mit_thresh).reshape( -1, 1).cpu().numpy() results_test = get_test_objective(labels, data, config) return results_valid, results_test
def main(config): seed = np.random.randint(0, high=10000) if 'seed' in config: seed = config['seed'] torch.manual_seed(seed) np.random.seed(seed) # Setup directories to save models and results Path('models').mkdir(exist_ok=True) Path('results').mkdir(exist_ok=True) # Get Data logger.info(f'Loading Data from dataset: {config["dataset"]}.') data = Data(config, seed) # Get trained model model = load_model(data.num_features, config.get('hyperparameters', {})) model_path = (Path('models') / Path(config['modelpath'])) if model_path.is_file(): logger.info(f'Loading Model from {model_path}.') model.load_state_dict(torch.load(model_path)) else: logger.info( f'{model_path} does not exist. Retraining model from scratch.') train_model(model, data, epochs=config.get('epochs', 1001)) torch.save(model.state_dict(), model_path) model_state_dict = copy.deepcopy(model.state_dict()) # Preliminaries logger.info('Setting up preliminaries.') model.eval() with torch.no_grad(): valid_pred = data.valid.copy(deepcopy=True) valid_pred.scores = model(data.X_valid)[:, 0].reshape(-1, 1).numpy() valid_pred.labels = np.array(valid_pred.scores > 0.5) test_pred = data.test.copy(deepcopy=True) test_pred.scores = model(data.X_test)[:, 0].reshape(-1, 1).numpy() test_pred.labels = np.array(test_pred.scores > 0.5) results_valid = {} results_test = {} # Evaluate default model if 'default' in config['models']: logger.info( 'Finding best threshold for default model to minimize objective function' ) threshs = np.linspace(0, 1, 1001) performances = [] for thresh in threshs: perf = balanced_accuracy_score(data.y_valid, valid_pred.scores > thresh) performances.append(perf) best_thresh = threshs[np.argmax(performances)] logger.info('Evaluating default model with best threshold.') results_valid['default'] = get_valid_objective( valid_pred.scores > best_thresh, data, config) logger.info(f'Results: {results_valid["default"]}') results_test['default'] = get_test_objective( test_pred.scores > best_thresh, data, config) # Evaluate ROC if 'ROC' in config['models']: metric_map = { 'spd': 'Statistical parity difference', 'aod': 'Average odds difference', 'eod': 'Equal opportunity difference' } ROC = RejectOptionClassification( unprivileged_groups=data.unpriv, privileged_groups=data.priv, low_class_thresh=0.01, high_class_thresh=0.99, num_class_thresh=100, num_ROC_margin=50, metric_name=metric_map[config['metric']], metric_ub=0.05, metric_lb=-0.05) logger.info('Training ROC model with validation dataset.') ROC = ROC.fit(data.valid, valid_pred) logger.info('Evaluating ROC model.') y_pred = ROC.predict(valid_pred).labels.reshape(-1) results_valid['ROC'] = get_valid_objective(y_pred, data, config) logger.info(f'Results: {results_valid["ROC"]}') y_pred = ROC.predict(test_pred).labels.reshape(-1) results_test['ROC'] = get_test_objective(y_pred, data, config) ROC = None # Evaluate Equality of Odds if 'EqOdds' in config['models']: eqodds = EqOddsPostprocessing(privileged_groups=data.priv, unprivileged_groups=data.unpriv) logger.info('Training Equality of Odds model with validation dataset.') eqodds = eqodds.fit(data.valid, valid_pred) logger.info('Evaluating Equality of Odds model.') y_pred = eqodds.predict(valid_pred).labels.reshape(-1) results_valid['EqOdds'] = get_valid_objective(y_pred, data, config) logger.info(f'Results: {results_valid["EqOdds"]}') y_pred = eqodds.predict(test_pred).labels.reshape(-1) results_test['EqOdds'] = get_test_objective(y_pred, data, config) eqodds = None # Evaluate Calibrated Equality of Odds if 'CalibEqOdds' in config['models']: cost_constraint = config['CalibEqOdds']['cost_constraint'] cpp = CalibratedEqOddsPostprocessing(privileged_groups=data.priv, unprivileged_groups=data.unpriv, cost_constraint=cost_constraint) logger.info( 'Training Calibrated Equality of Odds model with validation dataset.' ) cpp = cpp.fit(data.valid, valid_pred) logger.info('Evaluating Calibrated Equality of Odds model.') y_pred = cpp.predict(valid_pred).labels.reshape(-1) results_valid['CalibEqOdds'] = get_valid_objective( y_pred, data, config) logger.info(f'Results: {results_valid["CalibEqOdds"]}') y_pred = cpp.predict(test_pred).labels.reshape(-1) results_test['CalibEqOdds'] = get_test_objective(y_pred, data, config) cpp = None # Evaluate Random Debiasing if 'random' in config['models']: from algorithms.random import random_debiasing results_valid['random'], results_test['random'] = random_debiasing( model_state_dict, data, config, device) # Evaluate fairBO if 'fairBO' in config['models']: from algorithms.fairBO import fairBO_debiasing results_valid['fairBO'], results_test['fairBO'] = fairBO_debiasing( model_state_dict, data, config, device) # Evaluate Layerwise Optimizer if 'layerwiseOpt' in config['models']: from algorithms.layerwiseOpt import layerwiseOpt_debiasing results_valid['layerwiseOpt'], results_test[ 'layerwiseOpt'] = layerwiseOpt_debiasing(model_state_dict, data, config, device) # Evaluate Adversarial if 'adversarial' in config['models']: from algorithms.adversarial import adversarial_debiasing results_valid['adversarial'], results_test[ 'adversarial'] = adversarial_debiasing(model_state_dict, data, config, device) # Mitigating Unwanted Biases with Adversarial Learning if 'mitigating' in config['models']: from algorithms.mitigating import mitigating_debiasing results_valid['mitigating'], results_test[ 'mitigating'] = mitigating_debiasing(model_state_dict, data, config, device) # Save Results results_valid['config'] = config logger.info(f'Validation Results: {results_valid}') logger.info( f'Saving validation results to {config["experiment_name"]}_valid_output.json' ) with open( Path('results') / f'{config["experiment_name"]}_valid_output.json', 'w') as fh: json.dump(results_valid, fh) results_test['config'] = config logger.info(f'Test Results: {results_test}') logger.info( f'Saving validation results to {config["experiment_name"]}_test_output.json' ) with open( Path('results') / f'{config["experiment_name"]}_test_output.json', 'w') as fh: json.dump(results_test, fh)