def objective(conf): conf['n_hidden'] = map( lambda x: x[1], sorted((k, v) for k, v in conf['dpart'].iteritems() if k.startswith('h'))) conf['drates'] = map( lambda x: x[1], sorted((k, v) for k, v in conf['dpart'].iteritems() if k.startswith('d'))) dnn = model.DNN(NF, NOUT, conf) dcosts = [] for e in range(args['hepoch']): tcost = dnn.train(trnX, trnY) dcost, pred = dnn.predict(devX, devY) dcosts.append(dcost) dcost = min(dcosts) dcost = np.iinfo(np.int32).max if np.isnan(dcost) else dcost info = dd(lambda: None) info.update(conf) # info = conf.copy() info['loss'] = dcost info.update( ('h%d' % i, nh) for i, nh in enumerate(info['n_hidden'], 1)) info.update(('dr%d' % i, dr) for i, dr in enumerate(info['drates'])) # map(info.pop, ('dpart','n_hidden','drates')) headers = [ 'loss', 'n_batch', 'opt', 'activation', 'lr', 'norm', 'bnorm' ] + ['h%d' % i for i in range(1, args['max_layers'] + 1) ] + ['dr%d' % i for i in range(args['max_layers'] + 1)] logging.critical( tabulate([map(lambda x: info[x], headers)], headers=headers, floatfmt='.4f')) return { 'loss': dcost, 'status': STATUS_OK, }
def KFold_cross_validation(df_X, df_Y, n, model_flag): kf = KFold(n_splits=n, shuffle=True) error = [] for train_idx, test_idx in kf.split(df_X): train_X = df_X.iloc[train_idx, :] train_Y = df_Y.iloc[train_idx, :] test_X = df_X.iloc[test_idx, :] test_Y = df_Y.iloc[test_idx, :] if model_flag == 1: error.append(model.linear_reg(train_X, train_Y, test_X, test_Y)) if model_flag == 2: error.append(model.poly_reg(train_X, train_Y, test_X, test_Y)) if model_flag == 3: error.append(model.GDBT(train_X, train_Y, test_X, test_Y)) if model_flag == 4: error.append( model.DNN(train_X, train_Y, test_X, test_Y, activation_function="softmax")) train_mean_error = 0 test_mean_error = 0 for i in range(n): train_mean_error += error[i][0] test_mean_error += error[i][1] train_mean_error = train_mean_error / n test_mean_error = test_mean_error / n print("training error = {}".format(train_mean_error)) print("testing error = {}".format(test_mean_error))
def main(): parser = get_arg_parser() args = vars(parser.parse_args()) setup_logger(args) logging.info(tabulate([args],headers='keys',tablefmt='plain')) NF, NOUT = 400, 200 logging.info('loading data...') if args['toy']: dat = np.load('data/toy.npz') trn, dev, tst = dat['trn'], dat['dev'], dat['tst'] else: trn, dev, tst = map(prep.get_dset, ('trn','dev','tst')) logging.info('loading data done.') trnX, trnY = trn[:,NOUT:], trn[:,:NOUT] devX, devY = dev[:,NOUT:], dev[:,:NOUT] dnn = model.DNN(NF,NOUT,args) costs = [] for e in range(args['fepoch']): tcost = dnn.train(trnX, trnY) dcost, pred = dnn.predict(devX, devY) costs.append(dcost) print 'dcost: {} pred: {} pred avg norm: {} truth avg norm: {}'.format(dcost, pred.shape, np.mean(np.linalg.norm(pred,axis=1)), np.mean(np.linalg.norm(devY,axis=1))) """ t, p = devY[5,:], pred[5,:] print t print p print np.sum((t-p)**2)/2 break """ logging.info('dcost with best model: {}'.format(min(costs)))
def main(dataset, gpu, model_name, epochs, taus, alphas, with_regularization=False, sigmoid_approx=False, probabilities=False): device = torch.device('cuda:{}'.format(gpu)) ds_obj, datasets, data_loaders = \ hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size)) for epoch in epochs: for (tau_idx, tau), (alpha_idx, alpha) in itertools.product( *[enumerate(taus), enumerate(alphas)]): regularization_params = { 'tau': tau, 'alpha': alpha, 'sigmoid_approx': sigmoid_approx, 'probabilities': probabilities, 'device': device } model_to_load = model.DNN( model_name=model_name, num_classes=ds_obj.num_classes(), learning_rate=learning_rate, aggregate_coeff=aggregate_coeff, with_regularization=with_regularization, regularization_params=regularization_params) complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \ if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name filename = '{}_epoch_{}_lr_{}.pth'.format(complete_model_name, epoch, learning_rate) model_to_load.model_ft.load_state_dict( torch.load('../{}/model_weights/{}'.format( ds_obj.name, filename), map_location=device)) model_to_load.model_ft.eval() print('Loaded weights from: ../{}/model_weights/{}'.format( ds_obj.name, filename)) complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \ if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name predicted_classes, true_classes = None, None for _, inputs, labels, _ in data_loaders['test']: inputs = inputs.to(device) model_to_load.model_ft = model_to_load.model_ft.to(device) outputs = model_to_load.model_ft(inputs.float()) _, preds = torch.max(outputs, 1) predicted_classes = preds.detach().cpu().numpy() if predicted_classes is None else \ np.concatenate((predicted_classes, preds.detach().cpu().numpy())) true_classes = labels.numpy() if true_classes is None else \ np.concatenate((true_classes, labels.numpy())) print("Accuracy for {}: {}".format( complete_model_name, accuracy_score(true_classes, predicted_classes)))
def main(all_datasets, gpu, epoch): device = torch.device('cuda:{}'.format(gpu)) attack_names = ['DeepFool', 'CarliniWagner'] for attack_name in attack_names: csv_rows = [] for dataset in all_datasets: ds_obj, datasets, data_loaders = \ hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size)) for dir_name in os.listdir('../{}/adversarial_images/'.format( ds_obj.name)): # dir_name contains model name and other params, process them here if 'RegularizedLoss' in dir_name: model_name = dir_name.split('_RegularizedLoss_')[0] with_regularization = True tau = float(dir_name.split('_tau_')[1].split('_')[0]) alpha = float(dir_name.split('_alpha_')[1].split('_')[0]) if 'probabilities' in dir_name: probabilities = True else: probabilities = False if 'exact' in dir_name: sigmoid_approx = False else: sigmoid_approx = True else: model_name = dir_name with_regularization = False tau, alpha, sigmoid_approx, probabilities = None, None, None, None if 'robust' in dir_name: robust_regularization = True beta = float(dir_name.split('_beta_')[1].split('_')[0]) gamma = float(dir_name.split('_gamma_')[1].split('_')[0]) else: robust_regularization = False beta, gamma = None, None # for model_name in DATASET_TO_MODEL_NAMES[dataset.split('_')[0].lower()]: # taus = DATASET_TO_MODEL_TO_TAUS[dataset.split('_')[0].lower()][model_name] # alphas = DATASET_TO_MODEL_TO_ALPHAS[dataset.split('_')[0].lower()][model_name] # for (tau_idx, tau), (alpha_idx, alpha) in itertools.product(*[enumerate(taus), enumerate(alphas)]): regularization_params = { 'tau': tau, 'alpha': alpha, 'sigmoid_approx': sigmoid_approx, 'probabilities': probabilities, 'robust_regularization': robust_regularization, 'beta': beta, 'gamma': gamma, 'device': device } model_to_load = model.DNN( model_name=model_name, num_classes=ds_obj.num_classes(), learning_rate=learning_rate, aggregate_coeff=aggregate_coeff, with_regularization=with_regularization, regularization_params=regularization_params) complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \ if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name print('Attack: {}, Dataset: {}, Model: {}'.format( attack_name, dataset, complete_model_name)) adv_folder = '../{}/adversarial_images/{}/{}'.format( ds_obj.name, complete_model_name, attack_name) adv_image_ids, all_adv_objs = hp.load_adversarial_objects( folder=adv_folder, epoch=epoch, ds_obj=ds_obj, device=device) all_images_adversarial = [x.image for x in all_adv_objs] print(adv_folder) print( len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch)))) if 'cifar' in ds_obj.name.lower(): if ds_obj.name.lower() == 'cifar10': sensitive_attrs, sensitive_attrs_names = [], [] for cname in ds_obj.classes: sensitive_attrs_names.append(cname) sensitive_attrs.append(np.array([1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == cname \ else 0 for img_id in adv_image_ids])) else: sensitive_attrs = [np.array( [1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == ds_obj.name.split('_')[-1].lower() \ else 0 for img_id in adv_image_ids])] sensitive_attrs_names = [ ds_obj.name.lower().split('_')[-1] ] else: attr = ds_obj.name.lower().split('_')[-1] sensitive_attrs = [np.array([ds_obj.get_image_protected_class('test', int(img_id), attr=attr) \ for img_id in adv_image_ids])] # sens_attr = 1 means minority sensitive_attrs_names = [ 'Black' if attr == 'race' else 'Female' ] majority_differences, minority_differences = [], [] for sensitive_attr in sensitive_attrs: minority_difference, majority_difference = image_differences( adv_image_ids, all_images_adversarial, sensitive_attr, ds_obj) majority_differences.append(majority_difference) minority_differences.append(minority_difference) for minority_difference, majority_difference, sensitive_attr_name in zip( minority_differences, majority_differences, sensitive_attrs_names): mu_minority, mu_majority = np.mean( minority_difference), np.mean(majority_difference) csv_rows.append([ dataset, complete_model_name, sensitive_attr_name, mu_minority, mu_majority ]) hp.create_dir("pickled_ubs") df = pd.DataFrame( csv_rows, columns=['dataset', 'model', 'minority', 'mu_min', 'mu_maj']) df.to_csv('pickled_ubs/{}_cdf_mus_regularized.csv'.format(attack_name), index=False) print('Saved to pickled_ubs/{}_cdf_mus_regularized.csv!'.format( attack_name))
def main(dataset, gpu, epochs, model_names, with_regularization=False, taus=None, alphas=None, sigmoid_approx=False, probabilities=False, robust_regularization=False, betas=None, gammas=None): if with_regularization: assert taus is not None and alphas is not None if robust_regularization: assert betas is not None and gammas is not None else: taus, alphas, betas, gammas = [None], [None], [None], [None] device = torch.device('cuda:{}'.format(gpu)) ds_obj, datasets, data_loaders = \ hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size)) for (tau_idx, tau), (alpha_idx, alpha), (beta_idx, beta), (gamma_idx, gamma) in \ itertools.product(*[enumerate(taus), enumerate(alphas), enumerate(betas), enumerate(gammas)]): regularization_params = { 'tau': tau, 'alpha': alpha, 'sigmoid_approx': sigmoid_approx, 'probabilities': probabilities, 'robust_regularization': robust_regularization, 'beta': beta, 'gamma': gamma, 'device': device } criterion_kwargs = {} if not with_regularization else { 'inputs': None, 'protected_classes': None } # assert model_name in hp.get_model_names(dataset, with_regularization) for model_name in model_names: model_to_train = model.DNN( model_name=model_name, num_classes=ds_obj.num_classes(), learning_rate=learning_rate, aggregate_coeff=aggregate_coeff, with_regularization=with_regularization, regularization_params=regularization_params) if not os.path.exists( '../{}/training_values/{}_{}_lr_{}_train_acc_history.pkl'. format(ds_obj.name, model_to_train.model_name, model_to_train.criterion._get_name(), learning_rate)): (train_acc_history, train_overall_loss_history, train_cross_entropy, train_regularization, train_minority_dist, train_majority_dist, test_acc_history, test_overall_loss_history, test_cross_entropy, test_regularization, test_minority_dist, test_majority_dist) = \ model.train_model(model_to_train, epochs, device, data_loaders, criterion_kwargs) print((train_acc_history, train_overall_loss_history, train_cross_entropy, train_regularization, train_minority_dist, train_majority_dist, test_acc_history, test_overall_loss_history, test_cross_entropy, test_regularization, test_minority_dist, test_majority_dist)) hp.persist_model_weights(model_to_train, ds_obj, learning_rate, 'best', root_dir='.') hp.persist_epoch_values( model_to_train, ds_obj, learning_rate, (train_acc_history, train_overall_loss_history, train_cross_entropy, train_regularization, train_minority_dist, train_majority_dist, test_acc_history, test_overall_loss_history, test_cross_entropy, test_regularization, test_minority_dist, test_majority_dist), ('train_acc_history', 'train_overall_loss_history', 'train_cross_entropy', 'train_regularization', 'train_minority_dist', 'train_majority_dist', 'test_acc_history', 'test_overall_loss_history', 'test_cross_entropy', 'test_regularization', 'test_minority_dist', 'test_majority_dist')) else: (train_acc_history, train_overall_loss_history, train_cross_entropy, train_regularization, train_minority_dist, train_majority_dist, test_acc_history, test_overall_loss_history, test_cross_entropy, test_regularization, test_minority_dist, test_majority_dist) = \ hp.load_epoch_values(model_to_train, ds_obj, learning_rate, ('train_acc_history', 'train_overall_loss_history', 'train_cross_entropy', 'train_regularization', 'train_minority_dist', 'train_majority_dist', 'test_acc_history', 'test_overall_loss_history', 'test_cross_entropy', 'test_regularization', 'test_minority_dist', 'test_majority_dist')) # with torch.no_grad(): # train_acc_history, train_acc_history_s0, train_acc_history_s1, train_loss_history = model.load_model_history(model, ds_obj, num_epochs, portion='train', device=device, # override_criterion=nn.CrossEntropyLoss()) # test_acc_history, test_acc_history_s0, test_acc_history_s1, test_loss_history = model.load_model_history(model, ds_obj, num_epochs, portion='test', device=device, # override_criterion=nn.CrossEntropyLoss()) hp.line_plots([train_acc_history, test_acc_history], np.arange(0, epochs, aggregate_coeff), x_label="Epoch", y_label="Accuracy", subfolder=ds_obj.name, filename='{}_{}_train_test_acc.png'.format( model_to_train.model_name, model_to_train.criterion._get_name()), title="Accuracy ({})".format( model_to_train.model_name), legend_vals=["Train", "Test"]) hp.line_plots( [train_overall_loss_history, test_overall_loss_history], np.arange(0, epochs, aggregate_coeff), x_label="Epoch", y_label="Total Loss", subfolder=ds_obj.name, filename='{}_{}_train_test_overall_loss.png'.format( model_to_train.model_name, model_to_train.criterion._get_name()), title="Overall Loss ({})".format(model_to_train.model_name), legend_vals=["Train", "Test"]) hp.line_plots([train_cross_entropy, test_cross_entropy], np.arange(0, epochs, aggregate_coeff), x_label="Epoch", y_label="Cross Entropy Loss", subfolder=ds_obj.name, filename='{}_{}_train_test_ce_loss.png'.format( model_to_train.model_name, model_to_train.criterion._get_name()), title="CE Loss ({})".format( model_to_train.model_name), legend_vals=["Train", "Test"]) hp.line_plots([train_regularization, test_regularization], np.arange(0, epochs, aggregate_coeff), x_label="Epoch", y_label="Reg. Term", subfolder=ds_obj.name, filename='{}_{}_train_test_reg_term.png'.format( model_to_train.model_name, model_to_train.criterion._get_name()), title="Reg Term ({})".format( model_to_train.model_name), legend_vals=["Train", "Test"]) hp.line_plots([train_minority_dist, test_minority_dist], np.arange(0, epochs, aggregate_coeff), x_label="Epoch", y_label="Minority Dist Approx", subfolder=ds_obj.name, filename='{}_{}_train_test_minority_dist.png'.format( model_to_train.model_name, model_to_train.criterion._get_name()), title="Reg Term ({})".format( model_to_train.model_name), legend_vals=["Train", "Test"]) hp.line_plots([train_majority_dist, test_majority_dist], np.arange(0, epochs, aggregate_coeff), x_label="Epoch", y_label="Majority Dist Approx", subfolder=ds_obj.name, filename='{}_{}_train_test_majority_dist.png'.format( model_to_train.model_name, model_to_train.criterion._get_name()), title="Reg Term ({})".format( model_to_train.model_name), legend_vals=["Train", "Test"])
from torch import nn, optim from torch.autograd import Variable from torch.utils.data import DataLoader from torch.optim.lr_scheduler import ReduceLROnPlateau from pre_process import transform_train, transform_test # whether use gpu use_cuda = torch.cuda.is_available() # default parameters DATA_ROOT = '../data/' num_epochs = 50 batch_size = 128 model_names = { 'dnn': model.DNN(3072, 4096, 10), 'cnn': model.CNN(), 'resnet18': model.ResNet18(), 'resnet34': model.ResNet34(), 'resnet50': model.ResNet50() } def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--model_type', type=str, default='dnn', help="the type of model") parser.add_argument('--lr', type=float,
def main(dataset, gpu, model_name, epochs, taus, alphas, with_regularization=False, sigmoid_approx=False, probabilities=False, robust_regularization=False, betas=[None], gammas=[None]): device = torch.device('cuda:{}'.format(gpu)) attack_names = ['DeepFool', 'CarliniWagner'] ds_obj, datasets, data_loaders = \ hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size)) for epoch in epochs: for (tau_idx, tau), (alpha_idx, alpha), (beta_idx, beta), (gamma_idx, gamma) in \ itertools.product(*[enumerate(taus), enumerate(alphas), enumerate(betas), enumerate(gammas)]): regularization_params = { 'tau': tau, 'alpha': alpha, 'sigmoid_approx': sigmoid_approx, 'probabilities': probabilities, 'robust_regularization': robust_regularization, 'beta': beta, 'gamma': gamma, 'device': device } model_to_load = model.DNN( model_name=model_name, num_classes=ds_obj.num_classes(), learning_rate=learning_rate, aggregate_coeff=aggregate_coeff, with_regularization=with_regularization, regularization_params=regularization_params) # filename = '{}_{}_epoch_{}_lr_{}.pth'.format(model_to_load.model_name, model_to_load.criterion._get_name(), # epoch, learning_rate) # model_to_load.model_ft.load_state_dict(torch.load('../{}/model_weights/{}'.format(ds_obj.name, filename), # map_location=device)) # model_to_load.model_ft.eval() # print ('Loaded weights from: ../{}/model_weights/{}'.format(ds_obj.name, filename)) complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \ if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name for attack_name in attack_names: adv_folder = '../{}/adversarial_images/{}/{}'.format( ds_obj.name, complete_model_name, attack_name) adv_image_ids, all_adv_objs = hp.load_adversarial_objects( folder=adv_folder, epoch=epoch, ds_obj=ds_obj, device=device) all_images_adversarial = [x.image for x in all_adv_objs] print(adv_folder) print( len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch)))) if 'cifar' in ds_obj.name.lower(): if ds_obj.name.lower() == 'cifar10': sensitive_attrs, sensitive_attrs_names = [], [] for cname in ds_obj.classes: sensitive_attrs_names.append(cname) sensitive_attrs.append(np.array([1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == cname \ else 0 for img_id in adv_image_ids])) else: sensitive_attrs = [np.array( [1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == ds_obj.name.split('_')[-1].lower() \ else 0 for img_id in adv_image_ids])] sensitive_attrs_names = [ ds_obj.name.lower().split('_')[-1] ] else: attr = ds_obj.name.lower().split('_')[-1] sensitive_attrs = [np.array([ds_obj.get_image_protected_class('test', int(img_id), attr=attr) \ for img_id in adv_image_ids])] sensitive_attrs_names = [ 'Black' if attr == 'race' else 'Female' ] majority_differences, minority_differences = [], [] for sensitive_attr in sensitive_attrs: minority_difference, majority_difference = image_differences( adv_image_ids, all_images_adversarial, sensitive_attr, ds_obj) majority_differences.append(majority_difference) minority_differences.append(minority_difference) # print (minority_difference, majority_difference) hp.create_dir("plots/{}".format(ds_obj.name)) hp.create_dir("plots/{}/{}".format(ds_obj.name, model_to_load.model_name)) hp.create_dir("plots/{}/{}/{}".format(ds_obj.name, model_to_load.model_name, attack_name)) dir_to_save = "plots/{}/{}/{}".format(ds_obj.name, model_to_load.model_name, attack_name) # taus = np.linspace(0.0, 0.5, 2000) taus = np.linspace(0.0, 2.0, 2000) # taus = np.linspace(0.0, 2.0, 2000) if 'deepfool' in attack_name.lower() else np.linspace(2.9, 3.1, 2000) for minority_difference, majority_difference, sensitive_attr_name in zip( minority_differences, majority_differences, sensitive_attrs_names): frac_greater_than_tau_majority = np.array([ np.sum(majority_difference > t) / len(majority_difference) for t in taus ]) frac_greater_than_tau_minority = np.array([ np.sum(minority_difference > t) / len(minority_difference) for t in taus ]) if paper_friendly_plots: set_paper_friendly_plots_params() fig = plt.figure() if not paper_friendly_plots: fig.suptitle( r'fraction $d_\theta > \tau$ for {}'.format( ds_obj.name), fontsize=20) ax = fig.add_subplot(111) ax.plot(taus, frac_greater_than_tau_majority, color='blue', label='Other Classes') ax.plot(taus, frac_greater_than_tau_minority, color='red', label='{}'.format(sensitive_attr_name)) ax.set_xlabel('Distance to Adv. Sample' + r' ($\tau$)') ax.set_ylabel(r'$ \widehat{I^\tau_s} $') plt.legend() extension = 'png' if not paper_friendly_plots else 'pdf' filename = '{}_inv_cdf'.format(model_to_load.criterion._get_name()) \ if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else \ 'inv_cdf_{}'.format(sensitive_attr_name) plt.savefig('{}/{}.{}'.format(dir_to_save, filename, extension), bbox_inches='tight') plt.show() plt.close()
def training(): # Save the model according to the conditions filepath = "model_weight-{epoch:02d}-{loss:.4f}-m1.hdf5" checkpoint = ModelCheckpoint(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, save_weights_only = False, mode = 'auto', period = 1) early = EarlyStopping(monitor = 'val_acc', min_delta = 5, patience = 10, verbose = 1, mode = 'auto') data_pipeline = DataPipeline(path_params, train_params) train_generator = data_pipeline.build_training_data() validation_generator = data_pipeline.build_validation_data() test_generator = data_pipeline.build_testing_data() STEP_SIZE_TRAIN = train_generator.n//data_pipeline.batch_size STEP_SIZE_VALID = validation_generator.n//data_pipeline.batch_size STEP_SIZE_TEST = test_generator.n//data_pipeline.batch_size #1 dnn = model.DNN(model_params) dnn.build_model() # Start training time_start = time.time() dnn.model.fit_generator( train_generator, epochs = train_params.epochs, steps_per_epoch = STEP_SIZE_TRAIN, validation_data = validation_generator, validation_steps = STEP_SIZE_VALID) dnn.model.evaluate_generator( generator=validation_generator, steps=STEP_SIZE_VALID) #test_generator.reset() dnn.model.evaluate_generator( generator=test_generator, steps=STEP_SIZE_TEST) """ test_generator.reset() pred=model.predict_generator( test_generator, steps=STEP_SIZE_TEST, verbose=1) predicted_class_indices=np.argmax(pred,axis=1) labels = (train_generator.class_indices) labels = dict((v,k) for k,v in labels.items()) predictions = [labels[k] for k in predicted_class_indices] """ time_elapsed = time.time() - time_start print('Training time = ', time_elapsed)
def main(dataset_reg, dataset_original, gpu, model_name_reg, model_name_original, epochs, taus, alphas, sigmoid_approx=False, probabilities=False): device = torch.device('cuda:{}'.format(gpu)) attack_names = ['DeepFool', 'CarliniWagner'] ds_obj_original, _, _ = \ hp.get_data_loder_objects(dataset_original, PHASES, **hp.get_loader_kwargs(batch_size)) ds_obj_reg, _, _ = \ hp.get_data_loder_objects(dataset_reg, PHASES, **hp.get_loader_kwargs(batch_size)) taus = np.linspace(0.0, 2.0, 2000) for epoch in epochs: model_original = model.DNN(model_name=model_name_original, num_classes=ds_obj_reg.num_classes(), learning_rate=learning_rate, aggregate_coeff=aggregate_coeff, with_regularization=False) complete_model_name = '{}_{}'.format(model_original.model_name, model_original.criterion._get_name()) \ if not isinstance(model_original.criterion, nn.CrossEntropyLoss) else model_original.model_name for attack_name in attack_names: adv_folder = '../{}/adversarial_images/{}/{}'.format(ds_obj_original.name, complete_model_name, attack_name) adv_image_ids, all_adv_objs = hp.load_adversarial_objects(folder=adv_folder, epoch=epoch, ds_obj=ds_obj_original, device=device) all_images_adversarial = [x.image for x in all_adv_objs] print (adv_folder) print (len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch)))) if 'cifar' in ds_obj_original.name.lower(): sensitive_attrs_name = ds_obj_reg.name.split('_')[-1].lower() # get the sens attr name from reg model sensitive_attr = np.array([1 if ds_obj_original.classes[ds_obj_original.test_labels[int(img_id)]] == sensitive_attrs_name \ else 0 for img_id in adv_image_ids]) else: attr = ds_obj_original.name.lower().split('_')[-1] sensitive_attrs_name = 'Black' if attr == 'race' else 'Female' sensitive_attr = np.array([ds_obj_original.get_image_protected_class('test', int(img_id), attr=attr) \ for img_id in adv_image_ids]) minority_difference, majority_difference = image_differences(adv_image_ids, all_images_adversarial, sensitive_attr, ds_obj_original) frac_greater_than_tau_majority = np.array([np.sum(majority_difference > t) / len(majority_difference) for t in taus]) frac_greater_than_tau_minority = np.array([np.sum(minority_difference > t) / len(minority_difference) for t in taus]) all_lines = [[frac_greater_than_tau_majority, frac_greater_than_tau_minority]] titles = ['Original'] for (tau_idx, tau), (alpha_idx, alpha) in itertools.product(*[enumerate(taus), enumerate(alphas)]): regularization_params = {'tau': tau, 'alpha': alpha, 'sigmoid_approx': sigmoid_approx, 'probabilities': probabilities, 'device': device} model_reg = model.DNN(model_name=model_name_reg, num_classes=ds_obj_reg.num_classes(), learning_rate=learning_rate, aggregate_coeff=aggregate_coeff, with_regularization=True, regularization_params=regularization_params) complete_model_name = '{}_{}'.format(model_reg.model_name, model_reg.criterion._get_name()) \ if not isinstance(model_reg.criterion, nn.CrossEntropyLoss) else model_reg.model_name adv_folder = '../{}/adversarial_images/{}/{}'.format(ds_obj_reg.name, complete_model_name, attack_name) adv_image_ids, all_adv_objs = hp.load_adversarial_objects(folder=adv_folder, epoch=epoch, ds_obj=ds_obj_reg, device=device) all_images_adversarial = [x.image for x in all_adv_objs] print (adv_folder) print (len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch)))) if 'cifar' in ds_obj_reg.name.lower(): sensitive_attrs_name = ds_obj_reg.name.lower().split('_')[-1] sensitive_attr = np.array([1 if ds_obj_reg.classes[ds_obj_reg.test_labels[int(img_id)]] == sensitive_attrs_name \ else 0 for img_id in adv_image_ids]) partition_name = 'Partition by class: {}'.format(sensitive_attrs_name) else: attr = ds_obj_reg.name.lower().split('_')[-1] sensitive_attrs_name = 'Black' if attr == 'race' else 'Female' sensitive_attr = np.array([ds_obj_reg.get_image_protected_class('test', int(img_id), attr=attr) \ for img_id in adv_image_ids]) partition_name = 'Partition by {}: {}'.format(attr, sensitive_attrs_name) minority_difference, majority_difference = image_differences(adv_image_ids, all_images_adversarial, sensitive_attr, ds_obj_reg) frac_greater_than_tau_majority = np.array([np.sum(majority_difference > t) / len(majority_difference) for t in taus]) frac_greater_than_tau_minority = np.array([np.sum(minority_difference > t) / len(minority_difference) for t in taus]) all_lines.append([frac_greater_than_tau_majority, frac_greater_than_tau_minority]) titles.append(r'$\tau = $' + ' {:.2f}, '.format(tau) + r'$\alpha$' + ' = {:.2f}'.format(alpha)) x_label = 'Distance to Adv. Sample' + r' ($\tau$)' y_label = r'$ \widehat{I^\tau_s} $' filename = 'inv_cdf_{}_comparison'.format(sensitive_attrs_name) dir_to_save = "plots/{}/{}/{}".format(ds_obj_reg.name, model_reg.model_name, attack_name) hp.line_plots_grid(all_lines, [taus] * len(all_lines), x_label, y_label, filename, titles, partition_name, subfolder=dir_to_save, y_lims=(0,1), columns=len(all_lines))
for i in range(numeroMuestras): X[i, 2] = funciones.definir_salida(X[i, 0], X[i, 1]) train, test = train_test_split(X, test_size=0.3) trainDataset = funciones.MiDataset(train) testDataset = funciones.MiDataset(test) trainLoader = DataLoader(trainDataset, batch_size=batchSize, shuffle=True) testLoader = DataLoader(testDataset, batch_size=testDataset.__len__(), shuffle=False) classWeight = torch.from_numpy(funciones.calcula_class_weights( train[:, -1])).float() miRed = model.DNN(2, 300, 150, 5) lossFunction = nn.NLLLoss(weight=classWeight) optimizer = Adam(miRed.parameters()) lossTrain = [] lossTest = [] minAccuracy = 0 for epoch in range(numeroEpoch): for data, target in trainLoader: data = data.detach().requires_grad_(True).float() target = target.detach().requires_grad_(True).long() optimizer.zero_grad() out = miRed(data) loss = lossFunction(out, target) loss.backward()
def main(): parser = get_arg_parser() args = vars(parser.parse_args()) setup_logger(args) logging.critical(tabulate([args], headers='keys', tablefmt='plain')) OPTS = { 'activation': ['sigmoid', 'tanh', 'relu', 'elu'], 'opt': ['adam'], 'n_batch': [32, 64, 128, 256], 'hidden': [128, 256], 'bnorm': [0, 1], # 'n_batch' : [128,256,512], # 'hidden' : [512,1024], } NF, NOUT = 400, 200 logging.critical('loading data...') if args['toy']: dat = np.load('toy.npz') trn, dev, tst = dat['trn'], dat['dev'], dat['tst'] else: trn, dev, tst = map(prep.get_dset, ('trn', 'dev', 'tst')) OPTS['n_batch'] = [128, 256, 512] OPTS['hidden'] = [512, 1024, 2048] logging.critical('loading data done.') logging.critical(tabulate([OPTS], headers='keys')) logging.critical('') trnX, trnY = trn[:, NOUT:], trn[:, :NOUT] devX, devY = dev[:, NOUT:], dev[:, :NOUT] def objective(conf): conf['n_hidden'] = map( lambda x: x[1], sorted((k, v) for k, v in conf['dpart'].iteritems() if k.startswith('h'))) conf['drates'] = map( lambda x: x[1], sorted((k, v) for k, v in conf['dpart'].iteritems() if k.startswith('d'))) dnn = model.DNN(NF, NOUT, conf) dcosts = [] for e in range(args['hepoch']): tcost = dnn.train(trnX, trnY) dcost, pred = dnn.predict(devX, devY) dcosts.append(dcost) dcost = min(dcosts) dcost = np.iinfo(np.int32).max if np.isnan(dcost) else dcost info = dd(lambda: None) info.update(conf) # info = conf.copy() info['loss'] = dcost info.update( ('h%d' % i, nh) for i, nh in enumerate(info['n_hidden'], 1)) info.update(('dr%d' % i, dr) for i, dr in enumerate(info['drates'])) # map(info.pop, ('dpart','n_hidden','drates')) headers = [ 'loss', 'n_batch', 'opt', 'activation', 'lr', 'norm', 'bnorm' ] + ['h%d' % i for i in range(1, args['max_layers'] + 1) ] + ['dr%d' % i for i in range(args['max_layers'] + 1)] logging.critical( tabulate([map(lambda x: info[x], headers)], headers=headers, floatfmt='.4f')) return { 'loss': dcost, 'status': STATUS_OK, } space = create_space(args['max_layers'], OPTS) best = fmin(objective, space=space, algo=tpe.suggest, max_evals=args['max_evals']) logging.critical(best) logging.critical('') best_params = best2mparams(best, OPTS) logging.critical(tabulate([best_params], headers='keys')) dnn = model.DNN(NF, NOUT, best_params) for e in range(args['fepoch']): tcost = dnn.train(trnX, trnY) dcost = dnn.predict(devX, devY) logging.critical('dcost with best model: {}'.format(dcost))