Ejemplo n.º 1
0
    def objective(conf):
        conf['n_hidden'] = map(
            lambda x: x[1],
            sorted((k, v) for k, v in conf['dpart'].iteritems()
                   if k.startswith('h')))
        conf['drates'] = map(
            lambda x: x[1],
            sorted((k, v) for k, v in conf['dpart'].iteritems()
                   if k.startswith('d')))

        dnn = model.DNN(NF, NOUT, conf)

        dcosts = []
        for e in range(args['hepoch']):
            tcost = dnn.train(trnX, trnY)
            dcost, pred = dnn.predict(devX, devY)
            dcosts.append(dcost)

        dcost = min(dcosts)
        dcost = np.iinfo(np.int32).max if np.isnan(dcost) else dcost

        info = dd(lambda: None)
        info.update(conf)
        # info = conf.copy()
        info['loss'] = dcost
        info.update(
            ('h%d' % i, nh) for i, nh in enumerate(info['n_hidden'], 1))
        info.update(('dr%d' % i, dr) for i, dr in enumerate(info['drates']))
        # map(info.pop, ('dpart','n_hidden','drates'))
        headers = [
            'loss', 'n_batch', 'opt', 'activation', 'lr', 'norm', 'bnorm'
        ] + ['h%d' % i for i in range(1, args['max_layers'] + 1)
             ] + ['dr%d' % i for i in range(args['max_layers'] + 1)]
        logging.critical(
            tabulate([map(lambda x: info[x], headers)],
                     headers=headers,
                     floatfmt='.4f'))

        return {
            'loss': dcost,
            'status': STATUS_OK,
        }
Ejemplo n.º 2
0
def KFold_cross_validation(df_X, df_Y, n, model_flag):
    kf = KFold(n_splits=n, shuffle=True)
    error = []

    for train_idx, test_idx in kf.split(df_X):
        train_X = df_X.iloc[train_idx, :]
        train_Y = df_Y.iloc[train_idx, :]
        test_X = df_X.iloc[test_idx, :]
        test_Y = df_Y.iloc[test_idx, :]

        if model_flag == 1:
            error.append(model.linear_reg(train_X, train_Y, test_X, test_Y))

        if model_flag == 2:
            error.append(model.poly_reg(train_X, train_Y, test_X, test_Y))

        if model_flag == 3:
            error.append(model.GDBT(train_X, train_Y, test_X, test_Y))

        if model_flag == 4:
            error.append(
                model.DNN(train_X,
                          train_Y,
                          test_X,
                          test_Y,
                          activation_function="softmax"))

    train_mean_error = 0
    test_mean_error = 0

    for i in range(n):
        train_mean_error += error[i][0]
        test_mean_error += error[i][1]

    train_mean_error = train_mean_error / n
    test_mean_error = test_mean_error / n

    print("training error = {}".format(train_mean_error))
    print("testing error = {}".format(test_mean_error))
Ejemplo n.º 3
0
def main():
    parser = get_arg_parser()
    args = vars(parser.parse_args())
    setup_logger(args)

    logging.info(tabulate([args],headers='keys',tablefmt='plain'))

    NF, NOUT = 400, 200
    logging.info('loading data...')
    if args['toy']:
        dat = np.load('data/toy.npz')
        trn, dev, tst = dat['trn'], dat['dev'], dat['tst']
    else:
        trn, dev, tst = map(prep.get_dset, ('trn','dev','tst'))

    logging.info('loading data done.')

    trnX, trnY = trn[:,NOUT:], trn[:,:NOUT]
    devX, devY = dev[:,NOUT:], dev[:,:NOUT]


    dnn = model.DNN(NF,NOUT,args)
    costs = []
    for e in range(args['fepoch']):
        tcost = dnn.train(trnX, trnY)
        dcost, pred = dnn.predict(devX, devY)
        costs.append(dcost)
        print 'dcost: {} pred: {} pred avg norm: {} truth avg norm: {}'.format(dcost, pred.shape, np.mean(np.linalg.norm(pred,axis=1)), np.mean(np.linalg.norm(devY,axis=1)))
        """
        t, p = devY[5,:], pred[5,:]
        print t
        print p
        print np.sum((t-p)**2)/2
        break
        """

    logging.info('dcost with best model: {}'.format(min(costs)))
Ejemplo n.º 4
0
def main(dataset,
         gpu,
         model_name,
         epochs,
         taus,
         alphas,
         with_regularization=False,
         sigmoid_approx=False,
         probabilities=False):

    device = torch.device('cuda:{}'.format(gpu))

    ds_obj, datasets, data_loaders = \
        hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size))

    for epoch in epochs:
        for (tau_idx, tau), (alpha_idx, alpha) in itertools.product(
                *[enumerate(taus), enumerate(alphas)]):
            regularization_params = {
                'tau': tau,
                'alpha': alpha,
                'sigmoid_approx': sigmoid_approx,
                'probabilities': probabilities,
                'device': device
            }
            model_to_load = model.DNN(
                model_name=model_name,
                num_classes=ds_obj.num_classes(),
                learning_rate=learning_rate,
                aggregate_coeff=aggregate_coeff,
                with_regularization=with_regularization,
                regularization_params=regularization_params)

            complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \
                if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name
            filename = '{}_epoch_{}_lr_{}.pth'.format(complete_model_name,
                                                      epoch, learning_rate)
            model_to_load.model_ft.load_state_dict(
                torch.load('../{}/model_weights/{}'.format(
                    ds_obj.name, filename),
                           map_location=device))
            model_to_load.model_ft.eval()
            print('Loaded weights from: ../{}/model_weights/{}'.format(
                ds_obj.name, filename))

            complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \
                if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name

            predicted_classes, true_classes = None, None
            for _, inputs, labels, _ in data_loaders['test']:
                inputs = inputs.to(device)
                model_to_load.model_ft = model_to_load.model_ft.to(device)
                outputs = model_to_load.model_ft(inputs.float())
                _, preds = torch.max(outputs, 1)
                predicted_classes = preds.detach().cpu().numpy() if predicted_classes is None else \
                    np.concatenate((predicted_classes, preds.detach().cpu().numpy()))
                true_classes = labels.numpy() if true_classes is None else \
                    np.concatenate((true_classes, labels.numpy()))

            print("Accuracy for {}: {}".format(
                complete_model_name,
                accuracy_score(true_classes, predicted_classes)))
Ejemplo n.º 5
0
def main(all_datasets, gpu, epoch):

    device = torch.device('cuda:{}'.format(gpu))

    attack_names = ['DeepFool', 'CarliniWagner']

    for attack_name in attack_names:
        csv_rows = []
        for dataset in all_datasets:
            ds_obj, datasets, data_loaders = \
                hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size))

            for dir_name in os.listdir('../{}/adversarial_images/'.format(
                    ds_obj.name)):
                # dir_name contains model name and other params, process them here
                if 'RegularizedLoss' in dir_name:
                    model_name = dir_name.split('_RegularizedLoss_')[0]
                    with_regularization = True
                    tau = float(dir_name.split('_tau_')[1].split('_')[0])
                    alpha = float(dir_name.split('_alpha_')[1].split('_')[0])
                    if 'probabilities' in dir_name:
                        probabilities = True
                    else:
                        probabilities = False
                    if 'exact' in dir_name:
                        sigmoid_approx = False
                    else:
                        sigmoid_approx = True
                else:
                    model_name = dir_name
                    with_regularization = False
                    tau, alpha, sigmoid_approx, probabilities = None, None, None, None
                if 'robust' in dir_name:
                    robust_regularization = True
                    beta = float(dir_name.split('_beta_')[1].split('_')[0])
                    gamma = float(dir_name.split('_gamma_')[1].split('_')[0])
                else:
                    robust_regularization = False
                    beta, gamma = None, None

            # for model_name in DATASET_TO_MODEL_NAMES[dataset.split('_')[0].lower()]:
            # taus = DATASET_TO_MODEL_TO_TAUS[dataset.split('_')[0].lower()][model_name]
            # alphas = DATASET_TO_MODEL_TO_ALPHAS[dataset.split('_')[0].lower()][model_name]
            # for (tau_idx, tau), (alpha_idx, alpha) in itertools.product(*[enumerate(taus), enumerate(alphas)]):

                regularization_params = {
                    'tau': tau,
                    'alpha': alpha,
                    'sigmoid_approx': sigmoid_approx,
                    'probabilities': probabilities,
                    'robust_regularization': robust_regularization,
                    'beta': beta,
                    'gamma': gamma,
                    'device': device
                }
                model_to_load = model.DNN(
                    model_name=model_name,
                    num_classes=ds_obj.num_classes(),
                    learning_rate=learning_rate,
                    aggregate_coeff=aggregate_coeff,
                    with_regularization=with_regularization,
                    regularization_params=regularization_params)

                complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \
                    if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name

                print('Attack: {}, Dataset: {}, Model: {}'.format(
                    attack_name, dataset, complete_model_name))

                adv_folder = '../{}/adversarial_images/{}/{}'.format(
                    ds_obj.name, complete_model_name, attack_name)
                adv_image_ids, all_adv_objs = hp.load_adversarial_objects(
                    folder=adv_folder,
                    epoch=epoch,
                    ds_obj=ds_obj,
                    device=device)
                all_images_adversarial = [x.image for x in all_adv_objs]

                print(adv_folder)
                print(
                    len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch))))

                if 'cifar' in ds_obj.name.lower():
                    if ds_obj.name.lower() == 'cifar10':
                        sensitive_attrs, sensitive_attrs_names = [], []
                        for cname in ds_obj.classes:
                            sensitive_attrs_names.append(cname)
                            sensitive_attrs.append(np.array([1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == cname \
                                                            else 0 for img_id in adv_image_ids]))
                    else:
                        sensitive_attrs = [np.array(
                                                [1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == ds_obj.name.split('_')[-1].lower() \
                                                else 0 for img_id in adv_image_ids])]
                        sensitive_attrs_names = [
                            ds_obj.name.lower().split('_')[-1]
                        ]
                else:
                    attr = ds_obj.name.lower().split('_')[-1]
                    sensitive_attrs = [np.array([ds_obj.get_image_protected_class('test', int(img_id), attr=attr) \
                                            for img_id in adv_image_ids])] # sens_attr = 1 means minority
                    sensitive_attrs_names = [
                        'Black' if attr == 'race' else 'Female'
                    ]

                majority_differences, minority_differences = [], []
                for sensitive_attr in sensitive_attrs:
                    minority_difference, majority_difference = image_differences(
                        adv_image_ids, all_images_adversarial, sensitive_attr,
                        ds_obj)
                    majority_differences.append(majority_difference)
                    minority_differences.append(minority_difference)

                for minority_difference, majority_difference, sensitive_attr_name in zip(
                        minority_differences, majority_differences,
                        sensitive_attrs_names):
                    mu_minority, mu_majority = np.mean(
                        minority_difference), np.mean(majority_difference)
                    csv_rows.append([
                        dataset, complete_model_name, sensitive_attr_name,
                        mu_minority, mu_majority
                    ])

        hp.create_dir("pickled_ubs")
        df = pd.DataFrame(
            csv_rows,
            columns=['dataset', 'model', 'minority', 'mu_min', 'mu_maj'])
        df.to_csv('pickled_ubs/{}_cdf_mus_regularized.csv'.format(attack_name),
                  index=False)

        print('Saved to pickled_ubs/{}_cdf_mus_regularized.csv!'.format(
            attack_name))
Ejemplo n.º 6
0
def main(dataset,
         gpu,
         epochs,
         model_names,
         with_regularization=False,
         taus=None,
         alphas=None,
         sigmoid_approx=False,
         probabilities=False,
         robust_regularization=False,
         betas=None,
         gammas=None):

    if with_regularization:
        assert taus is not None and alphas is not None
        if robust_regularization:
            assert betas is not None and gammas is not None
    else:
        taus, alphas, betas, gammas = [None], [None], [None], [None]

    device = torch.device('cuda:{}'.format(gpu))

    ds_obj, datasets, data_loaders = \
        hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size))

    for (tau_idx, tau), (alpha_idx, alpha), (beta_idx, beta), (gamma_idx, gamma) in \
        itertools.product(*[enumerate(taus), enumerate(alphas), enumerate(betas), enumerate(gammas)]):

        regularization_params = {
            'tau': tau,
            'alpha': alpha,
            'sigmoid_approx': sigmoid_approx,
            'probabilities': probabilities,
            'robust_regularization': robust_regularization,
            'beta': beta,
            'gamma': gamma,
            'device': device
        }
        criterion_kwargs = {} if not with_regularization else {
            'inputs': None,
            'protected_classes': None
        }

        # assert model_name in hp.get_model_names(dataset, with_regularization)
        for model_name in model_names:
            model_to_train = model.DNN(
                model_name=model_name,
                num_classes=ds_obj.num_classes(),
                learning_rate=learning_rate,
                aggregate_coeff=aggregate_coeff,
                with_regularization=with_regularization,
                regularization_params=regularization_params)
            if not os.path.exists(
                    '../{}/training_values/{}_{}_lr_{}_train_acc_history.pkl'.
                    format(ds_obj.name, model_to_train.model_name,
                           model_to_train.criterion._get_name(),
                           learning_rate)):

                (train_acc_history, train_overall_loss_history, train_cross_entropy,
                    train_regularization, train_minority_dist, train_majority_dist,
                        test_acc_history, test_overall_loss_history, test_cross_entropy,
                            test_regularization, test_minority_dist, test_majority_dist) = \
                                model.train_model(model_to_train, epochs, device, data_loaders,
                                    criterion_kwargs)
                print((train_acc_history, train_overall_loss_history,
                       train_cross_entropy, train_regularization,
                       train_minority_dist, train_majority_dist,
                       test_acc_history, test_overall_loss_history,
                       test_cross_entropy, test_regularization,
                       test_minority_dist, test_majority_dist))
                hp.persist_model_weights(model_to_train,
                                         ds_obj,
                                         learning_rate,
                                         'best',
                                         root_dir='.')
                hp.persist_epoch_values(
                    model_to_train, ds_obj, learning_rate,
                    (train_acc_history, train_overall_loss_history,
                     train_cross_entropy, train_regularization,
                     train_minority_dist, train_majority_dist,
                     test_acc_history, test_overall_loss_history,
                     test_cross_entropy, test_regularization,
                     test_minority_dist, test_majority_dist),
                    ('train_acc_history', 'train_overall_loss_history',
                     'train_cross_entropy', 'train_regularization',
                     'train_minority_dist', 'train_majority_dist',
                     'test_acc_history', 'test_overall_loss_history',
                     'test_cross_entropy', 'test_regularization',
                     'test_minority_dist', 'test_majority_dist'))
            else:
                (train_acc_history, train_overall_loss_history, train_cross_entropy,
                    train_regularization, train_minority_dist, train_majority_dist,
                        test_acc_history, test_overall_loss_history, test_cross_entropy,
                            test_regularization, test_minority_dist, test_majority_dist) = \
                    hp.load_epoch_values(model_to_train, ds_obj, learning_rate,
                        ('train_acc_history', 'train_overall_loss_history', 'train_cross_entropy',
                            'train_regularization', 'train_minority_dist', 'train_majority_dist',
                                'test_acc_history', 'test_overall_loss_history', 'test_cross_entropy',
                                    'test_regularization', 'test_minority_dist', 'test_majority_dist'))
                # with torch.no_grad():
                # train_acc_history, train_acc_history_s0, train_acc_history_s1, train_loss_history = model.load_model_history(model, ds_obj, num_epochs, portion='train', device=device,
                #                        override_criterion=nn.CrossEntropyLoss())
                # test_acc_history, test_acc_history_s0, test_acc_history_s1, test_loss_history = model.load_model_history(model, ds_obj, num_epochs, portion='test', device=device,
                #                        override_criterion=nn.CrossEntropyLoss())

            hp.line_plots([train_acc_history, test_acc_history],
                          np.arange(0, epochs, aggregate_coeff),
                          x_label="Epoch",
                          y_label="Accuracy",
                          subfolder=ds_obj.name,
                          filename='{}_{}_train_test_acc.png'.format(
                              model_to_train.model_name,
                              model_to_train.criterion._get_name()),
                          title="Accuracy ({})".format(
                              model_to_train.model_name),
                          legend_vals=["Train", "Test"])
            hp.line_plots(
                [train_overall_loss_history, test_overall_loss_history],
                np.arange(0, epochs, aggregate_coeff),
                x_label="Epoch",
                y_label="Total Loss",
                subfolder=ds_obj.name,
                filename='{}_{}_train_test_overall_loss.png'.format(
                    model_to_train.model_name,
                    model_to_train.criterion._get_name()),
                title="Overall Loss ({})".format(model_to_train.model_name),
                legend_vals=["Train", "Test"])
            hp.line_plots([train_cross_entropy, test_cross_entropy],
                          np.arange(0, epochs, aggregate_coeff),
                          x_label="Epoch",
                          y_label="Cross Entropy Loss",
                          subfolder=ds_obj.name,
                          filename='{}_{}_train_test_ce_loss.png'.format(
                              model_to_train.model_name,
                              model_to_train.criterion._get_name()),
                          title="CE Loss ({})".format(
                              model_to_train.model_name),
                          legend_vals=["Train", "Test"])
            hp.line_plots([train_regularization, test_regularization],
                          np.arange(0, epochs, aggregate_coeff),
                          x_label="Epoch",
                          y_label="Reg. Term",
                          subfolder=ds_obj.name,
                          filename='{}_{}_train_test_reg_term.png'.format(
                              model_to_train.model_name,
                              model_to_train.criterion._get_name()),
                          title="Reg Term ({})".format(
                              model_to_train.model_name),
                          legend_vals=["Train", "Test"])
            hp.line_plots([train_minority_dist, test_minority_dist],
                          np.arange(0, epochs, aggregate_coeff),
                          x_label="Epoch",
                          y_label="Minority Dist Approx",
                          subfolder=ds_obj.name,
                          filename='{}_{}_train_test_minority_dist.png'.format(
                              model_to_train.model_name,
                              model_to_train.criterion._get_name()),
                          title="Reg Term ({})".format(
                              model_to_train.model_name),
                          legend_vals=["Train", "Test"])
            hp.line_plots([train_majority_dist, test_majority_dist],
                          np.arange(0, epochs, aggregate_coeff),
                          x_label="Epoch",
                          y_label="Majority Dist Approx",
                          subfolder=ds_obj.name,
                          filename='{}_{}_train_test_majority_dist.png'.format(
                              model_to_train.model_name,
                              model_to_train.criterion._get_name()),
                          title="Reg Term ({})".format(
                              model_to_train.model_name),
                          legend_vals=["Train", "Test"])
Ejemplo n.º 7
0
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pre_process import transform_train, transform_test

# whether use gpu
use_cuda = torch.cuda.is_available()

# default parameters
DATA_ROOT = '../data/'
num_epochs = 50
batch_size = 128

model_names = {
    'dnn': model.DNN(3072, 4096, 10),
    'cnn': model.CNN(),
    'resnet18': model.ResNet18(),
    'resnet34': model.ResNet34(),
    'resnet50': model.ResNet50()
}


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_type',
                        type=str,
                        default='dnn',
                        help="the type of model")
    parser.add_argument('--lr',
                        type=float,
def main(dataset,
         gpu,
         model_name,
         epochs,
         taus,
         alphas,
         with_regularization=False,
         sigmoid_approx=False,
         probabilities=False,
         robust_regularization=False,
         betas=[None],
         gammas=[None]):

    device = torch.device('cuda:{}'.format(gpu))

    attack_names = ['DeepFool', 'CarliniWagner']

    ds_obj, datasets, data_loaders = \
        hp.get_data_loder_objects(dataset, PHASES, **hp.get_loader_kwargs(batch_size))

    for epoch in epochs:
        for (tau_idx, tau), (alpha_idx, alpha), (beta_idx, beta), (gamma_idx, gamma) in \
            itertools.product(*[enumerate(taus), enumerate(alphas), enumerate(betas), enumerate(gammas)]):

            regularization_params = {
                'tau': tau,
                'alpha': alpha,
                'sigmoid_approx': sigmoid_approx,
                'probabilities': probabilities,
                'robust_regularization': robust_regularization,
                'beta': beta,
                'gamma': gamma,
                'device': device
            }
            model_to_load = model.DNN(
                model_name=model_name,
                num_classes=ds_obj.num_classes(),
                learning_rate=learning_rate,
                aggregate_coeff=aggregate_coeff,
                with_regularization=with_regularization,
                regularization_params=regularization_params)

            # filename = '{}_{}_epoch_{}_lr_{}.pth'.format(model_to_load.model_name, model_to_load.criterion._get_name(),
            #     epoch, learning_rate)
            # model_to_load.model_ft.load_state_dict(torch.load('../{}/model_weights/{}'.format(ds_obj.name, filename),
            #                                          map_location=device))
            # model_to_load.model_ft.eval()
            # print ('Loaded weights from: ../{}/model_weights/{}'.format(ds_obj.name, filename))

            complete_model_name = '{}_{}'.format(model_to_load.model_name, model_to_load.criterion._get_name()) \
                if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else model_to_load.model_name

            for attack_name in attack_names:

                adv_folder = '../{}/adversarial_images/{}/{}'.format(
                    ds_obj.name, complete_model_name, attack_name)
                adv_image_ids, all_adv_objs = hp.load_adversarial_objects(
                    folder=adv_folder,
                    epoch=epoch,
                    ds_obj=ds_obj,
                    device=device)
                all_images_adversarial = [x.image for x in all_adv_objs]

                print(adv_folder)
                print(
                    len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch))))

                if 'cifar' in ds_obj.name.lower():
                    if ds_obj.name.lower() == 'cifar10':
                        sensitive_attrs, sensitive_attrs_names = [], []
                        for cname in ds_obj.classes:
                            sensitive_attrs_names.append(cname)
                            sensitive_attrs.append(np.array([1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == cname \
                                                            else 0 for img_id in adv_image_ids]))
                    else:
                        sensitive_attrs = [np.array(
                                                [1 if ds_obj.classes[ds_obj.test_labels[int(img_id)]] == ds_obj.name.split('_')[-1].lower() \
                                                else 0 for img_id in adv_image_ids])]
                        sensitive_attrs_names = [
                            ds_obj.name.lower().split('_')[-1]
                        ]
                else:
                    attr = ds_obj.name.lower().split('_')[-1]
                    sensitive_attrs = [np.array([ds_obj.get_image_protected_class('test', int(img_id), attr=attr) \
                                            for img_id in adv_image_ids])]
                    sensitive_attrs_names = [
                        'Black' if attr == 'race' else 'Female'
                    ]

                majority_differences, minority_differences = [], []
                for sensitive_attr in sensitive_attrs:
                    minority_difference, majority_difference = image_differences(
                        adv_image_ids, all_images_adversarial, sensitive_attr,
                        ds_obj)
                    majority_differences.append(majority_difference)
                    minority_differences.append(minority_difference)

                # print (minority_difference, majority_difference)

                hp.create_dir("plots/{}".format(ds_obj.name))
                hp.create_dir("plots/{}/{}".format(ds_obj.name,
                                                   model_to_load.model_name))
                hp.create_dir("plots/{}/{}/{}".format(ds_obj.name,
                                                      model_to_load.model_name,
                                                      attack_name))

                dir_to_save = "plots/{}/{}/{}".format(ds_obj.name,
                                                      model_to_load.model_name,
                                                      attack_name)

                # taus = np.linspace(0.0, 0.5, 2000)
                taus = np.linspace(0.0, 2.0, 2000)
                # taus = np.linspace(0.0, 2.0, 2000) if 'deepfool' in attack_name.lower() else np.linspace(2.9, 3.1, 2000)

                for minority_difference, majority_difference, sensitive_attr_name in zip(
                        minority_differences, majority_differences,
                        sensitive_attrs_names):
                    frac_greater_than_tau_majority = np.array([
                        np.sum(majority_difference > t) /
                        len(majority_difference) for t in taus
                    ])
                    frac_greater_than_tau_minority = np.array([
                        np.sum(minority_difference > t) /
                        len(minority_difference) for t in taus
                    ])

                    if paper_friendly_plots:
                        set_paper_friendly_plots_params()

                    fig = plt.figure()
                    if not paper_friendly_plots:
                        fig.suptitle(
                            r'fraction $d_\theta > \tau$ for {}'.format(
                                ds_obj.name),
                            fontsize=20)
                    ax = fig.add_subplot(111)
                    ax.plot(taus,
                            frac_greater_than_tau_majority,
                            color='blue',
                            label='Other Classes')
                    ax.plot(taus,
                            frac_greater_than_tau_minority,
                            color='red',
                            label='{}'.format(sensitive_attr_name))
                    ax.set_xlabel('Distance to Adv. Sample' + r' ($\tau$)')
                    ax.set_ylabel(r'$ \widehat{I^\tau_s} $')
                    plt.legend()

                    extension = 'png' if not paper_friendly_plots else 'pdf'
                    filename = '{}_inv_cdf'.format(model_to_load.criterion._get_name()) \
                        if not isinstance(model_to_load.criterion, nn.CrossEntropyLoss) else \
                            'inv_cdf_{}'.format(sensitive_attr_name)
                    plt.savefig('{}/{}.{}'.format(dir_to_save, filename,
                                                  extension),
                                bbox_inches='tight')
                    plt.show()
                    plt.close()
Ejemplo n.º 9
0
def training():

        # Save the model according to the conditions  
        filepath = "model_weight-{epoch:02d}-{loss:.4f}-m1.hdf5"

        checkpoint = ModelCheckpoint(filepath, 
                                monitor = 'val_acc', 
                                verbose = 1, 
                                save_best_only = True, 
                                save_weights_only = False, 
                                mode = 'auto', 
                                period = 1)

        early = EarlyStopping(monitor = 'val_acc', 
                        min_delta = 5, 
                        patience = 10, 
                        verbose = 1, 
                        mode = 'auto')

        
        data_pipeline = DataPipeline(path_params, train_params)
        train_generator = data_pipeline.build_training_data()
        validation_generator = data_pipeline.build_validation_data()
        test_generator = data_pipeline.build_testing_data()

        STEP_SIZE_TRAIN = train_generator.n//data_pipeline.batch_size
        STEP_SIZE_VALID = validation_generator.n//data_pipeline.batch_size
        STEP_SIZE_TEST = test_generator.n//data_pipeline.batch_size #1

        dnn = model.DNN(model_params)
        dnn.build_model()

        # Start training
        time_start = time.time()

        dnn.model.fit_generator(
                train_generator,
                epochs = train_params.epochs,
                steps_per_epoch = STEP_SIZE_TRAIN,
                validation_data = validation_generator,
                validation_steps = STEP_SIZE_VALID)

        dnn.model.evaluate_generator(
                generator=validation_generator,
                steps=STEP_SIZE_VALID)


        #test_generator.reset()
        dnn.model.evaluate_generator(
                generator=test_generator,
                steps=STEP_SIZE_TEST)

        """
        test_generator.reset()
        pred=model.predict_generator(
                test_generator,
                steps=STEP_SIZE_TEST,
                verbose=1)

        predicted_class_indices=np.argmax(pred,axis=1)
        labels = (train_generator.class_indices)
        labels = dict((v,k) for k,v in labels.items())
        predictions = [labels[k] for k in predicted_class_indices]
        """

        time_elapsed = time.time() - time_start
        print('Training time = ', time_elapsed)
def main(dataset_reg, dataset_original, gpu, model_name_reg, model_name_original, 
    epochs, taus, alphas, sigmoid_approx=False, probabilities=False):

    device = torch.device('cuda:{}'.format(gpu))

    attack_names = ['DeepFool', 'CarliniWagner']
    
    ds_obj_original, _, _ = \
        hp.get_data_loder_objects(dataset_original, PHASES, **hp.get_loader_kwargs(batch_size))
    ds_obj_reg, _, _ = \
        hp.get_data_loder_objects(dataset_reg, PHASES, **hp.get_loader_kwargs(batch_size))

    taus = np.linspace(0.0, 2.0, 2000)

    for epoch in epochs:

        model_original = model.DNN(model_name=model_name_original, num_classes=ds_obj_reg.num_classes(), 
                learning_rate=learning_rate, aggregate_coeff=aggregate_coeff,
                with_regularization=False)
        complete_model_name = '{}_{}'.format(model_original.model_name, model_original.criterion._get_name()) \
            if not isinstance(model_original.criterion, nn.CrossEntropyLoss) else model_original.model_name

        for attack_name in attack_names:

            adv_folder = '../{}/adversarial_images/{}/{}'.format(ds_obj_original.name, 
                complete_model_name, attack_name)
            adv_image_ids, all_adv_objs = hp.load_adversarial_objects(folder=adv_folder, epoch=epoch, ds_obj=ds_obj_original, device=device)
            all_images_adversarial = [x.image for x in all_adv_objs]

            print (adv_folder)
            print (len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch))))

            if 'cifar' in ds_obj_original.name.lower():
                sensitive_attrs_name = ds_obj_reg.name.split('_')[-1].lower() # get the sens attr name from reg model
                sensitive_attr = np.array([1 if ds_obj_original.classes[ds_obj_original.test_labels[int(img_id)]] == sensitive_attrs_name \
                    else 0 for img_id in adv_image_ids])
            else:
                attr = ds_obj_original.name.lower().split('_')[-1]
                sensitive_attrs_name = 'Black' if attr == 'race' else 'Female'
                sensitive_attr = np.array([ds_obj_original.get_image_protected_class('test', int(img_id), attr=attr) \
                                        for img_id in adv_image_ids])
                
            minority_difference, majority_difference = image_differences(adv_image_ids, all_images_adversarial, sensitive_attr, ds_obj_original)
            frac_greater_than_tau_majority = np.array([np.sum(majority_difference > t) / len(majority_difference) for t in taus])
            frac_greater_than_tau_minority = np.array([np.sum(minority_difference > t) / len(minority_difference) for t in taus])

            all_lines = [[frac_greater_than_tau_majority, frac_greater_than_tau_minority]]
            titles = ['Original']

            for (tau_idx, tau), (alpha_idx, alpha) in itertools.product(*[enumerate(taus), enumerate(alphas)]):
                regularization_params = {'tau': tau, 'alpha': alpha, 'sigmoid_approx': sigmoid_approx, 
                    'probabilities': probabilities, 'device': device}
                model_reg = model.DNN(model_name=model_name_reg, num_classes=ds_obj_reg.num_classes(), 
                    learning_rate=learning_rate, aggregate_coeff=aggregate_coeff,
                    with_regularization=True, 
                    regularization_params=regularization_params)
                
                complete_model_name = '{}_{}'.format(model_reg.model_name, model_reg.criterion._get_name()) \
                    if not isinstance(model_reg.criterion, nn.CrossEntropyLoss) else model_reg.model_name

                adv_folder = '../{}/adversarial_images/{}/{}'.format(ds_obj_reg.name, 
                    complete_model_name, attack_name)
                adv_image_ids, all_adv_objs = hp.load_adversarial_objects(folder=adv_folder, epoch=epoch, ds_obj=ds_obj_reg, device=device)
                all_images_adversarial = [x.image for x in all_adv_objs]

                print (adv_folder)
                print (len(glob.glob("{}/*_epoch_{}*".format(adv_folder, epoch))))

                if 'cifar' in ds_obj_reg.name.lower():
                    sensitive_attrs_name = ds_obj_reg.name.lower().split('_')[-1]
                    sensitive_attr = np.array([1 if ds_obj_reg.classes[ds_obj_reg.test_labels[int(img_id)]] == sensitive_attrs_name \
                        else 0 for img_id in adv_image_ids])
                    partition_name = 'Partition by class: {}'.format(sensitive_attrs_name)
                else:
                    attr = ds_obj_reg.name.lower().split('_')[-1]
                    sensitive_attrs_name = 'Black' if attr == 'race' else 'Female'
                    sensitive_attr = np.array([ds_obj_reg.get_image_protected_class('test', int(img_id), attr=attr) \
                                            for img_id in adv_image_ids])
                    partition_name = 'Partition by {}: {}'.format(attr, sensitive_attrs_name)
                    
                minority_difference, majority_difference = image_differences(adv_image_ids, all_images_adversarial, sensitive_attr, ds_obj_reg)
                frac_greater_than_tau_majority = np.array([np.sum(majority_difference > t) / len(majority_difference) for t in taus])
                frac_greater_than_tau_minority = np.array([np.sum(minority_difference > t) / len(minority_difference) for t in taus])

                all_lines.append([frac_greater_than_tau_majority, frac_greater_than_tau_minority])
                titles.append(r'$\tau = $' + ' {:.2f}, '.format(tau) + r'$\alpha$' + ' = {:.2f}'.format(alpha))

            
            x_label = 'Distance to Adv. Sample' + r' ($\tau$)'
            y_label = r'$ \widehat{I^\tau_s} $'
            filename = 'inv_cdf_{}_comparison'.format(sensitive_attrs_name)
            dir_to_save = "plots/{}/{}/{}".format(ds_obj_reg.name, model_reg.model_name, attack_name)
            hp.line_plots_grid(all_lines, [taus] * len(all_lines), x_label, y_label, filename, titles, 
                partition_name, subfolder=dir_to_save, y_lims=(0,1), columns=len(all_lines))
Ejemplo n.º 11
0
for i in range(numeroMuestras):
    X[i, 2] = funciones.definir_salida(X[i, 0], X[i, 1])

train, test = train_test_split(X, test_size=0.3)

trainDataset = funciones.MiDataset(train)
testDataset = funciones.MiDataset(test)
trainLoader = DataLoader(trainDataset, batch_size=batchSize, shuffle=True)
testLoader = DataLoader(testDataset,
                        batch_size=testDataset.__len__(),
                        shuffle=False)

classWeight = torch.from_numpy(funciones.calcula_class_weights(
    train[:, -1])).float()

miRed = model.DNN(2, 300, 150, 5)
lossFunction = nn.NLLLoss(weight=classWeight)
optimizer = Adam(miRed.parameters())

lossTrain = []
lossTest = []
minAccuracy = 0

for epoch in range(numeroEpoch):
    for data, target in trainLoader:
        data = data.detach().requires_grad_(True).float()
        target = target.detach().requires_grad_(True).long()
        optimizer.zero_grad()
        out = miRed(data)
        loss = lossFunction(out, target)
        loss.backward()
Ejemplo n.º 12
0
def main():
    parser = get_arg_parser()
    args = vars(parser.parse_args())
    setup_logger(args)

    logging.critical(tabulate([args], headers='keys', tablefmt='plain'))

    OPTS = {
        'activation': ['sigmoid', 'tanh', 'relu', 'elu'],
        'opt': ['adam'],
        'n_batch': [32, 64, 128, 256],
        'hidden': [128, 256],
        'bnorm': [0, 1],
        # 'n_batch' : [128,256,512],
        # 'hidden' : [512,1024],
    }

    NF, NOUT = 400, 200
    logging.critical('loading data...')
    if args['toy']:
        dat = np.load('toy.npz')
        trn, dev, tst = dat['trn'], dat['dev'], dat['tst']
    else:
        trn, dev, tst = map(prep.get_dset, ('trn', 'dev', 'tst'))
        OPTS['n_batch'] = [128, 256, 512]
        OPTS['hidden'] = [512, 1024, 2048]

    logging.critical('loading data done.')
    logging.critical(tabulate([OPTS], headers='keys'))
    logging.critical('')

    trnX, trnY = trn[:, NOUT:], trn[:, :NOUT]
    devX, devY = dev[:, NOUT:], dev[:, :NOUT]

    def objective(conf):
        conf['n_hidden'] = map(
            lambda x: x[1],
            sorted((k, v) for k, v in conf['dpart'].iteritems()
                   if k.startswith('h')))
        conf['drates'] = map(
            lambda x: x[1],
            sorted((k, v) for k, v in conf['dpart'].iteritems()
                   if k.startswith('d')))

        dnn = model.DNN(NF, NOUT, conf)

        dcosts = []
        for e in range(args['hepoch']):
            tcost = dnn.train(trnX, trnY)
            dcost, pred = dnn.predict(devX, devY)
            dcosts.append(dcost)

        dcost = min(dcosts)
        dcost = np.iinfo(np.int32).max if np.isnan(dcost) else dcost

        info = dd(lambda: None)
        info.update(conf)
        # info = conf.copy()
        info['loss'] = dcost
        info.update(
            ('h%d' % i, nh) for i, nh in enumerate(info['n_hidden'], 1))
        info.update(('dr%d' % i, dr) for i, dr in enumerate(info['drates']))
        # map(info.pop, ('dpart','n_hidden','drates'))
        headers = [
            'loss', 'n_batch', 'opt', 'activation', 'lr', 'norm', 'bnorm'
        ] + ['h%d' % i for i in range(1, args['max_layers'] + 1)
             ] + ['dr%d' % i for i in range(args['max_layers'] + 1)]
        logging.critical(
            tabulate([map(lambda x: info[x], headers)],
                     headers=headers,
                     floatfmt='.4f'))

        return {
            'loss': dcost,
            'status': STATUS_OK,
        }

    space = create_space(args['max_layers'], OPTS)

    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                max_evals=args['max_evals'])
    logging.critical(best)
    logging.critical('')
    best_params = best2mparams(best, OPTS)
    logging.critical(tabulate([best_params], headers='keys'))

    dnn = model.DNN(NF, NOUT, best_params)
    for e in range(args['fepoch']):
        tcost = dnn.train(trnX, trnY)
        dcost = dnn.predict(devX, devY)

    logging.critical('dcost with best model: {}'.format(dcost))