def collect_leakage(device,
                    base_folder='./models/BAM/',
                    specific="bowling_alley", 
                    seed=0, 
                    module="layer3",
                    experiment="sgd_finetuned",
                    ratios=["0.0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9", "1.0"],
                    adv=False,
                    baseline=False,
                    epoch=None,
                    multiple=True,
                    force=False,
                    dataset='bam',
                    args=None):
    results = {}
    if dataset == 'bam':
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    elif dataset != 'coco':
        _, testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
    for ratio in ratios:
        model, net, net_forward, activation_probe = load_models(
            device,
            base_folder=base_folder,
            specific=specific, 
            seed=seed, 
            module=module,
            experiment=experiment,
            ratio=ratio,
            adv=adv,
            baseline=baseline,
            epoch=epoch,
            post=True,
            multiple=multiple,
            leakage=True,
            force=force,
            dataset=dataset,
            args=args
        )
        model.eval()
        net.eval()
        if dataset == 'coco':
            tmp_args = copy.deepcopy(args)
            tmp_args.ratio = ratio
            tmp_args.gender_balanced = True
            if int(ratio) > 0:
                tmp_args.balanced = True
            _, testloader = coco_dataload.get_data_loader_coco(
                tmp_args
            )

        results[ratio],_ = utils.net2vec_accuracy(
            testloader, 
            net_forward, 
            device, 
            train_labels=[-2,-1]
        )
    return results
def collect_accuracies(
    device,
    base_folder='./models/BAM/',
    specific="bowling_alley", 
    seeds=[0,1,2,3,4], 
    module="layer3",
    experiment="sgd_finetuned",
    ratios=["0.0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9", "1.0"],
    adv=False,
    baseline=False,
    epoch=None,
    post=False,
    multiple=True,
    dataset='bam'):
    res = []
    for seed in seeds:
        curr = []
        if dataset == 'bam':
            _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
        else:
            _, testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=32,
                                                                   exclusive=True)
        for ratio in ratios:
            model, _, _, _ = load_models(
                    device,
                    base_folder=base_folder,
                    specific=specific, 
                    seed=seed, 
                    module=module,
                    experiment=experiment,
                    ratio=ratio,
                    adv=adv,
                    baseline=baseline,
                    epoch=epoch,
                    post=False, # n2v doesnt matter, we should at least have the during n2v trained
                    multiple=multiple,
                    leakage=False,
                    dataset=dataset
                )
            model.eval()
            acc = utils.classification_specific_accuracy(
                testloader, 
                model, 
                device)
            curr.append(acc)
        res.append(curr)
    return res
Exemple #3
0
 # setup our datasets...
 if args.dataset == 'bam':
     if args.specific is not None and len(args.specific) == 1:
         args.specific = args.specific[0]
     setup_datasets.setupBAM(args.seed,
                             True,
                             args.specific,
                             ratios=args.verify_ratios)
     setup_datasets.setupBAM(args.seed,
                             False,
                             args.specific,
                             ratios=args.verify_ratios)
     # now use the given ratio
     trainloader, _ = dataload.get_data_loader_SceneBAM(
         seed=args.seed,
         ratio=float(args.ratio),
         specific=args.specific,
         train_batch_size=args.train_bs)
     _, testloader = dataload.get_data_loader_SceneBAM(
         seed=args.seed,
         ratio=float("0.5"),
         specific=args.specific,
         test_batch_size=args.test_bs)
 elif args.dataset == 'idenprof':
     trainloader, testloader = dataload.get_data_loader_idenProf(
         'idenprof',
         train_shuffle=True,
         train_batch_size=args.train_bs,
         test_batch_size=args.test_bs,
         exclusive=True)
 elif args.dataset == 'coco':
Exemple #4
0
    if args.dataset == 'bam':
        if args.specific is not None and len(args.specific) == 1:
            args.specific = args.specific[0]
        if str(args.ratio) not in args.verify_ratios:
            args.verify_ratios = sorted(args.verify_ratios + [str(args.ratio)],
                                        reverse=True)
        setup_datasets.setupBAM(args.seed,
                                True,
                                args.specific,
                                ratios=args.verify_ratios)
        setup_datasets.setupBAM(args.seed,
                                False,
                                args.specific,
                                ratios=args.verify_ratios)
        # now use the given ratio
        trainloader, _ = dataload.get_data_loader_SceneBAM(
            seed=args.seed, ratio=float(args.ratio), specific=args.specific)
        _, testloader = dataload.get_data_loader_SceneBAM(
            seed=args.seed, ratio=float("0.5"), specific=args.specific)
    elif args.dataset == 'idenprof':
        trainloader, testloader = dataload.get_data_loader_idenProf(
            'idenprof',
            train_shuffle=True,
            train_batch_size=args.train_bs,
            test_batch_size=args.test_bs,
            exclusive=True)
    elif args.dataset == 'coco':
        trainloader, testloader = coco_dataload.get_data_loader_coco(args)
    # restart seed since we loop through dataloader to get mean/mu statistics
    set_seed(args.seed)
    device = torch.device('cuda:' + str(args.device))
def collect_accuracies(
    device,
    base_folder='./models/BAM/',
    specific="bowling_alley", 
    seeds=[0,1,2,3,4], 
    module="layer3",
    experiment="sgd_finetuned",
    ratios=["0.0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9", "1.0"],
    adv=False,
    baseline=False,
    epoch=None,
    post=False,
    multiple=True,
    dataset='bam',
    args=None):
    res = []
    for seed in seeds:
        curr = []
        if dataset == 'bam':
            _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
        elif dataset != 'coco':
            _, testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
        for ratio in ratios:
            model, _, _, _ = load_models(
                    device,
                    base_folder=base_folder,
                    specific=specific, 
                    seed=seed, 
                    module=module,
                    experiment=experiment,
                    ratio=ratio,
                    adv=adv,
                    baseline=baseline,
                    epoch=epoch,
                    post=False, # n2v doesnt matter, we should at least have the during n2v trained
                    multiple=multiple,
                    leakage=False,
                    dataset=dataset,
                    args=args,
                    ignore_net=True
                )
            model.eval()
            if dataset == 'coco':
                tmp_args = copy.deepcopy(args)
                tmp_args.ratio = 1
                tmp_args.balanced = True
                tmp_args.gender_balanced = True #False
                if int(ratio) > 0:
                    tmp_args.balanced = True
                _, testloader = coco_dataload.get_data_loader_coco(
                    tmp_args
                )
                with torch.no_grad():
                    acc = utils.detection_disparity(
                        testloader,
                        model,
                        device
                    )
            else:
                acc = utils.classification_specific_accuracy(
                    testloader, 
                    model, 
                    device
                )
            curr.append(acc)
        res.append(curr)
    return res
def collect_tcav(
    device,
    base_folder='./models/BAM/',
    specific="bowling_alley", 
    seed=0, 
    module="layer3",
    experiment="sgd_finetuned",
    ratios=["0.0","0.1","0.2","0.3","0.4","0.5","0.6","0.7","0.8","0.9", "1.0"],
    adv=False,
    baseline=False,
    epoch=None,
    post=False,
    multiple=True,
    force=False,
    dataset='bam',
    args=None):
    if dataset == 'bam':
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    elif dataset != 'coco':
        _, testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                train_batch_size=64,
                                                                test_batch_size=64,
                                                                exclusive=True)
    model_type = "Baseline" if baseline else (
        "Adversarial" if adv else "Debias"
    )
    df = {}
    values = []
    r = []
    m = []
    s = []
    num_classes = 10
    if dataset == 'coco':
        num_classes = 79
    for ratio in ratios:
        print("loading model")
        model, net, net_forward, activation_probe = load_models(
            device,
            base_folder=base_folder,
            specific=specific, 
            seed=seed, 
            module=module,
            experiment=experiment,
            ratio=ratio,
            adv=adv,
            baseline=baseline,
            epoch=epoch,
            post=post,
            multiple=multiple,
            leakage=False,
            force=force,
            tcav=True,
            dataset=dataset,
            args=args
        )
        print("did load?")
        if dataset == 'coco':
            assert args is not None
            tmp_args = copy.deepcopy(args)
            tmp_args.ratio = 1
            tmp_args.balanced = True
            tmp_args.gender_balanced = True
            tmp_args.train_bs = 64
            tmp_args.test_bs  = 32
            _, testloader = coco_dataload.get_data_loader_coco(
                tmp_args
            )
        print(net)
        collect, labels = analysis.compute_tcavs(
            testloader,
            model,
            activation_probe,
            net.weight[-2]-net.weight[-1], #bias vector
            device,
            object_detect= (args.dataset == 'coco'),
            objects=79
        )
        for s_idx in range(num_classes):
            current = collect[labels==s_idx]
            values.append(current)
            r += [ratio] * current.shape[0]
            m += [model_type] * current.shape[0]
            s += [s_idx] * current.shape[0]
        
    values = np.concatenate(
        values
    )
    df['values'] = values
    df['ratio'] = r
    df['model'] = m
    df['class'] = s
    
    # return a Dataframe, then we can join together data from different experiments...
    return pd.DataFrame(df)
def load_models(device,
                base_folder='./models/BAM/',
                specific="bowling_alley", 
                seed=0, 
                module="layer3",
                experiment="sgd_finetuned",
                ratio="0.5",
                adv=False,
                baseline=False,
                epoch=None,
                post=False,
                multiple=True,
                leakage=False,
                tcav=False,
                force=False,
                dataset='bam',
                args=None,
                ignore_net=False):
    '''
    if dataset == 'coco' and adv:
        class DummyArgs:
            num_object = 79
            finetune=False
            layer='generated_image'
            autoencoder_finetune=True
            finetune=True
        model = balanced_models.ObjectMultiLabelAdv(DummyArgs(), 79, 300, True, 1)
        ok    = torch.load('model_best.pth.tar', encoding='bytes')
        state_dict = {key.decode("utf-8"):ok[b'state_dict'][key] for key in ok[b'state_dict']}
        model.load_state_dict(state_dict)
        model.to(device)
        model.eval()
    '''
    if leakage:
        assert post
    if epoch is not None:
        epoch = "_" + str(epoch)
    else:
        epoch = ""
    if len(args.custom_end) > 0:
        args.custom_end = "_" + str(args.custom_end)
    if baseline:
        model_end = "resnet_base_"+str(ratio)+epoch+'.pt'
        if not post:
            n2v_end   = "resnet_n2v_base_"+str(ratio)+epoch+'.pt'
        else:
            n2v_end   = "resnet_n2v_base_after_"+str(ratio)+epoch+'.pt'
    else:
        if not adv:
            model_end = "resnet_debias_"+str(ratio)+epoch+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_debias_"+str(ratio)+epoch+'.pt'
            else:
                n2v_end   = "resnet_n2v_debias_after_"+str(ratio)+epoch+str(args.custom_end)+'.pt'
        else:
            model_end = "resnet_adv_"+str(ratio)+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_adv_"+str(ratio)+'.pt'
            else:
                n2v_end   = "resnet_n2v_adv_after_"+str(ratio)+epoch+'.pt'
    if dataset != 'bam' and dataset != 'coco':
        model_end = model_end.replace('_'+str(ratio), '')
        n2v_end   = n2v_end.replace('_'+str(ratio), '')
    if dataset == 'bam' or dataset == 'coco':
        model_path, n2v_path = utils.get_paths(
                base_folder,
                seed,
                specific,
                model_end=model_end,
                n2v_end='leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end,
                n2v_module=module,
                experiment=experiment,
                with_n2v=True,
        )
    else:
        model_path = os.path.join(base_folder, str(seed), experiment, module, model_end)
        n2v_path = os.path.join(base_folder, str(seed), experiment, module, 'leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end)
    if dataset == 'bam':
        trainloader, _ = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(ratio), specific=specific)
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    elif dataset == 'coco':
        tmp_args = copy.deepcopy(args)
        tmp_args.ratio = ratio
        if int(ratio) > 0:
            tmp_args.balanced = True
        if leakage:
            tmp_args.gender_balanced = True
        trainloader, testloader = coco_dataload.get_data_loader_coco(
            tmp_args
        )
    else:
        trainloader,testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
    if not (dataset == 'coco' and adv):
        assert os.path.exists(model_path), model_path
    if post:
        # since we have to run a separate script, might not have finished...
        if not leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if tcav:
                pass
            elif force:
                post_train.train_net2vec(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=.01,
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=False, # might want to change this later
                                        model_custom_end=epoch.replace('_',''),
                                        n2v_custom_end=epoch.replace('_',''),
                                        multiple=multiple,
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
        elif leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if force:
                post_train.train_leakage(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=5e-5, # leakage model uses adam
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=True, # MLP leakage model
                                        model_custom_end='',
                                        n2v_custom_end='',
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
    else:
        # should've been saved during training if not ported from tianlu
        if not (dataset == 'coco' and adv):
            assert os.path.exists(n2v_path)
    num_attributes = 10 + 9 + 20 if multiple else 12
    num_classes=10
    if dataset == 'coco':
        num_attributes = 81
        num_classes = 79
    model, net, net_forward, activation_probe = models.load_models(
        device,
        None if (dataset == 'coco' and adv) else
        lambda x,y,z: models.resnet_(
            pretrained=True, 
            custom_path=x, 
            device=y,
            initialize=z, 
            num_classes=num_classes,
            size=50 if (dataset == 'bam') or (dataset == 'coco') else 34
        ),
        model_path=model_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_path,
        module='fc' if leakage else module,
        num_attributes=2 if leakage else num_attributes,
        model_init = False,
        n2v_init = False,
        nonlinear = leakage,
        ignore_net = ignore_net
    )
    print(n2v_path)
    return model, net, net_forward, activation_probe
def load_models(device,
                base_folder='./models/BAM/',
                specific="bowling_alley", 
                seed=0, 
                module="layer3",
                experiment="sgd_finetuned",
                ratio="0.5",
                adv=False,
                baseline=False,
                epoch=None,
                post=False,
                multiple=True,
                leakage=False,
                tcav=False,
                force=False,
                dataset='bam'):
    if leakage:
        assert post
    if epoch is not None:
        epoch = "_" + str(epoch)
    else:
        epoch = ""
    if baseline:
        model_end = "resnet_base_"+str(ratio)+epoch+'.pt'
        if not post:
            n2v_end   = "resnet_n2v_base_"+str(ratio)+epoch+'.pt'
        else:
            n2v_end   = "resnet_n2v_base_after_"+str(ratio)+epoch+'.pt'
    else:
        if not adv:
            model_end = "resnet_debias_"+str(ratio)+epoch+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_debias_"+str(ratio)+epoch+'.pt'
            else:
                n2v_end   = "resnet_n2v_debias_after_"+str(ratio)+epoch+'.pt'
        else:
            model_end = "resnet_adv_"+str(ratio)+'.pt'
            if not post:
                n2v_end   = "resnet_n2v_adv_"+str(ratio)+'.pt'
            else:
                n2v_end   = "resnet_n2v_adv_after_"+str(ratio)+epoch+'.pt'
    if dataset != 'bam':
        model_end = model_end.replace('_'+str(ratio), '')
        n2v_end   = n2v_end.replace('_'+str(ratio), '')
    if dataset == 'bam':
        model_path, n2v_path = utils.get_paths(
                base_folder,
                seed,
                specific,
                model_end=model_end,
                n2v_end='leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end,
                n2v_module=module,
                experiment=experiment,
                with_n2v=True,
        )
    else:
        model_path = os.path.join(base_folder, str(seed), experiment, module, model_end)
        n2v_path = os.path.join(base_folder, str(seed), experiment, module, 'leakage/' + n2v_end.replace('n2v','mlp') if leakage else n2v_end)
    if dataset == 'bam':
        trainloader, _ = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(ratio), specific=specific)
        _, testloader = dataload.get_data_loader_SceneBAM(seed=seed,ratio=float(0.5), specific=specific)
    else:
        trainloader,testloader = dataload.get_data_loader_idenProf('idenprof',train_shuffle=True,
                                                                   train_batch_size=64,
                                                                   test_batch_size=64,
                                                                   exclusive=True)
    assert os.path.exists(model_path), model_path
    if post:
        # since we have to run a separate script, might not have finished...
        if not leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if tcav:
                pass
            elif force:
                post_train.train_net2vec(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=.01,
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=False, # might want to change this later
                                        model_custom_end=epoch.replace('_',''),
                                        n2v_custom_end=epoch.replace('_',''),
                                        multiple=multiple,
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
        elif leakage:
            model_extra = '_adv' if adv else ('_base' if baseline else '_debias')
            n2v_extra   = model_extra + '_after'
            if force:
                post_train.train_leakage(trainloader, 
                                        testloader, 
                                        device, 
                                        seed,
                                        specific=specific,
                                        p=ratio,
                                        n_epochs=20,
                                        module=module,
                                        lr=5e-5, # leakage model uses adam
                                        out_file=None,
                                        base_folder=base_folder,
                                        experiment1=experiment,
                                        experiment2=experiment,
                                        model_extra=model_extra,
                                        n2v_extra=n2v_extra,
                                        with_n2v=True,
                                        nonlinear=True, # MLP leakage model
                                        model_custom_end='',
                                        n2v_custom_end='',
                                        dataset=dataset
                )
            else:
                raise Exception('Run trial again')
    else:
        # should've been saved during training
        assert os.path.exists(n2v_path)
    num_attributes = 10 + 9 + 20 if multiple else 12
    model, net, net_forward, activation_probe = models.load_models(
        device,
        lambda x,y,z: models.resnet_(pretrained=True, custom_path=x, device=y,initialize=z, size=50 if dataset == 'bam' else 34),
        model_path=model_path,
        net2vec_pretrained=True,
        net2vec_path=n2v_path,
        module='fc' if leakage else module,
        num_attributes=2 if leakage else num_attributes,
        model_init = False,
        n2v_init = False,
        nonlinear = leakage
    )
    return model, net, net_forward, activation_probe