Esempio n. 1
0
    def __init__(self, args):
        self.s = args.s
        self.z = args.z
        self.batch_size = args.batch_size
        self.epochs = 200
        self.alpha = 1
        self.beta = args.beta
        self.target = args.target
        self.use_bn = args.use_bn
        self.bias = args.bias
        self.n_hidden = args.n_hidden
        self.pretrain_e = args.pretrain_e
        self.dataset = args.dataset
        self.test_ensemble = args.test_ensemble
        self.test_uncertainty = args.test_uncertainty
        self.vote = args.vote

        self.device = torch.device('cuda')
        torch.manual_seed(8734)        

        self.hypergan = HyperGAN(args, self.device)
        self.hypergan.print_hypergan()
        self.hypergan.attach_optimizers(5e-3, 1e-4, 5e-5)

        if self.dataset == 'mnist':
            self.data_train, self.data_test = datagen.load_mnist()
        elif self.dataset == 'cifar':
            self.data_train, self.data_test = datagen.load_cifar()

        self.best_test_acc = 0.
        self.best_test_loss = np.inf
Esempio n. 2
0
def _run_anomaly_mnist(args, hypernet):
    arch = get_network(args)
    train, test = datagen.load_mnist(args)
    _vars, _stds, _ents = [], [], []
    model = sample_model(hypernet, arch)
    for n in [5, 10, 100]:
        for idx, (data, target) in enumerate(test):
            data, target = data.cuda(), target.cuda()
            pred_labels = []
            for _ in range(n):
                model = sample_model(hypernet, arch)
                output = model(data)
                pred = output.data.max(1, keepdim=True)[1]
                pred_labels.append(pred.view(pred.numel()))

            p_labels = torch.stack(pred_labels).float().transpose(0, 1)
            _vars.append(p_labels.var(1).mean())
            _stds.append(p_labels.std(1).mean())
            _ents.append(np.apply_along_axis(entropy, 1, p_labels))

        plot_empirical_cdf(args, _ents, n)
        print('mean var: {}, max var: {}, min var:{}, std: {}'.format(
            torch.tensor(_vars).mean(),
            torch.tensor(_vars).max(),
            torch.tensor(_vars).min(),
            torch.tensor(_stds).mean()))
Esempio n. 3
0
def train(args, model, grad=False):
    train_loader, _ = datagen.load_mnist(args)
    train_loss, train_acc = 0., 0.
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    for epoch in range(args.epochs):
        model.train()
        correct = 0.
        train_loss = 0.
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            pred = output.data.max(
                1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
            train_loss += loss
        train_loss /= len(train_loader.dataset)
        acc = (correct.float() / len(train_loader.dataset)).item()
        print('train_acc: {}, train loss: {}'.format(acc, train_loss))
        acc, loss = test(args, model)

    return acc, loss
Esempio n. 4
0
def test_mnist(args, Z, names, arch):
    _, test_loader = datagen.load_mnist(args)
    criterion = nn.CrossEntropyLoss()
    pop_size = args.batch_size
    with torch.no_grad():
        correct = 0.
        test_loss = 0
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            outputs = []
            for i in range(pop_size):
                params = [Z[0][i], Z[1][i], Z[2][i]]
                model = weights_to_clf(params, names, arch)
                output = model(data)
                outputs.append(output)
            pop_outputs = torch.stack(outputs)
            pop_labels = pop_outputs.max(2, keepdim=True)[1].view(
                pop_size, 100, 1)
            modes = torch.mode(pop_labels, dim=0, keepdim=True)[0].view(100, )
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

        test_loss /= len(test_loader.dataset)
        acc = (correct.float() / len(test_loader.dataset)).item()
    return acc, test_loss
Esempio n. 5
0
def train(args, model, grad=False):
    if args.dataset == 'mnist':
        train_loader, _ = datagen.load_mnist(args)
    elif args.dataset == 'fashion_mnist':
        train_loader, _ = datagen.load_fashion_mnist(args)
    train_loss, train_acc = 0., 0.
    criterion = nn.CrossEntropyLoss()
    if args.ft:
        for child in list(model.children())[:2]:
            print('removing {}'.format(child))
            for param in child.parameters():
                param.requires_grad = False
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=1e-3)
    for epoch in range(args.epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        acc, loss = test(args, model, epoch)
    return acc, loss
Esempio n. 6
0
def test(args, model, epoch=None, grad=False):
    model.eval()
    if args.dataset == 'mnist':
        _, test_loader = datagen.load_mnist(args)
    elif args.dataset == 'fashion_mnist':
        _, test_loader = datagen.load_fashion_mnist(args)
    test_loss = 0
    correct = 0.
    criterion = nn.CrossEntropyLoss()
    for data, target in test_loader:
        data, target = data.cuda(), target.cuda()
        output = model(data)
        if grad is False:
            test_loss += criterion(output, target).item()  # sum up batch loss
        else:
            test_loss += criterion(output, target)

        pred = output.data.max(
            1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

    test_loss /= len(test_loader.dataset)
    acc = (correct.float() / len(test_loader.dataset)).item()
    print(acc)

    if epoch:
        print('Average loss: {}, Accuracy: {}/{} ({}%)'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    return acc, test_loss
Esempio n. 7
0
def eval_mnist_ensemble(ensemble, outlier=False):
    for model in ensemble:
        model.eval()

    if outlier is True:
        trainloader, testloader = datagen.load_notmnist()
    else:
        trainloader, testloader = datagen.load_mnist()

    model_outputs = torch.zeros(len(ensemble), len(testloader.dataset), 10)
    for i, (data, target) in enumerate(testloader):
        data = data.cuda()
        target = target.cuda()
        outputs = []
        for model in ensemble:
            outputs.append(model(data))
        outputs = torch.stack(outputs)
        model_outputs[:, i * len(data):(i + 1) * len(data), :] = outputs

    # Soft Voting (entropy in confidence)
    probs_soft = F.softmax(model_outputs, dim=-1)  # [ens, data, 10]
    preds_soft = probs_soft.mean(0)  # [data, 10]
    entropy = entropy_fn(preds_soft.T.cpu().numpy())  # [data]

    # Hard Voting (variance in predicted classed)
    probs_hard = F.softmax(model_outputs, dim=-1)  #[ens, data, 10]
    preds_hard = probs_hard.var(0).cpu()  # [data, 10]
    variance = preds_hard.sum(1).numpy()  # [data]
    for model in ensemble:
        model.train()

    return entropy, variance
Esempio n. 8
0
def eval_mnist_hypergan(hypergan, ens_size, s_dim, outlier=False):
    hypergan.eval_()
    if outlier is True:
        trainloader, testloader = datagen.load_notmnist()
    else:
        trainloader, testloader = datagen.load_mnist()

    model_outputs = torch.zeros(ens_size, len(testloader.dataset), 10)
    for i, (data, target) in enumerate(testloader):
        data = data.cuda()
        target = target.cuda()
        z = torch.randn(ens_size, s_dim).to(hypergan.device)
        codes = hypergan.mixer(z)
        params = hypergan.generator(codes)
        outputs = []
        for (layers) in zip(*params):
            output = hypergan.eval_f(layers, data)
            outputs.append(output)
        outputs = torch.stack(outputs)
        model_outputs[:, i * len(data):(i + 1) * len(data), :] = outputs

    # Soft Voting (entropy in confidence)
    probs_soft = F.softmax(model_outputs, dim=-1)  # [ens, data, 10]
    preds_soft = probs_soft.mean(0)  # [data, 10]
    entropy = entropy_fn(preds_soft.T.cpu().numpy())  # [data]

    # Hard Voting (variance in predicted classed)
    probs_hard = F.softmax(model_outputs, dim=-1)  #[ens, data, 10]
    preds_hard = probs_hard.var(0).cpu()  # [data, 10]
    variance = preds_hard.sum(1).numpy()  # [data]
    hypergan.train_()

    return entropy, variance
Esempio n. 9
0
def test_ent(args, model):
    from scipy.stats import entropy
    _, test_loader = datagen.load_mnist(args)
    runs = []
    ent_runs = []
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        correct = 0.
        preds = []
        test_loss = 0
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            outputs = []
            for _ in range(100):
                output = model(data)
                outputs.append(output)
            if grad is False:
                test_loss += criterion(output,
                                       target).item()  # sum up batch loss
            else:
                test_loss += criterion(output, target)
            pred = output.data.max(
                1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()

            preds.append(F.softmax(torch.stack(outputs).mean(0), dim=1))

        test_loss /= len(test_loader.dataset)
        acc = (correct.float() / len(test_loader.dataset)).item()

        preds = torch.stack(preds).view(-1, 10)
        print(preds)

    preds = preds.cpu().detach().numpy()
    print('PREDS: ', preds.shape)
    ent = entropy(preds.T)
    print('ENT: ', ent, ent.shape, ent.mean())
    """
    def plot_e(a):
        ecdf = sm.distributions.ECDF(a)
        x = np.linspace(min(a), max(a))
        y = ecdf(x)
        return x, y
    a1, b1, = plot_e(ent)
    plt.plot(a1, b1, label='inlier dropout')
    plt.grid(True)
    plt.xlabel('Entropy')
    plt.show()
    """
    if epoch:
        print('Average loss: {}, Accuracy: {}/{} ({}%)'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    return acc, test_loss
Esempio n. 10
0
def load_data(args):
    if args.dataset == 'mnist':
        return datagen.load_mnist(args)
    if args.dataset == 'cifar':
        return datagen.load_cifar(args)
    if args.dataset == 'fmnist':
        return datagen.load_fashion_mnist(args)
    if args.dataset == 'cifar_hidden':
        class_list = [0] ## just load class 0
        return datagen.load_cifar_hidden(args, class_list)
    else:
        print ('Dataset not specified correctly')
        print ('choose --dataset <mnist, fmnist, cifar, cifar_hidden>')
Esempio n. 11
0
def run_adv_hyper(args, hypernet):
    arch = get_network(args)
    arch.lnames = args.stat['layer_names']
    model_base, fmodel_base = sample_fmodel(hypernet, arch)
    fgs = foolbox.attacks.HyperBIM(fmodel_base)
    _, test_loader = datagen.load_mnist(args)
    adv, y = [],  []
    for eps in [0.2, .3, 1.0]:
        total_adv = 0
        acc, _accs, _vars, _stds = [], [], [], []
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.cuda(), target.cuda()
            adv_batch, target_batch, _ = attack_batch_hyper(
                    data, target, fmodel_base, eps, fgs, hypernet, arch)
            if adv_batch is None:
                continue
            output = model_base(adv_batch)
            pred = output.data.max(1, keepdim=True)[1]
            correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
            n_adv = len(target_batch) - correct.item()
            total_adv += n_adv
            padv = np.argmax(fmodel_base.predictions(
                adv_batch[0].cpu().numpy()))
            sample_adv, pred_labels = [], []
            for _ in range(10):
                model, fmodel = sample_fmodel(hypernet, arch) 
                output = model(adv_batch)
                pred = output.data.max(1, keepdim=True)[1]
                correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
                acc.append(correct.item())
                n_adv_sample = len(target_batch)-correct.item()
                sample_adv.append(n_adv_sample)
                pred_labels.append(pred.view(pred.numel()))

            p_labels = torch.stack(pred_labels).float().transpose(0, 1)
            acc = torch.tensor(acc, dtype=torch.float)
            _accs.append(torch.mean(acc))
            _vars.append(p_labels.var(1).mean())
            _stds.append(p_labels.std(1).mean())
            acc, adv, y = [], [], []

        print ('Eps: {}, Adv: {}/{}, var: {}, std: {}'.format(eps,
            total_adv, len(test_loader.dataset), torch.tensor(_vars).mean(),
            torch.tensor(_stds).mean()))
Esempio n. 12
0
def test_acc_single(args, Z, names, arch, pop_size):
    _, test_loader = datagen.load_mnist(args)
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        correct = 0.
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            outputs = []
            for i in range(pop_size):
                params = [Z[0][i], Z[1][i], Z[2][i]]
                model = weights_to_clf(params, names, arch)
                output = model(data)
                outputs.append(output)
            pop_outputs = F.softmax(torch.stack(outputs).mean(0), dim=1)
            pop_outputs = pop_outputs.view(-1, 10)
            pred = pop_outputs.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
            # loss is over one network only, I don't want to backcalc the loss from indexes
        acc = (correct.float() / len(test_loader.dataset)).item()
    return acc, 0
Esempio n. 13
0
def measure_acc(args, hypernet, arch):
    _, test_loader = datagen.load_mnist(args)
    test_loss = 0
    correct = 0.
    criterion = nn.CrossEntropyLoss()
    e1, e5, e10, e100 = 0., 0., 0., 0.
    for n in [1, 5, 10, 100]:
        test_acc = 0.
        test_loss = 0.
        weights = utils.sample_hypernet(hypernet, n)
        for i, (data, y) in enumerate(mnist_test):
            n_votes = []
            for k in range(n):
                sample_w = (weights[0][k], weights[1][k], weights[2][k])
                model = utils.weights_to_clf(sample_w, arch,
                                             args.stat['layer_names'])
                votes = model(data)
                n_votes.append(votes.cpu().numpy())
            votes = np.array(n_votes)
            vote_modes = stats.mode(votes, axis=0)[0]
            vote_modes = torch.tensor(vote_modes)
            if n == 2:
                e1 += vote_modes.eq(
                    y.data.view_as(vote_modes)).long().cpu().sum()
            elif n == 5:
                e5 += vote_modes.eq(
                    y.data.view_as(vote_modes)).long().cpu().sum()
            elif n == 10:
                e10 += vote_modes.eq(
                    y.data.view_as(vote_modes)).long().cpu().sum()
            elif n == 100:
                e100 += vote_modes.eq(
                    y.data.view_as(vote_modes)).long().cpu().sum()

    test_loss /= len(mnist_test.dataset) * args.batch_size
    test_acc /= len(mnist_test.dataset) * args.batch_size
    e1 = e1.item() / len(mnist_test.dataset)
    e5 = e5.item() / len(mnist_test.dataset)
    e10 = e10.item() / len(mnist_test.dataset)
    e100 = e100.item() / len(mnist_test.dataset)
    print('Test Accuracy: {}, Test Loss: {}'.format(test_acc, test_loss))
Esempio n. 14
0
def run_adv_model(args, models):
    fmodels = [attacks.load_model(model) for model in models]
    criterion = Misclassification()
    fgs = foolbox.attacks.HyperBIM(fmodels[0])
    _, test_loader = datagen.load_mnist(args)

    adv, y, inter = [],  [], []
    acc, _accs = [], []
    total_adv, total_correct = 0, 0
    missed = 0
    
    for eps in [0.01, 0.03, 0.08, 0.1, .2, .3, 1]:
        total_adv = 0
        _accs, _vars, _stds = [], [], []
        pred_labels = []
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            adv_batch, target_batch, _ = attack_batch_ensemble(data, target, eps, fgs, fmodels)
            if adv_batch is None:
                continue
            n_adv = 0.
            acc, pred_labels = [], []
            output = ensemble_prediction(models, adv_batch)
            for i in range(5):
                pred = output[i].data.max(1, keepdim=True)[1]
                correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
                n_adv += len(target_batch)-correct.item()
                pred_labels.append(pred.view(pred.numel()))

            ens_pred = output.mean(0).data.max(1, keepdim=True)[1]
            ens_correct = ens_pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
            total_adv += len(target_batch) - ens_correct.item()

            p_labels = torch.stack(pred_labels).float().transpose(0, 1)
            _vars.append(p_labels.var(1).mean())
            _stds.append(p_labels.std(1).mean())
            acc, adv, y = [], [], []

        print ('Eps: {}, Adv: {}/{}, var: {}, std: {}'.format(eps,
            total_adv, len(test_loader.dataset), torch.tensor(_vars).mean(),
            torch.tensor(_stds).mean()))
Esempio n. 15
0
    def __init__(self, args):
        self.lr = args.lr
        self.wd = args.wd
        self.epochs = 200
        self.dataset = args.dataset
        self.test_uncertainty = args.test_uncertainty
        self.vote = args.vote
        self.device = torch.device('cuda')
        torch.manual_seed(8734)        
        
        self.model = models.LeNet_Dropout().to(self.device)
        self.optimizer = torch.optim.Adam(model.parameters(), self.lr, weight_decay=self.wd)

        if self.dataset == 'mnist':
            self.data_train, self.data_test = datagen.load_mnist()
        elif self.dataset == 'cifar':
            self.data_train, self.data_test = datagen.load_cifar()

        self.best_test_acc = 0.
        self.best_test_loss = np.inf
        print (self.model)
Esempio n. 16
0
def run_anomaly_mnist(args, hypernet):
    arch = get_network(args)
    train, test = datagen.load_mnist(args)
    _vars, _stds, _ents = [], [], []
    model = sample_model(hypernet, arch)
    for n in [5, 10, 100]:
        for idx, (data, target) in enumerate(test):
            data, target = data.cuda(), target.cuda()
            pred_labels = []
            logits = []
            for _ in range(n):
                model = sample_model(hypernet, arch)
                output = model(data)
                logits.append(output)
            probs = torch.stack(logits).mean(0).float()
            _ents.append(np.apply_along_axis(E, 1, probs.detach()))

        plot_empirical_cdf(args, _ents, n)
        print('mean E: {}, max E: {}, min E:{}'.format(
            torch.tensor(_ents).mean(),
            torch.tensor(_ents).max(),
            torch.tensor(_ents).min()))
Esempio n. 17
0
def test_ensemble(args, models, pop_size):
    _, test_loader = datagen.load_mnist(args)
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        correct = 0.
        test_loss = 0
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            outputs = []
            for model in models:
                output = model(data)
                outputs.append(output)
            pop_outputs = torch.stack(outputs)
            pop_labels = pop_outputs.max(2, keepdim=True)[1].view(pop_size, 100, 1)
            modes, idxs = torch.mode(pop_labels, dim=0, keepdim=True)
            modes = modes.view(100, 1)
            correct += modes.eq(target.data.view_as(modes)).long().cpu().sum()
            # loss is over one network only, I don't want to backcalc the loss from indexes
            test_loss += criterion(output, target).item()
        test_loss /= len(test_loader.dataset)
        acc = (correct.float() / len(test_loader.dataset)).item()
    return acc, test_loss
Esempio n. 18
0
def test(args, model):
    from scipy.stats import entropy
    _, test_loader = datagen.load_mnist(args)
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        correct = 0.
        preds = []
        test_loss = 0
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.data.max(
                1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.data.view_as(pred)).long().cpu().sum()
        test_loss /= len(test_loader.dataset)
        acc = (correct.float() / len(test_loader.dataset)).item()

    print('Average loss: {}, Accuracy: {}/{} ({}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    return acc, test_loss
Esempio n. 19
0
def test_f(args, path, model, n):
    def clf(data, target, Z):
        data, target = data.cuda(), target.cuda()
        out = F.conv2d(data, Z[0], stride=1)
        out = F.relu(out)
        out = F.max_pool2d(out, 2, 2)
        out = F.conv2d(out, Z[1], stride=1)
        out = F.relu(out)
        out = F.max_pool2d(out, 2, 2)
        out = out.view(-1, 1024)
        out = F.linear(out, Z[2])
        out = F.relu(out)
        out = F.linear(out, Z[3])
        return out
    netE, W1, W2, W3, W4 = model
    x_dist = utils.create_d(args.ze)
    _, test_loader = datagen.load_mnist(args)
    correct = 0
    z = utils.sample_d(x_dist, n)
    codes = netE(z)
    l1 = W1(codes[0])
    l2 = W2(codes[1])
    l3 = W3(codes[2])
    l4 = W4(codes[3])

    for i, (data, target) in enumerate(test_loader):
        data = data.cuda()
        target = target.cuda()
        outputs = []
        for (g1, g2, g3, g4) in zip(l1, l2, l3, l4):
            output = clf(data, target, [g1, g2, g3, g4])
            outputs.append(output)
        pop_outputs = torch.stack(outputs)
        pop_labels = pop_outputs.max(2, keepdim=True)[1].view(n, 100, 1)
        modes, idxs = torch.mode(pop_labels, dim=0, keepdim=True)
        modes = modes.view(100, 1)
        correct += modes.eq(target.data.view_as(modes)).long().cpu().sum()
    return correct.float().item() / 10000.
Esempio n. 20
0
def test_ent(args, Z, names, arch, pop_size, ds):
    if ds == 'mnist':
        _, test_loader = datagen.load_mnist(args)
    if ds == 'notmnist':
        _, test_loader = datagen.load_notmnist(args)
    criterion = nn.CrossEntropyLoss()
    with torch.no_grad():
        correct = 0.
        test_loss = 0
        for data, target in test_loader:
            data, target = data.cuda(), target.cuda()
            outputs = []
            for i in range(pop_size):
                params = [Z[0][i], Z[1][i], Z[2][i]]
                model = weights_to_clf(params, names, arch)
                output = F.softmax(model(data))
                outputs.append(output)
            pop_outputs = torch.stack(outputs)
        
        pop_outputs = pop_outputs.view(pop_size, 10000, 10)
        pop_mean = pop_outputs.mean(0).view(10000, 10)
        ent = entropy(pop_mean.cpu().numpy().T)
    return ent
Esempio n. 21
0
    def __init__(self, args):
        self.lr = args.lr
        self.wd = args.wd
        self.epochs = 200
        self.dataset = args.dataset
        self.test_uncertainty = args.test_uncertainty
        self.vote = args.vote
        self.n_models = args.n_models
        self.device = torch.device('cuda')
        torch.manual_seed(8734)

        self.ensemble = [
            models.LeNet().to(self.device) for _ in range(self.n_models)
        ]
        self.attach_optimizers()

        if self.dataset == 'mnist':
            self.data_train, self.data_test = datagen.load_mnist()
        elif self.dataset == 'cifar':
            self.data_train, self.data_test = datagen.load_cifar()

        self.best_test_acc = 0.
        self.best_test_loss = np.inf
        print(self.ensemble[0], ' X {}'.format(self.n_models))
Esempio n. 22
0
def train(args):

    torch.manual_seed(1)
    netE = models.Encoderz(args).cuda()
    W1 = models.GeneratorW1(args).cuda()
    W2 = models.GeneratorW2(args).cuda()
    W3 = models.GeneratorW3(args).cuda()
    netD = models.DiscriminatorQz(args).cuda()
    print(netE, W1, W2, W3)

    if args.resume is not None:
        d = torch.load(args.resume)
        netE = utils.load_net_only(netE, d['E'])
        netD = utils.load_net_only(netD, d['D'])
        W1 = utils.load_net_only(W1, d['W1'])
        W2 = utils.load_net_only(W2, d['W2'])
        W3 = utils.load_net_only(W3, d['W3'])

    optimE = optim.Adam(netE.parameters(),
                        lr=args.lr,
                        betas=(0.5, 0.9),
                        weight_decay=5e-4)
    optimW1 = optim.Adam(W1.parameters(),
                         lr=args.lr,
                         betas=(0.5, 0.9),
                         weight_decay=5e-4)
    optimW2 = optim.Adam(W2.parameters(),
                         lr=args.lr,
                         betas=(0.5, 0.9),
                         weight_decay=5e-4)
    optimW3 = optim.Adam(W3.parameters(),
                         lr=args.lr,
                         betas=(0.5, 0.9),
                         weight_decay=5e-4)
    optimD = optim.Adam(netD.parameters(),
                        lr=args.lr,
                        betas=(0.5, 0.9),
                        weight_decay=5e-4)

    best_test_acc, best_clf_acc, best_test_loss, = 0., 0., np.inf
    args.best_loss, args.best_acc = best_test_loss, best_test_acc
    args.best_clf_loss, args.best_clf_acc = np.inf, 0

    mnist_train, mnist_test = datagen.load_mnist(args)
    x_dist = utils.create_d(args.ze)
    z_dist = utils.create_d(args.z)
    qz_dist = utils.create_d(args.z * 3)
    one = torch.tensor(1.).cuda()
    mone = one * -1

    print("==> pretraining encoder")
    j = 0
    final = 100.
    e_batch_size = 1000
    if args.resume is None:
        if args.pretrain_e is True:
            for j in range(1000):
                x = utils.sample_d(x_dist, e_batch_size)
                z = utils.sample_d(z_dist, e_batch_size)
                codes = torch.stack(netE(x)).view(-1, args.z * 3)
                qz = utils.sample_d(qz_dist, e_batch_size)
                mean_loss, cov_loss = ops.pretrain_loss(codes, qz)
                loss = mean_loss + cov_loss
                loss.backward()
                optimE.step()
                netE.zero_grad()
                print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.
                      format(j, mean_loss.item(), cov_loss.item()))
                final = loss.item()
                if loss.item() < 0.1:
                    print('Finished Pretraining Encoder')
                    break

    print('==> Begin Training')
    for _ in range(args.epochs):
        for batch_idx, (data, target) in enumerate(mnist_train):
            z = utils.sample_d(x_dist, args.batch_size)
            ze = utils.sample_d(z_dist, args.batch_size)
            qz = utils.sample_d(qz_dist, args.batch_size)
            codes = netE(z)
            noise = utils.sample_d(qz_dist, args.batch_size)
            log_pz = ops.log_density(ze, 2).view(-1, 1)
            d_loss, d_q = ops.calc_d_loss(args, netD, ze, codes, log_pz)
            optimD.zero_grad()
            d_loss.backward(retain_graph=True)
            optimD.step()

            l1_w, l1_b = W1(codes[0])
            l2_w, l2_b = W2(codes[1])
            l3_w, l3_b = W3(codes[2])

            clf_loss = 0
            for (g1_w, g1_b, g2_w, g2_b, g3_w,
                 g3_b) in zip(l1_w, l1_b, l2_w, l2_b, l3_w, l3_b):
                g1 = (g1_w, g1_b)
                g2 = (g2_w, g2_b)
                g3 = (g3_w, g3_b)
                loss, correct = train_clf(args, [g1, g2, g3], data, target)
                clf_loss += loss
            G_loss = clf_loss / args.batch_size  # * args.beta
            one_qz = torch.ones((args.batch_size * 3, 1),
                                requires_grad=True).cuda()
            log_qz = ops.log_density(torch.ones(args.batch_size * 3, 1),
                                     2).view(-1, 1)
            Q_loss = F.binary_cross_entropy_with_logits(d_q + log_qz, one_qz)
            total_hyper_loss = Q_loss + G_loss  #+ (gp.sum().cuda())#mean().cuda()
            total_hyper_loss.backward()

            optimE.step()
            optimW1.step()
            optimW2.step()
            optimW3.step()
            optimE.zero_grad()
            optimW1.zero_grad(), optimW2.zero_grad(), optimW3.zero_grad()

            total_loss = total_hyper_loss.item()

            if batch_idx % 50 == 0:
                acc = correct
                print('**************************************')
                print('Iter: {}'.format(len(logger['acc'])))
                print('Acc: {}, MD Loss: {}, D loss: {}'.format(
                    acc, total_hyper_loss, d_loss))
                #print ('penalties: ', gp[0].item(), gp[1].item(), gp[2].item())
                #print ('grads: ', grads)
                print('best test loss: {}'.format(args.best_loss))
                print('best test acc: {}'.format(args.best_acc))
                #print ('best clf acc: {}'.format(args.best_clf_acc))
                print('**************************************')

            if batch_idx > 1 and batch_idx % 100 == 0:
                test_acc = 0.
                test_loss = 0.
                with torch.no_grad():
                    for i, (data, y) in enumerate(mnist_test):
                        z = utils.sample_d(x_dist, args.batch_size)
                        codes = netE(z)
                        l1_w, l1_b = W1(codes[0])
                        l2_w, l2_b = W2(codes[1])
                        l3_w, l3_b = W3(codes[2])
                        for (g1_w, g1_b, g2_w, g2_b, g3_w,
                             g3_b) in zip(l1_w, l1_b, l2_w, l2_b, l3_w, l3_b):
                            g1 = (g1_w, g1_b)
                            g2 = (g2_w, g2_b)
                            g3 = (g3_w, g3_b)
                            loss, correct = train_clf(args, [g1, g2, g3], data,
                                                      y)
                            test_acc += correct.item()
                            test_loss += loss.item()
                    test_loss /= len(mnist_test.dataset) * args.batch_size
                    test_acc /= len(mnist_test.dataset) * args.batch_size

                    print('Test Accuracy: {}, Test Loss: {}'.format(
                        test_acc, test_loss))
                    #print ('Clf Accuracy: {}, Clf Loss: {}'.format(clf_acc, clf_loss))
                    if test_loss < best_test_loss:
                        best_test_loss, args.best_loss = test_loss, test_loss
                    if test_acc > best_test_acc:
                        #    best_clf_acc, args.best_clf_acc = clf_acc, clf_acc
                        utils.save_hypernet_mnist(args,
                                                  [netE, netD, W1, W2, W3],
                                                  test_acc)
                    if test_acc > best_test_acc:
                        best_test_acc, args.best_acc = test_acc, test_acc
Esempio n. 23
0
def train(args):
    
    torch.manual_seed(8734)
    
    netG = Generator(args).cuda()
    netD = Discriminator(args).cuda()
    print (netG, netD)

    optimG = optim.Adam(netG.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    optimD = optim.Adam(netD.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    
    mnist_train, mnist_test = datagen.load_mnist(args)
    train = inf_gen(mnist_train)
    print ('saving reals')
    reals, _ = next(train)
    if not os.path.exists('results/'): 
        os.makedirs('results')

    save_image(reals, 'results/reals.png')
    
    one = torch.tensor(1.).cuda()
    mone = (one * -1)
    
    print ('==> Begin Training')
    for iter in range(args.epochs):
        ops.batch_zero_grad([netG, netD])
        for p in netD.parameters():
            p.requires_grad = True
        for _ in range(args.disc_iters):
            data, targets = next(train)
            data = data.view(args.batch_size, 28*28).cuda()
            netD.zero_grad()
            d_real = netD(data).mean()
            d_real.backward(mone, retain_graph=True)
            noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda()
            with torch.no_grad():
                fake = netG(noise)
            fake.requires_grad_(True)
            d_fake = netD(fake)
            d_fake = d_fake.mean()
            d_fake.backward(one, retain_graph=True)
            gp = ops.grad_penalty_1dim(args, netD, data, fake)
            gp.backward()
            d_cost = d_fake - d_real + gp
            wasserstein_d = d_real - d_fake
            optimD.step()

        for p in netD.parameters():
            p.requires_grad=False
        netG.zero_grad()
        noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda()
        fake = netG(noise)
        G = netD(fake)
        G = G.mean()
        G.backward(mone)
        g_cost = -G
        optimG.step()
       
        if iter % 100 == 0:
            print('iter: ', iter, 'train D cost', d_cost.cpu().item())
            print('iter: ', iter, 'train G cost', g_cost.cpu().item())
        if iter % 300 == 0:
            val_d_costs = []
            for i, (data, target) in enumerate(mnist_test):
                data = data.cuda()
                d = netD(data)
                val_d_cost = -d.mean().item()
                val_d_costs.append(val_d_cost)
            utils.generate_image(args, iter, netG)
Esempio n. 24
0
def train_gan(args):
    netG = init(Generator(args)).cuda()
    netD = Discriminator(args).cuda()
    optimG = optim.Adam(netG.parameters(),
                        lr=1e-4,
                        betas=(0.5, 0.9),
                        weight_decay=1e-4)
    optimD = optim.Adam(netD.parameters(),
                        lr=1e-4,
                        betas=(0.5, 0.9),
                        weight_decay=1e-4)
    mnist_train, mnist_test = datagen.load_mnist(args)
    train = inf_gen(mnist_train)
    print('saving reals')
    reals, _ = next(train)
    utils.save_images(reals.detach().cpu().numpy(), 'gan_training/reals.png')

    one = torch.tensor(1.).cuda()
    mone = one * -1
    args.batch_size = 32
    args.gan = True
    print('==> Begin Training')
    for iter in range(args.epochs):
        netG.zero_grad()
        netD.zero_grad()
        for p in netD.parameters():
            p.requires_grad = True
        for _ in range(5):
            data, targets = next(train)
            data = data.view(args.batch_size, 28 * 28).cuda()
            netD.zero_grad()
            d_real = netD(data).mean()
            d_real.backward(mone, retain_graph=True)
            noise = torch.randn(args.batch_size, args.z,
                                requires_grad=True).cuda()
            fake = []
            with torch.no_grad():
                fake = sample(args, netG, noise, gan=True).view(32, -1)
            fake.requires_grad_(True)
            d_fake = netD(fake)
            d_fake = d_fake.mean()
            d_fake.backward(one, retain_graph=True)
            gp = ops.grad_penalty_1dim(args, netD, data, fake)
            gp.backward()
            d_cost = d_fake - d_real + gp
            wasserstein_d = d_real - d_fake
            optimD.step()

        for p in netD.parameters():
            p.requires_grad = False
        netG.zero_grad()
        noise = torch.randn(args.batch_size, args.z, requires_grad=True).cuda()
        fake = []
        for z in noise:
            fake.append(sample(args, netG, noise, gan=True))
        fake = torch.stack(fake)
        G = netD(fake)
        G = G.mean()
        G.backward(mone)
        g_cost = -G
        optimG.step()
        if iter % 100 == 0:
            with torch.no_grad():
                noise = torch.randn(args.batch_size,
                                    args.z,
                                    requires_grad=True).cuda()
                samples = sample(args, netG, noise, gan=True)
                samples = samples.view(-1, 28, 28).cpu().data.numpy()
                path = 'gan_training/gan_sample_{}.png'.format(iter)
                print('saving gan sample: ', path)
                utils.save_images(samples, path)
            args.gan = False
            args.batch_size = 1  # accomodate large images
            cppn(args, netG, iter, noise[:args.n])
            args.batch_size = 32
            args.gan = True
            print('iter: ', iter, 'G cost', g_cost.cpu().item())
            print('iter: ', iter, 'D cost', d_cost.cpu().item())
Esempio n. 25
0
def train(args):
    from torch import optim
    #torch.manual_seed(8734)
    netE = models.Encoderz(args).cuda()
    netD = models.DiscriminatorZ(args).cuda()
    E1 = models.GeneratorE1(args).cuda()
    E2 = models.GeneratorE2(args).cuda()
    #E3 = models.GeneratorE3(args).cuda()
    #E4 = models.GeneratorE4(args).cuda()
    #D1 = models.GeneratorD1(args).cuda()
    D1 = models.GeneratorD2(args).cuda()
    D2 = models.GeneratorD3(args).cuda()
    D3 = models.GeneratorD4(args).cuda()
    print(netE, netD)
    print(E1, E2, D1, D2, D3)

    optimE = optim.Adam(netE.parameters(),
                        lr=5e-4,
                        betas=(0.5, 0.9),
                        weight_decay=1e-4)
    optimD = optim.Adam(netD.parameters(),
                        lr=1e-4,
                        betas=(0.5, 0.9),
                        weight_decay=1e-4)

    Eoptim = [
        optim.Adam(E1.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4),
        optim.Adam(E2.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4),
        #optim.Adam(E3.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4),
        #optim.Adam(E4.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    ]
    Doptim = [
        #optim.Adam(D1.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4),
        optim.Adam(D1.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4),
        optim.Adam(D2.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4),
        optim.Adam(D3.parameters(),
                   lr=1e-4,
                   betas=(0.5, 0.9),
                   weight_decay=1e-4)
    ]

    Enets = [E1, E2]
    Dnets = [D1, D2, D3]

    best_test_loss = np.inf
    args.best_loss = best_test_loss

    mnist_train, mnist_test = datagen.load_mnist(args)
    x_dist = utils.create_d(args.ze)
    z_dist = utils.create_d(args.z)
    one = torch.FloatTensor([1]).cuda()
    mone = (one * -1).cuda()
    print("==> pretraining encoder")
    j = 0
    final = 100.
    e_batch_size = 1000
    if args.pretrain_e:
        for j in range(100):
            x = utils.sample_d(x_dist, e_batch_size)
            z = utils.sample_d(z_dist, e_batch_size)
            codes = netE(x)
            for i, code in enumerate(codes):
                code = code.view(e_batch_size, args.z)
                mean_loss, cov_loss = ops.pretrain_loss(code, z)
                loss = mean_loss + cov_loss
                loss.backward(retain_graph=True)
            optimE.step()
            netE.zero_grad()
            print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format(
                j, mean_loss.item(), cov_loss.item()))
            final = loss.item()
            if loss.item() < 0.1:
                print('Finished Pretraining Encoder')
                break

    print('==> Begin Training')
    for _ in range(args.epochs):
        for batch_idx, (data, target) in enumerate(mnist_train):
            netE.zero_grad()
            for optim in Eoptim:
                optim.zero_grad()
            for optim in Doptim:
                optim.zero_grad()
            z = utils.sample_d(x_dist, args.batch_size)
            codes = netE(z)
            for code in codes:
                noise = utils.sample_z_like((args.batch_size, args.z))
                d_real = netD(noise)
                d_fake = netD(code)
                d_real_loss = torch.log((1 - d_real).mean())
                d_fake_loss = torch.log(d_fake.mean())
                d_real_loss.backward(torch.tensor(-1,
                                                  dtype=torch.float).cuda(),
                                     retain_graph=True)
                d_fake_loss.backward(torch.tensor(-1,
                                                  dtype=torch.float).cuda(),
                                     retain_graph=True)
                d_loss = d_real_loss + d_fake_loss
            optimD.step()
            netD.zero_grad()
            z = utils.sample_d(x_dist, args.batch_size)
            codes = netE(z)
            Eweights, Dweights = [], []
            i = 0
            for net in Enets:
                Eweights.append(net(codes[i]))
                i += 1
            for net in Dnets:
                Dweights.append(net(codes[i]))
                i += 1
            d_real = []
            for code in codes:
                d = netD(code)
                d_real.append(d)

            netD.zero_grad()
            d_loss = torch.stack(d_real).log().mean() * 10.

            for layers in zip(*(Eweights + Dweights)):
                loss, _ = train_clf(args, layers, data, target)
                scaled_loss = args.beta * loss
                scaled_loss.backward(retain_graph=True)
                d_loss.backward(torch.tensor(-1, dtype=torch.float).cuda(),
                                retain_graph=True)
            optimE.step()
            for optim in Eoptim:
                optim.step()
            for optim in Doptim:
                optim.step()
            loss = loss.item()

            if batch_idx % 50 == 0:
                print('**************************************')
                print('AE MNIST Test, beta: {}'.format(args.beta))
                print('MSE Loss: {}'.format(loss))
                print('D loss: {}'.format(d_loss))
                print('best test loss: {}'.format(args.best_loss))
                print('**************************************')

            if batch_idx > 1 and batch_idx % 199 == 0:
                test_acc = 0.
                test_loss = 0.
                for i, (data, y) in enumerate(mnist_test):
                    z = utils.sample_d(x_dist, args.batch_size)
                    codes = netE(z)
                    Eweights, Dweights = [], []
                    i = 0
                    for net in Enets:
                        Eweights.append(net(codes[i]))
                        i += 1
                    for net in Dnets:
                        Dweights.append(net(codes[i]))
                        i += 1
                    for layers in zip(*(Eweights + Dweights)):
                        loss, out = train_clf(args, layers, data, y)
                        test_loss += loss.item()
                    if i == 10:
                        break
                test_loss /= 10 * len(y) * args.batch_size
                print('Test Loss: {}'.format(test_loss))
                if test_loss < best_test_loss:
                    print('==> new best stats, saving')
                    #utils.save_clf(args, z_test, test_acc)
                    if test_loss < best_test_loss:
                        best_test_loss = test_loss
                        args.best_loss = test_loss
                archE = sampleE(args).cuda()
                archD = sampleD(args).cuda()
                rand = np.random.randint(args.batch_size)
                eweight = list(zip(*Eweights))[rand]
                dweight = list(zip(*Dweights))[rand]
                modelE = utils.weights_to_clf(eweight, archE,
                                              args.statE['layer_names'])
                modelD = utils.weights_to_clf(dweight, archD,
                                              args.statD['layer_names'])
                utils.generate_image(args, batch_idx, modelE, modelD,
                                     data.cuda())
Esempio n. 26
0
def run_adv_hyper(args, hypernet):
    arch = get_network(args)
    models, fmodels = [], []
    #for i in range(10):
    #    model_base, fmodel_base = sample_fmodel(args, hypernet, arch)
    #    models.append(model_base)
    #    fmodels.append(fmodel_base)   
    #fmodel_base = attacks.load_model(FusedNet(models))
    model_base, fmodel_base = sample_fmodel(args, hypernet, arch)
    criterion = Misclassification()
    fgs = foolbox.attacks.BIM(fmodel_base, criterion)
    _, test_loader = datagen.load_mnist(args)
    adv, y = [],  []
    for n_models in [5, 10, 100, 1000]:
        print ('ensemble of {}'.format(n_models))
        for eps in [0.01, 0.03, 0.08, 0.1, 0.3, 0.5, 1.0]:
            total_adv = 0
            acc, _accs = [], []
            _kl_real, _kl_adv = [], []
            _soft, _logs, _vars, _ents, _lsoft = [], [], [], [], []
            _soft_adv, _logs_adv, _vars_adv, _ents_adv, _lsoft_adv = [], [], [], [], []
            for idx, (data, target) in enumerate(test_loader):
                data, target = data.cuda(), target.cuda()
                adv_batch, target_batch, _ = sample_adv_batch(
                        data, target, fmodel_base, eps, fgs)
                if adv_batch is None:
                    continue
                if len(adv_batch) < 2:
                    continue
                # get base hypermodel output, I guess
                output = model_base(adv_batch)
                pred = output.data.max(1, keepdim=True)[1]
                correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
                n_adv = len(target_batch) - correct.item()
                total_adv += n_adv
               
                soft_out, pred_out, logits, lsoft_out = [], [], [], []
                soft_out_adv, pred_out_adv, logits_adv, lsoft_out_adv = [], [], [], []
                with torch.no_grad():
                    for n in range(n_models):
                        model, fmodel = sample_fmodel(args, hypernet, arch) 
                        output = model(data)
                        soft_out.append(F.softmax(output, dim=1))
                        lsoft_out.append(F.log_softmax(output, dim=1))
                        #pred_out.append(output.data.max(1, keepdim=True)[1])
                        logits.append(output)

                        output = model(adv_batch)
                        soft_out_adv.append(F.softmax(output, dim=1))
                        lsoft_out_adv.append(F.log_softmax(output, dim=1))
                        #pred_out_adv.append(output.data.max(1, keepdim=True)[1])
                        logits_adv.append(output)
                        
                softs = torch.stack(soft_out).float()
                lsoft = torch.stack(lsoft_out).float()
                #preds = torch.stack(pred_out).float()
                logs = torch.stack(logits).float()
                softs_adv = torch.stack(soft_out_adv).float()
                lsoft_adv = torch.stack(lsoft_out_adv).float()
                #preds_adv = torch.stack(pred_out_adv).float()
                logs_adv = torch.stack(logits_adv).float()
                # Measure variance of individual logits across models. 
                # HyperGAN ensemble has lower variance across 10 class predictions 
                # But a single logit has high variance acorss models
                units_softmax = softs.var(0).mean().item() # var across models across images
                ent = float(entropy(softs.mean(0).transpose(0, 1).detach()).mean())
                #units_logprob = logs.var(0).mean().item()
                units_softmax_adv = softs_adv.var(0).mean().item() # var across models - images
                ent_adv = float(entropy(softs_adv.mean(0).transpose(0, 1).detach()).mean())
                
                log_var = lsoft.var(2).var(0)
                pop_var = softs.var(2).var(0)
                
                log_var_adv = lsoft_adv.var(0).var(1)
                pop_var_adv = softs_adv.var(0).var(1)

        
                """ Core Debug """
                # print ('softmax var: ', units_softmax)
                # print ('logprob var: ', units_logprob)
                # print ('ensemble var: ', ensemble_var)

                # build lists
                # softmax probs
                _soft.append(units_softmax)
                _soft_adv.append(units_softmax_adv)
                # softmax variance 
                _vars.append(pop_var)
                _vars_adv.append(pop_var_adv)
                # log variance
                _ents.append(ent)
                _ents_adv.append(ent_adv)

                #_logs.append(units_logprob)
                #_logs_adv.append(units_logprob_adv)

                if idx > 10:
                    print ('REAL: ent: {}'.format(torch.tensor(_ents).mean()))
                    print ('ADV Eps: {}, ent: {}'.format(eps, torch.tensor(_ents_adv).mean()))
                    break;
Esempio n. 27
0
def train(args):

    torch.manual_seed(8734)

    netE = models.Encoder(args).cuda()
    W1 = models.GeneratorW1(args).cuda()
    W2 = models.GeneratorW2(args).cuda()
    W3 = models.GeneratorW3(args).cuda()
    W4 = models.GeneratorW4(args).cuda()
    W5 = models.GeneratorW5(args).cuda()
    netD = models.DiscriminatorZ(args).cuda()
    print(netE, W1, W2, W3, W4, W5, netD)

    optimE = optim.Adam(netE.parameters(),
                        lr=5e-3,
                        betas=(0.5, 0.9),
                        weight_decay=1e-4)
    optimW1 = optim.Adam(W1.parameters(),
                         lr=5e-4,
                         betas=(0.5, 0.9),
                         weight_decay=1e-4)
    optimW2 = optim.Adam(W2.parameters(),
                         lr=5e-4,
                         betas=(0.5, 0.9),
                         weight_decay=1e-4)
    optimW3 = optim.Adam(W3.parameters(),
                         lr=5e-4,
                         betas=(0.5, 0.9),
                         weight_decay=1e-4)
    optimW4 = optim.Adam(W4.parameters(),
                         lr=5e-4,
                         betas=(0.5, 0.9),
                         weight_decay=1e-4)
    optimW5 = optim.Adam(W5.parameters(),
                         lr=5e-4,
                         betas=(0.5, 0.9),
                         weight_decay=1e-4)
    optimD = optim.Adam(netD.parameters(),
                        lr=5e-5,
                        betas=(0.5, 0.9),
                        weight_decay=1e-4)

    m_best_test_acc, m_best_test_loss = 0., np.inf
    c_best_test_acc, c_best_test_loss = 0., np.inf
    args.m_best_loss, args.m_best_acc = m_best_test_loss, m_best_test_acc
    args.c_best_loss, args.c_best_acc = c_best_test_loss, c_best_test_acc

    mnist_train, mnist_test = datagen.load_mnist(args)
    cifar_train, cifar_test = datagen.load_cifar(args)
    x_dist = utils.create_d(args.ze)
    z_dist = utils.create_d(args.z)
    one = torch.FloatTensor([1]).cuda()
    mone = (one * -1).cuda()
    print("==> pretraining encoder")
    j = 0
    final = 100.
    e_batch_size = 1000
    if args.pretrain_e:
        mask1 = torch.zeros(e_batch_size, args.ze).cuda()
        mask2 = torch.ones(e_batch_size, args.ze).cuda()
        for j in range(500):
            x = utils.sample_d(x_dist, e_batch_size)
            z = utils.sample_d(z_dist, e_batch_size)
            if j % 2 == 0: x = torch.cat((x, mask1), dim=0)
            if j % 2 == 1: x = torch.cat((x, mask2), dim=0)
            codes = netE(x)
            for i, code in enumerate(codes):
                code = code.view(e_batch_size, args.z)
                mean_loss, cov_loss = pretrain_loss(code, z)
                loss = mean_loss + cov_loss
                loss.backward(retain_graph=True)
            optimE.step()
            netE.zero_grad()
            print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format(
                j, mean_loss.item(), cov_loss.item()))
            final = loss.item()
            if loss.item() < 0.1:
                print('Finished Pretraining Encoder')
                break

    print('==> Begin Training')
    for _ in range(args.epochs):
        for batch_idx, (mnist,
                        cifar) in enumerate(zip(mnist_train, cifar_train)):
            if batch_idx % 2 == 0:
                data, target = mnist
                mask = torch.zeros(args.batch_size, args.ze).cuda()
            else:
                data, target = cifar
                mask = torch.ones(args.batch_size, args.ze).cuda()

            batch_zero_grad([netE, W1, W2, W3, W4, W5, netD])
            z = utils.sample_d(x_dist, args.batch_size)
            z = torch.cat((z, mask), dim=0)
            codes = netE(z)
            l1 = W1(codes[0])
            l2 = W2(codes[1])
            l3 = W3(codes[2])
            l4 = W4(codes[3])
            l5 = W5(codes[4])
            # Z Adversary
            for code in codes:
                noise = utils.sample_d(z_dist, args.batch_size)
                d_real = netD(noise)
                d_fake = netD(code)
                d_real_loss = -1 * torch.log((1 - d_real).mean())
                d_fake_loss = -1 * torch.log(d_fake.mean())
                d_real_loss.backward(retain_graph=True)
                d_fake_loss.backward(retain_graph=True)
                d_loss = d_real_loss + d_fake_loss
            optimD.step()
            # Generator (Mean test)
            netD.zero_grad()
            z = utils.sample_d(x_dist, args.batch_size)
            z = torch.cat((z, mask), dim=0)
            codes = netE(z)
            l1 = W1(codes[0])
            l2 = W2(codes[1])
            l3 = W3(codes[2])
            l4 = W4(codes[3])
            l5 = W5(codes[4])
            d_real = []
            for code in codes:
                d = netD(code)
                d_real.append(d)
            netD.zero_grad()
            d_loss = torch.stack(d_real).log().mean() * 10.
            for (g1, g2, g3, g4, g5) in zip(l1, l2, l3, l4, l5):
                correct, loss = train_clf(args, [g1, g2, g3, g4, g5], data,
                                          target)
                scaled_loss = args.beta * loss
                if loss != loss:
                    sys.exit(0)
                scaled_loss.backward(retain_graph=True)
                d_loss.backward(torch.tensor(-1, dtype=torch.float).cuda(),
                                retain_graph=True)
            optimE.step()
            optimW1.step()
            optimW2.step()
            optimW3.step()
            optimW4.step()
            optimW5.step()

            loss = loss.item()
            """ Update Statistics """
            if batch_idx % 50 == 0 or batch_idx % 50 == 1:
                acc = correct
                print('**************************************')
                if batch_idx % 50 == 0:
                    print('MNIST Test: Enc, Dz, Lscale: {} test'.format(
                        args.beta))
                if batch_idx % 50 == 1:
                    print('CIFAR Test: Enc, Dz, Lscale: {} test'.format(
                        args.beta))
                print('Acc: {}, G Loss: {}, D Loss: {}'.format(
                    acc, loss, d_loss))
                print('best test loss: {}, {}'.format(args.m_best_loss,
                                                      args.c_best_loss))
                print('best test acc: {}, {}'.format(args.m_best_acc,
                                                     args.c_best_acc))
                print('**************************************')
            if batch_idx > 1 and batch_idx % 100 == 0:
                m_test_acc = 0.
                m_test_loss = 0.
                for i, (data, y) in enumerate(mnist_test):
                    z = utils.sample_d(x_dist, args.batch_size)
                    z = torch.cat(
                        (z, torch.zeros(args.batch_size, args.ze).cuda()),
                        dim=0)
                    w1_code, w2_code, w3_code, w4_code, w5_code = netE(z)
                    l1 = W1(w1_code)
                    l2 = W2(w2_code)
                    l3 = W3(w3_code)
                    l4 = W4(w4_code)
                    l5 = W5(w5_code)
                    for (g1, g2, g3, g4, g5) in zip(l1, l2, l3, l4, l5):
                        correct, loss = train_clf(args, [g1, g2, g3, g4, g5],
                                                  data, y)
                        m_test_acc += correct.item()
                        m_test_loss += loss.item()
                m_test_loss /= len(mnist_test.dataset) * args.batch_size
                m_test_acc /= len(mnist_test.dataset) * args.batch_size
                print('MNIST Test Accuracy: {}, Test Loss: {}'.format(
                    m_test_acc, m_test_loss))

                c_test_acc = 0.
                c_test_loss = 0
                for i, (data, y) in enumerate(cifar_test):
                    z = utils.sample_d(x_dist, args.batch_size)
                    z = torch.cat(
                        (z, torch.ones(args.batch_size, args.ze).cuda()),
                        dim=0)
                    w1_code, w2_code, w3_code, w4_code, w5_code = netE(z)
                    l1 = W1(w1_code)
                    l2 = W2(w2_code)
                    l3 = W3(w3_code)
                    l4 = W4(w4_code)
                    l5 = W5(w5_code)
                    for (g1, g2, g3, g4, g5) in zip(l1, l2, l3, l4, l5):
                        correct, loss = train_clf(args, [g1, g2, g3, g4, g5],
                                                  data, y)
                        c_test_acc += correct.item()
                        c_test_loss += loss.item()
                c_test_loss /= len(cifar_test.dataset) * args.batch_size
                c_test_acc /= len(cifar_test.dataset) * args.batch_size
                print('CIFAR Test Accuracy: {}, Test Loss: {}'.format(
                    c_test_acc, c_test_loss))

                if m_test_loss < m_best_test_loss or m_test_acc > m_best_test_acc:
                    #utils.save_hypernet_cifar(args, [netE, W1, W2, W3, W4, W5, netD], test_acc)
                    print('==> new best stats, saving')
                    if m_test_loss < m_best_test_loss:
                        m_best_test_loss = m_test_loss
                        args.m_best_loss = m_test_loss
                    if m_test_acc > m_best_test_acc:
                        m_best_test_acc = m_test_acc
                        args.m_best_acc = m_test_acc

                if c_test_loss < c_best_test_loss or c_test_acc > c_best_test_acc:
                    #utils.save_hypernet_cifar(args, [netE, W1, W2, W3, W4, W5, netD], test_acc)
                    print('==> new best stats, saving')
                    if c_test_loss < c_best_test_loss:
                        c_best_test_loss = c_test_loss
                        args.c_best_loss = c_test_loss
                    if c_test_acc > c_best_test_acc:
                        c_best_test_acc = c_test_acc
                        args.c_best_acc = c_test_acc
Esempio n. 28
0
def train(args, model):

    torch.manual_seed(1)
    netE = models.Encoderz(args).cuda()
    netG = models.Final_Small(args).cuda()
    netD = models.DiscriminatorQz(args).cuda()
    print(netE, netG, netD)

    optimE = optim.Adam(netE.parameters(),
                        lr=1e-4,
                        betas=(0.5, 0.9),
                        weight_decay=5e-4)
    optimG = optim.Adam(netG.parameters(),
                        lr=1e-4,
                        betas=(0.5, 0.9),
                        weight_decay=5e-4)
    optimD = optim.Adam(netD.parameters(),
                        lr=1e-4,
                        betas=(0.5, 0.9),
                        weight_decay=5e-4)

    best_test_acc, best_clf_acc, best_test_loss, = 0., 0., np.inf
    args.best_loss, args.best_acc = best_test_loss, best_test_acc
    args.best_clf_loss, args.best_clf_acc = np.inf, 0

    mnist_train, mnist_test = datagen.load_mnist(args)
    x_dist = utils.create_d(args.ze)
    z_dist = utils.create_d(args.z)
    one = torch.tensor(1.).cuda()
    mone = one * -1
    print("==> pretraining encoder")
    j = 0
    final = 100.
    e_batch_size = 1000
    if args.pretrain_e is True:
        for j in range(500):
            x = utils.sample_d(x_dist, e_batch_size)
            z = utils.sample_d(z_dist, e_batch_size)
            code = netE(x)
            qz = utils.sample_d(z_dist, e_batch_size)
            mean_loss, cov_loss = ops.pretrain_loss(code, qz)
            loss = mean_loss + cov_loss
            loss.backward()
            optimE.step()
            netE.zero_grad()
            print('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format(
                j, mean_loss.item(), cov_loss.item()))
            final = loss.item()
            if loss.item() < 0.1:
                print('Finished Pretraining Encoder')
                break

    print('==> Begin Training')
    for _ in range(args.epochs):
        for batch_idx, (data, target) in enumerate(mnist_train):

            data, target = data.cuda(), target.cuda()
            z = utils.sample_d(x_dist, args.batch_size)
            ze = utils.sample_d(z_dist, args.batch_size)
            qz = utils.sample_d(z_dist, args.batch_size)
            code = netE(z)
            log_pz = ops.log_density(ze, 2).view(-1, 1)
            d_loss, d_q = ops.calc_d_loss(args, netD, ze, [code], log_pz)
            d_loss.backward(retain_graph=True)
            optimD.step()
            optimE.step()
            optimD.zero_grad()
            optimE.zero_grad()

            gen_layers = netG(code)

            gp, grads, norms = ops.calc_gradient_penalty_layer(z, netG, netE)
            grads = grads.mean(0).mean(0).item()
            accs = torch.zeros(len(gen_layers)).cuda()
            losses = torch.zeros(len(gen_layers)).cuda()
            for i, layer in enumerate(gen_layers):
                output = model(data, layer)
                loss = F.cross_entropy(output, target)
                pred = output.data.max(1, keepdim=True)[1]
                correct = pred.eq(target.data.view_as(pred)).long().cpu().sum()
                losses[i] = loss
                accs[i] = correct
            G_loss, correct = losses.max(), accs.mean()
            one_qz = torch.ones((args.batch_size, 1),
                                requires_grad=True).cuda()
            log_qz = ops.log_density(torch.ones(args.batch_size, 1),
                                     2).view(-1, 1)
            Q_loss = F.binary_cross_entropy_with_logits(d_q + log_qz, one_qz)
            total_hyper_loss = Q_loss + G_loss  #+ (gp.sum().cuda())#mean().cuda()
            total_hyper_loss.backward()

            optimE.step()
            optimG.step()
            optimE.zero_grad()
            optimG.zero_grad()
            total_loss = total_hyper_loss.item()

            if batch_idx % 50 == 0:
                acc = correct
                print('**************************************')
                print('Iter: {}'.format(len(logger['acc'])))
                print('Acc: {}, MD Loss: {}, D loss: {}'.format(
                    acc, total_hyper_loss, d_loss))
                print('penalties: ', gp.item())
                print('grads: ', grads)
                print('best test loss: {}'.format(args.best_loss))
                print('best test acc: {}'.format(args.best_acc))
                print('best clf acc: {}'.format(args.best_clf_acc))
                print('**************************************')

            if batch_idx % 100 == 0:
                test_acc = 0.
                test_loss = 0.
                with torch.no_grad():
                    for i, (data, target) in enumerate(mnist_test):
                        data, target = data.cuda(), target.cuda()
                        z = utils.sample_d(x_dist, args.batch_size)
                        code = netE(z)
                        gen_layers = netG(code)
                        for i, layer in enumerate(gen_layers):
                            output = model(data, layer)
                            test_loss += F.cross_entropy(output, target)
                            pred = output.data.max(1, keepdim=True)[1]
                            test_acc += pred.eq(
                                target.data.view_as(pred)).float().sum()
                    test_loss /= len(mnist_test.dataset) * args.batch_size
                    test_acc /= len(mnist_test.dataset) * args.batch_size
                    clf_acc, clf_loss = test_clf(args, gen_layers)
                    stats.update_logger(gen_layers, logger)
                    stats.update_acc(logger, test_acc)
                    stats.update_grad(logger, grads, norms)
                    stats.save_logger(logger, args.exp)
                    stats.plot_logger(logger)
                    print('Test Accuracy: {}, Test Loss: {}'.format(
                        test_acc, test_loss))
                    print('Clf Accuracy: {}, Clf Loss: {}'.format(
                        clf_acc, clf_loss))
                    if test_loss < best_test_loss:
                        best_test_loss, args.best_loss = test_loss, test_loss
                    if test_acc > best_test_acc:
                        best_test_acc, args.best_acc = test_acc, test_acc
                    if clf_acc > best_clf_acc:
                        best_clf_acc, args.best_clf_acc = clf_acc, clf_acc
                        utils.save_hypernet_layer(args, [netE, netD, netG],
                                                  clf_acc)
Esempio n. 29
0
def run_adv_model(args, models):
    for model in models:
        model.eval()
    print ('models loaded')
    #models = models[:5]
    model = FusedNet(models)
    print ('made fusednet')
    fmodel = attacks.load_model(model)
    criterion = Misclassification()
    fgs = foolbox.attacks.FGSM(fmodel)
    print ('created attack')
    _, test_loader = datagen.load_mnist(args)
    print ('loaded dataset')
    for eps in [0.01, 0.03, 0.08, .1, .3, .5, 1.0]:
        total_adv = 0
        _soft, _logs, _vars, _ents, _lsoft = [], [], [], [], []
        _soft_adv, _logs_adv, _vars_adv, _ents_adv, _lsoft_adv = [], [], [], [], []
        _kl_real, _kl_adv = [], []
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.cuda(), target.cuda()
            adv_batch, target_batch, _ = sample_adv_batch(data, target, fmodel, eps, fgs)
            
            if adv_batch is None:
                continue
            # get intial prediction of ensemble, sure
            output = model(adv_batch)
            pred = output.data.max(1, keepdim=True)[1]
            correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
            n_adv = len(target_batch)-correct.item()
            
            # set up to sample from individual models
            soft_out, pred_out, logits, lsoft_out = [], [], [], []
            soft_out_adv, pred_out_adv, logits_adv, lsoft_out_adv = [], [], [], []

            for i in range(len(models)):
                output = models[i](data)
                soft_out.append(F.softmax(output, dim=1))
                pred_out.append(output.data.max(1, keepdim=True)[1])
                lsoft_out.append(F.log_softmax(output, dim=1))
                logits.append(output)
                
                output = models[i](adv_batch)
                soft_out_adv.append(F.softmax(output, dim=1))
                lsoft_out_adv.append(F.log_softmax(output, dim=1))

            softs = torch.stack(soft_out).float()
            preds = torch.stack(pred_out).float()
            lsoft = torch.stack(lsoft_out).float()
            logs = torch.stack(logits).float()
            softs_adv = torch.stack(soft_out_adv).float()
            lsoft_adv = torch.stack(lsoft_out_adv).float()
            
            # Measure variance of individual logits across models. 
            # HyperGAN ensemble has lower variance across 10 class predictions 
            # But a single logit has high variance acorss models
            units_softmax = softs.var(0).mean().item() # var across models across images
            units_logprob = logs.var(0).mean().item()
            ensemble_var = softs.mean(0).var(1).mean().item()  
            ent = float(entropy(softs.mean(0).transpose(0, 1).detach()).mean())
            #units_logprob = logs.var(0).mean().item()
            ent_adv = float(entropy(softs_adv.mean(0).transpose(0, 1).detach()).mean())
            units_softmax_adv = softs_adv.var(0).mean().item() # var across models - images
            ensemble_var_adv = softs_adv.mean(0).var(1).mean().item()
            
            """ Core Debug """
            # print ('softmax var: ', units_softmax)
            # print ('logprob var: ', units_logprob)
            # print ('ensemble var: ', ensemble_var)
            
            # build lists
            _soft.append(units_softmax)
            _soft_adv.append(units_softmax_adv)
            
            _logs.append(units_logprob)
            # log variance
            _ents.append(ent)
            _ents_adv.append(ent_adv)

            total_adv += n_adv
            if idx > 10:
                print ('REAL: ent: {}'.format(torch.tensor(_ents).mean()))
                print ('ADV Eps: {}, ent: {}'.format(
                    eps, torch.tensor(_ents_adv).mean()))
                break;
Esempio n. 30
0
def train(args):
    
    torch.manual_seed(8734)
    netE = models.Encoder(args).cuda()
    W1 = models.GeneratorW1(args).cuda()
    W2 = models.GeneratorW2(args).cuda()
    W3 = models.GeneratorW3(args).cuda()
    netD = models.DiscriminatorZ(args).cuda()
    print (netE, W1, W2, W3)

    optimE = optim.Adam(netE.parameters(), lr=5e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    optimW1 = optim.Adam(W1.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    optimW2 = optim.Adam(W2.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    optimW3 = optim.Adam(W3.parameters(), lr=1e-4, betas=(0.5, 0.9), weight_decay=1e-4)
    optimD = optim.Adam(netD.parameters(), lr=1e-5, betas=(0.5, 0.9), weight_decay=1e-4)
    
    best_test_acc, best_test_loss = 0., np.inf
    args.best_loss, args.best_acc = best_test_loss, best_test_acc

    mnist_train, mnist_test = datagen.load_mnist(args)
    x_dist = utils.create_d(args.ze)
    z_dist = utils.create_d(args.z)
    one = torch.FloatTensor([1]).cuda()
    mone = (one * -1).cuda()
    print ("==> pretraining encoder")
    j = 0
    final = 100.
    e_batch_size = 1000
    if args.pretrain_e:
        for j in range(2000):
            x = utils.sample_d(x_dist, e_batch_size)
            z = utils.sample_d(z_dist, e_batch_size)
            codes = netE(x)
            for i, code in enumerate(codes):
                code = code.view(e_batch_size, args.z)
                mean_loss, cov_loss = ops.pretrain_loss(code, z)
                loss = mean_loss + cov_loss
                loss.backward(retain_graph=True)
            optimE.step()
            netE.zero_grad()
            print ('Pretrain Enc iter: {}, Mean Loss: {}, Cov Loss: {}'.format(
                j, mean_loss.item(), cov_loss.item()))
            final = loss.item()
            if loss.item() < 0.1:
                print ('Finished Pretraining Encoder')
                break

    print ('==> Begin Training')
    for _ in range(args.epochs):
        for batch_idx, (data, target) in enumerate(mnist_train):
            ops.batch_zero_grad([netE, W1, W2, W3, netD])
            z = utils.sample_d(x_dist, args.batch_size)
            codes = netE(z)
            l1 = W1(codes[0])
            l2 = W2(codes[1])
            l3 = W3(codes[2])
            if args.use_d:
                ops.free_params([netD])
                ops.frozen_params([netE, W1, W2, W3])
                for code in codes:
                    noise = utils.sample_d(z_dist, args.batch_size)
                    d_real = netD(noise)
                    d_fake = netD(code)
                    d_real_loss = -1 * torch.log((1-d_real).mean())
                    d_fake_loss = -1 * torch.log(d_fake.mean())
                    d_real_loss.backward(retain_graph=True)
                    d_fake_loss.backward(retain_graph=True)
                    d_loss = d_real_loss + d_fake_loss
                optimD.step()
                ops.frozen_params([netD])
                ops.free_params([netE, W1, W2, W3])
            
            for (g1, g2, g3) in zip(l1, l2, l3):
                correct, loss = train_clf(args, [g1, g2, g3], data, target)
                scaled_loss = args.beta * loss
                scaled_loss.backward(retain_graph=True)
            optimE.step()
            optimW1.step()
            optimW2.step()
            optimW3.step()
            loss = loss.item()
                
            if batch_idx % 50 == 0:
                acc = (correct / 1) 
                print ('**************************************')
                print ('{} MNIST Test, beta: {}'.format(args.model, args.beta))
                print ('Acc: {}, Loss: {}'.format(acc, loss))
                print ('best test loss: {}'.format(args.best_loss))
                print ('best test acc: {}'.format(args.best_acc))
                print ('**************************************')
            
            if batch_idx > 1 and batch_idx % 199 == 0:
                test_acc = 0.
                test_loss = 0.
                ensemble = 5
                for i, (data, y) in enumerate(mnist_test):
                    en1, en2, en3 = [], [], []
                    for i in range(ensemble):
                        z = utils.sample_d(x_dist, args.batch_size)
                        codes = netE(z)
                        rand = np.random.randint(32)
                        en1.append(W1(codes[0])[rand])
                        en2.append(W2(codes[1])[rand])
                        en3.append(W3(codes[2])[rand])
                    g1 = torch.stack(en1).mean(0)
                    g2 = torch.stack(en2).mean(0)
                    g3 = torch.stack(en3).mean(0)
                    correct, loss = train_clf(args, [g1, g2, g3], data, y)
                    test_acc += correct.item()
                    test_loss += loss.item()
                test_loss /= len(mnist_test.dataset)
                test_acc /= len(mnist_test.dataset)
                """
                for (g1, g2, g3) in zip(l1, l2, l3):
                        correct, loss = train_clf(args, [g1, g2, g3], data, y)
                        test_acc += correct.item()
                        test_loss += loss.item()
                test_loss /= len(mnist_test.dataset) * args.batch_size
                test_acc /= len(mnist_test.dataset) * args.batch_size
                """
                print ('Test Accuracy: {}, Test Loss: {}'.format(test_acc, test_loss))
                if test_loss < best_test_loss or test_acc > best_test_acc:
                    print ('==> new best stats, saving')
                    #utils.save_clf(args, z_test, test_acc)
                    if test_acc > .95:
                        utils.save_hypernet_mnist(args, [netE, W1, W2, W3], test_acc)
                    if test_loss < best_test_loss:
                        best_test_loss = test_loss
                        args.best_loss = test_loss
                    if test_acc > best_test_acc:
                        best_test_acc = test_acc
                        args.best_acc = test_acc