def perturb(X_nat, y, epsilon, Modell=None):
    """
        Given examples (X_nat, y), returns their adversarial
        counterparts with an attack length of epsilon.
        """
    '''
        # Providing epsilons in batch
        if epsilons is not None:
            self.epsilon = epsilons
        '''
    X = np.copy(X_nat)

    X_var = to_var(torch.from_numpy(X), requires_grad=True)
    y_var = to_var(torch.LongTensor(y))
    '''
        scores = self.model(X_var)
        loss = self.loss_fn(scores, y_var)
        '''
    if Modell == None:
        Modell = VGG('VGG19')
    Modell.cuda()
    scores = Modell(X_var)
    #loss = Modell.loss_fn(scores,y_var)
    criterion = nn.CrossEntropyLoss()
    loss = criterion(scores, y_var)
    loss.backward()
    grad_sign = X_var.grad.data.cpu().sign().numpy()

    X += epsilon * grad_sign
    X = np.clip(X, 0, 1)

    return X
예제 #2
0
    def perturb(self, X_nat, y):
        """
        Given examples (X_nat, y), returns adversarial
        examples within epsilon of X_nat in l_infinity norm.
        """
        if self.rand:
            X = X_nat + np.random.uniform(-self.epsilon, self.epsilon,
                                          X_nat.shape).astype('float32')
        else:
            X = np.copy(X_nat)

        for i in range(self.k):
            X_var = to_var(torch.from_numpy(X), requires_grad=True)
            y_var = to_var(torch.LongTensor(y))

            scores = self.model(X_var)
            loss = self.loss_fn(scores, y_var)
            loss.backward()
            grad = X_var.grad.data.cpu().numpy()

            X += self.a * np.sign(grad)

            X = np.clip(X, X_nat - self.epsilon, X_nat + self.epsilon)
            X = np.clip(X, 0, 1)  # ensure valid pixel range

        return X
예제 #3
0
    def perturb(self, x_nat, y):
        """
        Given one example (x_nat, y), returns an adversarial
        examples within epsilon of x_nat in l_infinity norm.
        """
        if self.rand:
            x = x_nat + np.random.uniform(-self.epsilon, self.epsilon, 
                x_nat.shape).astype('float32')
        else:
            x = np.copy(x_nat)

        for i in range(self.k):
            x_var = to_var(torch.from_numpy(x), requires_grad=True)
            y_var = to_var(torch.LongTensor([y]))

            scores = self.model(x_var)
            loss = self.loss_fn(scores, y_var)
            loss.backward()
            grad = x_var.grad.data.cpu().numpy()

            x += self.a * np.sign(grad)

            x = np.clip(x, x_nat - self.epsilon, x_nat + self.epsilon)
            x = np.clip(x, 0, 1) # ensure valid pixel range

        return x
예제 #4
0
def main():

    # 自适应使用GPU还是CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = Net().to(device)

    optimizer = torch.optim.Adam(model.parameters())
    criterion = torch.nn.CrossEntropyLoss()

    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=batch_size,
                                   shuffle=True)
    test_loader = Data.DataLoader(dataset=test_data, batch_size=batch_size)

    adversary = FGSMAttack(epsilon=0.2)

    for epoch in range(epochs):
        for t, (x, y) in enumerate(train_loader):

            x_var, y_var = to_var(x), to_var(y.long())
            loss = criterion(model(x_var), y_var)

            # adversarial training
            if epoch + 1 > delay:
                # use predicted label to prevent label leaking
                y_pred = pred_batch(x, model)
                x_adv = adv_train(x, y_pred, model, criterion, adversary)
                x_adv_var = to_var(x_adv)
                loss_adv = criterion(model(x_adv_var), y_var)
                loss = (loss + loss_adv) / 2

            if (t + 1) % 10 == 0:
                print('t = %d, loss = %.8f' % (t + 1, loss.item()))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # 每跑完一次epoch测试一下准确率 进入测试模式 禁止梯度传递
        with torch.no_grad():
            correct = 0
            total = 0
            sum_val_loss = 0
            for data in test_loader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)

                val_loss = criterion(outputs, labels)
                sum_val_loss += val_loss.item()
                # 取得分最高的那个类
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum()
            print('epoch=%d accuracy=%.02f%% val_loss=%.02f%' %
                  (epoch + 1, (100 * correct / total), sum_val_loss))
            sum_val_loss = 0.0

    torch.save(model.state_dict(), './cifar-adv-pytorch/net.pth')
예제 #5
0
    def perturb(self, X_nat, y):
        """
        Given examples (X_nat, y), returns adversarial
        examples within epsilon of X_nat in l_infinity norm.
        """
        X = np.copy(X_nat)

        for i in range(self.k):
            X_var = to_var(torch.from_numpy(X), requires_grad=True)
            y_var = to_var(torch.LongTensor(y))

            scores = self.model(X_var)
            loss = self.loss_fn(scores, y_var)
            loss.backward()
            grad = X_var.grad.data.cpu().numpy()

            if self.order is 'inf':
                X += self.epsilon * np.sign(grad)
            elif self.order is '2':
                #print('grad shape', grad.shape)
                #square = sum(grad**2)
                grad = grad**2
                square = np.sum(grad, axis=2)
                square = np.sum(square, axis=2)
                square = np.squeeze(square)
                normalized_grad = (grad.T / (np.sqrt(square))).T
                X += self.epsilon * normalized_grad
            else:
                raise NotImplementedError(
                    'Only L-inf, L2 norms FGSM attacks are implemented')

            #X = np.clip(X, X_nat - self.epsilon, X_nat + self.epsilon)
        if self.is_train == False:
            X = np.clip(X, 0, 1)  # ensure valid pixel range
        #print('maximum diff adv', np.max(np.abs(X-X_nat)))
        if self.storeadv == True:
            X = np.clip(X, 0, 1) * 255
            print('self.k', self.k, 'self.order', self.order)

            if self.k == 1 and self.order is '2':
                cv2.imwrite(
                    os.path.join(advtrainfolder, 'PGD_Advtraining.png'),
                    np.squeeze(X[0, :]))
            else:
                cv2.imwrite(
                    os.path.join(advtrainfolder, 'IFGSM_Advtraining.png'),
                    np.squeeze(X[0, :]))

            exit(0)

        return X
예제 #6
0
    def perturb(self, x_nat, y):
        """
        Given one example (x_nat, y), returns its adversarial
        counterpart with an attack length of epsilon.
        """
        x = np.copy(x_nat)

        x_var = to_var(torch.from_numpy(x), requires_grad=True)
        y_var = to_var(torch.LongTensor([int(y)]))

        scores = self.model(x_var)
        loss = self.loss_fn(scores, y_var)
        loss.backward()
        grad_sign = x_var.grad.data.cpu().sign().numpy()

        x += self.epsilon * grad_sign
        x = np.clip(x, 0, 1)

        return x
예제 #7
0
 def feval():
     loss_adv = 0
     for k in range(len(x_adv)):
         x_adv_var = to_var(torch.from_numpy(x_adv[k].astype(np.float32)))
         #loss_adv = loss_adv + criterion(net(x_adv_var), y_var)
         loss_adv = loss_adv + criterion(model_list[i](x_adv_var), y_var)
         loss = criterion(model_list[i](x_var), y_var)
         loss_adv = loss_adv + loss
     loss_adv = loss_adv/2.0
     #loss_adv = loss_adv/len(x_adv)
     optimizer.zero_grad()
     loss_adv.backward()
     
     return loss_adv #TODO return loss for extension
예제 #8
0
    def perturb(self, X_nat, y, epsilons=None):
        """
        Given examples (X_nat, y), returns their adversarial
        counterparts with an attack length of epsilon.
        """
        # Providing epsilons in batch
        if epsilons is not None:
            self.epsilon = epsilons

        X = np.copy(X_nat)

        X_var = to_var(torch.from_numpy(X), requires_grad=True)
        y_var = to_var(torch.LongTensor(y))

        scores = self.model(X_var)
        loss = self.loss_fn(scores, y_var)
        loss.backward()
        grad_sign = X_var.grad.data.cpu().sign().numpy()

        X += self.epsilon * grad_sign
        X = np.clip(X, 0, 1)

        return X
예제 #9
0
def jacobian(model, x, nb_classes=10):
    """
    This function will return a list of PyTorch gradients
    """
    list_derivatives = []
    x_var = to_var(torch.from_numpy(x), requires_grad=True)

    # derivatives for each class
    for class_ind in range(nb_classes):
        score = model(x_var)[:, class_ind]
        score.backward()
        list_derivatives.append(x_var.grad.data.cpu().numpy())
        x_var.grad.data.zero_()

    return list_derivatives
예제 #10
0
 def feval():
     loss_adv = 0
     for k in range(len(x_adv_temp)):
         x_adv_var = to_var(torch.from_numpy(x_adv_temp[k].astype(np.float32)))
         #loss_adv = loss_adv + criterion(net(x_adv_var), y_var)
         #add adversarial loss
         loss_adv = loss_adv + criterion(self.model_list[i](x_adv_var), y_var)
         #add clean loss
         loss_adv = loss_adv + criterion(self.model_list[i](x_var), y_var)
     loss_adv = loss_adv/2.0
     
     optimizer.zero_grad()
     loss_adv.backward()
 
     return loss_adv #TODO return loss for extension
예제 #11
0
#adversary = FGSMAttack(epsilon=0.3)
adversary = LinfPGDAttack()

# Train the model
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(net.parameters(),
                                lr=param['learning_rate'],
                                weight_decay=param['weight_decay'])

for epoch in range(param['num_epochs']):

    print('Starting epoch %d / %d' % (epoch + 1, param['num_epochs']))

    for t, (x, y) in enumerate(loader_train):

        x_var, y_var = to_var(x), to_var(y.long())
        loss = criterion(net(x_var), y_var)

        # adversarial training
        if epoch + 1 > param['delay']:
            # use predicted label to prevent label leaking
            y_pred = pred_batch(x, net)
            x_adv = adv_train(x, y_pred, net, criterion, adversary)
            x_adv_var = to_var(x_adv)
            loss_adv = criterion(net(x_adv_var), y_var)
            loss = (loss + loss_adv) / 2

        if (t + 1) % 100 == 0:
            print('t = %d, loss = %.8f' % (t + 1, loss.item()))

        optimizer.zero_grad()
예제 #12
0
# early stopping parameters
patience = param['patience']
best_loss = 1e4

# Print model to logfile
#print(net, file=logfile)

# Change optimizer for finetuning
optimizer = optim.Adam(net.parameters())

for e in range(param['nepochs']):
    print('Starting epoch %d' % (e + 1))

    for t, (x_input, y_label) in enumerate(train_loader):
        #print('t:',t)
        x_var, y_var = to_var(x_input), to_var(y_label.long())

        if args.advtraining == 'BayesWRM' or args.advtraining == 'Bayes':
            if args.advtraining == 'BayesWRM':
                x_adv = x_input.cpu()
                x_adv = adv_train(X=x_adv,
                                  y=y_label.cpu().long(),
                                  model=model_list,
                                  criterion=criterion,
                                  adversary=adversary)
            for i in range(len(model_list)):
                optimizer = SGAdaHMC(model_list[i].parameters(),
                                     config=dict(lr=args.initlr, T=args.T_out))
                #optimizer = optimizer_list[i]
                if advtraining == 'BayesWRM':
예제 #13
0
    def perturb(self, X_nat, y):
        """
        Given examples (X_nat, y), returns adversarial
        examples within epsilon of X_nat in l_infinity norm.
        """
        import time
        start_time = time.time()
        if self.rand:
            X = X_nat + np.random.uniform(-self.epsilon, self.epsilon,
                X_nat.shape).astype('float32')
        else:
            X = np.copy(X_nat)
        Sz = self.Sz
        Stheta = self.Stheta
        MC = 15
        #List of numpy arrays
        z_list = [copy.deepcopy(X) for i in range(Sz)]
        y_list = [copy.deepcopy(y) for i in range(Sz)]

        y = np.concatenate(y_list)
        #print('X.shape', X.shape) 
        z_stack = np.concatenate(z_list)
         
        #print('Z_stack shape', z_stack.shape)
        
        y_var = to_var(torch.LongTensor(y))
        loss_fn = nn.CrossEntropyLoss()
            
                
        znn = ZNN(zinput=z_stack, y_var=y_var)         
        #optimizer = torch.optim.SGD(znn.parameters(), lr=1e-4)  
        
        if self.optim == 'SGHMC':
            optimizer = SGHMC(znn.parameters(), config=dict(lr=self.a, T=self.T, L=self.k))
        elif self.optim == 'SGAdaHMC':
            optimizer = SGAdaHMC(znn.parameters(), config=dict(lr=self.a, T=self.T, L=self.k))
            
        def helper():
            def feval():
                total_loss = znn(model_list=self.model_list, gamma=self.gamma, Stheta=Stheta)
                optimizer.zero_grad()
                total_loss.backward()
                return total_loss #TODO return loss for extension
            return feval
        total_loss = optimizer.step(helper())
        z_stack = znn.Z_var.data.cpu().numpy()

        #print('z_stack.shape', z_stack.shape)
        
        #print('inner maximization time %s' % (time.time()-start_time))
        #print('Sz step finished')
        
        z_list = []
        for i in range(Sz):
            batch_size = int(z_stack.shape[0]/Sz)
            #z_list.append(z_stack[i*batch_size:(i+1)*batch_size, :, :, :])
            z_adv = z_stack[i*batch_size:(i+1)*batch_size, :, :, :]

            z_list.append(z_adv)

            #print('maximum diff adv', np.max(np.abs(z_list[-1]-X_nat)))

        #print('Sz', Sz)
        #print('Stheta', Stheta)
        #print('len(z_list)', len(z_list))
        if self.storeadv == True:
            for ind, X in enumerate(z_list):
                X = np.clip(X, 0, 1)*255
                cv2.imwrite(os.path.join('advtrain_sample', 'BayesWRM_Advtraining_'+str(ind)+'.png'), np.squeeze(X[0,:]))
            exit(0)
        
        return z_list
예제 #14
0
    def train(self):
        self.epochs = self.params.epochs

        criterion = nn.CrossEntropyLoss()
        start_epoch = 0

        if advtraining == 'BayesWRM' or advtraining == 'Bayes':
            for net in self.model_list:
                net.train()
        else:
            self.model.train()
        
        print("Starting training")
        self.print_info()

        init_lr = 0.5
        init_lr = args.initlr

        for epoch in range(start_epoch, self.params.epochs):
            print('start epoch', str(epoch))
            print('advtraining method', advtraining)
            #break
            for i, (images, labels) in enumerate(self.train_loader):
                

                X, y = images.cuda(), labels.cuda()
                x_var, y_var = to_var(X), to_var(y)
                
                if adversary is not None:
                    x_adv = X.cpu()
                    
                    if advtraining == 'BayesWRM':
                        x_adv = adv_train(X=x_adv, y=labels.cpu().long(), model=self.model_list, criterion=criterion, adversary=adversary)   
                        
                        for i in range(Stheta):
                            x_adv_temp = x_adv
                            if args.multi == True:
                                del x_adv_temp[i]

                            if epoch < 2:
                                lr = init_lr
                            elif epoch < 5:
                                lr = 0.1*init_lr
                            elif epoch < 10:
                                lr = 0.1*init_lr
                            else:
                                lr = 0.05*init_lr
                            #optimizer = SGHMC(self.model_list[i].parameters(), config=dict(lr=lr))
                            if args.outoptimizer == 'SGHMC':
                                optimizer = SGHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=lr, T=args.T_out))
                            elif args.outoptimizer == 'SGAdaHMC':
                                optimizer = SGAdaHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=0.01, T=args.T_out))
                            else:
                                raise NotImplementedError('Inner optimizer not implemented')

                            def helper():
                                def feval():
                                    loss_adv = 0
                                    for k in range(len(x_adv_temp)):
                                        x_adv_var = to_var(torch.from_numpy(x_adv_temp[k].astype(np.float32)))
                                        #loss_adv = loss_adv + criterion(net(x_adv_var), y_var)
                                        #add adversarial loss
                                        loss_adv = loss_adv + criterion(self.model_list[i](x_adv_var), y_var)
                                        #add clean loss
                                        loss_adv = loss_adv + criterion(self.model_list[i](x_var), y_var)
                                    loss_adv = loss_adv/2.0
                                    
                                    optimizer.zero_grad()
                                    loss_adv.backward()
                                
                                    return loss_adv #TODO return loss for extension
                                return feval
                            #Tracer()()
                            loss_adv = optimizer.step(helper())
                            #print('Epoch:', epoch, 'model:', i, 'loss', loss_adv.data.cpu().numpy()[0])
                        #print("Current timestamp: %s" % (utils.get_time_hhmmss()))


                    else:    
                        x_adv = adv_train(x_adv, y.cpu().long(), self.model, criterion, adversary)
                
                        x_adv_var = to_var(x_adv)
                        loss_adv = criterion(self.model(x_adv_var), y_var)
                        loss = (loss_adv + criterion(self.model(x_var),  y_var))/2.0

                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()
                
                else:
                    if advtraining == 'Bayes':
                        for i in range(Stheta):
                            if epoch < 2:
                                lr = init_lr
                            elif epoch < 5:
                                lr = 0.1*init_lr
                            elif epoch < 10:
                                lr = 0.1*init_lr
                            else:
                                lr = 0.05*init_lr
                            #optimizer = SGHMC(self.model_list[i].parameters(), config=dict(lr=lr))
                            if args.outoptimizer == 'SGHMC':
                                optimizer = SGHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=lr, T=args.T_out))
                            elif args.outoptimizer == 'SGAdaHMC':
                                optimizer = SGAdaHMC(filter(lambda x: x.requires_grad, self.model_list[i].parameters()), config=dict(lr=0.01, T=args.T_out))
                            else:
                                raise NotImplementedError('Outer optimizer not implemented')

                            def helper():
                                def feval():
                                    loss_adv =  criterion(self.model_list[i](x_var), y_var)
                                    optimizer.zero_grad()
                                    loss_adv.backward()
                                
                                    return loss_adv #TODO return loss for extension
                                return feval
                            #Tracer()()
                            loss = optimizer.step(helper())
                    else:
                        loss = criterion(self.model(x_var), y_var)
                    
                        self.optimizer.zero_grad()
                        loss.backward()
                        self.optimizer.step()


                if self.params.extra_debug and (i + 1) % (self.params.batch_size * 4) == 0:
                    print(('Epoch: [{0}/{1}], Step: [{2}/{3}], Loss: {4},')
                          .format(epoch + 1,
                                  self.params.epochs,
                                  i + 1,
                                  len(self.train_loader),
                                  loss.data[0]))

            print('entering validation loss set the advtraining method is ', advtraining)
            if  advtraining == 'BayesWRM' or advtraining == 'Bayes':
                train_acc, train_loss = self.validate_model(self.train_loader, self.model_list[0])
                val_acc, val_loss = self.validate_model(self.val_loader, self.model_list[0])
            else:
                train_acc, train_loss = self.validate_model(self.train_loader, self.model)
                val_acc, val_loss = self.validate_model(self.val_loader, self.model)
                

            self.histories['train_loss'] = np.append(self.histories['train_loss'], [train_loss])
            self.histories['val_loss'] = np.append(self.histories['val_loss'], [val_loss])
            self.histories['val_acc'] = np.append(self.histories['val_acc'], [val_acc])
            self.histories['train_acc'] = np.append(self.histories['train_acc'], [train_acc])
            print('trianacc', str(train_acc), 'valacc', str(val_acc))

            print('advtraining method', advtraining)
예제 #15
0
            total += y_train.size(0)
            correct += predicted.eq(y_train.data).cuda().sum()
            torch.cuda.empty_cache()
        else:
            loss_cl = loss2(c_pre, y_train)

            loss_sum = torch.mul(loss, 1 / 1) + loss_cl
            if epoch + 1 > param['delay']:
                # use predicted label to prevent label leaking
                y_pred = pred_batch(torch.cat((x_train, x_train, x_train), 1),
                                    n)
                x_adv = adv_train(torch.cat((x_train, x_train, x_train), 1),
                                  y_pred, n, loss2, adversary)
                n.zero_grad()
                optimizer.zero_grad()
                x_adv_var = to_var(x_adv)
                y_pre, c_pre = n(x_adv_var)
                loss_adv = loss2(c_pre, y_train) + loss1(
                    torch.mul(y_pre, 1.0),
                    torch.mul(torch.cat(
                        (x_train, x_train, x_train), 1), 1.0)) / 1
                loss_sum = (loss_sum + loss_adv) / 2
            loss_sum.backward(retain_graph=True)
            torch.nn.utils.clip_grad_norm(n.parameters(), 10.0)
            optimizer.step()
            epoch_loss += loss_sum.data.item()
            _, predicted = torch.max(c_pre.data, 1)
            total += y_train.size(0)
            correct += predicted.eq(y_train.data).cuda().sum()

            train_globa_step += 1
예제 #16
0
    def perturb(self, X_nat, y):
        #refer to TF clevans implementation
        X = np.copy(X_nat)
        #X = np.clip(X, 0, 1)
        #X = 2*X - 1
        #X = np.arctanh(X*.9999)
        batch_size = X.shape[0]

        index = y.view(-1, 1)

        #print('X.shape', X.shape)
        tlab = torch_extras.one_hot((X.shape[0], self.classes), index)
        tlab = to_var(torch.from_numpy(tlab.numpy().astype(np.long)),
                      requires_grad=False)

        lower_bound = np.zeros(batch_size)

        X_var0 = to_var(torch.from_numpy(X), requires_grad=False)

        X_adv = np.copy(X_nat)

        if type(self.model) is list:
            for t in range(self.steps):
                grad_list = []
                for model in self.model:
                    X_var = to_var(torch.from_numpy(X), requires_grad=True)
                    y_var = to_var(torch.LongTensor(y))

                    scores = model(X_var)
                    tlab = tlab.type(torch.cuda.FloatTensor)
                    real = torch.sum(torch.mul(scores, tlab))
                    other = torch.sum(torch.mul(scores, 1 - tlab))

                    loss1 = torch.clamp(real - other, min=0.0)
                    #loss1 = real-other
                    loss2 = (torch.sum((X_var - X_var0)**2) + 1e-9)**0.5

                    loss = loss1 + loss2

                    loss.backward()
                    grad = X_var.grad.data.cpu().numpy()
                    grad_list.append(grad)

                grad = np.mean(grad_list, axis=0)

                #go to the oposite direction as we wants to minimize the scores of the true label and maximize the scores of the wrong label
                X_adv = X_adv - 1. / np.sqrt(t + 2) * grad

        else:
            for t in range(self.steps):
                X_var = to_var(torch.from_numpy(X), requires_grad=True)
                y_var = to_var(torch.LongTensor(y))

                scores = self.model(X_var)
                tlab = tlab.type(torch.cuda.FloatTensor)
                real = torch.sum(torch.mul(scores, tlab))
                other = torch.sum(torch.mul(scores, 1 - tlab))

                loss1 = torch.clamp(real - other, min=0.0)
                #loss1 = real-other
                loss2 = (torch.sum((X_var - X_var0)**2) + 1e-9)**0.5

                loss = loss1 + loss2

                loss.backward()
                grad = X_var.grad.data.cpu().numpy()

                X_adv = X_adv - 1. / np.sqrt(t + 2) * grad

        #print('maximum diff adv', np.max(np.abs(X_adv-X_nat)))
        if self.storeadv == True:
            X = np.clip(X, 0, 1) * 255
            cv2.imwrite(
                os.path.join(advtrainfolder,
                             self.advtraining + 'CWattacked.png'),
                np.squeeze(X[0, :]))
            exit(0)
        X_adv = np.clip(X_adv, 0, 1)
        return X_adv
예제 #17
0
    def perturb(self, X_nat, y, epsilons=None):
        """
        Given examples (X_nat, y), returns their adversarial
        counterparts with an attack length of epsilon.
        """
        # Providing epsilons in batch
        if epsilons is not None:
            self.epsilon = epsilons

        X = np.copy(X_nat)

        if type(self.model) is list:
            grad_list = []
            for model in self.model:
                X_var = to_var(torch.from_numpy(X), requires_grad=True)

                y_var = to_var(torch.LongTensor(y))
                scores = model(X_var)
                loss = self.loss_fn(scores, y_var)
                loss.backward()
                grad_list.append(X_var.grad.data.cpu().numpy())
            #grad_sign = np.mean(grad_list).sign().numpy()
            grad_sign = np.sign(np.mean(grad_list, axis=0))
            X += self.epsilon * grad_sign
            X = np.clip(X, 0, 1)
            #print('maximum diff adv', np.max(np.abs(X-X_nat)))
        else:
            X_var = to_var(torch.from_numpy(X), requires_grad=True)
            y_var = to_var(torch.LongTensor(y))

            scores = self.model(X_var)
            loss = self.loss_fn(scores, y_var)
            loss.backward()

            if self.pixelattack == 0:
                if self.order is 'inf':
                    grad_sign = X_var.grad.data.cpu().sign().numpy()
                    normalized_grad = grad_sign

                elif self.order is '2':
                    grad = X_var.grad.data.cpu().numpy()
                    square = sum(grad**2)
                    normalized_grad = grad / np.sqrt(square)

            elif self.pixelattack != 0:
                grad = X_var.grad.data.cpu().numpy()
                topk = grad.flatten()
                topk.sort()
                topk = topk[-self.pixelattack]
                grad[grad < topk] = 0.0
                grad[grad >= topk] = 1.0
                normalized_grad = grad

            else:
                raise NotImplementedError(
                    'Only L-inf, L2 norms FGSM attacks are implemented')

            X += self.epsilon * normalized_grad

            if self.is_train == False:
                X = np.clip(X, 0, 1)
            #print('maximum diff adv', np.max(np.abs(X-X_nat)))

        if self.storeadv == True:
            X_display = np.clip(X, 0, 1) * 255
            cv2.imwrite(
                os.path.join(
                    advtrainfolder,
                    self.advtraining + '_epsilon_' + str(self.epsilon) +
                    '_fixedindex_' + str(self.storeindex) + '_FGSMAttack.png'),
                np.squeeze(X_display[0, :]))
            #exit(0)

        return X
예제 #18
0
def MNIST_bbox_sub(param, loader_hold_out, loader_test):
    """
    Train a substitute model using Jacobian data augmentation
    arXiv:1602.02697
    """

    # Setup the substitute
    net = SubstituteModel()

    if torch.cuda.is_available():
        print('CUDA ensabled for the substitute.')
        net.cuda()
    net.train()

    # Setup the oracle
    oracle = LeNet5()

    if torch.cuda.is_available():
        print('CUDA ensabled for the oracle.')
        oracle.cuda()
    oracle.load_state_dict(torch.load(param['oracle_name'] + '.pkl'))
    oracle.eval()

    # Setup training
    criterion = nn.CrossEntropyLoss()
    # Careful optimization is crucial to train a well-representative
    # substitute. In Tensorflow Adam has some problem:
    # (https://github.com/tensorflow/cleverhans/issues/183)
    # But it works fine here in PyTorch (you may try other optimization
    # methods
    optimizer = torch.optim.Adam(net.parameters(), lr=param['learning_rate'])

    # Data held out for initial training
    data_iter = iter(loader_hold_out)
    X_sub, y_sub = data_iter.next()
    X_sub, y_sub = X_sub.numpy(), y_sub.numpy()

    # Train the substitute and augment dataset alternatively
    for rho in range(param['data_aug']):
        print("Substitute training epoch #" + str(rho))
        print("Training data: " + str(len(X_sub)))

        rng = np.random.RandomState()

        # model training
        for epoch in range(param['nb_epochs']):

            print('Starting epoch %d / %d' % (epoch + 1, param['nb_epochs']))

            # Compute number of batches
            nb_batches = int(
                np.ceil(float(len(X_sub)) / param['test_batch_size']))
            assert nb_batches * param['test_batch_size'] >= len(X_sub)

            # Indices to shuffle training set
            index_shuf = list(range(len(X_sub)))
            rng.shuffle(index_shuf)

            for batch in range(nb_batches):

                # Compute batch start and end indices
                start, end = batch_indices(batch, len(X_sub),
                                           param['test_batch_size'])

                x = X_sub[index_shuf[start:end]]
                y = y_sub[index_shuf[start:end]]

                scores = net(to_var(torch.from_numpy(x)))
                loss = criterion(scores, to_var(torch.from_numpy(y).long()))

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            print('loss = %.8f' % (loss.data[0]))
        test(net,
             loader_test,
             blackbox=True,
             hold_out_size=param['hold_out_size'])

        # If we are not at last substitute training iteration, augment dataset
        if rho < param['data_aug'] - 1:
            print("Augmenting substitute training data.")
            # Perform the Jacobian augmentation
            X_sub = jacobian_augmentation(net, X_sub, y_sub)

            print("Labeling substitute training data.")
            # Label the newly generated synthetic points using the black-box
            scores = oracle(to_var(torch.from_numpy(X_sub)))
            # Note here that we take the argmax because the adversary
            # only has access to the label (not the probabilities) output
            # by the black-box model
            y_sub = np.argmax(scores.data.cpu().numpy(), axis=1)

    torch.save(net.state_dict(), param['oracle_name'] + '_sub.pkl')