def compute_and_compare_bounds(self, eps, norm, IBP, method): input_data = torch.randn((N, 256)) model = BoundedModule(self.original_model, torch.empty_like(input_data)) ptb = PerturbationLpNorm(norm=norm, eps=eps) ptb_data = BoundedTensor(input_data, ptb) pred = model(ptb_data) label = torch.argmax(pred, dim=1).cpu().detach().numpy() # Compute bounds. lb, ub = model.compute_bounds(IBP=IBP, method=method) # Compute dual norm. if norm == 1: q = np.inf elif norm == np.inf: q = 1.0 else: q = 1.0 / (1.0 - (1.0 / norm)) # Compute reference manually. weight, bias = list(model.parameters()) norm = weight.norm(p=q, dim=1) expected_pred = input_data.matmul(weight.t()) + bias expected_ub = eps * norm + expected_pred expected_lb = -eps * norm + expected_pred # Check equivalence. self.assertEqual(expected_pred, pred) self.assertEqual(expected_ub, ub) self.assertEqual(expected_lb, lb)
def test(self): model_oris = [ models.model_resnet(width=1, mult=2), models.ResNet18(in_planes=2) ] self.result = [] for model_ori in model_oris: conv_mode = 'patches' # conv_mode can be set as 'matrix' or 'patches' normalize = torchvision.transforms.Normalize( mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) test_data = torchvision.datasets.CIFAR10( "./data", train=False, download=True, transform=torchvision.transforms.Compose( [torchvision.transforms.ToTensor(), normalize])) N = 1 n_classes = 10 image = torch.Tensor(test_data.data[:N]).reshape(N, 3, 32, 32) image = image.to(torch.float32) / 255.0 model = BoundedModule(model_ori, image, bound_opts={"conv_mode": conv_mode}) ptb = PerturbationLpNorm(norm=np.inf, eps=0.03) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds(IBP=False, C=None, method='backward') self.result += [lb, ub] self.check()
def test(): torch.manual_seed(1) torch.cuda.manual_seed_all(1) model = cnn_MNIST() checkpoint = torch.load("../examples/vision/pretrain/mnist_cnn_small.pth", map_location="cpu") model.load_state_dict(checkpoint) N = 2 n_classes = 10 image = torch.randn(N, 1, 28, 28) image = image.to(torch.float32) / 255.0 model = BoundedModule(model, torch.empty_like(image), device="cpu") eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds() assert lb.shape == ub.shape == torch.Size((2, 10)) path = 'data/constant_test_data' if args.gen_ref: torch.save((lb, ub), path) else: lb_ref, ub_ref = torch.load(path) print(lb) print(lb_ref) assert torch.allclose(lb, lb_ref) assert torch.allclose(ub, ub_ref)
def __init__(self, model_ori, pred, test, solve_slope=False, device='cuda', simplify=True, in_size=(1, 3, 32, 32)): """ convert pytorch model to auto_LiRPA module """ layers = list(model_ori.children()) if simplify: added_prop_layers = add_single_prop(layers, pred, test) self.layers = added_prop_layers else: self.layers = layers net = nn.Sequential(*self.layers) self.solve_slope = solve_slope if solve_slope: self.net = BoundedModule(net, torch.rand(in_size), bound_opts={ 'relu': 'random_evaluation', 'conv_mode': 'patches' }, device=device) else: self.net = BoundedModule(net, torch.rand(in_size), bound_opts={'relu': 'same-slope'}, device=device) self.net.eval()
def verify_model(pert_weight=True, pert_bias=True, norm=np.inf, lb_name='', ub_name=''): model_ori_ = models.Models['mlp_3layer_weight_perturb'](pert_weight=pert_weight, pert_bias=pert_bias, norm=norm).eval() model_ori_.load_state_dict(state_dict) model_ = BoundedModule(model_ori_, inputs) model_.ptb = model_ori.ptb self.verify_bounds(model_, dummy_input, IBP=True, method='backward', forward_ret=forward_ret, lb_name=lb_name + '_CROWN-IBP', ub_name=ub_name + '_CROWN-IBP') # CROWN-IBP self.verify_bounds(model_, dummy_input, IBP=False, method='backward', forward_ret=forward_ret, lb_name=lb_name + '_CROWN', ub_name=ub_name + '_CROWN') # CROWN
def setup_sarsa(self, lr_schedule, eps_scheduler, beta_scheduler): # Create the Sarsa model, with S and A as the input. self.sarsa_model = ValueDenseNet(self.NUM_FEATURES + self.NUM_ACTIONS, self.INITIALIZATION) self.sarsa_opt = optim.Adam(self.sarsa_model.parameters(), lr=self.VAL_LR, eps=1e-5) self.sarsa_scheduler = optim.lr_scheduler.LambdaLR(self.sarsa_opt, lr_schedule) self.sarsa_eps_scheduler = eps_scheduler self.sarsa_beta_scheduler = beta_scheduler # Convert model with relaxation wrapper. dummy_input = torch.randn(1, self.NUM_FEATURES + self.NUM_ACTIONS) self.relaxed_sarsa_model = BoundedModule(self.sarsa_model, dummy_input)
def train(epoch, batches, type): meter = MultiAverageMeter() assert(optimizer is not None) train = type == 'train' if args.robust: eps_scheduler.set_epoch_length(len(batches)) if train: eps_scheduler.train() eps_scheduler.step_epoch() else: eps_scheduler.eval() for i, batch in enumerate(batches): if args.robust: eps_scheduler.step_batch() eps = eps_scheduler.get_eps() else: eps = 0 acc, loss, acc_robust, loss_robust = \ step(model, ptb, batch, eps=eps, train=train) meter.update('acc', acc, len(batch)) meter.update('loss', loss, len(batch)) meter.update('acc_rob', acc_robust, len(batch)) meter.update('loss_rob', loss_robust, len(batch)) if train: if (i + 1) % args.gradient_accumulation_steps == 0 or (i + 1) == len(batches): scale_gradients(optimizer, i % args.gradient_accumulation_steps + 1, args.grad_clip) optimizer.step() optimizer.zero_grad() if lr_scheduler is not None: lr_scheduler.step() writer.add_scalar('loss_train_{}'.format(epoch), meter.avg('loss'), i + 1) writer.add_scalar('loss_robust_train_{}'.format(epoch), meter.avg('loss_rob'), i + 1) writer.add_scalar('acc_train_{}'.format(epoch), meter.avg('acc'), i + 1) writer.add_scalar('acc_robust_train_{}'.format(epoch), meter.avg('acc_rob'), i + 1) if (i + 1) % args.log_interval == 0 or (i + 1) == len(batches): logger.info('Epoch {}, {} step {}/{}: eps {:.5f}, {}'.format( epoch, type, i + 1, len(batches), eps, meter)) if lr_scheduler is not None: logger.info('lr {}'.format(lr_scheduler.get_lr())) writer.add_scalar('loss/{}'.format(type), meter.avg('loss'), epoch) writer.add_scalar('loss_robust/{}'.format(type), meter.avg('loss_rob'), epoch) writer.add_scalar('acc/{}'.format(type), meter.avg('acc'), epoch) writer.add_scalar('acc_robust/{}'.format(type), meter.avg('acc_rob'), epoch) if train: if args.loss_fusion: state_dict_loss = model_loss.state_dict() state_dict = {} for name in state_dict_loss: assert(name.startswith('model.')) state_dict[name[6:]] = state_dict_loss[name] model_ori.load_state_dict(state_dict) model_bound = BoundedModule( model_ori, (dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) model.model_from_embeddings = model_bound model.save(epoch) return meter.avg('acc_rob')
def torch2network(self, torch_model): from auto_LiRPA import BoundedModule my_input = torch.empty((1, ) + self.input_shape) if hasattr(torch_model, "core"): torch_model = torch_model.core model = BoundedModule(torch_model, my_input, bound_opts=self.bound_opts) return model
def compute_and_compare_bounds(self, eps, norm, IBP, method): input_data = torch.randn((N, 1, input_dim, input_dim)) model = BoundedModule(self.original_model, torch.empty_like(input_data)) ptb = PerturbationLpNorm(norm=norm, eps=eps) ptb_data = BoundedTensor(input_data, ptb) pred = model(ptb_data) label = torch.argmax(pred, dim=1).cpu().detach().numpy() # Compute bounds. lb, ub = model.compute_bounds(IBP=IBP, method=method) # Compute reference. conv_weight, conv_bias = list(model.parameters()) conv_bias = conv_bias.view(1, out_channel, 1, 1) matrix_eye = torch.eye(input_dim * input_dim).view( input_dim * input_dim, 1, input_dim, input_dim) # Obtain equivalent weight and bias for convolution. weight = self.original_model.conv( matrix_eye ) - conv_bias # Output is (batch, channel, weight, height). weight = weight.view( input_dim * input_dim, -1) # Dimension is (flattened_input, flattened_output). bias = conv_bias.repeat(1, 1, input_dim // 2, input_dim // 2).view(-1) flattend_data = input_data.view(N, -1) # Compute dual norm. if norm == 1: q = np.inf elif norm == np.inf: q = 1.0 else: q = 1.0 / (1.0 - (1.0 / norm)) # Manually compute bounds. norm = weight.t().norm(p=q, dim=1) expected_pred = flattend_data.matmul(weight) + bias expected_ub = eps * norm + expected_pred expected_lb = -eps * norm + expected_pred # Check equivalence. if method == 'backward' or method == 'forward': self.assertEqual(expected_pred, pred) self.assertEqual(expected_ub, ub) self.assertEqual(expected_lb, lb)
def main(): torch.manual_seed(1234) torch.cuda.manual_seed_all(1234) random.seed(1234) np.random.seed(123) input_size = 11 ## Step 1: Initial original model as usual; note that this model has BoundedParameter as its weight parameters model_ori = SimpleNet(input_dim=input_size) ## Step 2: Prepare dataset as usual dummy_input1 = torch.randn(1, input_size) inputs = (dummy_input1, ) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, inputs) model.weight_perturbations = model_ori.weight_perturbations model.bias_perturbations = model_ori.bias_perturbations x = torch.randn(1, input_size) model(x) print('prediction', model_ori(x).squeeze().detach().cpu().numpy()) compute_perturbations(model, x, np.linspace(0, 0.01, 5))
def test(self): model = cnn_MNIST() checkpoint = torch.load( "../examples/vision/pretrain/mnist_cnn_small.pth", map_location="cpu") model.load_state_dict(checkpoint) N = 2 n_classes = 10 image = torch.randn(N, 1, 28, 28) image = image.to(torch.float32) / 255.0 model = BoundedModule(model, torch.empty_like(image), device="cpu") eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds() assert lb.shape == ub.shape == torch.Size((2, 10)) self.result = (lb, ub) self.check()
def create_relaxed_model(self, time_in_state=False): # Create state perturbation model for robust PPO training. if isinstance(self.policy_model, CtsPolicy): from .convex_relaxation import RelaxedCtsPolicyForState relaxed_policy_model = RelaxedCtsPolicyForState( self.NUM_FEATURES, self.NUM_ACTIONS, time_in_state=time_in_state, activation=self.policy_activation, policy_model=self.policy_model) dummy_input1 = torch.randn(1, self.NUM_FEATURES) inputs = (dummy_input1, ) self.relaxed_policy_model = BoundedModule(relaxed_policy_model, inputs) self.robust_eps_scheduler = LinearScheduler(self.params.ROBUST_PPO_EPS, self.params.ROBUST_PPO_EPS_SCHEDULER_OPTS) if self.params.ROBUST_PPO_BETA_SCHEDULER_OPTS == "same": self.robust_beta_scheduler = LinearScheduler(self.params.ROBUST_PPO_BETA, self.params.ROBUST_PPO_EPS_SCHEDULER_OPTS) else: self.robust_beta_scheduler = LinearScheduler(self.params.ROBUST_PPO_BETA, self.params.ROBUST_PPO_BETA_SCHEDULER_OPTS) else: raise NotImplementedError
def __init__(self, state_dim, action_dim, actor_network, critic_network, mini_batch_size, actor_opt_fn, critic_opt_fn, robust_params=None): super(RobustDeterministicActorCriticNet, self).__init__() if robust_params is None: robust_params = {} self.use_loss_fusion = robust_params.get('use_loss_fusion', False) # Use loss fusion to reduce complexity for convex relaxation. Default is False. self.use_full_backward = robust_params.get('use_full_backward', False) if self.use_loss_fusion: # Use auto_LiRPA to compute the L2 norm directly. self.fc_action = model_mlp_any_with_loss(state_dim, actor_network, action_dim) modules = self.fc_action._modules # Auto LiRPA wrapper self.fc_action = BoundedModule( self.fc_action, (torch.empty(size=(1, state_dim)), torch.empty(size=(1, action_dim))), device=Config.DEVICE) # self.fc_action._modules = modules for n in self.fc_action.nodes: # Find the tanh neuron in computational graph if isinstance(n, BoundTanh): self.fc_action_after_tanh = n self.fc_action_pre_tanh = n.inputs[0] break else: # Fully connected layer with [state_dim, 400, 300, action_dim] neurons and ReLU activation function self.fc_action = model_mlp_any(state_dim, actor_network, action_dim) # auto_lirpa wrapper self.fc_action = BoundedModule( self.fc_action, (torch.empty(size=(1, state_dim)), ), device=Config.DEVICE) # Fully connected layer with [state_dim + action_dim, 400, 300, 1] self.fc_critic = model_mlp_any(state_dim + action_dim, critic_network, 1) # auto_lirpa wrapper self.fc_critic = BoundedModule( self.fc_critic, (torch.empty(size=(1, state_dim + action_dim)), ), device=Config.DEVICE) self.actor_params = self.fc_action.parameters() self.critic_params = self.fc_critic.parameters() self.actor_opt = actor_opt_fn(self.actor_params) self.critic_opt = critic_opt_fn(self.critic_params) self.to(Config.DEVICE) # Create identity specification matrices self.actor_identity = torch.eye(action_dim).repeat(mini_batch_size,1,1).to(Config.DEVICE) self.critic_identity = torch.eye(1).repeat(mini_batch_size,1,1).to(Config.DEVICE) self.action_dim = action_dim self.state_dim = state_dim
def test_perturbation(self): np.random.seed(123) # FIXME This seed is inconsistent with other seeds (1234) model_ori = models.Models['mlp_3layer_weight_perturb'](pert_weight=True, pert_bias=True).eval() self.result['model'] = model_ori.state_dict() self.result['data'] = torch.randn(8, 1, 28, 28) model_ori.load_state_dict(self.result['model']) state_dict = copy.deepcopy(model_ori.state_dict()) dummy_input = self.result['data'].requires_grad_() inputs = (dummy_input,) model = BoundedModule(model_ori, inputs) forward_ret = model(dummy_input) model_ori.eval() assert torch.isclose(model_ori(dummy_input), model_ori(dummy_input), 1e-8).all() def verify_model(pert_weight=True, pert_bias=True, norm=np.inf, lb_name='', ub_name=''): model_ori_ = models.Models['mlp_3layer_weight_perturb'](pert_weight=pert_weight, pert_bias=pert_bias, norm=norm).eval() model_ori_.load_state_dict(state_dict) model_ = BoundedModule(model_ori_, inputs) model_.ptb = model_ori.ptb self.verify_bounds(model_, dummy_input, IBP=True, method='backward', forward_ret=forward_ret, lb_name=lb_name + '_CROWN-IBP', ub_name=ub_name + '_CROWN-IBP') # CROWN-IBP self.verify_bounds(model_, dummy_input, IBP=False, method='backward', forward_ret=forward_ret, lb_name=lb_name + '_CROWN', ub_name=ub_name + '_CROWN') # CROWN # Linf verify_model(pert_weight=True, pert_bias=True, norm=np.inf, lb_name='l_inf_weights_bias_lb', ub_name='l_inf_weights_bias_ub') verify_model(pert_weight=True, pert_bias=False, norm=np.inf, lb_name='l_inf_weights_lb', ub_name='l_inf_weights_ub') verify_model(pert_weight=False, pert_bias=True, norm=np.inf, lb_name='l_inf_bias_lb', ub_name='l_inf_bias_ub') # L2 verify_model(pert_weight=True, pert_bias=True, norm=2, lb_name='l_2_weights_bias_lb', ub_name='l_2_weights_bias_ub') verify_model(pert_weight=True, pert_bias=False, norm=2, lb_name='l_2_weights_lb', ub_name='l_2_weights_ub') verify_model(pert_weight=False, pert_bias=True, norm=2, lb_name='l_2_bias_lb', ub_name='l_2_bias_ub') if self.generate: self.save()
def __init__(self, name, env, input_shape, num_actions, robust=False, width=1): super(QNetwork, self).__init__() self.env = env self.input_shape = input_shape self.num_actions = num_actions self.robust = robust if name == 'DQN': self.features = nn.Sequential( nn.Linear(input_shape[0], 128 * width), nn.ReLU(), nn.Linear(128 * width, 128 * width), nn.ReLU(), nn.Linear(128 * width, self.env.action_space.n)) elif name == 'CnnDQN': self.features = nn.Sequential( nn.Conv2d(input_shape[0], 32 * width, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32 * width, 64 * width, kernel_size=4, stride=2), nn.ReLU(), nn.Conv2d(64 * width, 64 * width, kernel_size=3, stride=1), nn.ReLU(), Flatten(), nn.Linear(3136 * width, 512 * width), nn.ReLU(), nn.Linear(512 * width, self.num_actions)) elif name == 'DuelingCnnDQN': self.features = DuelingCnnDQN(input_shape, num_actions, width) else: raise NotImplementedError( '{} network structure not implemented.'.format(name)) if self.robust: dummy_input = torch.empty_like(torch.randn((1, ) + input_shape)) self.features = BoundedModule(self.features, dummy_input, device="cuda" if USE_CUDA else "cpu")
def test(): torch.manual_seed(1) torch.cuda.manual_seed_all(1) models = [2, 3] paddings = [1, 2] strides = [1, 3] N = 2 n_classes = 10 image = torch.randn(N, 1, 28, 28) image = image.to(torch.float32) / 255.0 for layer_num in models: for padding in paddings: for stride in strides: # print(layer_num, padding, stride) try: model_ori = cnn_model(layer_num, padding, stride) except: continue model = BoundedModule(model_ori, torch.empty_like(image), device="cpu", bound_opts={"conv_mode": "patches"}) eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) pred = model(image) lb, ub = model.compute_bounds() model = BoundedModule(model_ori, torch.empty_like(image), device="cpu", bound_opts={"conv_mode": "matrix"}) pred = model(image) lb_ref, ub_ref = model.compute_bounds() assert lb.shape == ub.shape == torch.Size((N, n_classes)) assert torch.allclose(lb, lb_ref) assert torch.allclose(ub, ub_ref)
def test(): net = ResNet18() N = 2 n_classes = 10 x = torch.randn(N, 3, 32, 32) y = net(x) device = 'cpu' if device == 'cuda': x = x.cuda() y = y.cuda() model = BoundedModule(net, torch.empty_like(x), bound_opts={"conv_mode": "patches"}, device=device) print("Model structure: \n", str(net)) eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(x, ptb) pred = model(image) lb, ub = model.compute_bounds() model = BoundedModule(net, torch.empty_like(x), bound_opts={"conv_mode": "matrix"}, device=device) eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(x, ptb) pred = model(image) lb_ref, ub_ref = model.compute_bounds() # assert lb.shape == ub.shape == torch.Size((N, n_classes)) print((lb - lb_ref).sum(), (ub - ub_ref).sum()) assert torch.allclose(lb, lb_ref) assert torch.allclose(ub, ub_ref)
def main(): torch.manual_seed(1234) torch.cuda.manual_seed_all(1234) random.seed(1234) np.random.seed(123) input_size = 17 action_size = 6 ## Step 1: Initial original model as usual; note that this model has BoundedParameter as its weight parameters model_ori = RelaxedCtsPolicyForState(state_dim=input_size, action_dim=action_size) state_dict = torch.load('test_policy_net.model') if not forward_one: state_dict['log_weight'] = state_dict['log_stdev'] del state_dict['log_stdev'] # model_ori.load_state_dict(state_dict) ## Step 2: Prepare dataset as usual dummy_input1 = torch.randn(1, input_size) dummy_input2 = torch.randn(1, action_size) if forward_one: inputs = (dummy_input1, ) else: inputs = (dummy_input1, dummy_input2) model_ori(*inputs) # inputs = (dummy_input1, ) # dummy_input2 = model_ori.get_means(dummy_input1) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, inputs) all_states = x = torch.randn(2, input_size) means = model_ori.get_means(x).detach() if forward_one: print('prediction', model_ori(x).sum()) else: print('prediction', model_ori(x, means).sum()) action_means = means perturbations = np.arange(0.0, 0.1, 0.01) compute_perturbations(model, x, means, perturbations) if forward_one: # pred = model_ori(all_states) # pred = ((pred - means) ** 2).mean() ikl, ckl = get_kl_bound(model, all_states, action_means, 0.1) ikl, ckl = get_kl_bound(model, all_states, action_means, 0.0) print('ikl', ikl.mean().item()) print('ckl', ckl.mean().item()) pred = (0.5 * ikl + 0.5 * ckl).mean() pred.backward() print('pred', pred.item()) else: iub, cub = get_kl_bound(model, all_states, action_means, 0.1) # iub, cub = get_kl_bound(model, all_states, action_means, 0) # iub, cub = model_ori(all_states, action_means).mean() print('iub', iub.mean().item()) print('cub', cub.mean().item()) kl = (0.5 * iub + 0.5 * cub).mean() kl.backward() print('kl', kl.item()) for p in model.parameters(): if p.grad is not None: print(p.size(), p.grad.abs().sum().item()) # print(p.size(), p.grad) else: print(p.size(), p.grad)
shuffle=False, num_workers=4, pin_memory=True) # For illustration we only use 2 image from dataset n_classes = 100 for batch_idx, (inputs, targets) in enumerate(testloader): if True: inputs, targets = inputs.cuda(), targets.cuda() if batch_idx < 2: ## Step 3: wrap model with auto_LiRPA # The second parameter is for constructing the trace of the computational graph, and its content is not important. model = BoundedModule(model, inputs, device="cuda") ## Step 4: Compute bounds using LiRPA given a perturbation eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(inputs, ptb) # Get model prediction as usual pred = model(image) label = torch.argmax(pred, dim=1).cpu().numpy() # Compute bounds lb, ub = model.compute_bounds() ## Step 5: Final output pred = pred.detach().cpu().numpy() lb = lb.detach().cpu().numpy()
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py model_ori = models.Models[args.model]() epoch = 0 if args.load: checkpoint = torch.load(args.load) epoch, state_dict = checkpoint['epoch'], checkpoint['state_dict'] opt_state = None try: opt_state = checkpoint['optimizer'] except KeyError: print('no opt_state found') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf( v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.log('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual dummy_input = torch.randn(1, 3, 56, 56) normalize = transforms.Normalize(mean=[0.4802, 0.4481, 0.3975], std=[0.2302, 0.2265, 0.2262]) train_data = datasets.ImageFolder(args.data_dir + '/train', transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop( 56, padding_mode='edge'), transforms.ToTensor(), normalize, ])) test_data = datasets.ImageFolder( args.data_dir + '/val', transform=transforms.Compose([ # transforms.RandomResizedCrop(64, scale=(0.875, 0.875), ratio=(1., 1.)), transforms.CenterCrop(56), transforms.ToTensor(), normalize ])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min( multiprocessing.cpu_count(), 4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size // 5, pin_memory=True, num_workers=min( multiprocessing.cpu_count(), 4)) train_data.mean = test_data.mean = torch.tensor([0.4802, 0.4481, 0.3975]) train_data.std = test_data.std = torch.tensor([0.2302, 0.2265, 0.2262]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'relu': args.bound_opts}, device=args.device) model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), bound_opts={ 'relu': args.bound_opts, 'loss_fusion': True }, device=args.device) model_loss = BoundDataParallel(model_loss) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model_loss.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR( opt, milestones=args.lr_decay_milestones, gamma=0.1) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.log(str(model_ori)) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.log('resume opt_state') # skip epochs if epoch > 0: epoch_length = int( (len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.log('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=None) else: timer = 0.0 best_err = 1e10 # with torch.autograd.detect_anomaly(): for t in range(epoch + 1, args.num_epochs + 1): logger.log("Epoch {}, learning rate {}".format( t, lr_scheduler.get_last_lr())) start_time = time.time() Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=True) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.log('Epoch time: {:.4f}, Total time: {:.4f}'.format( epoch_time, timer)) logger.log("Evaluating...") torch.cuda.empty_cache() # remove 'model.' in state_dict state_dict_loss = model_loss.state_dict() state_dict = {} for name in state_dict_loss: assert (name.startswith('model.')) state_dict[name[6:]] = state_dict_loss[name] with torch.no_grad(): if int(eps_scheduler.params['start']) + int( eps_scheduler.params['length']) > t >= int( eps_scheduler.params['start']): m = Train(model_loss, t, test_data, eps_scheduler, norm, False, None, args.bound_type, loss_fusion=True) else: model_ori.load_state_dict(state_dict) model = BoundedModule(model_ori, dummy_input, bound_opts={'relu': args.bound_opts}, device=args.device) model = BoundDataParallel(model) m = Train(model, t, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False) del model save_dict = { 'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict() } if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int( eps_scheduler.params['length']): current_err = m.avg('Verified_Err') if current_err < best_err: best_err = current_err torch.save( save_dict, 'saved_models/' + exp_name + '_best_' + str(best_err)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache()
test_data = torchvision.datasets.CIFAR100(root=DATAROOT, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False, num_workers=4, pin_memory=True) n_classes = 100 for batch_idx, (image, targets) in enumerate(testloader): image, targets = image.cuda(), targets.cuda() if batch_idx < 2: print(batch_idx) model = BoundedModule(model, torch.empty_like(image), device="cuda") eps = 0.3 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) # Get model prediction as usual pred = model(image) label = torch.argmax(pred, dim=1).cpu().numpy() # Compute bounds lb, ub = model.compute_bounds() pred = pred.detach().cpu().numpy() lb = lb.detach().cpu().numpy() ub = ub.detach().cpu().numpy() for i in range(batch_idx): print("Image {} top-1 prediction {}".format(i, label[i]))
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py if args.data == 'MNIST': model_ori = models.Models[args.model](in_ch=1, in_dim=28) else: model_ori = models.Models[args.model]() epoch = 0 if args.load: checkpoint = torch.load(args.load) epoch, state_dict = checkpoint['epoch'], checkpoint['state_dict'] opt_state = None try: opt_state = checkpoint['optimizer'] except KeyError: print('no opt_state found') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf( v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.log('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual if args.data == 'MNIST': dummy_input = torch.randn(1, 1, 28, 28) train_data = datasets.MNIST("./data", train=True, download=True, transform=transforms.ToTensor()) test_data = datasets.MNIST("./data", train=False, download=True, transform=transforms.ToTensor()) elif args.data == 'CIFAR': dummy_input = torch.randn(1, 3, 32, 32) normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) train_data = datasets.CIFAR10("./data", train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop( 32, 4, padding_mode='edge'), transforms.ToTensor(), normalize ])) test_data = datasets.CIFAR10("./data", train=False, download=True, transform=transforms.Compose( [transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min( multiprocessing.cpu_count(), 4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size // 2, pin_memory=True, num_workers=min( multiprocessing.cpu_count(), 4)) if args.data == 'MNIST': train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) elif args.data == 'CIFAR': train_data.mean = test_data.mean = torch.tensor( [0.4914, 0.4822, 0.4465]) train_data.std = test_data.std = torch.tensor([0.2023, 0.1994, 0.2010]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'relu': args.bound_opts}, device=args.device) final_name1 = model.final_name model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), bound_opts={ 'relu': args.bound_opts, 'loss_fusion': True }, device=args.device) # after CrossEntropyWrapper, the final name will change because of one additional input node in CrossEntropyWrapper final_name2 = model_loss._modules[final_name1].output_name[0] assert type(model._modules[final_name1]) == type( model_loss._modules[final_name2]) if args.no_loss_fusion: model_loss = BoundedModule(model_ori, dummy_input, bound_opts={'relu': args.bound_opts}, device=args.device) final_name2 = None model_loss = BoundDataParallel(model_loss) macs, params = profile(model_ori, (dummy_input.cuda(), )) logger.log('macs: {}, params: {}'.format(macs, params)) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model_loss.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR( opt, milestones=args.lr_decay_milestones, gamma=0.1) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.log(str(model_ori)) # skip epochs if epoch > 0: epoch_length = int( (len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.log('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.log('resume opt_state') ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=None) else: timer = 0.0 best_acc = 1e10 # with torch.autograd.detect_anomaly(): for t in range(epoch + 1, args.num_epochs + 1): logger.log("Epoch {}, learning rate {}".format( t, lr_scheduler.get_last_lr())) start_time = time.time() Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=not args.no_loss_fusion) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.log('Epoch time: {:.4f}, Total time: {:.4f}'.format( epoch_time, timer)) logger.log("Evaluating...") torch.cuda.empty_cache() # remove 'model.' in state_dict for CrossEntropyWrapper state_dict_loss = model_loss.state_dict() state_dict = {} if not args.no_loss_fusion: for name in state_dict_loss: assert (name.startswith('model.')) state_dict[name[6:]] = state_dict_loss[name] else: state_dict = state_dict_loss with torch.no_grad(): if t > int(eps_scheduler.params['start']) + int( eps_scheduler.params['length']): m = Train(model_loss, t, test_data, FixedScheduler(8. / 255), norm, False, None, 'IBP', loss_fusion=False, final_node_name=final_name2) else: m = Train(model_loss, t, test_data, eps_scheduler, norm, False, None, 'IBP', loss_fusion=False, final_node_name=final_name2) save_dict = { 'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict() } if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int( eps_scheduler.params['length']): current_acc = m.avg('Verified_Err') if current_acc < best_acc: best_acc = current_acc torch.save( save_dict, 'saved_models/' + exp_name + '_best_' + str(best_acc)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache()
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Load the model with BoundedParameter for weight perturbation. model_ori = models.Models['mlp_3layer_weight_perturb']() epoch = 0 ## Load a checkpoint, if requested. if args.load: checkpoint = torch.load(args.load) epoch, state_dict = checkpoint['epoch'], checkpoint['state_dict'] opt_state = None try: opt_state = checkpoint['optimizer'] except KeyError: print('no opt_state found') for k, v in state_dict.items(): assert torch.isnan(v).any().cpu().numpy() == 0 and torch.isinf(v).any().cpu().numpy() == 0 model_ori.load_state_dict(state_dict) logger.log('Checkpoint loaded: {}'.format(args.load)) ## Step 2: Prepare dataset as usual dummy_input = torch.randn(1, 1, 28, 28) train_data, test_data = mnist_loaders(datasets.MNIST, batch_size=args.batch_size, ratio=args.ratio) train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'relu':args.bound_opts}, device=args.device) final_name1 = model.final_name model_loss = BoundedModule(CrossEntropyWrapper(model_ori), (dummy_input, torch.zeros(1, dtype=torch.long)), bound_opts= { 'relu': args.bound_opts, 'loss_fusion': True }, device=args.device) # after CrossEntropyWrapper, the final name will change because of one more input node in CrossEntropyWrapper final_name2 = model_loss._modules[final_name1].output_name[0] assert type(model._modules[final_name1]) == type(model_loss._modules[final_name2]) if args.multigpu: model_loss = BoundDataParallel(model_loss) model_loss.ptb = model.ptb = model_ori.ptb # Perturbation on the parameters ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler if args.opt == 'ADAM': opt = optim.Adam(model_loss.parameters(), lr=args.lr, weight_decay=0.01) elif args.opt == 'SGD': opt = optim.SGD(model_loss.parameters(), lr=args.lr, weight_decay=0.01) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=args.lr_decay_milestones, gamma=0.1) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) logger.log(str(model_ori)) # Skip epochs if we continue training from a checkpoint. if epoch > 0: epoch_length = int((len(train_data.dataset) + train_data.batch_size - 1) / train_data.batch_size) eps_scheduler.set_epoch_length(epoch_length) eps_scheduler.train() for i in range(epoch): lr_scheduler.step() eps_scheduler.step_epoch(verbose=True) for j in range(epoch_length): eps_scheduler.step_batch() logger.log('resume from eps={:.12f}'.format(eps_scheduler.get_eps())) if args.load: if opt_state: opt.load_state_dict(opt_state) logger.log('resume opt_state') ## Step 5: start training. if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, 'CROWN-IBP', loss_fusion=False, final_node_name=None) else: timer = 0.0 best_loss = 1e10 # Main training loop for t in range(epoch + 1, args.num_epochs+1): logger.log("Epoch {}, learning rate {}".format(t, lr_scheduler.get_last_lr())) start_time = time.time() # Training one epoch Train(model_loss, t, train_data, eps_scheduler, norm, True, opt, args.bound_type, loss_fusion=True) lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.log('Epoch time: {:.4f}, Total time: {:.4f}'.format(epoch_time, timer)) logger.log("Evaluating...") torch.cuda.empty_cache() # remove 'model.' in state_dict (hack for saving models so far...) state_dict_loss = model_loss.state_dict() state_dict = {} for name in state_dict_loss: assert (name.startswith('model.')) state_dict[name[6:]] = state_dict_loss[name] # Test one epoch. with torch.no_grad(): m = Train(model_loss, t, test_data, eps_scheduler, norm, False, None, args.bound_type, loss_fusion=False, final_node_name=final_name2) # Save checkpoints. save_dict = {'state_dict': state_dict, 'epoch': t, 'optimizer': opt.state_dict()} if not os.path.exists('saved_models'): os.mkdir('saved_models') if t < int(eps_scheduler.params['start']): torch.save(save_dict, 'saved_models/natural_' + exp_name) elif t > int(eps_scheduler.params['start']) + int(eps_scheduler.params['length']): current_loss = m.avg('Loss') if current_loss < best_loss: best_loss = current_loss torch.save(save_dict, 'saved_models/' + exp_name + '_best_' + str(best_loss)[:6]) else: torch.save(save_dict, 'saved_models/' + exp_name) else: torch.save(save_dict, 'saved_models/' + exp_name) torch.cuda.empty_cache()
elif args.model == 'lstm': dummy_mask = torch.zeros(1, args.max_sent_length, device=args.device) model = LSTM(args, data_train) dev_batches = get_batches(data_dev, args.batch_size) test_batches = get_batches(data_test, args.batch_size) ptb = PerturbationSynonym(budget=args.budget) dummy_embeddings = BoundedTensor(dummy_embeddings, ptb) model_ori = model.model_from_embeddings bound_opts = {'relu': args.bound_opts_relu, 'exp': 'no-max-input'} if isinstance(model_ori, BoundedModule): model_bound = model_ori else: model_bound = BoundedModule(model_ori, (dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) model.model_from_embeddings = model_bound if args.loss_fusion: bound_opts['loss_fusion'] = True model_loss = BoundedModule( CrossEntropyWrapperMultiInput(model_ori), (torch.zeros(1, dtype=torch.long), dummy_embeddings, dummy_mask), bound_opts=bound_opts, device=args.device) ptb.model = model optimizer = model.build_optimizer() if args.lr_decay < 1: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10,
class LiRPAConvNet: def __init__(self, model_ori, pred, test, solve_slope=False, device='cuda', simplify=True, in_size=(1, 3, 32, 32)): """ convert pytorch model to auto_LiRPA module """ layers = list(model_ori.children()) if simplify: added_prop_layers = add_single_prop(layers, pred, test) self.layers = added_prop_layers else: self.layers = layers net = nn.Sequential(*self.layers) self.solve_slope = solve_slope if solve_slope: self.net = BoundedModule(net, torch.rand(in_size), bound_opts={ 'relu': 'random_evaluation', 'conv_mode': 'patches' }, device=device) else: self.net = BoundedModule(net, torch.rand(in_size), bound_opts={'relu': 'same-slope'}, device=device) self.net.eval() def get_lower_bound(self, pre_lbs, pre_ubs, decision, slopes=None, history=[], decision_thresh=0, layer_set_bound=True, beta=True): """ # (in) pre_lbs: layers list -> tensor(batch, layer shape) # (in) relu_mask: relu layers list -> tensor(batch, relu layer shape (view-1)) # (in) slope: relu layers list -> tensor(batch, relu layer shape) # (out) lower_bounds: batch list -> layers list -> tensor(layer shape) # (out) masks_ret: batch list -> relu layers list -> tensor(relu layer shape) # (out) slope: batch list -> relu layers list -> tensor(relu layer shape) """ start = time.time() lower_bounds, upper_bounds, masks_ret, slopes = self.update_bounds_parallel( pre_lbs, pre_ubs, decision, slopes, beta=beta, early_stop=False, opt_choice="adam", iteration=20, history=history, decision_thresh=decision_thresh, layer_set_bound=layer_set_bound) end = time.time() print('batch time: ', end - start) return [i[-1] for i in upper_bounds ], [i[-1] for i in lower_bounds ], None, masks_ret, lower_bounds, upper_bounds, slopes def get_relu(self, model, idx): # find the i-th ReLU layer i = 0 for layer in model.children(): if isinstance(layer, BoundRelu): i += 1 if i == idx: return layer def get_candidate(self, model, lb, ub): # get the intermediate bounds in the current model and build self.name_dict which contains the important index # and model name pairs if self.input_domain.ndim == 2: lower_bounds = [self.input_domain[:, 0].squeeze(-1)] upper_bounds = [self.input_domain[:, 1].squeeze(-1)] else: lower_bounds = [self.input_domain[:, :, :, 0].squeeze(-1)] upper_bounds = [self.input_domain[:, :, :, 1].squeeze(-1)] self.pre_relu_indices = [] idx, i, model_i = 0, 0, 0 # build a name_dict to map layer idx in self.layers to BoundedModule self.name_dict = {0: model.root_name[0]} model_names = list(model._modules) for layer in self.layers: if isinstance(layer, nn.ReLU): i += 1 this_relu = self.get_relu(model, i) lower_bounds[-1] = this_relu.inputs[0].lower.squeeze().detach() upper_bounds[-1] = this_relu.inputs[0].upper.squeeze().detach() lower_bounds.append(F.relu(lower_bounds[-1]).detach()) upper_bounds.append(F.relu(upper_bounds[-1]).detach()) self.pre_relu_indices.append(idx) self.name_dict[idx + 1] = model_names[model_i] model_i += 1 elif isinstance(layer, Flatten): lower_bounds.append(lower_bounds[-1].reshape(-1).detach()) upper_bounds.append(upper_bounds[-1].reshape(-1).detach()) self.name_dict[idx + 1] = model_names[model_i] model_i += 8 # Flatten is split to 8 ops in BoundedModule elif isinstance(layer, ZeroPad2d): lower_bounds.append(F.pad(lower_bounds[-1], layer.padding)) upper_bounds.append(F.pad(upper_bounds[-1], layer.padding)) self.name_dict[idx + 1] = model_names[model_i] model_i += 24 else: self.name_dict[idx + 1] = model_names[model_i] lower_bounds.append([]) upper_bounds.append([]) model_i += 1 idx += 1 # Also add the bounds on the final thing lower_bounds[-1] = (lb.view(-1).detach()) upper_bounds[-1] = (ub.view(-1).detach()) return lower_bounds, upper_bounds, self.pre_relu_indices def get_candidate_parallel(self, model, lb, ub, batch): # get the intermediate bounds in the current model lower_bounds = [ self.input_domain[:, :, :, 0].squeeze(-1).repeat(batch, 1, 1, 1) ] upper_bounds = [ self.input_domain[:, :, :, 1].squeeze(-1).repeat(batch, 1, 1, 1) ] idx, i, = 0, 0 for layer in self.layers: if isinstance(layer, nn.ReLU): i += 1 this_relu = self.get_relu(model, i) lower_bounds[-1] = this_relu.inputs[0].lower.detach() upper_bounds[-1] = this_relu.inputs[0].upper.detach() lower_bounds.append(F.relu(lower_bounds[-1]).detach( )) # TODO we actually do not need the bounds after ReLU upper_bounds.append(F.relu(upper_bounds[-1]).detach()) elif isinstance(layer, Flatten): lower_bounds.append(lower_bounds[-1].reshape(batch, -1).detach()) upper_bounds.append(upper_bounds[-1].reshape(batch, -1).detach()) elif isinstance(layer, nn.ZeroPad2d): lower_bounds.append( F.pad(lower_bounds[-1], layer.padding).detach()) upper_bounds.append( F.pad(upper_bounds[-1], layer.padding).detach()) else: lower_bounds.append([]) upper_bounds.append([]) idx += 1 # Also add the bounds on the final thing lower_bounds[-1] = (lb.view(batch, -1).detach()) upper_bounds[-1] = (ub.view(batch, -1).detach()) return lower_bounds, upper_bounds def get_mask_parallel(self, model): # get the mask of status of ReLU, 0 means inactive neurons, -1 means unstable neurons, 1 means active neurons mask = [] idx, i, = 0, 0 for layer in self.layers: if isinstance(layer, nn.ReLU): i += 1 this_relu = self.get_relu(model, i) mask_tmp = torch.zeros_like(this_relu.inputs[0].lower) unstable = ((this_relu.inputs[0].lower < 0) & (this_relu.inputs[0].upper > 0)) mask_tmp[unstable] = -1 active = (this_relu.inputs[0].lower >= 0) mask_tmp[active] = 1 # otherwise 0, for inactive neurons mask.append(mask_tmp.reshape(mask_tmp.size(0), -1)) ret = [] for i in range(mask[0].size(0)): ret.append([j[i] for j in mask]) return ret def get_beta(self, model): b = [] bm = [] for m in model._modules.values(): if isinstance(m, BoundRelu): b.append(m.beta.clone().detach()) bm.append(m.beta_mask.clone().detach()) retb = [] retbm = [] for i in range(b[0].size(0)): retb.append([j[i] for j in b]) retbm.append([j[i] for j in bm]) return (retb, retbm) def get_slope(self, model): s = [] for m in model._modules.values(): if isinstance(m, BoundRelu): s.append(m.slope.transpose(0, 1).clone().detach()) ret = [] for i in range(s[0].size(0)): ret.append([j[i] for j in s]) return ret def set_slope(self, model, slope): idx = 0 for m in model._modules.values(): if isinstance(m, BoundRelu): # m.slope = slope[idx].repeat(2, *([1] * (slope[idx].ndim - 1))).requires_grad_(True) m.slope = slope[idx].repeat( 2, *([1] * (slope[idx].ndim - 1))).transpose( 0, 1).requires_grad_(True) idx += 1 def reset_beta(self, model, batch=0): if batch == 0: for m in model._modules.values(): if isinstance(m, BoundRelu): m.beta.data = m.beta.data * 0. m.beta_mask.data = m.beta_mask.data * 0. # print("beta[{}]".format(batch), m.beta.shape, m.beta_mask.shape) else: for m in model._modules.values(): if isinstance(m, BoundRelu): ndim = m.beta.data.ndim # m.beta.data=(m.beta.data[0:1]*0.).repeat(batch*2, *([1] * (ndim - 1))).requires_grad_(True) # m.beta_mask.data=(m.beta_mask.data[0:1]*0.).repeat(batch*2, *([1] * (ndim - 1))).requires_grad_(True) m.beta = torch.zeros(m.beta[:, 0:1].shape).repeat( 1, batch * 2, *([1] * (ndim - 2))).detach().to( m.beta.device).requires_grad_(True) m.beta_mask = torch.zeros(m.beta_mask[0:1].shape).repeat( batch * 2, *([1] * (ndim - 2))).detach().to( m.beta.device).requires_grad_(False) # print("beta[{}]".format(batch), m.beta.shape, m.beta_mask.shape) def update_bounds_parallel(self, pre_lb_all=None, pre_ub_all=None, decision=None, slopes=None, beta=True, early_stop=True, opt_choice="default", iteration=20, history=[], decision_thresh=0, layer_set_bound=True): # update optimize-CROWN bounds in a parallel way total_batch = len(decision) decision = np.array(decision) layers_need_change = np.unique(decision[:, 0]) layers_need_change.sort() # initial results with empty list ret_l = [[] for _ in range(len(decision) * 2)] ret_u = [[] for _ in range(len(decision) * 2)] masks = [[] for _ in range(len(decision) * 2)] ret_s = [[] for _ in range(len(decision) * 2)] pre_lb_all_cp = copy.deepcopy(pre_lb_all) pre_ub_all_cp = copy.deepcopy(pre_ub_all) for idx in layers_need_change: # iteratively change upper and lower bound from former to later layer tmp_d = np.argwhere(decision[:, 0] == idx) # .squeeze() # idx is the index of relu layers, change_idx is the index of all layers change_idx = self.pre_relu_indices[idx] batch = len(tmp_d) select_history = [ history[idx] for idx in tmp_d.squeeze().reshape(-1) ] if beta: # update beta mask, put it after reset_beta # reset beta according to the shape of batch self.reset_beta(self.net, batch) # print("select history", select_history) bound_relus = [] for m in self.net._modules.values(): if isinstance(m, BoundRelu): bound_relus.append(m) m.beta_mask.data = m.beta_mask.data.view(batch * 2, -1) for bi in range(batch): d = tmp_d[bi][0] # assign current decision to each point of a batch bound_relus[int(decision[d][0])].beta_mask.data[ bi, int(decision[d][1])] = 1 bound_relus[int(decision[d][0])].beta_mask.data[ bi + batch, int(decision[d][1])] = -1 # print("assign", bi, decision[d], 1, bound_relus[decision[d][0]].beta_mask.data[bi, decision[d][1]]) # print("assign", bi+batch, decision[d], -1, bound_relus[decision[d][0]].beta_mask.data[bi+batch, decision[d][1]]) # assign history decision according to select_history for (hid, hl), hc in select_history[bi]: bound_relus[hid].beta_mask.data[bi, hl] = int( (hc - 0.5) * 2) bound_relus[hid].beta_mask.data[bi + batch, hl] = int( (hc - 0.5) * 2) # print("assign", bi, [hid, hl], hc, bound_relus[hid].beta_mask.data[bi, hl]) # print("assign", bi+batch, [hid, hl], hc, bound_relus[hid].beta_mask.data[bi+batch, hl]) # sanity check: beta_mask should only be assigned for split nodes for m in bound_relus: m.beta_mask.data = m.beta_mask.data.view(m.beta[0].shape) slope_select = [i[tmp_d.squeeze()].clone() for i in slopes] pre_lb_all = [i[tmp_d.squeeze()].clone() for i in pre_lb_all_cp] pre_ub_all = [i[tmp_d.squeeze()].clone() for i in pre_ub_all_cp] if batch == 1: pre_lb_all = [i.clone().unsqueeze(0) for i in pre_lb_all] pre_ub_all = [i.clone().unsqueeze(0) for i in pre_ub_all] slope_select = [i.clone().unsqueeze(0) for i in slope_select] upper_bounds = [i.clone() for i in pre_ub_all[:change_idx + 1]] lower_bounds = [i.clone() for i in pre_lb_all[:change_idx + 1]] upper_bounds_cp = copy.deepcopy(upper_bounds) lower_bounds_cp = copy.deepcopy(lower_bounds) for i in range(batch): d = tmp_d[i][0] upper_bounds[change_idx].view(batch, -1)[i][decision[d][1]] = 0 lower_bounds[change_idx].view(batch, -1)[i][decision[d][1]] = 0 pre_lb_all = [torch.cat(2 * [i]) for i in pre_lb_all] pre_ub_all = [torch.cat(2 * [i]) for i in pre_ub_all] # merge the inactive and active splits together new_candidate = {} for i, (l, uc, lc, u) in enumerate( zip(lower_bounds, upper_bounds_cp, lower_bounds_cp, upper_bounds)): # we set lower = 0 in first half batch, and upper = 0 in second half batch new_candidate[self.name_dict[i]] = [ torch.cat((l, lc), dim=0), torch.cat((uc, u), dim=0) ] if not layer_set_bound: new_candidate_p = {} for i, (l, u) in enumerate(zip(pre_lb_all[:-2], pre_ub_all[:-2])): # we set lower = 0 in first half batch, and upper = 0 in second half batch new_candidate_p[self.name_dict[i]] = [l, u] # create new_x here since batch may change ptb = PerturbationLpNorm( norm=self.x.ptb.norm, eps=self.x.ptb.eps, x_L=self.x.ptb.x_L.repeat(batch * 2, 1, 1, 1), x_U=self.x.ptb.x_U.repeat(batch * 2, 1, 1, 1)) new_x = BoundedTensor(self.x.data.repeat(batch * 2, 1, 1, 1), ptb) self.net( new_x ) # batch may change, so we need to do forward to set some shapes here if len(slope_select) > 0: # set slope here again self.set_slope(self.net, slope_select) torch.cuda.empty_cache() if layer_set_bound: # we fix the intermediate bounds before change_idx-th layer by using normal CROWN if self.solve_slope and change_idx >= self.pre_relu_indices[-1]: # we split the ReLU at last layer, directly use Optimized CROWN self.net.set_bound_opts({ 'ob_start_idx': sum(change_idx <= x for x in self.pre_relu_indices), 'ob_beta': beta, 'ob_update_by_layer': layer_set_bound, 'ob_iteration': iteration }) lb, ub, = self.net.compute_bounds( x=(new_x, ), IBP=False, C=None, method='CROWN-Optimized', new_interval=new_candidate, return_A=False, bound_upper=False) else: # we split the ReLU before the last layer, calculate intermediate bounds by using normal CROWN self.net.set_relu_used_count( sum(change_idx <= x for x in self.pre_relu_indices)) with torch.no_grad(): lb, ub, = self.net.compute_bounds( x=(new_x, ), IBP=False, C=None, method='backward', new_interval=new_candidate, bound_upper=False, return_A=False) # we don't care about the upper bound of the last layer lower_bounds_new, upper_bounds_new = self.get_candidate_parallel( self.net, lb, lb + 99, batch * 2) if change_idx < self.pre_relu_indices[-1]: # check whether we have a better bounds before, and preset all intermediate bounds for i, (l, u) in enumerate( zip(lower_bounds_new[change_idx + 2:-1], upper_bounds_new[change_idx + 2:-1])): new_candidate[self.name_dict[i + change_idx + 2]] = [ torch.max(l, pre_lb_all[i + change_idx + 2]), torch.min(u, pre_ub_all[i + change_idx + 2]) ] if self.solve_slope: self.net.set_bound_opts({ 'ob_start_idx': sum(change_idx <= x for x in self.pre_relu_indices), 'ob_beta': beta, 'ob_update_by_layer': layer_set_bound, 'ob_iteration': iteration }) lb, ub, = self.net.compute_bounds( x=(new_x, ), IBP=False, C=None, method='CROWN-Optimized', new_interval=new_candidate, return_A=False, bound_upper=False) else: self.net.set_relu_used_count( sum(change_idx <= x for x in self.pre_relu_indices)) with torch.no_grad(): lb, ub, = self.net.compute_bounds( x=(new_x, ), IBP=False, C=None, method='backward', new_interval=new_candidate, bound_upper=False, return_A=False) else: # all intermediate bounds are re-calculate by optimized CROWN self.net.set_bound_opts({ 'ob_start_idx': 99, 'ob_beta': beta, 'ob_update_by_layer': layer_set_bound, 'ob_iteration': iteration }) lb, ub, = self.net.compute_bounds(x=(new_x, ), IBP=False, C=None, method='CROWN-Optimized', new_interval=new_candidate_p, return_A=False, bound_upper=False) # print('best results of parent nodes', pre_lb_all[-1].repeat(2, 1)) # print('finally, after optimization:', lower_bounds_new[-1]) # primal = self.get_primals(A_dict, return_x=True) lower_bounds_new, upper_bounds_new = self.get_candidate_parallel( self.net, lb, lb + 99, batch * 2) lower_bounds_new[-1] = torch.max(lower_bounds_new[-1], pre_lb_all[-1]) upper_bounds_new[-1] = torch.min(upper_bounds_new[-1], pre_ub_all[-1]) mask = self.get_mask_parallel(self.net) if len(slope_select) > 0: slope = self.get_slope(self.net) # reshape the results for i in range(len(tmp_d)): ret_l[int(tmp_d[i])] = [j[i] for j in lower_bounds_new] ret_l[int(tmp_d[i] + total_batch)] = [ j[i + batch] for j in lower_bounds_new ] ret_u[int(tmp_d[i])] = [j[i] for j in upper_bounds_new] ret_u[int(tmp_d[i] + total_batch)] = [ j[i + batch] for j in upper_bounds_new ] masks[int(tmp_d[i])] = mask[i] masks[int(tmp_d[i] + total_batch)] = mask[i + batch] if len(slope_select) > 0: ret_s[int(tmp_d[i])] = slope[i] ret_s[int(tmp_d[i] + total_batch)] = slope[i + batch] return ret_l, ret_u, masks, ret_s def fake_forward(self, x): for layer in self.layers: if type(layer) is nn.Linear: x = F.linear(x, layer.weight, layer.bias) elif type(layer) is nn.Conv2d: x = F.conv2d(x, layer.weight, layer.bias, layer.stride, layer.padding, layer.dilation, layer.groups) elif type(layer) == nn.ReLU: x = F.relu(x) elif type(layer) == Flatten: x = x.reshape(x.shape[0], -1) elif type(layer) == nn.ZeroPad2d: x = F.pad(x, layer.padding) else: print(type(layer)) raise NotImplementedError return x def get_primals(self, A, return_x=False): # get primal input by using A matrix input_A_lower = A[self.layer_names[-1]][self.net.input_name[0]][0] batch = input_A_lower.shape[1] l = self.input_domain[:, :, :, 0].repeat(batch, 1, 1, 1) u = self.input_domain[:, :, :, 1].repeat(batch, 1, 1, 1) diff = 0.5 * (l - u) # already flip the sign by using lower - upper net_input = diff * torch.sign(input_A_lower.squeeze(0)) + self.x if return_x: return net_input primals = [net_input] for layer in self.layers: if type(layer) is nn.Linear: pre = primals[-1] primals.append(F.linear(pre, layer.weight, layer.bias)) elif type(layer) is nn.Conv2d: pre = primals[-1] primals.append( F.conv2d(pre, layer.weight, layer.bias, layer.stride, layer.padding, layer.dilation, layer.groups)) elif type(layer) == nn.ReLU: primals.append(F.relu(primals[-1])) elif type(layer) == Flatten: primals.append(primals[-1].reshape(primals[-1].shape[0], -1)) else: print(type(layer)) raise NotImplementedError # primals = primals[1:] primals = [i.detach().clone() for i in primals] # print('primals', primals[-1]) return net_input, primals def get_relu_mask(self): relu_mask = [] relu_idx = 0 for layer in self.layers: if type(layer) == nn.ReLU: relu_idx += 1 this_relu = self.get_relu(self.net, relu_idx) new_layer_mask = [] ratios_all = this_relu.d.squeeze(0) for slope in ratios_all.flatten(): if slope.item() == 1.0: new_layer_mask.append(1) elif slope.item() == 0.0: new_layer_mask.append(0) else: new_layer_mask.append(-1) relu_mask.append( torch.tensor(new_layer_mask).to(self.x.device)) return relu_mask def build_the_model(self, input_domain, x, no_lp=True, decision_thresh=0): self.x = x self.input_domain = input_domain slope_opt = None # first get CROWN bounds if self.solve_slope: self.net.init_slope(self.x) self.net.set_bound_opts({ 'ob_iteration': 100, 'ob_beta': False, 'ob_alpha': True, 'ob_opt_choice': "adam", 'ob_decision_thresh': decision_thresh, 'ob_early_stop': False, 'ob_log': False, 'ob_start_idx': 99, 'ob_keep_best': True, 'ob_update_by_layer': True, 'ob_lr': 0.1 }) lb, ub, A_dict = self.net.compute_bounds(x=(x, ), IBP=False, C=None, method='CROWN-Optimized', return_A=True, bound_upper=False) slope_opt = self.get_slope( self.net)[0] # initial with one node only else: with torch.no_grad(): lb, ub, A_dict = self.net.compute_bounds(x=(x, ), IBP=False, C=None, method='backward', return_A=True) # build a complete A_dict self.layer_names = list(A_dict[list(A_dict.keys())[-1]].keys())[2:] self.layer_names.sort() # update bounds print('initial CROWN bounds:', lb, ub) primals, mini_inp = None, None # mini_inp, primals = self.get_primals(self.A_dict) lb, ub, pre_relu_indices = self.get_candidate( self.net, lb, lb + 99) # primals are better upper bounds duals = None return ub[-1], lb[-1], mini_inp, duals, primals, self.get_relu_mask( ), lb, ub, pre_relu_indices, slope_opt
[torchvision.transforms.ToTensor(), normalize])) # For illustration we only use 1 image from dataset N = 1 n_classes = 10 image = torch.Tensor(test_data.data[:N]).reshape(N, 3, 32, 32) # Convert to float image = image.to(torch.float32) / 255.0 if device == 'cuda': image = image.cuda() ## Step 3: wrap model with auto_LiRPA # The second parameter is for constructing the trace of the computational graph, and its content is not important. # The new "patches" conv_mode provides an more efficient implementation for convolutional neural networks. model = BoundedModule(model_ori, image, bound_opts={"conv_mode": conv_mode}, device=device) ## Step 4: Compute bounds using LiRPA given a perturbation eps = 0.03 norm = np.inf ptb = PerturbationLpNorm(norm=norm, eps=eps) image = BoundedTensor(image, ptb) # Get model prediction as usual pred = model(image) # Compute bounds torch.cuda.empty_cache() print('Using {} mode to compute convolution.'.format(conv_mode)) lb, ub = model.compute_bounds(IBP=False, C=None, method='backward')
def main(args): config = load_config(args.config) logger.info('config: {}'.format(json.dumps(config))) set_seed(args.seed or config['seed']) model_ori, checkpoint, epoch, best = prepare_model(args, logger, config) logger.info('Model structure: \n {}'.format(str(model_ori))) custom_ops = {} bound_config = config['bound_params'] batch_size = (args.batch_size or config['batch_size']) test_batch_size = args.test_batch_size or batch_size dummy_input, train_data, test_data = load_data(args, config['data'], batch_size, test_batch_size, aug=not args.no_data_aug) lf = args.loss_fusion and args.bound_type == 'CROWN-IBP' bound_opts = bound_config['bound_opts'] model_ori.train() model = BoundedModule(model_ori, dummy_input, bound_opts=bound_opts, custom_ops=custom_ops, device=args.device) model_ori.to(args.device) if checkpoint is None: if args.manual_init: manual_init(args, model_ori, model, train_data) if args.kaiming_init: kaiming_init(model_ori) if lf: model_loss = BoundedModule( CrossEntropyWrapper(model_ori), (dummy_input.cuda(), torch.zeros(1, dtype=torch.long).cuda()), bound_opts=get_bound_opts_lf(bound_opts), device=args.device) params = list(model_loss.parameters()) else: model_loss = model params = list(model_ori.parameters()) logger.info('Parameter shapes: {}'.format([p.shape for p in params])) if args.multi_gpu: raise NotImplementedError('Multi-GPU is not supported yet') model = BoundDataParallel(model) model_loss = BoundDataParallel(model_loss) opt = get_optimizer(args, params, checkpoint) max_eps = args.eps or bound_config['eps'] eps_scheduler = get_eps_scheduler(args, max_eps, train_data) lr_scheduler = get_lr_scheduler(args, opt) if epoch > 0 and not args.plot: # skip epochs eps_scheduler.train() for i in range(epoch): # FIXME Can use `last_epoch` argument of lr_scheduler lr_scheduler.step() eps_scheduler.step_epoch(verbose=False) if args.verify: logger.info('Inference') meter = Train(model, model_ori, 10000, test_data, eps_scheduler, None, loss_fusion=False) logger.info(meter) else: timer = 0.0 for t in range(epoch + 1, args.num_epochs + 1): logger.info('Epoch {}, learning rate {}, dir {}'.format( t, lr_scheduler.get_last_lr(), args.dir)) start_time = time.time() if lf: Train(model_loss, model_ori, t, train_data, eps_scheduler, opt, loss_fusion=True) else: Train(model, model_ori, t, train_data, eps_scheduler, opt) update_state_dict(model_ori, model_loss) epoch_time = time.time() - start_time timer += epoch_time lr_scheduler.step() logger.info('Epoch time: {:.4f}, Total time: {:.4f}'.format( epoch_time, timer)) is_best = False if t % args.test_interval == 0: logger.info('Test without loss fusion') with torch.no_grad(): meter = Train(model, model_ori, t, test_data, eps_scheduler, None, loss_fusion=False) if eps_scheduler.get_eps() == eps_scheduler.get_max_eps(): if meter.avg('Rob_Err') < best[1]: is_best, best = True, (meter.avg('Err'), meter.avg('Rob_Err'), t) logger.info( 'Best epoch {}, error {:.4f}, robust error {:.4f}'. format(best[-1], best[0], best[1])) save(args, epoch=t, best=best, model=model_ori, opt=opt, is_best=is_best)
from auto_LiRPA.perturbations import PerturbationLpNorm input_vec = torch.rand(5, 5) class mynet(nn.Module): def __init__(self): super(mynet, self).__init__() self.output = nn.sequential(nn.Linear(5, 10), nn.ReLU(), nn.Linear(10, 3)) def forward(self, input): return self.features(input) raw_model = mynet() bound_model = BoundedModule(raw_model, input_vec) num_actions = 3 batchsize = 5 label = torch.tensor([0, 2, 1, 1, 0]) bnd_state = BoundedTensor(input_vec, PerturbationLpNorm(norm=np.inf, eps=0.1)) c = torch.eye(3).type_as(input_vec)[label].unsqueeze(1) - torch.eye(3).type_as( input_vec).unsqueeze(0) I = (~(label.data.unsqueeze(1) == torch.arange(3).type_as( label.data).unsqueeze(0))) c = (c[I].view(input_vec.size(0), 2, 3)) pred = bound_model(input_vec) basic_bound, _ = bound_model.compute_bounds(IBP=False, method='backward') advance_bound, _ = bound_model.compute_bounds(C=c, IBP=False,
def main(args): torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) np.random.seed(args.seed) ## Step 1: Initial original model as usual, see model details in models/example_feedforward.py and models/example_resnet.py if args.data == 'MNIST': model_ori = models.Models[args.model](in_ch=1, in_dim=28) else: model_ori = models.Models[args.model](in_ch=3, in_dim=32) if args.load: state_dict = torch.load(args.load)['state_dict'] model_ori.load_state_dict(state_dict) ## Step 2: Prepare dataset as usual if args.data == 'MNIST': dummy_input = torch.randn(1, 1, 28, 28) train_data = datasets.MNIST("./data", train=True, download=True, transform=transforms.ToTensor()) test_data = datasets.MNIST("./data", train=False, download=True, transform=transforms.ToTensor()) elif args.data == 'CIFAR': dummy_input = torch.randn(1, 3, 32, 32) normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) train_data = datasets.CIFAR10("./data", train=True, download=True, transform=transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, 4), transforms.ToTensor(), normalize ])) test_data = datasets.CIFAR10("./data", train=False, download=True, transform=transforms.Compose( [transforms.ToTensor(), normalize])) train_data = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=min( multiprocessing.cpu_count(), 4)) test_data = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, pin_memory=True, num_workers=min( multiprocessing.cpu_count(), 4)) if args.data == 'MNIST': train_data.mean = test_data.mean = torch.tensor([0.0]) train_data.std = test_data.std = torch.tensor([1.0]) elif args.data == 'CIFAR': train_data.mean = test_data.mean = torch.tensor( [0.4914, 0.4822, 0.4465]) train_data.std = test_data.std = torch.tensor([0.2023, 0.1994, 0.2010]) ## Step 3: wrap model with auto_LiRPA # The second parameter dummy_input is for constructing the trace of the computational graph. model = BoundedModule(model_ori, dummy_input, bound_opts={'relu': args.bound_opts}, device=args.device) ## Step 4 prepare optimizer, epsilon scheduler and learning rate scheduler opt = optim.Adam(model.parameters(), lr=args.lr) norm = float(args.norm) lr_scheduler = optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.5) eps_scheduler = eval(args.scheduler_name)(args.eps, args.scheduler_opts) print("Model structure: \n", str(model_ori)) ## Step 5: start training if args.verify: eps_scheduler = FixedScheduler(args.eps) with torch.no_grad(): Train(model, 1, test_data, eps_scheduler, norm, False, None, args.bound_type) else: timer = 0.0 for t in range(1, args.num_epochs + 1): if eps_scheduler.reached_max_eps(): # Only decay learning rate after reaching the maximum eps lr_scheduler.step() print("Epoch {}, learning rate {}".format(t, lr_scheduler.get_lr())) start_time = time.time() Train(model, t, train_data, eps_scheduler, norm, True, opt, args.bound_type) epoch_time = time.time() - start_time timer += epoch_time print('Epoch time: {:.4f}, Total time: {:.4f}'.format( epoch_time, timer)) print("Evaluating...") with torch.no_grad(): Train(model, t, test_data, eps_scheduler, norm, False, None, args.bound_type) torch.save({ 'state_dict': model.state_dict(), 'epoch': t }, args.model)
def __init__(self): super().__init__() self.features = BoundedModule(FeatureExtraction(), torch.empty((1, 1, 28, 28))) self.fc = nn.Linear(256, 10)