def __init__(self, layers): ''' layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools ''' self.layers = layers self.net = nn.Sequential(*layers) # Initialize a LinearizedNetwork object to determine the lower and # upper bounds at each layer. self.lin_net = LinearizedNetwork(layers)
def bab(gt_prop, verif_layers, domain, return_dict, timeout, batch_size, method, tot_iter, parent_init, args, gurobi_dict=None, writer=None): epsilon = 1e-4 if gpu: cuda_verif_layers = [copy.deepcopy(lay).cuda() for lay in verif_layers] domain = domain.cuda() else: cuda_verif_layers = [copy.deepcopy(lay) for lay in verif_layers] # use best of naive interval propagation and KW as intermediate bounds intermediate_net = SaddleLP(cuda_verif_layers, store_bounds_primal=False, max_batch=args.max_solver_batch) intermediate_net.set_solution_optimizer('best_naive_kw', None) anderson_bounds_net = None hard_crit = None prob_hard_crit = None # might need a smaller batch size for hard domains hard_batch_size = batch_size if args.hard_batch_size == -1 else args.hard_batch_size # Split domains into easy and hard, define two separate bounding methods to handle their last layer. if method in ["cut", "gurobi-anderson"]: # Set criteria for identifying subproblems as hard hard_crit = { "lb_threshold": 0.5, "depth_threshold": 0, # 15 "impr_threshold": 1e-1, "doms_len_threshold": 200, "auto": args.auto_strat, "hard_overhead": args.hard_overhead, # assumed at full batch } # Set bounds net for easy domains. if method in ["cut"]: bigm_adam_params = { "bigm_algorithm": "adam", "bigm": "only", "nb_outer_iter": int(tot_iter), # cifar_oval: 180 'initial_step_size': args.dualinit_init_step, # cifar_oval: 1e-2 'initial_step_size_pinit': args.dualinit_init_step / 10, 'final_step_size': args.dualinit_fin_step, # cifar_oval: 1e-4 'betas': (0.9, 0.999) } bounds_net = ExpLP(cuda_verif_layers, params=bigm_adam_params, store_bounds_primal=True) else: bounds_net = LinearizedNetwork(verif_layers) # Set bounds net for hard domains. if method == "cut": anderson_iter = args.hard_iter # 100 explp_params = { "nb_iter": anderson_iter, 'bigm': "init", 'cut': "only", "bigm_algorithm": "adam", 'cut_frequency': 450, 'max_cuts': 8, 'cut_add': args.cut_add, # 2 'betas': (0.9, 0.999), 'initial_step_size': args.init_step, 'final_step_size': args.fin_step, "init_params": { "nb_outer_iter": 500, #500 for our datasets, 1000 for cifar10_8_255 'initial_step_size': args.dualinit_init_step, 'initial_step_size_pinit': args.dualinit_init_step / 10, 'final_step_size': args.dualinit_fin_step, 'betas': (0.9, 0.999), }, } anderson_bounds_net = ExpLP(cuda_verif_layers, params=explp_params, fixed_M=True, store_bounds_primal=True) print(f"Running cut for {anderson_iter} iterations") elif method == "gurobi-anderson": anderson_bounds_net = AndersonLinearizedNetwork( verif_layers, mode="lp-cut", n_cuts=args.n_cuts, cuts_per_neuron=True, decision_boundary=decision_bound) if args.no_easy: # Ignore the easy problems bounding, use the hard one for all. bounds_net = anderson_bounds_net anderson_bounds_net = None # Use only a single last layer bounding method for all problems. elif method == "prox": bounds_net = SaddleLP(cuda_verif_layers, store_bounds_primal=True, max_batch=args.max_solver_batch) bounds_net.set_decomposition('pairs', 'KW') optprox_params = { 'nb_total_steps': int(tot_iter), 'max_nb_inner_steps': 2, # this is 2/5 as simpleprox 'initial_eta': args.eta, 'final_eta': args.feta, 'log_values': False, 'maintain_primal': True } bounds_net.set_solution_optimizer('optimized_prox', optprox_params) print(f"Running prox with {tot_iter} steps") elif method == "adam": bounds_net = SaddleLP(cuda_verif_layers, store_bounds_primal=True, max_batch=args.max_solver_batch) bounds_net.set_decomposition('pairs', 'KW') adam_params = { 'nb_steps': int(tot_iter), 'initial_step_size': args.init_step, 'final_step_size': args.fin_step, 'betas': (0.9, 0.999), 'log_values': False } bounds_net.set_solution_optimizer('adam', adam_params) print(f"Running adam with {tot_iter} steps") elif method == "bigm-adam": bigm_adam_params = { "bigm_algorithm": "adam", "bigm": "only", "nb_outer_iter": int(tot_iter), 'initial_step_size': args.init_step, 'initial_step_size_pinit': args.init_step / 10, 'final_step_size': args.fin_step, 'betas': (0.9, 0.999) } bounds_net = ExpLP(cuda_verif_layers, params=bigm_adam_params, store_bounds_primal=True) elif method == "gurobi": bounds_net = LinearizedNetwork(verif_layers) # branching if args.branching_choice == 'heuristic': branching_net_name = None else: raise NotImplementedError # try: with torch.no_grad(): min_lb, min_ub, ub_point, nb_states, fail_safe_ratio = relu_bab( intermediate_net, bounds_net, branching_net_name, domain, decision_bound, eps=epsilon, timeout=timeout, batch_size=batch_size, parent_init_flag=parent_init, gurobi_specs=gurobi_dict, anderson_bounds_net=anderson_bounds_net, writer=writer, hard_crit=hard_crit, hard_batch_size=hard_batch_size) if not (min_lb or min_ub or ub_point): return_dict["min_lb"] = None return_dict["min_ub"] = None return_dict["ub_point"] = None return_dict["nb_states"] = nb_states return_dict["bab_out"] = "timeout" return_dict["fs_ratio"] = fail_safe_ratio else: return_dict["min_lb"] = min_lb.cpu() return_dict["min_ub"] = min_ub.cpu() return_dict["ub_point"] = ub_point.cpu() return_dict["nb_states"] = nb_states return_dict["fs_ratio"] = fail_safe_ratio
def reluify_maxpool(layers, domain, no_opt=False): ''' Remove all the Maxpool units of a feedforward network represented by `layers` and replace them by an equivalent combination of ReLU + Linear This is only valid over the domain `domain` because we use some knowledge about upper and lower bounds of certain neurons Args: no_opt: Boolean. If set to True, don't optimize the bounds to convert the maxpool into ReLU and use interval_analysis. If set to False, will use the tight optimized bounds. ''' if no_opt: # We're building a MIPNetwork but we are not going to solve it. This is just # because this is the class that has the code for interval_analysis # TODO: Importing here sucks but avoiding it and importing at the top level # would mean a larger refactoring that I'm willing to do right now. from plnn.mip_solver import MIPNetwork mip_net = MIPNetwork(layers) mip_net.do_interval_analysis(domain) lbs = mip_net.lower_bounds else: # We will need some lower bounds for the inputs to the maxpooling # We will simply use those given by a LinearizedNetwork lin_net = LinearizedNetwork(layers) lin_net.define_linear_approximation(domain) lbs = lin_net.lower_bounds layers = layers[:] new_all_layers = [] idx_of_inp_lbs = 0 layer_idx = 0 while layer_idx < len(layers): layer = layers[layer_idx] if type(layer) is nn.MaxPool1d: # We need to decompose this MaxPool until it only has a size of 2 assert layer.padding == 0 assert layer.dilation == 1 if layer.kernel_size > 2: assert layer.kernel_size % 2 == 0, "Not supported yet" assert layer.stride % 2 == 0, "Not supported yet" # We're going to decompose this maxpooling into two maxpooling # max( in_1, in_2 , in_3, in_4) # will become # max( max(in_1, in_2), max(in_3, in_4)) first_mp = nn.MaxPool1d(2, stride=2) second_mp = nn.MaxPool1d(layer.kernel_size // 2, stride=layer.stride // 2) # We will replace the Maxpooling that was originally there with # those two layers # We need to add a corresponding layer of lower bounds first_lbs = lbs[idx_of_inp_lbs] intermediate_lbs = [] for pair_idx in range(len(first_lbs) // 2): intermediate_lbs.append( max(first_lbs[2 * pair_idx], first_lbs[2 * pair_idx + 1])) # Do the replacement del layers[layer_idx] layers.insert(layer_idx, first_mp) layers.insert(layer_idx + 1, second_mp) lbs.insert(idx_of_inp_lbs + 1, intermediate_lbs) # Now continue so that we re-go through the loop with the now # simplified maxpool continue elif layer.kernel_size == 2: # Each pair need two in the intermediate layers that is going # to be Relu-ified pre_nb_inp_lin = len(lbs[idx_of_inp_lbs]) # How many starting position can we fit in? # 1 + how many stride we can fit before we're too late in the array to fit a kernel_size pre_nb_out_lin = (1 + ( (pre_nb_inp_lin - layer.kernel_size) // layer.stride)) * 2 pre_relu_lin = nn.Linear(pre_nb_inp_lin, pre_nb_out_lin, bias=True) pre_relu_weight = pre_relu_lin.weight.data pre_relu_bias = pre_relu_lin.bias.data pre_relu_weight.zero_() pre_relu_bias.zero_() # For each of (x, y) that needs to be transformed to max(x, y) # We create (x-y, y-y_lb) first_in_index = 0 first_out_index = 0 while first_in_index + 1 < pre_nb_inp_lin: pre_relu_weight[first_out_index, first_in_index] = 1 pre_relu_weight[first_out_index, first_in_index + 1] = -1 pre_relu_weight[first_out_index + 1, first_in_index + 1] = 1 pre_relu_bias[first_out_index + 1] = -lbs[idx_of_inp_lbs][first_in_index + 1] # Now shift first_in_index += layer.stride first_out_index += 2 new_all_layers.append(pre_relu_lin) new_all_layers.append(nn.ReLU()) # We now need to create the second layer # It will sum [max(x-y, 0)], [max(y - y_lb, 0)] and y_lb post_nb_inp_lin = pre_nb_out_lin post_nb_out_lin = post_nb_inp_lin // 2 post_relu_lin = nn.Linear(post_nb_inp_lin, post_nb_out_lin) post_relu_weight = post_relu_lin.weight.data post_relu_bias = post_relu_lin.bias.data post_relu_weight.zero_() post_relu_bias.zero_() first_in_index = 0 out_index = 0 while first_in_index + 1 < post_nb_inp_lin: post_relu_weight[out_index, first_in_index] = 1 post_relu_weight[out_index, first_in_index + 1] = 1 post_relu_bias[out_index] = lbs[idx_of_inp_lbs][ layer.stride * out_index + 1] first_in_index += 2 out_index += 1 new_all_layers.append(post_relu_lin) idx_of_inp_lbs += 1 else: # This should have been cleaned up in one of the simplify passes raise NotImplementedError elif type(layer) in [nn.Linear, nn.ReLU]: new_all_layers.append(layer) idx_of_inp_lbs += 1 elif type(layer) is View: # We shouldn't add the view as we are getting rid of them pass layer_idx += 1 return new_all_layers
class MIPNetwork: def __init__(self, layers): ''' layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools ''' self.layers = layers self.net = nn.Sequential(*layers) # Initialize a LinearizedNetwork object to determine the lower and # upper bounds at each layer. self.lin_net = LinearizedNetwork(layers) def solve(self, inp_domain, timeout=None): ''' inp_domain: Tensor containing in each row the lower and upper bound for the corresponding dimension Returns: sat : boolean indicating whether the MIP is satisfiable. solution: Feasible point if the MIP is satisfiable, None otherwise. timeout : Maximum allowed time to run, if is not None ''' if self.lower_bounds[-1][0] > 0: # The problem is infeasible, and we haven't setup the MIP return (False, None, 0) if timeout is not None: self.model.setParam('TimeLimit', timeout) if self.check_obj_value_callback: def early_stop_cb(model, where): if where == grb.GRB.Callback.MIP: best_bound = model.cbGet(grb.GRB.Callback.MIP_OBJBND) if best_bound > 0: model.terminate() if where == grb.GRB.Callback.MIPNODE: nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT) if (nodeCount % 100) == 0: # print(f"Running Nb states visited: {nodeCount}") pass if where == grb.GRB.Callback.MIPSOL: obj = model.cbGet(grb.GRB.Callback.MIPSOL_OBJ) if obj < 0: # Does it have a chance at being a valid # counter-example? # Check it with the network input_vals = model.cbGetSolution(self.gurobi_vars[0]) with torch.no_grad(): inps = torch.Tensor(input_vals).view(1, -1) out = self.net(inps).squeeze().item() if out < 0: model.terminate() else: def early_stop_cb(model, where): if where == grb.GRB.Callback.MIPNODE: nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT) if (nodeCount % 100) == 0: pass # print(f"Running Nb states visited: {nodeCount}") self.model.optimize(early_stop_cb) nb_visited_states = self.model.nodeCount if self.model.status is grb.GRB.INFEASIBLE: # Infeasible: No solution return (False, None, nb_visited_states) elif self.model.status is grb.GRB.OPTIMAL: # There is a feasible solution. Return the feasible solution as well. len_inp = len(self.gurobi_vars[0]) # Get the input that gives the feasible solution. inp = torch.Tensor(len_inp) for idx, var in enumerate(self.gurobi_vars[0]): inp[idx] = var.x optim_val = self.gurobi_vars[-1][-1].x return (optim_val < 0, (inp, optim_val), nb_visited_states) elif self.model.status is grb.GRB.INTERRUPTED: obj_bound = self.model.ObjBound if obj_bound > 0: return (False, None, nb_visited_states) else: # There is a feasible solution. Return the feasible solution as well. len_inp = len(self.gurobi_vars[0]) # Get the input that gives the feasible solution. inp = torch.Tensor(len_inp) for idx, var in enumerate(self.gurobi_vars[0]): inp[idx] = var.x optim_val = self.gurobi_vars[-1][-1].x return (optim_val < 0, (inp, optim_val), nb_visited_states) elif self.model.status is grb.GRB.TIME_LIMIT: # We timed out, return a None Status return (None, None, nb_visited_states) else: raise Exception("Unexpected Status code") def tune(self, param_outfile, tune_timeout): self.model.Params.tuneOutput = 1 self.model.Params.tuneTimeLimit = tune_timeout self.model.tune() # Get the best set of parameters self.model.getTuneResult(0) self.model.write(param_outfile) def do_interval_analysis(self, inp_domain): self.lower_bounds = [] self.upper_bounds = [] inp_lb = [] inp_ub = [] self.lower_bounds.append(inp_domain[:, 0]) self.upper_bounds.append(inp_domain[:, 1]) layer_idx = 1 for layer in self.layers: new_layer_lb = [] new_layer_ub = [] if type(layer) is nn.Linear: pos_weights = torch.clamp(layer.weight, min=0) neg_weights = torch.clamp(layer.weight, max=0) new_layer_lb = torch.mv(pos_weights, self.lower_bounds[-1]) + \ torch.mv(neg_weights, self.upper_bounds[-1]) + \ layer.bias new_layer_ub = torch.mv(pos_weights, self.upper_bounds[-1]) + \ torch.mv(neg_weights, self.lower_bounds[-1]) + \ layer.bias elif type(layer) == nn.ReLU: new_layer_lb = torch.clamp(self.lower_bounds[-1], min=0) new_layer_ub = torch.clamp(self.upper_bounds[-1], min=0) elif type(layer) == nn.MaxPool1d: assert layer.padding == 0, "Non supported Maxpool option" assert layer.dilation == 1, "Non supported Maxpool option" nb_pre = len(self.lower_bounds[-1]) window_size = layer.kernel_size stride = layer.stride pre_start_idx = 0 pre_window_end = pre_start_idx + window_size while pre_window_end <= nb_pre: lb = max( self.lower_bounds[-1][pre_start_idx:pre_window_end]) ub = max( self.upper_bounds[-1][pre_start_idx:pre_window_end]) new_layer_lb.append(lb) new_layer_ub.append(ub) pre_start_idx += stride pre_window_end = pre_start_idx + window_size new_layer_lb = torch.Tensor(new_layer_lb) new_layer_ub = torch.Tensor(new_layer_ub) elif type(layer) == View: continue else: raise NotImplementedError self.lower_bounds.append(new_layer_lb) self.upper_bounds.append(new_layer_ub) layer_idx += 1 def setup_model(self, inp_domain, sym_bounds=False, use_obj_function=False, bounds="opt", parameter_file=None): ''' inp_domain: Tensor containing in each row the lower and upper bound for the corresponding dimension optimal: If False, don't use any objective function, simply add a constraint on the output If True, perform optimization and use callback to interrupt the solving when a counterexample is found bounds: string, indicate what type of method should be used to get the intermediate bounds parameter_file: Load a set of parameters for the MIP solver if a path is given. Setup the model to be optimized by Gurobi ''' if bounds == "opt": # First use define_linear_approximation from LinearizedNetwork to # compute upper and lower bounds to be able to define Ms self.lin_net.define_linear_approximation(inp_domain) self.lower_bounds = list( map(torch.Tensor, self.lin_net.lower_bounds)) self.upper_bounds = list( map(torch.Tensor, self.lin_net.upper_bounds)) elif bounds == "interval": self.do_interval_analysis(inp_domain) if self.lower_bounds[-1][0] > 0: # The problem is already guaranteed to be infeasible, # Let's not waste time setting up the MIP return elif bounds == "interval-kw": self.do_interval_analysis(inp_domain) kw_dual = LooseDualNetworkApproximation(self.layers) kw_dual.remove_maxpools(inp_domain, no_opt=True) lower_bounds, upper_bounds = kw_dual.get_intermediate_bounds( inp_domain) # We want to get the best out of interval-analysis and K&W # TODO: There is a slight problem. To use the K&W code directly, we # need to make a bunch of changes, notably remove all of the # Maxpooling and convert them to ReLUs. Quick and temporary fix: # take the max of both things if the shapes are all the same so # far, and use the one from interval analysis after the first # difference. # If the network are full ReLU, there should be no problem. # If the network are just full ReLU with a MaxPool at the end, # that's still okay because we get the best bounds until the # maxpool, and that's the last thing that we use the bounds for # This is just going to suck if we have a Maxpool early in the # network, and even then, that just means we use interval analysis # so stop complaining. for i in range(len(lower_bounds)): if lower_bounds[i].shape == self.lower_bounds[i].shape: # Keep the best lower bound torch.max(lower_bounds[i], self.lower_bounds[i], out=self.lower_bounds[i]) torch.min(upper_bounds[i], self.upper_bounds[i], out=self.upper_bounds[i]) else: # Mismatch in dimension. # Drop it and stop trying to improve the stuff of interval analysis break if self.lower_bounds[-1][0] > 0: # The problem is already guaranteed to be infeasible, # Let's not waste time setting up the MIP return else: raise NotImplementedError("Unknown bound computation method.") self.gurobi_vars = [] self.model = grb.Model() self.model.setParam('OutputFlag', False) self.model.setParam('Threads', 1) self.model.setParam('DualReductions', 0) if parameter_file is not None: self.model.read(parameter_file) # First add the input variables as Gurobi variables. inp_gurobi_vars = [] for dim, (lb, ub) in enumerate(inp_domain): v = self.model.addVar(lb=lb, ub=ub, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'inp_{dim}') inp_gurobi_vars.append(v) self.gurobi_vars.append(inp_gurobi_vars) self.model.update() layer_idx = 1 for layer in self.layers: new_layer_gurobi_vars = [] if type(layer) is nn.Linear: for neuron_idx in range(layer.weight.size(0)): lin_expr = layer.bias[neuron_idx].item() for prev_neuron_idx_ten in torch.nonzero( layer.weight[neuron_idx]): prev_neuron_idx = prev_neuron_idx_ten[0] coeff = layer.weight[neuron_idx, prev_neuron_idx].item() lin_expr += coeff * self.gurobi_vars[-1][ prev_neuron_idx] v = self.model.addVar( lb=-grb.GRB.INFINITY, ub=grb.GRB.INFINITY, vtype=grb.GRB.CONTINUOUS, name=f'lin_v_{layer_idx}_{neuron_idx}') self.model.addConstr(v == lin_expr) self.model.update() # We are now done with this neuron. new_layer_gurobi_vars.append(v) elif type(layer) == nn.ReLU: for neuron_idx, pre_var in enumerate(self.gurobi_vars[-1]): pre_lb = self.lower_bounds[layer_idx - 1][neuron_idx].item() pre_ub = self.upper_bounds[layer_idx - 1][neuron_idx].item() # Use the constraints specified by # Verifying Neural Networks with Mixed Integer Programming # MIP formulation of ReLU: # # x = max(pre_var, 0) # # Introduce binary variable b, such that: # b = 1 if inp is the maximum value, 0 otherwise # # We know the lower (pre_lb) and upper bounds (pre_ub) for pre_var # We can thus write the following: # # MIP must then satisfy the following constraints: # Constr_13: x <= pre_var - pre_lb (1-b) # Constr_14: x >= pre_var # Constr_15: x <= b* pre_ub # Constr_16: x >= 0 if sym_bounds: # We're going to use the big-M encoding of the other papers. M = max(-pre_lb, pre_ub) pre_lb = -M pre_ub = M if pre_lb <= 0 and pre_ub <= 0: # x = self.model.addVar(lb=0, ub=0, # vtype=grb.GRB.CONTINUOUS, # name = f'ReLU_x_{layer_idx}_{neuron_idx}') x = 0 elif (pre_lb >= 0) and (pre_ub >= 0): # x = self.model.addVar(lb=pre_lb, ub=pre_ub, # vtype=grb.GRB.CONTINUOUS, # name = f'ReLU_x_{layer_idx}_{neuron_idx}') # self.model.addConstr(x == pre_var, f'constr_{layer_idx}_{neuron_idx}_fixedpassing') x = pre_var else: x = self.model.addVar( lb=0, ub=grb.GRB.INFINITY, vtype=grb.GRB.CONTINUOUS, name=f'ReLU_x_{layer_idx}_{neuron_idx}') b = self.model.addVar( vtype=grb.GRB.BINARY, name=f'ReLU_b_{layer_idx}_{neuron_idx}') self.model.addConstr( x <= pre_var - pre_lb * (1 - b), f'constr_{layer_idx}_{neuron_idx}_c13') self.model.addConstr( x >= pre_var, f'constr_{layer_idx}_{neuron_idx}_c14') self.model.addConstr( x <= b * pre_ub, f'constr_{layer_idx}_{neuron_idx}_c15') # self.model.addConstr(x >= 0, f'constr_{layer_idx}_{neuron_idx}_c16') # (implied already by bound on x) self.model.update() new_layer_gurobi_vars.append(x) elif type(layer) == nn.MaxPool1d: assert layer.padding == 0, "Non supported Maxpool option" assert layer.dilation == 1, "Non supported MaxPool option" nb_pre = len(self.gurobi_vars[-1]) window_size = layer.kernel_size stride = layer.stride pre_start_idx = 0 pre_window_end = pre_start_idx + window_size while pre_window_end <= nb_pre: ub_max = max(self.upper_bounds[layer_idx - 1] [pre_start_idx:pre_window_end]).item() window_bin_vars = [] neuron_idx = pre_start_idx % stride v = self.model.addVar( vtype=grb.GRB.CONTINUOUS, lb=-grb.GRB.INFINITY, ub=grb.GRB.INFINITY, name=f'MaxPool_out_{layer_idx}_{neuron_idx}') for pre_var_idx, pre_var in enumerate( self.gurobi_vars[-1] [pre_start_idx:pre_window_end]): lb = self.lower_bounds[layer_idx - 1][pre_start_idx + pre_var_idx].item() b = self.model.addVar( vtype=grb.GRB.BINARY, name= f'MaxPool_b_{layer_idx}_{neuron_idx}_{pre_var_idx}' ) # MIP formulation of max pooling: # # y = max(x_1, x_2, ..., x_n) # # Introduce binary variables d_1, d_2, ..., d_n: # d_i = i if x_i is the maximum value, 0 otherwise # # We know the lower (l_i) and upper bounds (u_i) for x_i # # Denote the maximum of the upper_bounds of all inputs x_i as u_max # # MIP must then satisfy the following constraints: # # Constr_1: l_i <= x_i <= u_i # Constr_2: y >= x_i # Constr_3: y <= x_i + (u_max - l_i)*(1 - d_i) # Constr_4: sum(d_1, d_2, ..., d_n) = 1 # Constr_1 is already satisfied due to the implementation of LinearizedNetworks. # Constr_2 self.model.addConstr(v >= pre_var) # Constr_3 self.model.addConstr(v <= pre_var + (ub_max - lb) * (1 - b)) window_bin_vars.append(b) # Constr_4 self.model.addConstr(sum(window_bin_vars) == 1) self.model.update() pre_start_idx += stride pre_window_end = pre_start_idx + window_size new_layer_gurobi_vars.append(v) elif type(layer) == View: continue else: raise NotImplementedError self.gurobi_vars.append(new_layer_gurobi_vars) layer_idx += 1 # Assert that this is as expected: a network with a single output assert len( self.gurobi_vars[-1]) == 1, "Network doesn't have scalar output" # Add the final constraint that the output must be less than or equal # to zero. if not use_obj_function: self.model.addConstr(self.gurobi_vars[-1][-1] <= 0) self.model.setObjective(0, grb.GRB.MAXIMIZE) self.check_obj_value_callback = False else: self.model.setObjective(self.gurobi_vars[-1][-1], grb.GRB.MINIMIZE) self.check_obj_value_callback = True # Optimize the model. self.model.update()
class MIPNetwork: def __init__(self, layers): ''' layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools ''' self.layers = layers self.net = nn.Sequential(*layers) # Initialize a LinearizedNetwork object to determine the lower and # upper bounds at each layer. self.lin_net = LinearizedNetwork(layers) def solve(self, inp_domain): ''' inp_domain: Tensor containing in each row the lower and upper bound for the corresponding dimension Returns: sat : boolean indicating whether the MIP is satisfiable. solution: Feasible point if the MIP is satisfiable, None otherwise. ''' # First use define_linear_approximation from LinearizedNetwork to # compute upper and lower bounds to be able to define Ms self.lin_net.define_linear_approximation(inp_domain) self.lower_bounds = self.lin_net.lower_bounds self.upper_bounds = self.lin_net.upper_bounds self.gurobi_vars = [] self.model = grb.Model() self.model.setParam('OutputFlag', False) self.model.setParam('Threads', 1) # First add the input variables as Gurobi variables. inp_gurobi_vars = [] for dim, (lb, ub) in enumerate(inp_domain): v = self.model.addVar(lb=lb, ub=ub, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'inp_{dim}') inp_gurobi_vars.append(v) self.gurobi_vars.append(inp_gurobi_vars) self.model.update() layer_idx = 1 for layer in self.layers: new_layer_gurobi_vars = [] if type(layer) is nn.Linear: for neuron_idx in range(layer.weight.size(0)): lin_expr = layer.bias.data[neuron_idx] for prev_neuron_idx in range(layer.weight.size(1)): coeff = layer.weight.data[neuron_idx, prev_neuron_idx] lin_expr += coeff * self.gurobi_vars[-1][prev_neuron_idx] v = self.model.addVar(lb=-grb.GRB.INFINITY, ub=grb.GRB.INFINITY, vtype=grb.GRB.CONTINUOUS, name=f'lin_v_{layer_idx}_{neuron_idx}') self.model.addConstr(v == lin_expr) self.model.update() # We are now done with this neuron. new_layer_gurobi_vars.append(v) elif type(layer) == nn.ReLU: for neuron_idx, pre_var in enumerate(self.gurobi_vars[-1]): pre_lb = self.lower_bounds[layer_idx-1][neuron_idx] pre_ub = self.upper_bounds[layer_idx-1][neuron_idx] # Use the constraints specified by # Maximum Resilience of Artificial Neural Networks paper. # MIP formulation of ReLU: # # x = max(pre_var, 0) # # Introduce binary variable b, such that: # b = 1 if in is the maximum value, 0 otherwise # # Introduce a continuous variable M, such that -M <= pre_var <= M: # # We know the lower (pre_lb) and upper bounds (pre_ub) for pre_var # We can thus write the following: # M = max(-pre_lb, pre_ub) # # MIP must then satisfy the following constraints: # Constr_2a: x >= 0 # Constr_2b: x >= pre_var # Constr_3a: pre_var - b*M <= 0 # Constr_3b: pre_var + (1-b)*M >= 0 # Constr_4a: x <= pre_var + (1-b)*M # Constr_4b: x <= b*M M = max(-pre_lb, pre_ub) x = self.model.addVar(lb=0, ub=grb.GRB.INFINITY, vtype=grb.GRB.CONTINUOUS, name = f'RelU_x_{layer_idx}_{neuron_idx}') b = self.model.addVar(vtype=grb.GRB.BINARY, name= f'ReLU_b_{layer_idx}_{neuron_idx}') self.model.addConstr(x >= 0, f'constr_{layer_idx}_{neuron_idx}_c2a') self.model.addConstr(x >= pre_var, f'constr_{layer_idx}_{neuron_idx}_c2b') self.model.addConstr(pre_var - b*M <= 0, f'constr_{layer_idx}_{neuron_idx}_c3a') self.model.addConstr(pre_var + (1-b)*M >= 0, f'constr_{layer_idx}_{neuron_idx}_c3b') self.model.addConstr(x <= pre_var + (1-b)*M , f'constr_{layer_idx}_{neuron_idx}_c4a') self.model.addConstr(x <= b*M , f'constr_{layer_idx}_{neuron_idx}_c4b') self.model.update() new_layer_gurobi_vars.append(x) elif type(layer) == nn.MaxPool1d: assert layer.padding == 0, "Non supported Maxpool option" assert layer.dilation == 1, "Non supported MaxPool option" nb_pre = len(self.gurobi_vars[-1]) window_size = layer.kernel_size stride = layer.stride pre_start_idx = 0 pre_window_end = pre_start_idx + window_size while pre_window_end <= nb_pre: ub_max = max(self.upper_bounds[layer_idx-1][pre_start_idx:pre_window_end]) window_bin_vars = [] neuron_idx = pre_start_idx % stride v = self.model.addVar(vtype=grb.GRB.CONTINUOUS, lb=-grb.GRB.INFINITY, ub=grb.GRB.INFINITY, name=f'MaxPool_out_{layer_idx}_{neuron_idx}') for pre_var_idx, pre_var in enumerate(self.gurobi_vars[-1][pre_start_idx:pre_window_end]): lb = self.lower_bounds[layer_idx-1][pre_start_idx + pre_var_idx] b = self.model.addVar(vtype=grb.GRB.BINARY, name= f'MaxPool_b_{layer_idx}_{neuron_idx}_{pre_var_idx}') # MIP formulation of max pooling: # # y = max(x_1, x_2, ..., x_n) # # Introduce binary variables d_1, d_2, ..., d_n: # d_i = i if x_i is the maximum value, 0 otherwise # # We know the lower (l_i) and upper bounds (u_i) for x_i # # Denote the maximum of the upper_bounds of all inputs x_i as u_max # # MIP must then satisfy the following constraints: # # Constr_1: l_i <= x_i <= u_i # Constr_2: y >= x_i # Constr_3: y <= x_i + (u_max - l_i)*(1 - d_i) # Constr_4: sum(d_1, d_2, ..., d_n) = 1 # Constr_1 is already satisfied due to the implementation of LinearizedNetworks. # Constr_2 self.model.addConstr(v >= pre_var) # Constr_3 self.model.addConstr(v <= pre_var + (ub_max - lb)*(1-b)) window_bin_vars.append(b) # Constr_4 self.model.addConstr(sum(window_bin_vars) == 1) self.model.update() pre_start_idx += stride pre_window_end = pre_start_idx + window_size new_layer_gurobi_vars.append(v) elif type(layer) == View: continue else: raise NotImplementedError self.gurobi_vars.append(new_layer_gurobi_vars) layer_idx += 1 # Assert that this is as expected: a network with a single output assert len(self.gurobi_vars[-1]) == 1, "Network doesn't have scalar output" # Add the final constraint that the output must be less than or equal # to zero. self.model.addConstr(self.gurobi_vars[-1][-1] <= 0) # Optimize the model. self.model.update() self.model.setObjective(0, grb.GRB.MAXIMIZE) self.model.optimize() if self.model.status is grb.GRB.INFEASIBLE: # Infeasible: No solution return (False, None) else: # There is a feasible solution. Return the feasible solution as well. len_inp = len(self.gurobi_vars[0]) # Get the input that gives the feasible solution. inp = torch.Tensor(len_inp) for idx, var in enumerate(self.gurobi_vars[0]): inp[idx] = var.x return (True, inp)
def main(): parser = argparse.ArgumentParser( description="Compute and time a bunch of bounds.") parser.add_argument('eps', type=float, help='Epsilon - default: 0.1') parser.add_argument('target_directory', type=str, help='Where to store the results') parser.add_argument('--modulo', type=int, help='Numbers of a job to split the dataset over.') parser.add_argument('--modulo_do', type=int, help='Which job_id is this one.') parser.add_argument( '--from_intermediate_bounds', action='store_true', help= "if this flag is true, intermediate bounds are computed w/ best of naive-KW" ) parser.add_argument('--network', type=str, help='which network to use', default="wide", choices=["wide", "deep"]) args = parser.parse_args() results_dir = args.target_directory os.makedirs(results_dir, exist_ok=True) testset_size = int(1e5) for idx in range(testset_size): if (args.modulo is not None) and (idx % args.modulo != args.modulo_do): continue target_dir = os.path.join(results_dir, f"{idx}") os.makedirs(target_dir, exist_ok=True) X, y, elided_models = load_mnist_wide_net(idx, mnist_test=None) if X is None: continue elided_model = elided_models[y] to_ignore = y domain = torch.stack([ torch.clamp(X.squeeze(0) - args.eps, 0, None), torch.clamp(X.squeeze(0) + args.eps, None, 1.0) ], -1).unsqueeze(0) lin_approx_string = "" if not args.from_intermediate_bounds else "-fromintermediate" # compute intermediate bounds with KW. Use only these for every method to allow comparison on the last layer # and optimize only the last layer if args.from_intermediate_bounds: cuda_elided_model = copy.deepcopy(elided_model).cuda() cuda_domain = domain.cuda() intermediate_net = SaddleLP([lay for lay in cuda_elided_model]) with torch.no_grad(): intermediate_net.set_solution_optimizer('best_naive_kw', None) intermediate_net.define_linear_approximation( cuda_domain, no_conv=False, override_numerical_errors=True) intermediate_ubs = intermediate_net.upper_bounds intermediate_lbs = intermediate_net.lower_bounds ## Proximal methods for optprox_steps in [400]: optprox_params = { 'nb_total_steps': optprox_steps, 'max_nb_inner_steps': 2, # this is 2/5 as simpleprox 'initial_eta': 1e0, 'final_eta': 5e1, 'log_values': False, 'inner_cutoff': 0, 'maintain_primal': True, 'acceleration_dict': { 'momentum': 0.3, # decent momentum: 0.9 w/ increasing eta } } optprox_target_file = os.path.join( target_dir, f"Proximal_finalmomentum_{optprox_steps}{lin_approx_string}.txt" ) if not os.path.exists(optprox_target_file): cuda_elided_model = copy.deepcopy(elided_model).cuda() cuda_domain = domain.cuda() optprox_net = SaddleLP([lay for lay in cuda_elided_model]) optprox_start = time.time() with torch.no_grad(): optprox_net.set_decomposition('pairs', 'KW') optprox_net.set_solution_optimizer('optimized_prox', optprox_params) if not args.from_intermediate_bounds: optprox_net.define_linear_approximation(cuda_domain, no_conv=False) ub = optprox_net.upper_bounds[-1] else: optprox_net.build_model_using_bounds( cuda_domain, (intermediate_lbs, intermediate_ubs)) _, ub = optprox_net.compute_lower_bound() optprox_end = time.time() optprox_time = optprox_end - optprox_start optprox_ubs = ub.cpu() del optprox_net dump_bounds(optprox_target_file, optprox_time, optprox_ubs) ## Gurobi PLANET Bounds grb_target_file = os.path.join(target_dir, f"Gurobi{lin_approx_string}-fixed.txt") if not os.path.exists(grb_target_file): grb_net = LinearizedNetwork([lay for lay in elided_model]) grb_start = time.time() if not args.from_intermediate_bounds: grb_net.define_linear_approximation(domain[0], n_threads=4) ub = grb_net.upper_bounds[-1] else: grb_net.build_model_using_bounds( domain[0], ([lbs[0].cpu() for lbs in intermediate_lbs ], [ubs[0].cpu() for ubs in intermediate_ubs]), n_threads=4) _, ub = grb_net.compute_lower_bound(ub_only=True) grb_end = time.time() grb_time = grb_end - grb_start grb_ubs = torch.Tensor(ub).cpu() dump_bounds(grb_target_file, grb_time, grb_ubs) ## Cuts for cut_steps in [80, 600, 1050, 1650, 2500]: explp_params = { "nb_iter": cut_steps, 'bigm': "init", 'cut': "only", "bigm_algorithm": "adam", 'cut_frequency': 450, 'max_cuts': 12, 'cut_add': 2, 'betas': (0.9, 0.999), 'initial_step_size': 1e-3, 'final_step_size': 1e-6, "init_params": { "nb_outer_iter": 500, 'initial_step_size': 1e-1, 'final_step_size': 1e-3, 'betas': (0.9, 0.999) }, } cut_target_file = os.path.join( target_dir, f"Cuts_{cut_steps}{lin_approx_string}.txt") if not os.path.exists(cut_target_file): cuda_elided_model = copy.deepcopy(elided_model).cuda() cuda_domain = domain.cuda() exp_net = ExpLP([lay for lay in cuda_elided_model], params=explp_params, use_preactivation=True, fixed_M=True) exp_start = time.time() with torch.no_grad(): if not args.from_intermediate_bounds: exp_net.define_linear_approximation(cuda_domain) ub = exp_net.upper_bounds[-1] else: exp_net.build_model_using_bounds( cuda_domain, (intermediate_lbs, intermediate_ubs)) _, ub = exp_net.compute_lower_bound() exp_end = time.time() exp_time = exp_end - exp_start exp_ubs = ub.cpu() del exp_net dump_bounds(cut_target_file, exp_time, exp_ubs) # Big-M supergradient. (iters tuned to take same time as prox) for bigm_steps in [850]: bigm_adam_params = { "bigm_algorithm": "adam", "bigm": "only", "nb_outer_iter": bigm_steps, 'initial_step_size': 1e-1, 'final_step_size': 1e-3, 'betas': (0.9, 0.999) } bigm_target_file = os.path.join( target_dir, f"Big-M_{bigm_steps}{lin_approx_string}.txt") if not os.path.exists(bigm_target_file): cuda_elided_model = copy.deepcopy(elided_model).cuda() cuda_domain = domain.cuda() bigm_net = ExpLP([lay for lay in cuda_elided_model], params=bigm_adam_params, use_preactivation=True, fixed_M=True) bigm_start = time.time() with torch.no_grad(): if not args.from_intermediate_bounds: bigm_net.define_linear_approximation(cuda_domain) ub = bigm_net.upper_bounds[-1] else: bigm_net.build_model_using_bounds( cuda_domain, (intermediate_lbs, intermediate_ubs)) _, ub = bigm_net.compute_lower_bound() bigm_end = time.time() bigm_time = bigm_end - bigm_start bigm_ubs = ub.cpu() del bigm_net dump_bounds(bigm_target_file, bigm_time, bigm_ubs) ## Gurobi Anderson Bounds for n_cuts in [1]: grb_and_target_file = os.path.join( target_dir, f"Anderson-{n_cuts}cuts{lin_approx_string}-fixed.txt") if not os.path.exists(grb_and_target_file): lp_and_grb_net = AndersonLinearizedNetwork( [lay for lay in elided_model], mode="lp-cut", n_cuts=n_cuts, cuts_per_neuron=True) lp_and_grb_start = time.time() if not args.from_intermediate_bounds: lp_and_grb_net.define_linear_approximation(domain[0], n_threads=4) ub = lp_and_grb_net.upper_bounds[-1] else: lp_and_grb_net.build_model_using_bounds( domain[0], ([lbs[0].cpu() for lbs in intermediate_lbs ], [ubs[0].cpu() for ubs in intermediate_ubs]), n_threads=4) _, ub = lp_and_grb_net.compute_lower_bound(ub_only=True) lp_and_grb_end = time.time() lp_and_grb_time = lp_and_grb_end - lp_and_grb_start lp_and_grb_ubs = torch.Tensor(ub).cpu() dump_bounds(grb_and_target_file, lp_and_grb_time, lp_and_grb_ubs)
class MIPNetwork: def __init__(self, layers): ''' layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools ''' self.layers = layers self.net = nn.Sequential(*layers) # Initialize a LinearizedNetwork object to determine the lower and # upper bounds at each layer. self.lin_net = LinearizedNetwork(layers) def solve(self, inp_domain, timeout=None): ''' inp_domain: Tensor containing in each row the lower and upper bound for the corresponding dimension Returns: sat : boolean indicating whether the MIP is satisfiable. solution: Feasible point if the MIP is satisfiable, None otherwise. timeout : Maximum allowed time to run, if is not None ''' if self.lower_bounds[-1].min() > 0: print("Early stopping") # The problem is infeasible, and we haven't setup the MIP return (False, None, 0) if timeout is not None: self.model.setParam('TimeLimit', timeout) if self.check_obj_value_callback: def early_stop_cb(model, where): if where == grb.GRB.Callback.MIP: best_bound = model.cbGet(grb.GRB.Callback.MIP_OBJBND) if best_bound > 0: model.terminate() if where == grb.GRB.Callback.MIPNODE: nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT) if (nodeCount % 100) == 0: print(f"Running Nb states visited: {nodeCount}") if where == grb.GRB.Callback.MIPSOL: obj = model.cbGet(grb.GRB.Callback.MIPSOL_OBJ) if obj < 0: # Does it have a chance at being a valid # counter-example? # Check it with the network input_vals = model.cbGetSolution(self.gurobi_vars[0]) with torch.no_grad(): if isinstance(input_vals, list): inps = torch.Tensor(input_vals).view(1, -1) else: assert isinstance(input_vals, grb.tupledict) inps = torch.Tensor( [val for val in input_vals.values()]) inps = inps.view((1, ) + self.lower_bounds[0].shape) out = self.net(inps).squeeze() # In case there is several output to the network, get the minimum one. out = out.min().item() if out < 0: model.terminate() else: def early_stop_cb(model, where): if where == grb.GRB.Callback.MIPNODE: nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT) if (nodeCount % 100) == 0: print(f"Running Nb states visited: {nodeCount}") self.model.optimize(early_stop_cb) nb_visited_states = self.model.nodeCount if self.model.status is grb.GRB.INFEASIBLE: # Infeasible: No solution return (False, None, nb_visited_states) elif self.model.status is grb.GRB.OPTIMAL: # There is a feasible solution. Return the feasible solution as well. len_inp = len(self.gurobi_vars[0]) # Get the input that gives the feasible solution. #input_vals = model.cbGetSolution(self.gurobi_vars[0]) #inps = torch.Tensor([val for val in input_vals.values()]) #inps = inps.view((1,) + self.lower_bounds[0].shape) optim_val = self.gurobi_vars[-1][-1].x return (optim_val < 0, (None, optim_val), nb_visited_states) elif self.model.status is grb.GRB.INTERRUPTED: obj_bound = self.model.ObjBound if obj_bound > 0: return (False, None, nb_visited_states) else: # There is a feasible solution. Return the feasible solution as well. len_inp = len(self.gurobi_vars[0]) # Get the input that gives the feasible solution. inp = torch.Tensor(len_inp) if isinstance(self.gurobi_vars[0], list): for idx, var in enumerate(self.gurobi_vars[0]): inp[idx] = var.x else: #assert isinstance(self.gurobi_vars[0], grb.tupledict) inp = torch.zeros_like(self.lower_bounds[0]) for idx, var in self.gurobi_vars[0].items(): inp[idx] = var.x optim_val = self.gurobi_vars[-1][-1].x return (optim_val < 0, (inp, optim_val), nb_visited_states) elif self.model.status is grb.GRB.TIME_LIMIT: # We timed out, return a None Status return (None, None, nb_visited_states) else: raise Exception("Unexpected Status code") def tune(self, param_outfile, tune_timeout): self.model.Params.tuneOutput = 1 self.model.Params.tuneTimeLimit = tune_timeout self.model.tune() # Get the best set of parameters self.model.getTuneResult(0) self.model.write(param_outfile) def do_interval_analysis(self, inp_domain): self.lower_bounds = [] self.upper_bounds = [] self.lower_bounds.append(inp_domain.select(-1, 0)) self.upper_bounds.append(inp_domain.select(-1, 1)) layer_idx = 1 current_lb = self.lower_bounds[-1] current_ub = self.upper_bounds[-1] for layer in self.layers: if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d): if type(layer) is nn.Linear: pos_weights = torch.clamp(layer.weight, min=0) neg_weights = torch.clamp(layer.weight, max=0) new_layer_lb = torch.mv(pos_weights, current_lb) + \ torch.mv(neg_weights, current_ub) + \ layer.bias new_layer_ub = torch.mv(pos_weights, current_ub) + \ torch.mv(neg_weights, current_lb) + \ layer.bias elif type(layer) is nn.Conv2d: pre_lb = torch.Tensor(current_lb).unsqueeze(0) pre_ub = torch.Tensor(current_ub).unsqueeze(0) pos_weight = torch.clamp(layer.weight, 0, None) neg_weight = torch.clamp(layer.weight, None, 0) out_lbs = ( F.conv2d(pre_lb, pos_weight, layer.bias, layer.stride, layer.padding, layer.dilation, layer.groups) + F.conv2d(pre_ub, neg_weight, None, layer.stride, layer.padding, layer.dilation, layer.groups)) out_ubs = ( F.conv2d(pre_ub, pos_weight, layer.bias, layer.stride, layer.padding, layer.dilation, layer.groups) + F.conv2d(pre_lb, neg_weight, None, layer.stride, layer.padding, layer.dilation, layer.groups)) new_layer_lb = out_lbs.squeeze(0) new_layer_ub = out_ubs.squeeze(0) self.lower_bounds.append(new_layer_lb) self.upper_bounds.append(new_layer_ub) current_lb = new_layer_lb current_ub = new_layer_ub elif type(layer) == nn.ReLU: current_lb = torch.clamp(current_lb, min=0) current_ub = torch.clamp(current_ub, min=0) elif type(layer) == nn.MaxPool1d: new_layer_lb = [] new_layer_ub = [] assert layer.padding == 0, "Non supported Maxpool option" assert layer.dilation == 1, "Non supported Maxpool option" nb_pre = len(self.lower_bounds[-1]) window_size = layer.kernel_size stride = layer.stride pre_start_idx = 0 pre_window_end = pre_start_idx + window_size while pre_window_end <= nb_pre: lb = max(current_lb[pre_start_idx:pre_window_end]) ub = max(current_ub[pre_start_idx:pre_window_end]) new_layer_lb.append(lb) new_layer_ub.append(ub) pre_start_idx += stride pre_window_end = pre_start_idx + window_size current_lb = torch.Tensor(new_layer_lb) current_ub = torch.Tensor(new_layer_ub) self.lower_bounds.append(current_lb) self.upper_bounds.append(current_ub) elif type(layer) == View: continue elif type(layer) == Flatten: current_lb = current_lb.view(-1) current_ub = current_ub.view(-1) else: raise NotImplementedError def setup_model(self, inp_domain, use_obj_function=False, bounds="opt", parameter_file=None): ''' inp_domain: Tensor containing in each row the lower and upper bound for the corresponding dimension optimal: If False, don't use any objective function, simply add a constraint on the output If True, perform optimization and use callback to interrupt the solving when a counterexample is found bounds: string, indicate what type of method should be used to get the intermediate bounds parameter_file: Load a set of parameters for the MIP solver if a path is given. Setup the model to be optimized by Gurobi ''' if bounds == "opt": # First use define_linear_approximation from LinearizedNetwork to # compute upper and lower bounds to be able to define Ms self.lin_net.define_linear_approximation(inp_domain) self.lower_bounds = list( map(torch.Tensor, self.lin_net.lower_bounds)) self.upper_bounds = list( map(torch.Tensor, self.lin_net.upper_bounds)) elif bounds == "interval": self.do_interval_analysis(inp_domain) if self.lower_bounds[-1][0] > 0: # The problem is already guaranteed to be infeasible, # Let's not waste time setting up the MIP return elif bounds == "interval-kw": self.do_interval_analysis(inp_domain) kw_dual = LooseDualNetworkApproximation(self.layers) kw_dual.remove_maxpools(inp_domain, no_opt=True) lower_bounds, upper_bounds = kw_dual.get_intermediate_bounds( inp_domain) #print(lower_bounds) #print(upper_bounds) # We want to get the best out of interval-analysis and K&W # TODO: There is a slight problem. To use the K&W code directly, we # need to make a bunch of changes, notably remove all of the # Maxpooling and convert them to ReLUs. Quick and temporary fix: # take the max of both things if the shapes are all the same so # far, and use the one from interval analysis after the first # difference. # If the network are full ReLU, there should be no problem. # If the network are just full ReLU with a MaxPool at the end, # that's still okay because we get the best bounds until the # maxpool, and that's the last thing that we use the bounds for # This is just going to suck if we have a Maxpool early in the # network, and even then, that just means we use interval analysis # so stop complaining. for i in range(len(lower_bounds)): if lower_bounds[i].shape == self.lower_bounds[i].shape: # Keep the best lower bound lb_diff = lower_bounds[i] - self.lower_bounds[i] ub_diff = upper_bounds[i] - self.upper_bounds[i] # print(f"LB Difference (kw to interval) min: {lb_diff.min()} \t max:{lb_diff.max()}") # print(f"UB Difference (kw to interval) min: {ub_diff.min()} \t max:{ub_diff.max()}") torch.max(lower_bounds[i], self.lower_bounds[i], out=self.lower_bounds[i]) torch.min(upper_bounds[i], self.upper_bounds[i], out=self.upper_bounds[i]) else: # Mismatch in dimension. # Drop it and stop trying to improve the stuff of interval analysis break if self.lower_bounds[-1].min() > 0: # The problem is already guaranteed to be infeasible, # Let's not waste time setting up the MIP return else: raise NotImplementedError("Unknown bound computation method.") self.gurobi_vars = [] self.model = grb.Model() self.model.setParam('OutputFlag', False) self.model.setParam('Threads', 1) self.model.setParam('DualReductions', 0) if parameter_file is not None: self.model.read(parameter_file) self.zero_var = self.model.addVar(lb=0, ub=0, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'zero') # First add the input variables as Gurobi variables. if inp_domain.dim() == 2: inp_gurobi_vars = self.model.addVars( [i for i in range(inp_domain.numel() // 2)], lb=self.lower_bounds[0], ub=self.upper_bounds[0], name='inp') inp_gurobi_vars = [var for key, var in inp_gurobi_vars.items()] else: inp_shape = self.lower_bounds[0].shape #inp_gurobi_vars = self.model.addVars([chan for chan in range(inp_shape[0])], # [row for row in range(inp_shape[1])], # [col for col in range(inp_shape[2])], # lb=self.lower_bounds[0].numpy(), # ub=self.upper_bounds[0].numpy(), # name='inp') #import pdb; pdb.set_trace() inp_gurobi_vars = {} for chan in range(inp_domain.size(0)): chan_vars = [] for row in range(inp_domain.size(1)): row_vars = [] for col in range(inp_domain.size(2)): lb = inp_domain[chan, row, col, 0] ub = inp_domain[chan, row, col, 1] v = self.model.addVar(lb=lb, ub=ub, obj=0, vtype=grb.GRB.CONTINUOUS, name=f'inp_[{chan},{row},{col}]') inp_gurobi_vars[(chan, row, col)] = v self.gurobi_vars.append(inp_gurobi_vars) layer_idx = 1 for layer in self.layers: if type(layer) is nn.Linear: layer_nb_out = layer.out_features pre_vars = self.gurobi_vars[-1] if isinstance(pre_vars, grb.tupledict): pre_vars = [var for key, var in sorted(pre_vars.items())] # Build all the outputs of the linear layer new_vars = self.model.addVars([i for i in range(layer_nb_out)], lb=self.lower_bounds[layer_idx], ub=self.upper_bounds[layer_idx], name=f'zhat{layer_idx}') new_layer_gurobi_vars = [var for key, var in new_vars.items()] self.model.addConstrs( ((grb.LinExpr(layer.weight[neuron_idx, :], pre_vars) + layer.bias[neuron_idx].item()) == new_vars[neuron_idx] for neuron_idx in range(layer.out_features)), name=f'lay{layer_idx}') elif type(layer) is nn.Conv2d: in_shape = self.lower_bounds[layer_idx - 1].shape out_shape = self.lower_bounds[layer_idx].shape flat_idxs = [ elt for elt in product(range(out_shape[0]), range( out_shape[1]), range(out_shape[2])) ] flat_out_lbs = [ self.lower_bounds[layer_idx][chan, row, col] for chan, row, col in product(range(out_shape[0]), range(out_shape[1]), range(out_shape[2])) ] flat_out_ubs = [ self.upper_bounds[layer_idx][chan, row, col] for chan, row, col in product(range(out_shape[0]), range(out_shape[1]), range(out_shape[2])) ] new_layer_gurobi_vars = self.model.addVars( flat_idxs, lb=flat_out_lbs, ub=flat_out_ubs, name=f'zhat{layer_idx}') coeffs = [] for out_chan_idx in range(out_shape[0]): coeffs.append(layer.weight[out_chan_idx, :].view(-1)) def make_lin_expr(out_chan_idx, out_row_idx, out_col_idx): lin_bias = layer.bias[out_chan_idx].item() lin_coeffs = coeffs[out_chan_idx] start_row_idx = -layer.padding[0] + layer.stride[ 0] * out_row_idx end_row_idx = start_row_idx + layer.weight.shape[2] start_col_idx = -layer.padding[1] + layer.stride[ 1] * out_col_idx end_col_idx = start_col_idx + layer.weight.shape[3] lin_vars = [ (self.zero_var if ((row_idx < 0) or (row_idx == in_shape[1]) or (col_idx < 0) or (col_idx == in_shape[2])) else self.gurobi_vars[-1][(chan_idx, row_idx, col_idx)]) for chan_idx in range(in_shape[0]) for row_idx in range(start_row_idx, end_row_idx) for col_idx in range(start_col_idx, end_col_idx) ] lin_expr = grb.LinExpr(lin_coeffs, lin_vars) + lin_bias return lin_expr constrs = [] for out_chan_idx in range(out_shape[0]): for out_row_idx in range(out_shape[1]): for out_col_idx in range(out_shape[2]): constrs.append( make_lin_expr(out_chan_idx, out_row_idx, out_col_idx) == new_layer_gurobi_vars[(out_chan_idx, out_row_idx, out_col_idx)]) self.model.addConstrs(constr for constr in constrs) elif type(layer) == nn.ReLU: pre_lbs = self.lower_bounds[layer_idx] pre_ubs = self.upper_bounds[layer_idx] if isinstance(self.gurobi_vars[-1], grb.tupledict): amb_mask = (pre_lbs < 0) & (pre_ubs > 0) if amb_mask.sum().item() != 0: to_new_preubs = pre_ubs[amb_mask] to_new_prelbs = pre_lbs[amb_mask] new_var_idxs = torch.nonzero( (pre_lbs < 0) & (pre_ubs > 0)).numpy().tolist() new_var_idxs = [tuple(idxs) for idxs in new_var_idxs] new_layer_gurobi_vars = self.model.addVars( new_var_idxs, lb=0, ub=to_new_preubs, name=f'z{layer_idx}') new_binary_vars = self.model.addVars( new_var_idxs, lb=0, ub=1, vtype=grb.GRB.BINARY, name=f'delta{layer_idx}') flat_new_vars = [ new_layer_gurobi_vars[idx] for idx in new_var_idxs ] flat_binary_vars = [ new_binary_vars[idx] for idx in new_var_idxs ] pre_amb_vars = [ self.gurobi_vars[-1][idx] for idx in new_var_idxs ] # C1: Superior to 0 # C2: Add the constraint that it's superior to the inputs self.model.addConstrs( (flat_new_vars[idx] >= pre_amb_vars[idx] for idx in range(len(flat_new_vars))), name=f'ReLU_lb{layer_idx}') # C3: Below binary*upper_bound self.model.addConstrs( (flat_new_vars[idx] <= to_new_preubs[idx].item() * flat_binary_vars[idx] for idx in range(len(flat_new_vars))), name=f'ReLU{layer_idx}_ub1-') # C4: Below binary*lower_bound self.model.addConstrs( (flat_new_vars[idx] <= (pre_amb_vars[idx] - to_new_prelbs[idx].item() * (1 - flat_binary_vars[idx])) for idx in range(len(flat_new_vars))), name=f'ReLU{layer_idx}_ub2-') else: new_layer_gurobi_vars = grb.tupledict() for pos in torch.nonzero(pre_lbs >= 0).numpy().tolist(): pos = tuple(pos) new_layer_gurobi_vars[pos] = self.gurobi_vars[-1][pos] for pos in torch.nonzero(pre_ubs <= 0).numpy().tolist(): new_layer_gurobi_vars[tuple(pos)] = self.zero_var else: assert isinstance(self.gurobi_vars[-1][0], grb.Var) amb_mask = (pre_lbs < 0) & (pre_ubs > 0) if amb_mask.sum().item() == 0: pass # print("WARNING: No ambiguous ReLU at a layer") else: to_new_preubs = pre_ubs[amb_mask] new_var_idxs = torch.nonzero(amb_mask).squeeze( 1).numpy().tolist() new_vars = self.model.addVars(new_var_idxs, lb=0, ub=to_new_preubs, name=f'z{layer_idx}') new_binary_vars = self.model.addVars( new_var_idxs, lb=0, ub=1, vtype=grb.GRB.BINARY, name=f'delta{layer_idx}') # C1: Superior to 0 # C2: Add the constraint that it's superior to the inputs self.model.addConstrs( (new_vars[idx] >= self.gurobi_vars[-1][idx] for idx in new_var_idxs), name=f'ReLU_lb{layer_idx}') # C3: Below binary*upper_bound self.model.addConstrs( (new_vars[idx] <= pre_ubs[idx].item() * new_binary_vars[idx] for idx in new_var_idxs), name=f'ReLU{layer_idx}_ub1-') # C4: Below binary*lower_bound self.model.addConstrs( (new_vars[idx] <= (self.gurobi_vars[-1][idx] - pre_lbs[idx].item() * (1 - new_binary_vars[idx])) for idx in new_var_idxs), name=f'ReLU{layer_idx}_ub2-') # Get all the variables in a list, such that we have the # output of the layer new_layer_gurobi_vars = [] new_idx = 0 for idx in range(layer_nb_out): if pre_lbs[idx] >= 0: # Pass through variable new_layer_gurobi_vars.append( self.gurobi_vars[-1][idx]) elif pre_ubs[idx] <= 0: # Blocked variable new_layer_gurobi_vars.append(self.zero_var) else: new_layer_gurobi_vars.append(new_vars[idx]) layer_idx += 1 elif type(layer) == nn.MaxPool1d: assert layer.padding == 0, "Non supported Maxpool option" assert layer.dilation == 1, "Non supported MaxPool option" nb_pre = len(self.gurobi_vars[-1]) window_size = layer.kernel_size stride = layer.stride pre_start_idx = 0 pre_window_end = pre_start_idx + window_size while pre_window_end <= nb_pre: ub_max = max(self.upper_bounds[layer_idx - 1] [pre_start_idx:pre_window_end]).item() window_bin_vars = [] neuron_idx = pre_start_idx % stride v = self.model.addVar( vtype=grb.GRB.CONTINUOUS, lb=-grb.GRB.INFINITY, ub=grb.GRB.INFINITY, name=f'MaxPool_out_{layer_idx}_{neuron_idx}') for pre_var_idx, pre_var in enumerate( self.gurobi_vars[-1] [pre_start_idx:pre_window_end]): lb = self.lower_bounds[layer_idx - 1][pre_start_idx + pre_var_idx].item() b = self.model.addVar( vtype=grb.GRB.BINARY, name= f'MaxPool_b_{layer_idx}_{neuron_idx}_{pre_var_idx}' ) # MIP formulation of max pooling: # # y = max(x_1, x_2, ..., x_n) # # Introduce binary variables d_1, d_2, ..., d_n: # d_i = i if x_i is the maximum value, 0 otherwise # # We know the lower (l_i) and upper bounds (u_i) for x_i # # Denote the maximum of the upper_bounds of all inputs x_i as u_max # # MIP must then satisfy the following constraints: # # Constr_1: l_i <= x_i <= u_i # Constr_2: y >= x_i # Constr_3: y <= x_i + (u_max - l_i)*(1 - d_i) # Constr_4: sum(d_1, d_2, ..., d_n) yer= 1 # Constr_1 is already satisfied due to the implementation of LinearizedNetworks. # Constr_2 self.model.addConstr(v >= pre_var) # Constr_3 self.model.addConstr(v <= pre_var + (ub_max - lb) * (1 - b)) window_bin_vars.append(b) # Constr_4 self.model.addConstr(sum(window_bin_vars) == 1) self.model.update() pre_start_idx += stride pre_window_end = pre_start_idx + window_size new_layer_gurobi_vars.append(v) elif isinstance(layer, View) or isinstance(layer, Flatten): continue else: raise NotImplementedError self.gurobi_vars.append(new_layer_gurobi_vars) if len(self.gurobi_vars[-1]) == 1: # The network has a scalar output, it works like this. pass else: # The network has multiple outputs, we need to encode that the # minimum is below 0, let's add a variable here that corresponds to # the minimum min_var = self.model.addVar(vtype=grb.GRB.CONTINUOUS, lb=self.lower_bounds[-1].min().item(), ub=self.upper_bounds[-1].min().item(), name="final_output") self.model.addConstrs( (min_var <= self.gurobi_vars[-1][out_idx] for out_idx in range(len(self.gurobi_vars[-1]))), name=f'final_constraint_min_ub') bin_min_vars = self.model.addVars(range(len(self.gurobi_vars[-1])), vtype=grb.GRB.BINARY, lb=0, ub=1, name='final_binary') out_lbmin = self.lower_bounds[-1].min() self.model.addConstrs( (min_var >= (self.gurobi_vars[-1][out_idx] + (out_lbmin - self.upper_bounds[-1][out_idx]).item() * (1 - bin_min_vars[out_idx])) for out_idx in range(len(self.gurobi_vars[-1]))), name=f'final_constraint_min_lb') self.model.addConstr( sum(var for var in bin_min_vars.values()) == 1) self.gurobi_vars.append([min_var]) self.lower_bounds.append(self.lower_bounds[-1].min()) self.upper_bounds.append(self.upper_bounds[-1].min()) # Add the final constraint that the output must be less than or equal # to zero. if not use_obj_function: self.model.addConstr(self.gurobi_vars[-1][0] <= 0) self.model.setObjective(0, grb.GRB.MAXIMIZE) self.check_obj_value_callback = False else: # Set the minimization of the network output self.model.setObjective(self.gurobi_vars[-1][-1], grb.GRB.MINIMIZE) self.check_obj_value_callback = True # Optimize the model. self.model.update()