Esempio n. 1
0
    def __init__(self, layers):
        '''
        layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools
        '''
        self.layers = layers
        self.net = nn.Sequential(*layers)

        # Initialize a LinearizedNetwork object to determine the lower and
        # upper bounds at each layer.
        self.lin_net = LinearizedNetwork(layers)
Esempio n. 2
0
def bab(gt_prop,
        verif_layers,
        domain,
        return_dict,
        timeout,
        batch_size,
        method,
        tot_iter,
        parent_init,
        args,
        gurobi_dict=None,
        writer=None):
    epsilon = 1e-4

    if gpu:
        cuda_verif_layers = [copy.deepcopy(lay).cuda() for lay in verif_layers]
        domain = domain.cuda()
    else:
        cuda_verif_layers = [copy.deepcopy(lay) for lay in verif_layers]

    # use best of naive interval propagation and KW as intermediate bounds
    intermediate_net = SaddleLP(cuda_verif_layers,
                                store_bounds_primal=False,
                                max_batch=args.max_solver_batch)
    intermediate_net.set_solution_optimizer('best_naive_kw', None)
    anderson_bounds_net = None
    hard_crit = None
    prob_hard_crit = None

    # might need a smaller batch size for hard domains
    hard_batch_size = batch_size if args.hard_batch_size == -1 else args.hard_batch_size

    # Split domains into easy and hard, define two separate bounding methods to handle their last layer.
    if method in ["cut", "gurobi-anderson"]:

        # Set criteria for identifying subproblems as hard
        hard_crit = {
            "lb_threshold": 0.5,
            "depth_threshold": 0,  # 15
            "impr_threshold": 1e-1,
            "doms_len_threshold": 200,
            "auto": args.auto_strat,
            "hard_overhead": args.hard_overhead,  # assumed at full batch
        }

        # Set bounds net for easy domains.
        if method in ["cut"]:
            bigm_adam_params = {
                "bigm_algorithm": "adam",
                "bigm": "only",
                "nb_outer_iter": int(tot_iter),  # cifar_oval: 180
                'initial_step_size':
                args.dualinit_init_step,  # cifar_oval: 1e-2
                'initial_step_size_pinit': args.dualinit_init_step / 10,
                'final_step_size': args.dualinit_fin_step,  # cifar_oval: 1e-4
                'betas': (0.9, 0.999)
            }
            bounds_net = ExpLP(cuda_verif_layers,
                               params=bigm_adam_params,
                               store_bounds_primal=True)
        else:
            bounds_net = LinearizedNetwork(verif_layers)

        # Set bounds net for hard domains.
        if method == "cut":
            anderson_iter = args.hard_iter  # 100
            explp_params = {
                "nb_iter": anderson_iter,
                'bigm': "init",
                'cut': "only",
                "bigm_algorithm": "adam",
                'cut_frequency': 450,
                'max_cuts': 8,
                'cut_add': args.cut_add,  # 2
                'betas': (0.9, 0.999),
                'initial_step_size': args.init_step,
                'final_step_size': args.fin_step,
                "init_params": {
                    "nb_outer_iter":
                    500,  #500 for our datasets, 1000 for cifar10_8_255
                    'initial_step_size': args.dualinit_init_step,
                    'initial_step_size_pinit': args.dualinit_init_step / 10,
                    'final_step_size': args.dualinit_fin_step,
                    'betas': (0.9, 0.999),
                },
            }
            anderson_bounds_net = ExpLP(cuda_verif_layers,
                                        params=explp_params,
                                        fixed_M=True,
                                        store_bounds_primal=True)
            print(f"Running cut for {anderson_iter} iterations")
        elif method == "gurobi-anderson":
            anderson_bounds_net = AndersonLinearizedNetwork(
                verif_layers,
                mode="lp-cut",
                n_cuts=args.n_cuts,
                cuts_per_neuron=True,
                decision_boundary=decision_bound)

        if args.no_easy:
            # Ignore the easy problems bounding, use the hard one for all.
            bounds_net = anderson_bounds_net
            anderson_bounds_net = None

    # Use only a single last layer bounding method for all problems.
    elif method == "prox":
        bounds_net = SaddleLP(cuda_verif_layers,
                              store_bounds_primal=True,
                              max_batch=args.max_solver_batch)
        bounds_net.set_decomposition('pairs', 'KW')
        optprox_params = {
            'nb_total_steps': int(tot_iter),
            'max_nb_inner_steps': 2,  # this is 2/5 as simpleprox
            'initial_eta': args.eta,
            'final_eta': args.feta,
            'log_values': False,
            'maintain_primal': True
        }
        bounds_net.set_solution_optimizer('optimized_prox', optprox_params)
        print(f"Running prox with {tot_iter} steps")
    elif method == "adam":
        bounds_net = SaddleLP(cuda_verif_layers,
                              store_bounds_primal=True,
                              max_batch=args.max_solver_batch)
        bounds_net.set_decomposition('pairs', 'KW')
        adam_params = {
            'nb_steps': int(tot_iter),
            'initial_step_size': args.init_step,
            'final_step_size': args.fin_step,
            'betas': (0.9, 0.999),
            'log_values': False
        }
        bounds_net.set_solution_optimizer('adam', adam_params)
        print(f"Running adam with {tot_iter} steps")
    elif method == "bigm-adam":
        bigm_adam_params = {
            "bigm_algorithm": "adam",
            "bigm": "only",
            "nb_outer_iter": int(tot_iter),
            'initial_step_size': args.init_step,
            'initial_step_size_pinit': args.init_step / 10,
            'final_step_size': args.fin_step,
            'betas': (0.9, 0.999)
        }
        bounds_net = ExpLP(cuda_verif_layers,
                           params=bigm_adam_params,
                           store_bounds_primal=True)
    elif method == "gurobi":
        bounds_net = LinearizedNetwork(verif_layers)

    # branching
    if args.branching_choice == 'heuristic':
        branching_net_name = None
    else:
        raise NotImplementedError

    # try:
    with torch.no_grad():
        min_lb, min_ub, ub_point, nb_states, fail_safe_ratio = relu_bab(
            intermediate_net,
            bounds_net,
            branching_net_name,
            domain,
            decision_bound,
            eps=epsilon,
            timeout=timeout,
            batch_size=batch_size,
            parent_init_flag=parent_init,
            gurobi_specs=gurobi_dict,
            anderson_bounds_net=anderson_bounds_net,
            writer=writer,
            hard_crit=hard_crit,
            hard_batch_size=hard_batch_size)

    if not (min_lb or min_ub or ub_point):
        return_dict["min_lb"] = None
        return_dict["min_ub"] = None
        return_dict["ub_point"] = None
        return_dict["nb_states"] = nb_states
        return_dict["bab_out"] = "timeout"
        return_dict["fs_ratio"] = fail_safe_ratio
    else:
        return_dict["min_lb"] = min_lb.cpu()
        return_dict["min_ub"] = min_ub.cpu()
        return_dict["ub_point"] = ub_point.cpu()
        return_dict["nb_states"] = nb_states
        return_dict["fs_ratio"] = fail_safe_ratio
Esempio n. 3
0
def reluify_maxpool(layers, domain, no_opt=False):
    '''
    Remove all the Maxpool units of a feedforward network represented by
    `layers` and replace them by an equivalent combination of ReLU + Linear

    This is only valid over the domain `domain` because we use some knowledge
    about upper and lower bounds of certain neurons

    Args:
      no_opt: Boolean. If set to True, don't optimize the bounds to convert the
              maxpool into ReLU and use interval_analysis. If set to False, will
              use the tight optimized bounds.
    '''
    if no_opt:
        # We're building a MIPNetwork but we are not going to solve it. This is just
        # because this is the class that has the code for interval_analysis

        # TODO: Importing here sucks but avoiding it and importing at the top level
        # would mean a larger refactoring that I'm willing to do right now.
        from plnn.mip_solver import MIPNetwork

        mip_net = MIPNetwork(layers)
        mip_net.do_interval_analysis(domain)
        lbs = mip_net.lower_bounds
    else:
        # We will need some lower bounds for the inputs to the maxpooling
        # We will simply use those given by a LinearizedNetwork
        lin_net = LinearizedNetwork(layers)
        lin_net.define_linear_approximation(domain)
        lbs = lin_net.lower_bounds

    layers = layers[:]

    new_all_layers = []

    idx_of_inp_lbs = 0
    layer_idx = 0
    while layer_idx < len(layers):
        layer = layers[layer_idx]
        if type(layer) is nn.MaxPool1d:
            # We need to decompose this MaxPool until it only has a size of 2
            assert layer.padding == 0
            assert layer.dilation == 1
            if layer.kernel_size > 2:
                assert layer.kernel_size % 2 == 0, "Not supported yet"
                assert layer.stride % 2 == 0, "Not supported yet"
                # We're going to decompose this maxpooling into two maxpooling
                # max(     in_1, in_2 ,      in_3, in_4)
                # will become
                # max( max(in_1, in_2),  max(in_3, in_4))
                first_mp = nn.MaxPool1d(2, stride=2)
                second_mp = nn.MaxPool1d(layer.kernel_size // 2,
                                         stride=layer.stride // 2)
                # We will replace the Maxpooling that was originally there with
                # those two layers
                # We need to add a corresponding layer of lower bounds
                first_lbs = lbs[idx_of_inp_lbs]
                intermediate_lbs = []
                for pair_idx in range(len(first_lbs) // 2):
                    intermediate_lbs.append(
                        max(first_lbs[2 * pair_idx],
                            first_lbs[2 * pair_idx + 1]))
                # Do the replacement
                del layers[layer_idx]
                layers.insert(layer_idx, first_mp)
                layers.insert(layer_idx + 1, second_mp)
                lbs.insert(idx_of_inp_lbs + 1, intermediate_lbs)

                # Now continue so that we re-go through the loop with the now
                # simplified maxpool
                continue
            elif layer.kernel_size == 2:
                # Each pair need two in the intermediate layers that is going
                # to be Relu-ified
                pre_nb_inp_lin = len(lbs[idx_of_inp_lbs])
                # How many starting position can we fit in?
                # 1 + how many stride we can fit before we're too late in the array to fit a kernel_size
                pre_nb_out_lin = (1 + (
                    (pre_nb_inp_lin - layer.kernel_size) // layer.stride)) * 2
                pre_relu_lin = nn.Linear(pre_nb_inp_lin,
                                         pre_nb_out_lin,
                                         bias=True)
                pre_relu_weight = pre_relu_lin.weight.data
                pre_relu_bias = pre_relu_lin.bias.data
                pre_relu_weight.zero_()
                pre_relu_bias.zero_()
                # For each of (x, y) that needs to be transformed to max(x, y)
                # We create (x-y, y-y_lb)
                first_in_index = 0
                first_out_index = 0
                while first_in_index + 1 < pre_nb_inp_lin:
                    pre_relu_weight[first_out_index, first_in_index] = 1
                    pre_relu_weight[first_out_index, first_in_index + 1] = -1

                    pre_relu_weight[first_out_index + 1,
                                    first_in_index + 1] = 1
                    pre_relu_bias[first_out_index +
                                  1] = -lbs[idx_of_inp_lbs][first_in_index + 1]

                    # Now shift
                    first_in_index += layer.stride
                    first_out_index += 2
                new_all_layers.append(pre_relu_lin)
                new_all_layers.append(nn.ReLU())

                # We now need to create the second layer
                # It will sum [max(x-y, 0)], [max(y - y_lb, 0)] and y_lb
                post_nb_inp_lin = pre_nb_out_lin
                post_nb_out_lin = post_nb_inp_lin // 2
                post_relu_lin = nn.Linear(post_nb_inp_lin, post_nb_out_lin)
                post_relu_weight = post_relu_lin.weight.data
                post_relu_bias = post_relu_lin.bias.data
                post_relu_weight.zero_()
                post_relu_bias.zero_()
                first_in_index = 0
                out_index = 0
                while first_in_index + 1 < post_nb_inp_lin:
                    post_relu_weight[out_index, first_in_index] = 1
                    post_relu_weight[out_index, first_in_index + 1] = 1
                    post_relu_bias[out_index] = lbs[idx_of_inp_lbs][
                        layer.stride * out_index + 1]
                    first_in_index += 2
                    out_index += 1
                new_all_layers.append(post_relu_lin)
                idx_of_inp_lbs += 1
            else:
                # This should have been cleaned up in one of the simplify passes
                raise NotImplementedError
        elif type(layer) in [nn.Linear, nn.ReLU]:
            new_all_layers.append(layer)
            idx_of_inp_lbs += 1
        elif type(layer) is View:
            # We shouldn't add the view as we are getting rid of them
            pass
        layer_idx += 1
    return new_all_layers
Esempio n. 4
0
class MIPNetwork:
    def __init__(self, layers):
        '''
        layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools
        '''
        self.layers = layers
        self.net = nn.Sequential(*layers)

        # Initialize a LinearizedNetwork object to determine the lower and
        # upper bounds at each layer.
        self.lin_net = LinearizedNetwork(layers)

    def solve(self, inp_domain, timeout=None):
        '''
        inp_domain: Tensor containing in each row the lower and upper bound
                    for the corresponding dimension

        Returns:
        sat     : boolean indicating whether the MIP is satisfiable.
        solution: Feasible point if the MIP is satisfiable,
                  None otherwise.
        timeout : Maximum allowed time to run, if is not None
        '''
        if self.lower_bounds[-1][0] > 0:
            # The problem is infeasible, and we haven't setup the MIP
            return (False, None, 0)

        if timeout is not None:
            self.model.setParam('TimeLimit', timeout)

        if self.check_obj_value_callback:

            def early_stop_cb(model, where):
                if where == grb.GRB.Callback.MIP:
                    best_bound = model.cbGet(grb.GRB.Callback.MIP_OBJBND)
                    if best_bound > 0:
                        model.terminate()

                if where == grb.GRB.Callback.MIPNODE:
                    nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT)
                    if (nodeCount % 100) == 0:
                        # print(f"Running Nb states visited: {nodeCount}")
                        pass

                if where == grb.GRB.Callback.MIPSOL:
                    obj = model.cbGet(grb.GRB.Callback.MIPSOL_OBJ)
                    if obj < 0:
                        # Does it have a chance at being a valid
                        # counter-example?

                        # Check it with the network
                        input_vals = model.cbGetSolution(self.gurobi_vars[0])

                        with torch.no_grad():
                            inps = torch.Tensor(input_vals).view(1, -1)
                            out = self.net(inps).squeeze().item()

                        if out < 0:
                            model.terminate()
        else:

            def early_stop_cb(model, where):
                if where == grb.GRB.Callback.MIPNODE:
                    nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT)
                    if (nodeCount % 100) == 0:
                        pass
                        # print(f"Running Nb states visited: {nodeCount}")

        self.model.optimize(early_stop_cb)
        nb_visited_states = self.model.nodeCount

        if self.model.status is grb.GRB.INFEASIBLE:
            # Infeasible: No solution
            return (False, None, nb_visited_states)
        elif self.model.status is grb.GRB.OPTIMAL:
            # There is a feasible solution. Return the feasible solution as well.
            len_inp = len(self.gurobi_vars[0])

            # Get the input that gives the feasible solution.
            inp = torch.Tensor(len_inp)
            for idx, var in enumerate(self.gurobi_vars[0]):
                inp[idx] = var.x
            optim_val = self.gurobi_vars[-1][-1].x

            return (optim_val < 0, (inp, optim_val), nb_visited_states)
        elif self.model.status is grb.GRB.INTERRUPTED:
            obj_bound = self.model.ObjBound

            if obj_bound > 0:
                return (False, None, nb_visited_states)
            else:
                # There is a feasible solution. Return the feasible solution as well.
                len_inp = len(self.gurobi_vars[0])

                # Get the input that gives the feasible solution.
                inp = torch.Tensor(len_inp)
                for idx, var in enumerate(self.gurobi_vars[0]):
                    inp[idx] = var.x
                optim_val = self.gurobi_vars[-1][-1].x
            return (optim_val < 0, (inp, optim_val), nb_visited_states)
        elif self.model.status is grb.GRB.TIME_LIMIT:
            # We timed out, return a None Status
            return (None, None, nb_visited_states)
        else:
            raise Exception("Unexpected Status code")

    def tune(self, param_outfile, tune_timeout):
        self.model.Params.tuneOutput = 1
        self.model.Params.tuneTimeLimit = tune_timeout
        self.model.tune()

        # Get the best set of parameters
        self.model.getTuneResult(0)

        self.model.write(param_outfile)

    def do_interval_analysis(self, inp_domain):
        self.lower_bounds = []
        self.upper_bounds = []

        inp_lb = []
        inp_ub = []

        self.lower_bounds.append(inp_domain[:, 0])
        self.upper_bounds.append(inp_domain[:, 1])

        layer_idx = 1
        for layer in self.layers:
            new_layer_lb = []
            new_layer_ub = []
            if type(layer) is nn.Linear:
                pos_weights = torch.clamp(layer.weight, min=0)
                neg_weights = torch.clamp(layer.weight, max=0)

                new_layer_lb = torch.mv(pos_weights, self.lower_bounds[-1]) + \
                               torch.mv(neg_weights, self.upper_bounds[-1]) + \
                               layer.bias
                new_layer_ub = torch.mv(pos_weights, self.upper_bounds[-1]) + \
                               torch.mv(neg_weights, self.lower_bounds[-1]) + \
                               layer.bias
            elif type(layer) == nn.ReLU:
                new_layer_lb = torch.clamp(self.lower_bounds[-1], min=0)
                new_layer_ub = torch.clamp(self.upper_bounds[-1], min=0)
            elif type(layer) == nn.MaxPool1d:
                assert layer.padding == 0, "Non supported Maxpool option"
                assert layer.dilation == 1, "Non supported Maxpool option"

                nb_pre = len(self.lower_bounds[-1])
                window_size = layer.kernel_size
                stride = layer.stride

                pre_start_idx = 0
                pre_window_end = pre_start_idx + window_size

                while pre_window_end <= nb_pre:
                    lb = max(
                        self.lower_bounds[-1][pre_start_idx:pre_window_end])
                    ub = max(
                        self.upper_bounds[-1][pre_start_idx:pre_window_end])

                    new_layer_lb.append(lb)
                    new_layer_ub.append(ub)

                    pre_start_idx += stride
                    pre_window_end = pre_start_idx + window_size
                new_layer_lb = torch.Tensor(new_layer_lb)
                new_layer_ub = torch.Tensor(new_layer_ub)
            elif type(layer) == View:
                continue
            else:
                raise NotImplementedError

            self.lower_bounds.append(new_layer_lb)
            self.upper_bounds.append(new_layer_ub)

            layer_idx += 1

    def setup_model(self,
                    inp_domain,
                    sym_bounds=False,
                    use_obj_function=False,
                    bounds="opt",
                    parameter_file=None):
        '''
        inp_domain: Tensor containing in each row the lower and upper bound
                    for the corresponding dimension

        optimal: If False, don't use any objective function, simply add a constraint on the output
                 If True, perform optimization and use callback to interrupt the solving when a
                          counterexample is found
        bounds: string, indicate what type of method should be used to get the intermediate bounds
        parameter_file: Load a set of parameters for the MIP solver if a path is given.

        Setup the model to be optimized by Gurobi
        '''
        if bounds == "opt":
            # First use define_linear_approximation from LinearizedNetwork to
            # compute upper and lower bounds to be able to define Ms
            self.lin_net.define_linear_approximation(inp_domain)

            self.lower_bounds = list(
                map(torch.Tensor, self.lin_net.lower_bounds))
            self.upper_bounds = list(
                map(torch.Tensor, self.lin_net.upper_bounds))
        elif bounds == "interval":
            self.do_interval_analysis(inp_domain)
            if self.lower_bounds[-1][0] > 0:
                # The problem is already guaranteed to be infeasible,
                # Let's not waste time setting up the MIP
                return
        elif bounds == "interval-kw":
            self.do_interval_analysis(inp_domain)
            kw_dual = LooseDualNetworkApproximation(self.layers)
            kw_dual.remove_maxpools(inp_domain, no_opt=True)
            lower_bounds, upper_bounds = kw_dual.get_intermediate_bounds(
                inp_domain)

            # We want to get the best out of interval-analysis and K&W

            # TODO: There is a slight problem. To use the K&W code directly, we
            # need to make a bunch of changes, notably remove all of the
            # Maxpooling and convert them to ReLUs. Quick and temporary fix:
            # take the max of both things if the shapes are all the same so
            # far, and use the one from interval analysis after the first
            # difference.

            # If the network are full ReLU, there should be no problem.
            # If the network are just full ReLU with a MaxPool at the end,
            # that's still okay because we get the best bounds until the
            # maxpool, and that's the last thing that we use the bounds for
            # This is just going to suck if we have a Maxpool early in the
            # network, and even then, that just means we use interval analysis
            # so stop complaining.
            for i in range(len(lower_bounds)):
                if lower_bounds[i].shape == self.lower_bounds[i].shape:
                    # Keep the best lower bound
                    torch.max(lower_bounds[i],
                              self.lower_bounds[i],
                              out=self.lower_bounds[i])
                    torch.min(upper_bounds[i],
                              self.upper_bounds[i],
                              out=self.upper_bounds[i])
                else:
                    # Mismatch in dimension.
                    # Drop it and stop trying to improve the stuff of interval analysis
                    break
            if self.lower_bounds[-1][0] > 0:
                # The problem is already guaranteed to be infeasible,
                # Let's not waste time setting up the MIP
                return
        else:
            raise NotImplementedError("Unknown bound computation method.")

        self.gurobi_vars = []
        self.model = grb.Model()
        self.model.setParam('OutputFlag', False)
        self.model.setParam('Threads', 1)
        self.model.setParam('DualReductions', 0)
        if parameter_file is not None:
            self.model.read(parameter_file)

        # First add the input variables as Gurobi variables.
        inp_gurobi_vars = []
        for dim, (lb, ub) in enumerate(inp_domain):
            v = self.model.addVar(lb=lb,
                                  ub=ub,
                                  obj=0,
                                  vtype=grb.GRB.CONTINUOUS,
                                  name=f'inp_{dim}')
            inp_gurobi_vars.append(v)

        self.gurobi_vars.append(inp_gurobi_vars)
        self.model.update()

        layer_idx = 1
        for layer in self.layers:
            new_layer_gurobi_vars = []
            if type(layer) is nn.Linear:
                for neuron_idx in range(layer.weight.size(0)):
                    lin_expr = layer.bias[neuron_idx].item()
                    for prev_neuron_idx_ten in torch.nonzero(
                            layer.weight[neuron_idx]):
                        prev_neuron_idx = prev_neuron_idx_ten[0]
                        coeff = layer.weight[neuron_idx,
                                             prev_neuron_idx].item()
                        lin_expr += coeff * self.gurobi_vars[-1][
                            prev_neuron_idx]
                    v = self.model.addVar(
                        lb=-grb.GRB.INFINITY,
                        ub=grb.GRB.INFINITY,
                        vtype=grb.GRB.CONTINUOUS,
                        name=f'lin_v_{layer_idx}_{neuron_idx}')
                    self.model.addConstr(v == lin_expr)
                    self.model.update()

                    # We are now done with this neuron.
                    new_layer_gurobi_vars.append(v)

            elif type(layer) == nn.ReLU:

                for neuron_idx, pre_var in enumerate(self.gurobi_vars[-1]):
                    pre_lb = self.lower_bounds[layer_idx -
                                               1][neuron_idx].item()
                    pre_ub = self.upper_bounds[layer_idx -
                                               1][neuron_idx].item()

                    # Use the constraints specified by
                    # Verifying Neural Networks with Mixed Integer Programming
                    # MIP formulation of ReLU:
                    #
                    # x = max(pre_var, 0)
                    #
                    # Introduce binary variable b, such that:
                    # b = 1 if inp is the maximum value, 0 otherwise
                    #
                    # We know the lower (pre_lb) and upper bounds (pre_ub) for pre_var
                    # We can thus write the following:
                    #
                    # MIP must then satisfy the following constraints:
                    # Constr_13: x <= pre_var - pre_lb (1-b)
                    # Constr_14: x >= pre_var
                    # Constr_15: x <= b* pre_ub
                    # Constr_16: x >= 0

                    if sym_bounds:
                        # We're going to use the big-M encoding of the other papers.
                        M = max(-pre_lb, pre_ub)
                        pre_lb = -M
                        pre_ub = M

                    if pre_lb <= 0 and pre_ub <= 0:
                        # x = self.model.addVar(lb=0, ub=0,
                        #                       vtype=grb.GRB.CONTINUOUS,
                        #                       name = f'ReLU_x_{layer_idx}_{neuron_idx}')
                        x = 0
                    elif (pre_lb >= 0) and (pre_ub >= 0):
                        # x = self.model.addVar(lb=pre_lb, ub=pre_ub,
                        #                       vtype=grb.GRB.CONTINUOUS,
                        #                       name = f'ReLU_x_{layer_idx}_{neuron_idx}')
                        # self.model.addConstr(x == pre_var, f'constr_{layer_idx}_{neuron_idx}_fixedpassing')
                        x = pre_var
                    else:
                        x = self.model.addVar(
                            lb=0,
                            ub=grb.GRB.INFINITY,
                            vtype=grb.GRB.CONTINUOUS,
                            name=f'ReLU_x_{layer_idx}_{neuron_idx}')
                        b = self.model.addVar(
                            vtype=grb.GRB.BINARY,
                            name=f'ReLU_b_{layer_idx}_{neuron_idx}')

                        self.model.addConstr(
                            x <= pre_var - pre_lb * (1 - b),
                            f'constr_{layer_idx}_{neuron_idx}_c13')
                        self.model.addConstr(
                            x >= pre_var,
                            f'constr_{layer_idx}_{neuron_idx}_c14')
                        self.model.addConstr(
                            x <= b * pre_ub,
                            f'constr_{layer_idx}_{neuron_idx}_c15')
                        # self.model.addConstr(x >= 0, f'constr_{layer_idx}_{neuron_idx}_c16')
                        # (implied already by bound on x)

                    self.model.update()

                    new_layer_gurobi_vars.append(x)
            elif type(layer) == nn.MaxPool1d:
                assert layer.padding == 0, "Non supported Maxpool option"
                assert layer.dilation == 1, "Non supported MaxPool option"
                nb_pre = len(self.gurobi_vars[-1])
                window_size = layer.kernel_size
                stride = layer.stride

                pre_start_idx = 0
                pre_window_end = pre_start_idx + window_size

                while pre_window_end <= nb_pre:
                    ub_max = max(self.upper_bounds[layer_idx - 1]
                                 [pre_start_idx:pre_window_end]).item()
                    window_bin_vars = []
                    neuron_idx = pre_start_idx % stride
                    v = self.model.addVar(
                        vtype=grb.GRB.CONTINUOUS,
                        lb=-grb.GRB.INFINITY,
                        ub=grb.GRB.INFINITY,
                        name=f'MaxPool_out_{layer_idx}_{neuron_idx}')
                    for pre_var_idx, pre_var in enumerate(
                            self.gurobi_vars[-1]
                        [pre_start_idx:pre_window_end]):
                        lb = self.lower_bounds[layer_idx -
                                               1][pre_start_idx +
                                                  pre_var_idx].item()
                        b = self.model.addVar(
                            vtype=grb.GRB.BINARY,
                            name=
                            f'MaxPool_b_{layer_idx}_{neuron_idx}_{pre_var_idx}'
                        )
                        # MIP formulation of max pooling:
                        #
                        # y = max(x_1, x_2, ..., x_n)
                        #
                        # Introduce binary variables d_1, d_2, ..., d_n:
                        # d_i = i if x_i is the maximum value, 0 otherwise
                        #
                        # We know the lower (l_i) and upper bounds (u_i) for x_i
                        #
                        # Denote the maximum of the upper_bounds of all inputs x_i as u_max
                        #
                        # MIP must then satisfy the following constraints:
                        #
                        # Constr_1: l_i <= x_i <= u_i
                        # Constr_2: y >= x_i
                        # Constr_3: y <= x_i + (u_max - l_i)*(1 - d_i)
                        # Constr_4: sum(d_1, d_2, ..., d_n) = 1

                        # Constr_1 is already satisfied due to the implementation of LinearizedNetworks.
                        # Constr_2
                        self.model.addConstr(v >= pre_var)
                        # Constr_3
                        self.model.addConstr(v <= pre_var + (ub_max - lb) *
                                             (1 - b))

                        window_bin_vars.append(b)
                    # Constr_4
                    self.model.addConstr(sum(window_bin_vars) == 1)
                    self.model.update()
                    pre_start_idx += stride
                    pre_window_end = pre_start_idx + window_size
                    new_layer_gurobi_vars.append(v)
            elif type(layer) == View:
                continue
            else:
                raise NotImplementedError

            self.gurobi_vars.append(new_layer_gurobi_vars)
            layer_idx += 1
        # Assert that this is as expected: a network with a single output
        assert len(
            self.gurobi_vars[-1]) == 1, "Network doesn't have scalar output"

        # Add the final constraint that the output must be less than or equal
        # to zero.
        if not use_obj_function:
            self.model.addConstr(self.gurobi_vars[-1][-1] <= 0)
            self.model.setObjective(0, grb.GRB.MAXIMIZE)
            self.check_obj_value_callback = False
        else:
            self.model.setObjective(self.gurobi_vars[-1][-1], grb.GRB.MINIMIZE)
            self.check_obj_value_callback = True

        # Optimize the model.
        self.model.update()
Esempio n. 5
0
class MIPNetwork:

    def __init__(self, layers):
        '''
        layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools
        '''
        self.layers = layers
        self.net = nn.Sequential(*layers)

        # Initialize a LinearizedNetwork object to determine the lower and
        # upper bounds at each layer.
        self.lin_net = LinearizedNetwork(layers)

    def solve(self, inp_domain):
        '''
        inp_domain: Tensor containing in each row the lower and upper bound
                    for the corresponding dimension

        Returns:
        sat     : boolean indicating whether the MIP is satisfiable.
        solution: Feasible point if the MIP is satisfiable,
                  None otherwise.
        '''
        # First use define_linear_approximation from LinearizedNetwork to
        # compute upper and lower bounds to be able to define Ms
        self.lin_net.define_linear_approximation(inp_domain)

        self.lower_bounds = self.lin_net.lower_bounds
        self.upper_bounds = self.lin_net.upper_bounds
        self.gurobi_vars = []

        self.model = grb.Model()
        self.model.setParam('OutputFlag', False)
        self.model.setParam('Threads', 1)

        # First add the input variables as Gurobi variables.
        inp_gurobi_vars = []
        for dim, (lb, ub) in enumerate(inp_domain):
            v = self.model.addVar(lb=lb, ub=ub, obj=0,
                                  vtype=grb.GRB.CONTINUOUS,
                                  name=f'inp_{dim}')
            inp_gurobi_vars.append(v)

        self.gurobi_vars.append(inp_gurobi_vars)
        self.model.update()

        layer_idx = 1
        for layer in self.layers:
            new_layer_gurobi_vars = []
            if type(layer) is nn.Linear:
                for neuron_idx in range(layer.weight.size(0)):

                    lin_expr = layer.bias.data[neuron_idx]
                    for prev_neuron_idx in range(layer.weight.size(1)):
                        coeff = layer.weight.data[neuron_idx, prev_neuron_idx]
                        lin_expr += coeff * self.gurobi_vars[-1][prev_neuron_idx]
                    v = self.model.addVar(lb=-grb.GRB.INFINITY,
                                          ub=grb.GRB.INFINITY,
                                          vtype=grb.GRB.CONTINUOUS,
                                          name=f'lin_v_{layer_idx}_{neuron_idx}')
                    self.model.addConstr(v == lin_expr)
                    self.model.update()

                    # We are now done with this neuron.
                    new_layer_gurobi_vars.append(v)

            elif type(layer) == nn.ReLU:

                for neuron_idx, pre_var in enumerate(self.gurobi_vars[-1]):
                    pre_lb = self.lower_bounds[layer_idx-1][neuron_idx]
                    pre_ub = self.upper_bounds[layer_idx-1][neuron_idx]

                    # Use the constraints specified by
                    # Maximum Resilience of Artificial Neural Networks paper.
                    # MIP formulation of ReLU:
                    #
                    # x = max(pre_var, 0)
                    #
                    # Introduce binary variable b, such that:
                    # b = 1 if in is the maximum value, 0 otherwise
                    #
                    # Introduce a continuous variable M, such that -M <= pre_var <= M:
                    #
                    # We know the lower (pre_lb) and upper bounds (pre_ub) for pre_var
                    # We can thus write the following:
                    # M = max(-pre_lb, pre_ub)
                    #
                    # MIP must then satisfy the following constraints:
                    # Constr_2a: x >= 0
                    # Constr_2b: x >= pre_var
                    # Constr_3a: pre_var - b*M <= 0
                    # Constr_3b: pre_var + (1-b)*M >= 0
                    # Constr_4a: x <= pre_var + (1-b)*M
                    # Constr_4b: x <= b*M

                    M = max(-pre_lb, pre_ub)
                    x = self.model.addVar(lb=0,
                                          ub=grb.GRB.INFINITY,
                                          vtype=grb.GRB.CONTINUOUS,
                                          name = f'RelU_x_{layer_idx}_{neuron_idx}')
                    b = self.model.addVar(vtype=grb.GRB.BINARY,
                                          name= f'ReLU_b_{layer_idx}_{neuron_idx}')

                    self.model.addConstr(x >= 0, f'constr_{layer_idx}_{neuron_idx}_c2a')
                    self.model.addConstr(x >= pre_var, f'constr_{layer_idx}_{neuron_idx}_c2b')
                    self.model.addConstr(pre_var - b*M <= 0, f'constr_{layer_idx}_{neuron_idx}_c3a')
                    self.model.addConstr(pre_var + (1-b)*M >= 0, f'constr_{layer_idx}_{neuron_idx}_c3b')
                    self.model.addConstr(x <= pre_var + (1-b)*M , f'constr_{layer_idx}_{neuron_idx}_c4a')
                    self.model.addConstr(x <= b*M , f'constr_{layer_idx}_{neuron_idx}_c4b')

                    self.model.update()

                    new_layer_gurobi_vars.append(x)
            elif type(layer) == nn.MaxPool1d:
                assert layer.padding == 0, "Non supported Maxpool option"
                assert layer.dilation == 1, "Non supported MaxPool option"
                nb_pre = len(self.gurobi_vars[-1])
                window_size = layer.kernel_size
                stride = layer.stride

                pre_start_idx = 0
                pre_window_end = pre_start_idx + window_size

                while pre_window_end <= nb_pre:
                    ub_max = max(self.upper_bounds[layer_idx-1][pre_start_idx:pre_window_end])
                    window_bin_vars = []
                    neuron_idx = pre_start_idx % stride
                    v = self.model.addVar(vtype=grb.GRB.CONTINUOUS,
                                          lb=-grb.GRB.INFINITY,
                                          ub=grb.GRB.INFINITY,
                                          name=f'MaxPool_out_{layer_idx}_{neuron_idx}')
                    for pre_var_idx, pre_var in enumerate(self.gurobi_vars[-1][pre_start_idx:pre_window_end]):
                        lb = self.lower_bounds[layer_idx-1][pre_start_idx + pre_var_idx]
                        b = self.model.addVar(vtype=grb.GRB.BINARY,
                                              name= f'MaxPool_b_{layer_idx}_{neuron_idx}_{pre_var_idx}')
                        # MIP formulation of max pooling:
                        #
                        # y = max(x_1, x_2, ..., x_n)
                        #
                        # Introduce binary variables d_1, d_2, ..., d_n:
                        # d_i = i if x_i is the maximum value, 0 otherwise
                        #
                        # We know the lower (l_i) and upper bounds (u_i) for x_i
                        #
                        # Denote the maximum of the upper_bounds of all inputs x_i as u_max
                        #
                        # MIP must then satisfy the following constraints:
                        #
                        # Constr_1: l_i <= x_i <= u_i
                        # Constr_2: y >= x_i
                        # Constr_3: y <= x_i + (u_max - l_i)*(1 - d_i)
                        # Constr_4: sum(d_1, d_2, ..., d_n) = 1

                        # Constr_1 is already satisfied due to the implementation of LinearizedNetworks.
                        # Constr_2
                        self.model.addConstr(v >= pre_var)
                        # Constr_3
                        self.model.addConstr(v <= pre_var + (ub_max - lb)*(1-b))

                        window_bin_vars.append(b)
                    # Constr_4
                    self.model.addConstr(sum(window_bin_vars) == 1)
                    self.model.update()
                    pre_start_idx += stride
                    pre_window_end = pre_start_idx + window_size
                    new_layer_gurobi_vars.append(v)
            elif type(layer) == View:
                continue
            else:
                raise NotImplementedError

            self.gurobi_vars.append(new_layer_gurobi_vars)
            layer_idx += 1
        # Assert that this is as expected: a network with a single output
        assert len(self.gurobi_vars[-1]) == 1, "Network doesn't have scalar output"

        # Add the final constraint that the output must be less than or equal
        # to zero.
        self.model.addConstr(self.gurobi_vars[-1][-1] <= 0)

        # Optimize the model.
        self.model.update()
        self.model.setObjective(0, grb.GRB.MAXIMIZE)
        self.model.optimize()

        if self.model.status is grb.GRB.INFEASIBLE:
            # Infeasible: No solution
            return (False, None)
        else:
            # There is a feasible solution. Return the feasible solution as well.
            len_inp = len(self.gurobi_vars[0])

            # Get the input that gives the feasible solution.
            inp = torch.Tensor(len_inp)
            for idx, var in enumerate(self.gurobi_vars[0]):
                inp[idx] = var.x

            return (True, inp)
def main():
    parser = argparse.ArgumentParser(
        description="Compute and time a bunch of bounds.")
    parser.add_argument('eps', type=float, help='Epsilon - default: 0.1')
    parser.add_argument('target_directory',
                        type=str,
                        help='Where to store the results')
    parser.add_argument('--modulo',
                        type=int,
                        help='Numbers of a job to split the dataset over.')
    parser.add_argument('--modulo_do',
                        type=int,
                        help='Which job_id is this one.')
    parser.add_argument(
        '--from_intermediate_bounds',
        action='store_true',
        help=
        "if this flag is true, intermediate bounds are computed w/ best of naive-KW"
    )
    parser.add_argument('--network',
                        type=str,
                        help='which network to use',
                        default="wide",
                        choices=["wide", "deep"])
    args = parser.parse_args()

    results_dir = args.target_directory
    os.makedirs(results_dir, exist_ok=True)

    testset_size = int(1e5)
    for idx in range(testset_size):
        if (args.modulo is not None) and (idx % args.modulo != args.modulo_do):
            continue
        target_dir = os.path.join(results_dir, f"{idx}")
        os.makedirs(target_dir, exist_ok=True)

        X, y, elided_models = load_mnist_wide_net(idx, mnist_test=None)
        if X is None:
            continue
        elided_model = elided_models[y]
        to_ignore = y

        domain = torch.stack([
            torch.clamp(X.squeeze(0) - args.eps, 0, None),
            torch.clamp(X.squeeze(0) + args.eps, None, 1.0)
        ], -1).unsqueeze(0)

        lin_approx_string = "" if not args.from_intermediate_bounds else "-fromintermediate"

        # compute intermediate bounds with KW. Use only these for every method to allow comparison on the last layer
        # and optimize only the last layer
        if args.from_intermediate_bounds:
            cuda_elided_model = copy.deepcopy(elided_model).cuda()
            cuda_domain = domain.cuda()
            intermediate_net = SaddleLP([lay for lay in cuda_elided_model])
            with torch.no_grad():
                intermediate_net.set_solution_optimizer('best_naive_kw', None)
                intermediate_net.define_linear_approximation(
                    cuda_domain, no_conv=False, override_numerical_errors=True)
            intermediate_ubs = intermediate_net.upper_bounds
            intermediate_lbs = intermediate_net.lower_bounds

        ## Proximal methods
        for optprox_steps in [400]:
            optprox_params = {
                'nb_total_steps': optprox_steps,
                'max_nb_inner_steps': 2,  # this is 2/5 as simpleprox
                'initial_eta': 1e0,
                'final_eta': 5e1,
                'log_values': False,
                'inner_cutoff': 0,
                'maintain_primal': True,
                'acceleration_dict': {
                    'momentum': 0.3,  # decent momentum: 0.9 w/ increasing eta
                }
            }
            optprox_target_file = os.path.join(
                target_dir,
                f"Proximal_finalmomentum_{optprox_steps}{lin_approx_string}.txt"
            )
            if not os.path.exists(optprox_target_file):
                cuda_elided_model = copy.deepcopy(elided_model).cuda()
                cuda_domain = domain.cuda()
                optprox_net = SaddleLP([lay for lay in cuda_elided_model])
                optprox_start = time.time()
                with torch.no_grad():
                    optprox_net.set_decomposition('pairs', 'KW')
                    optprox_net.set_solution_optimizer('optimized_prox',
                                                       optprox_params)
                    if not args.from_intermediate_bounds:
                        optprox_net.define_linear_approximation(cuda_domain,
                                                                no_conv=False)
                        ub = optprox_net.upper_bounds[-1]
                    else:
                        optprox_net.build_model_using_bounds(
                            cuda_domain, (intermediate_lbs, intermediate_ubs))
                        _, ub = optprox_net.compute_lower_bound()
                optprox_end = time.time()
                optprox_time = optprox_end - optprox_start
                optprox_ubs = ub.cpu()

                del optprox_net
                dump_bounds(optprox_target_file, optprox_time, optprox_ubs)

        ## Gurobi PLANET Bounds
        grb_target_file = os.path.join(target_dir,
                                       f"Gurobi{lin_approx_string}-fixed.txt")
        if not os.path.exists(grb_target_file):
            grb_net = LinearizedNetwork([lay for lay in elided_model])
            grb_start = time.time()
            if not args.from_intermediate_bounds:
                grb_net.define_linear_approximation(domain[0], n_threads=4)
                ub = grb_net.upper_bounds[-1]
            else:
                grb_net.build_model_using_bounds(
                    domain[0],
                    ([lbs[0].cpu() for lbs in intermediate_lbs
                      ], [ubs[0].cpu() for ubs in intermediate_ubs]),
                    n_threads=4)
                _, ub = grb_net.compute_lower_bound(ub_only=True)
            grb_end = time.time()
            grb_time = grb_end - grb_start
            grb_ubs = torch.Tensor(ub).cpu()
            dump_bounds(grb_target_file, grb_time, grb_ubs)

        ## Cuts
        for cut_steps in [80, 600, 1050, 1650, 2500]:
            explp_params = {
                "nb_iter": cut_steps,
                'bigm': "init",
                'cut': "only",
                "bigm_algorithm": "adam",
                'cut_frequency': 450,
                'max_cuts': 12,
                'cut_add': 2,
                'betas': (0.9, 0.999),
                'initial_step_size': 1e-3,
                'final_step_size': 1e-6,
                "init_params": {
                    "nb_outer_iter": 500,
                    'initial_step_size': 1e-1,
                    'final_step_size': 1e-3,
                    'betas': (0.9, 0.999)
                },
            }
            cut_target_file = os.path.join(
                target_dir, f"Cuts_{cut_steps}{lin_approx_string}.txt")
            if not os.path.exists(cut_target_file):
                cuda_elided_model = copy.deepcopy(elided_model).cuda()
                cuda_domain = domain.cuda()
                exp_net = ExpLP([lay for lay in cuda_elided_model],
                                params=explp_params,
                                use_preactivation=True,
                                fixed_M=True)
                exp_start = time.time()
                with torch.no_grad():
                    if not args.from_intermediate_bounds:
                        exp_net.define_linear_approximation(cuda_domain)
                        ub = exp_net.upper_bounds[-1]
                    else:
                        exp_net.build_model_using_bounds(
                            cuda_domain, (intermediate_lbs, intermediate_ubs))
                        _, ub = exp_net.compute_lower_bound()
                exp_end = time.time()
                exp_time = exp_end - exp_start
                exp_ubs = ub.cpu()

                del exp_net
                dump_bounds(cut_target_file, exp_time, exp_ubs)

        # Big-M supergradient. (iters tuned to take same time as prox)
        for bigm_steps in [850]:
            bigm_adam_params = {
                "bigm_algorithm": "adam",
                "bigm": "only",
                "nb_outer_iter": bigm_steps,
                'initial_step_size': 1e-1,
                'final_step_size': 1e-3,
                'betas': (0.9, 0.999)
            }
            bigm_target_file = os.path.join(
                target_dir, f"Big-M_{bigm_steps}{lin_approx_string}.txt")
            if not os.path.exists(bigm_target_file):
                cuda_elided_model = copy.deepcopy(elided_model).cuda()
                cuda_domain = domain.cuda()
                bigm_net = ExpLP([lay for lay in cuda_elided_model],
                                 params=bigm_adam_params,
                                 use_preactivation=True,
                                 fixed_M=True)
                bigm_start = time.time()
                with torch.no_grad():
                    if not args.from_intermediate_bounds:
                        bigm_net.define_linear_approximation(cuda_domain)
                        ub = bigm_net.upper_bounds[-1]
                    else:
                        bigm_net.build_model_using_bounds(
                            cuda_domain, (intermediate_lbs, intermediate_ubs))
                        _, ub = bigm_net.compute_lower_bound()
                bigm_end = time.time()
                bigm_time = bigm_end - bigm_start
                bigm_ubs = ub.cpu()

                del bigm_net
                dump_bounds(bigm_target_file, bigm_time, bigm_ubs)

        ## Gurobi Anderson Bounds
        for n_cuts in [1]:
            grb_and_target_file = os.path.join(
                target_dir,
                f"Anderson-{n_cuts}cuts{lin_approx_string}-fixed.txt")
            if not os.path.exists(grb_and_target_file):
                lp_and_grb_net = AndersonLinearizedNetwork(
                    [lay for lay in elided_model],
                    mode="lp-cut",
                    n_cuts=n_cuts,
                    cuts_per_neuron=True)
                lp_and_grb_start = time.time()
                if not args.from_intermediate_bounds:
                    lp_and_grb_net.define_linear_approximation(domain[0],
                                                               n_threads=4)
                    ub = lp_and_grb_net.upper_bounds[-1]
                else:
                    lp_and_grb_net.build_model_using_bounds(
                        domain[0],
                        ([lbs[0].cpu() for lbs in intermediate_lbs
                          ], [ubs[0].cpu() for ubs in intermediate_ubs]),
                        n_threads=4)
                    _, ub = lp_and_grb_net.compute_lower_bound(ub_only=True)
                lp_and_grb_end = time.time()
                lp_and_grb_time = lp_and_grb_end - lp_and_grb_start
                lp_and_grb_ubs = torch.Tensor(ub).cpu()
                dump_bounds(grb_and_target_file, lp_and_grb_time,
                            lp_and_grb_ubs)
Esempio n. 7
0
class MIPNetwork:
    def __init__(self, layers):
        '''
        layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools
        '''
        self.layers = layers
        self.net = nn.Sequential(*layers)

        # Initialize a LinearizedNetwork object to determine the lower and
        # upper bounds at each layer.
        self.lin_net = LinearizedNetwork(layers)

    def solve(self, inp_domain, timeout=None):
        '''
        inp_domain: Tensor containing in each row the lower and upper bound
                    for the corresponding dimension

        Returns:
        sat     : boolean indicating whether the MIP is satisfiable.
        solution: Feasible point if the MIP is satisfiable,
                  None otherwise.
        timeout : Maximum allowed time to run, if is not None
        '''
        if self.lower_bounds[-1].min() > 0:
            print("Early stopping")
            # The problem is infeasible, and we haven't setup the MIP
            return (False, None, 0)

        if timeout is not None:
            self.model.setParam('TimeLimit', timeout)

        if self.check_obj_value_callback:

            def early_stop_cb(model, where):
                if where == grb.GRB.Callback.MIP:
                    best_bound = model.cbGet(grb.GRB.Callback.MIP_OBJBND)
                    if best_bound > 0:
                        model.terminate()

                if where == grb.GRB.Callback.MIPNODE:
                    nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT)
                    if (nodeCount % 100) == 0:
                        print(f"Running Nb states visited: {nodeCount}")

                if where == grb.GRB.Callback.MIPSOL:
                    obj = model.cbGet(grb.GRB.Callback.MIPSOL_OBJ)
                    if obj < 0:
                        # Does it have a chance at being a valid
                        # counter-example?

                        # Check it with the network
                        input_vals = model.cbGetSolution(self.gurobi_vars[0])

                        with torch.no_grad():
                            if isinstance(input_vals, list):
                                inps = torch.Tensor(input_vals).view(1, -1)
                            else:
                                assert isinstance(input_vals, grb.tupledict)
                                inps = torch.Tensor(
                                    [val for val in input_vals.values()])
                                inps = inps.view((1, ) +
                                                 self.lower_bounds[0].shape)
                            out = self.net(inps).squeeze()
                            # In case there is several output to the network, get the minimum one.
                            out = out.min().item()

                        if out < 0:
                            model.terminate()
        else:

            def early_stop_cb(model, where):
                if where == grb.GRB.Callback.MIPNODE:
                    nodeCount = model.cbGet(grb.GRB.Callback.MIPNODE_NODCNT)
                    if (nodeCount % 100) == 0:
                        print(f"Running Nb states visited: {nodeCount}")

        self.model.optimize(early_stop_cb)
        nb_visited_states = self.model.nodeCount

        if self.model.status is grb.GRB.INFEASIBLE:
            # Infeasible: No solution
            return (False, None, nb_visited_states)
        elif self.model.status is grb.GRB.OPTIMAL:
            # There is a feasible solution. Return the feasible solution as well.
            len_inp = len(self.gurobi_vars[0])

            # Get the input that gives the feasible solution.
            #input_vals = model.cbGetSolution(self.gurobi_vars[0])
            #inps = torch.Tensor([val for val in input_vals.values()])
            #inps = inps.view((1,) + self.lower_bounds[0].shape)
            optim_val = self.gurobi_vars[-1][-1].x

            return (optim_val < 0, (None, optim_val), nb_visited_states)
        elif self.model.status is grb.GRB.INTERRUPTED:
            obj_bound = self.model.ObjBound

            if obj_bound > 0:
                return (False, None, nb_visited_states)
            else:
                # There is a feasible solution. Return the feasible solution as well.
                len_inp = len(self.gurobi_vars[0])

                # Get the input that gives the feasible solution.
                inp = torch.Tensor(len_inp)
                if isinstance(self.gurobi_vars[0], list):
                    for idx, var in enumerate(self.gurobi_vars[0]):
                        inp[idx] = var.x
                else:
                    #assert isinstance(self.gurobi_vars[0], grb.tupledict)
                    inp = torch.zeros_like(self.lower_bounds[0])
                    for idx, var in self.gurobi_vars[0].items():
                        inp[idx] = var.x
                optim_val = self.gurobi_vars[-1][-1].x
            return (optim_val < 0, (inp, optim_val), nb_visited_states)
        elif self.model.status is grb.GRB.TIME_LIMIT:
            # We timed out, return a None Status
            return (None, None, nb_visited_states)
        else:
            raise Exception("Unexpected Status code")

    def tune(self, param_outfile, tune_timeout):
        self.model.Params.tuneOutput = 1
        self.model.Params.tuneTimeLimit = tune_timeout
        self.model.tune()

        # Get the best set of parameters
        self.model.getTuneResult(0)

        self.model.write(param_outfile)

    def do_interval_analysis(self, inp_domain):
        self.lower_bounds = []
        self.upper_bounds = []

        self.lower_bounds.append(inp_domain.select(-1, 0))
        self.upper_bounds.append(inp_domain.select(-1, 1))
        layer_idx = 1
        current_lb = self.lower_bounds[-1]
        current_ub = self.upper_bounds[-1]
        for layer in self.layers:
            if isinstance(layer, nn.Linear) or isinstance(layer, nn.Conv2d):
                if type(layer) is nn.Linear:
                    pos_weights = torch.clamp(layer.weight, min=0)
                    neg_weights = torch.clamp(layer.weight, max=0)

                    new_layer_lb = torch.mv(pos_weights, current_lb) + \
                                   torch.mv(neg_weights, current_ub) + \
                                   layer.bias
                    new_layer_ub = torch.mv(pos_weights, current_ub) + \
                                   torch.mv(neg_weights, current_lb) + \
                                   layer.bias
                elif type(layer) is nn.Conv2d:
                    pre_lb = torch.Tensor(current_lb).unsqueeze(0)
                    pre_ub = torch.Tensor(current_ub).unsqueeze(0)
                    pos_weight = torch.clamp(layer.weight, 0, None)
                    neg_weight = torch.clamp(layer.weight, None, 0)

                    out_lbs = (
                        F.conv2d(pre_lb, pos_weight, layer.bias, layer.stride,
                                 layer.padding, layer.dilation, layer.groups) +
                        F.conv2d(pre_ub, neg_weight, None, layer.stride,
                                 layer.padding, layer.dilation, layer.groups))
                    out_ubs = (
                        F.conv2d(pre_ub, pos_weight, layer.bias, layer.stride,
                                 layer.padding, layer.dilation, layer.groups) +
                        F.conv2d(pre_lb, neg_weight, None, layer.stride,
                                 layer.padding, layer.dilation, layer.groups))
                    new_layer_lb = out_lbs.squeeze(0)
                    new_layer_ub = out_ubs.squeeze(0)
                self.lower_bounds.append(new_layer_lb)
                self.upper_bounds.append(new_layer_ub)
                current_lb = new_layer_lb
                current_ub = new_layer_ub
            elif type(layer) == nn.ReLU:
                current_lb = torch.clamp(current_lb, min=0)
                current_ub = torch.clamp(current_ub, min=0)
            elif type(layer) == nn.MaxPool1d:
                new_layer_lb = []
                new_layer_ub = []
                assert layer.padding == 0, "Non supported Maxpool option"
                assert layer.dilation == 1, "Non supported Maxpool option"

                nb_pre = len(self.lower_bounds[-1])
                window_size = layer.kernel_size
                stride = layer.stride

                pre_start_idx = 0
                pre_window_end = pre_start_idx + window_size

                while pre_window_end <= nb_pre:
                    lb = max(current_lb[pre_start_idx:pre_window_end])
                    ub = max(current_ub[pre_start_idx:pre_window_end])

                    new_layer_lb.append(lb)
                    new_layer_ub.append(ub)

                    pre_start_idx += stride
                    pre_window_end = pre_start_idx + window_size
                current_lb = torch.Tensor(new_layer_lb)
                current_ub = torch.Tensor(new_layer_ub)
                self.lower_bounds.append(current_lb)
                self.upper_bounds.append(current_ub)
            elif type(layer) == View:
                continue
            elif type(layer) == Flatten:
                current_lb = current_lb.view(-1)
                current_ub = current_ub.view(-1)
            else:
                raise NotImplementedError

    def setup_model(self,
                    inp_domain,
                    use_obj_function=False,
                    bounds="opt",
                    parameter_file=None):
        '''
        inp_domain: Tensor containing in each row the lower and upper bound
                    for the corresponding dimension

        optimal: If False, don't use any objective function, simply add a constraint on the output
                 If True, perform optimization and use callback to interrupt the solving when a
                          counterexample is found
        bounds: string, indicate what type of method should be used to get the intermediate bounds
        parameter_file: Load a set of parameters for the MIP solver if a path is given.

        Setup the model to be optimized by Gurobi
        '''
        if bounds == "opt":
            # First use define_linear_approximation from LinearizedNetwork to
            # compute upper and lower bounds to be able to define Ms
            self.lin_net.define_linear_approximation(inp_domain)

            self.lower_bounds = list(
                map(torch.Tensor, self.lin_net.lower_bounds))
            self.upper_bounds = list(
                map(torch.Tensor, self.lin_net.upper_bounds))
        elif bounds == "interval":
            self.do_interval_analysis(inp_domain)
            if self.lower_bounds[-1][0] > 0:
                # The problem is already guaranteed to be infeasible,
                # Let's not waste time setting up the MIP
                return
        elif bounds == "interval-kw":
            self.do_interval_analysis(inp_domain)
            kw_dual = LooseDualNetworkApproximation(self.layers)
            kw_dual.remove_maxpools(inp_domain, no_opt=True)
            lower_bounds, upper_bounds = kw_dual.get_intermediate_bounds(
                inp_domain)
            #print(lower_bounds)
            #print(upper_bounds)

            # We want to get the best out of interval-analysis and K&W

            # TODO: There is a slight problem. To use the K&W code directly, we
            # need to make a bunch of changes, notably remove all of the
            # Maxpooling and convert them to ReLUs. Quick and temporary fix:
            # take the max of both things if the shapes are all the same so
            # far, and use the one from interval analysis after the first
            # difference.

            # If the network are full ReLU, there should be no problem.
            # If the network are just full ReLU with a MaxPool at the end,
            # that's still okay because we get the best bounds until the
            # maxpool, and that's the last thing that we use the bounds for
            # This is just going to suck if we have a Maxpool early in the
            # network, and even then, that just means we use interval analysis
            # so stop complaining.
            for i in range(len(lower_bounds)):
                if lower_bounds[i].shape == self.lower_bounds[i].shape:
                    # Keep the best lower bound
                    lb_diff = lower_bounds[i] - self.lower_bounds[i]
                    ub_diff = upper_bounds[i] - self.upper_bounds[i]
                    # print(f"LB Difference (kw to interval) min: {lb_diff.min()} \t max:{lb_diff.max()}")
                    # print(f"UB Difference (kw to interval) min: {ub_diff.min()} \t max:{ub_diff.max()}")
                    torch.max(lower_bounds[i],
                              self.lower_bounds[i],
                              out=self.lower_bounds[i])
                    torch.min(upper_bounds[i],
                              self.upper_bounds[i],
                              out=self.upper_bounds[i])
                else:
                    # Mismatch in dimension.
                    # Drop it and stop trying to improve the stuff of interval analysis
                    break
            if self.lower_bounds[-1].min() > 0:
                # The problem is already guaranteed to be infeasible,
                # Let's not waste time setting up the MIP
                return
        else:
            raise NotImplementedError("Unknown bound computation method.")

        self.gurobi_vars = []
        self.model = grb.Model()
        self.model.setParam('OutputFlag', False)
        self.model.setParam('Threads', 1)
        self.model.setParam('DualReductions', 0)
        if parameter_file is not None:
            self.model.read(parameter_file)

        self.zero_var = self.model.addVar(lb=0,
                                          ub=0,
                                          obj=0,
                                          vtype=grb.GRB.CONTINUOUS,
                                          name=f'zero')

        # First add the input variables as Gurobi variables.
        if inp_domain.dim() == 2:
            inp_gurobi_vars = self.model.addVars(
                [i for i in range(inp_domain.numel() // 2)],
                lb=self.lower_bounds[0],
                ub=self.upper_bounds[0],
                name='inp')
            inp_gurobi_vars = [var for key, var in inp_gurobi_vars.items()]
        else:
            inp_shape = self.lower_bounds[0].shape
            #inp_gurobi_vars = self.model.addVars([chan for chan in range(inp_shape[0])],
            #                                     [row for row in range(inp_shape[1])],
            #                                     [col for col in range(inp_shape[2])],
            #                                     lb=self.lower_bounds[0].numpy(),
            #                                     ub=self.upper_bounds[0].numpy(),
            #                                     name='inp')
            #import pdb; pdb.set_trace()
            inp_gurobi_vars = {}
            for chan in range(inp_domain.size(0)):
                chan_vars = []
                for row in range(inp_domain.size(1)):
                    row_vars = []
                    for col in range(inp_domain.size(2)):
                        lb = inp_domain[chan, row, col, 0]
                        ub = inp_domain[chan, row, col, 1]
                        v = self.model.addVar(lb=lb,
                                              ub=ub,
                                              obj=0,
                                              vtype=grb.GRB.CONTINUOUS,
                                              name=f'inp_[{chan},{row},{col}]')
                        inp_gurobi_vars[(chan, row, col)] = v
        self.gurobi_vars.append(inp_gurobi_vars)

        layer_idx = 1
        for layer in self.layers:
            if type(layer) is nn.Linear:
                layer_nb_out = layer.out_features
                pre_vars = self.gurobi_vars[-1]
                if isinstance(pre_vars, grb.tupledict):
                    pre_vars = [var for key, var in sorted(pre_vars.items())]
                # Build all the outputs of the linear layer
                new_vars = self.model.addVars([i for i in range(layer_nb_out)],
                                              lb=self.lower_bounds[layer_idx],
                                              ub=self.upper_bounds[layer_idx],
                                              name=f'zhat{layer_idx}')
                new_layer_gurobi_vars = [var for key, var in new_vars.items()]
                self.model.addConstrs(
                    ((grb.LinExpr(layer.weight[neuron_idx, :], pre_vars) +
                      layer.bias[neuron_idx].item()) == new_vars[neuron_idx]
                     for neuron_idx in range(layer.out_features)),
                    name=f'lay{layer_idx}')
            elif type(layer) is nn.Conv2d:
                in_shape = self.lower_bounds[layer_idx - 1].shape
                out_shape = self.lower_bounds[layer_idx].shape

                flat_idxs = [
                    elt
                    for elt in product(range(out_shape[0]), range(
                        out_shape[1]), range(out_shape[2]))
                ]
                flat_out_lbs = [
                    self.lower_bounds[layer_idx][chan, row, col]
                    for chan, row, col in product(range(out_shape[0]),
                                                  range(out_shape[1]),
                                                  range(out_shape[2]))
                ]
                flat_out_ubs = [
                    self.upper_bounds[layer_idx][chan, row, col]
                    for chan, row, col in product(range(out_shape[0]),
                                                  range(out_shape[1]),
                                                  range(out_shape[2]))
                ]
                new_layer_gurobi_vars = self.model.addVars(
                    flat_idxs,
                    lb=flat_out_lbs,
                    ub=flat_out_ubs,
                    name=f'zhat{layer_idx}')
                coeffs = []
                for out_chan_idx in range(out_shape[0]):
                    coeffs.append(layer.weight[out_chan_idx, :].view(-1))

                def make_lin_expr(out_chan_idx, out_row_idx, out_col_idx):
                    lin_bias = layer.bias[out_chan_idx].item()
                    lin_coeffs = coeffs[out_chan_idx]

                    start_row_idx = -layer.padding[0] + layer.stride[
                        0] * out_row_idx
                    end_row_idx = start_row_idx + layer.weight.shape[2]
                    start_col_idx = -layer.padding[1] + layer.stride[
                        1] * out_col_idx
                    end_col_idx = start_col_idx + layer.weight.shape[3]

                    lin_vars = [
                        (self.zero_var if
                         ((row_idx < 0) or (row_idx == in_shape[1]) or
                          (col_idx < 0) or (col_idx == in_shape[2])) else
                         self.gurobi_vars[-1][(chan_idx, row_idx, col_idx)])
                        for chan_idx in range(in_shape[0])
                        for row_idx in range(start_row_idx, end_row_idx)
                        for col_idx in range(start_col_idx, end_col_idx)
                    ]
                    lin_expr = grb.LinExpr(lin_coeffs, lin_vars) + lin_bias
                    return lin_expr

                constrs = []
                for out_chan_idx in range(out_shape[0]):
                    for out_row_idx in range(out_shape[1]):
                        for out_col_idx in range(out_shape[2]):
                            constrs.append(
                                make_lin_expr(out_chan_idx, out_row_idx,
                                              out_col_idx) ==
                                new_layer_gurobi_vars[(out_chan_idx,
                                                       out_row_idx,
                                                       out_col_idx)])
                self.model.addConstrs(constr for constr in constrs)
            elif type(layer) == nn.ReLU:
                pre_lbs = self.lower_bounds[layer_idx]
                pre_ubs = self.upper_bounds[layer_idx]
                if isinstance(self.gurobi_vars[-1], grb.tupledict):
                    amb_mask = (pre_lbs < 0) & (pre_ubs > 0)
                    if amb_mask.sum().item() != 0:
                        to_new_preubs = pre_ubs[amb_mask]
                        to_new_prelbs = pre_lbs[amb_mask]

                        new_var_idxs = torch.nonzero(
                            (pre_lbs < 0) & (pre_ubs > 0)).numpy().tolist()
                        new_var_idxs = [tuple(idxs) for idxs in new_var_idxs]
                        new_layer_gurobi_vars = self.model.addVars(
                            new_var_idxs,
                            lb=0,
                            ub=to_new_preubs,
                            name=f'z{layer_idx}')
                        new_binary_vars = self.model.addVars(
                            new_var_idxs,
                            lb=0,
                            ub=1,
                            vtype=grb.GRB.BINARY,
                            name=f'delta{layer_idx}')

                        flat_new_vars = [
                            new_layer_gurobi_vars[idx] for idx in new_var_idxs
                        ]
                        flat_binary_vars = [
                            new_binary_vars[idx] for idx in new_var_idxs
                        ]
                        pre_amb_vars = [
                            self.gurobi_vars[-1][idx] for idx in new_var_idxs
                        ]

                        # C1: Superior to 0
                        # C2: Add the constraint that it's superior to the inputs
                        self.model.addConstrs(
                            (flat_new_vars[idx] >= pre_amb_vars[idx]
                             for idx in range(len(flat_new_vars))),
                            name=f'ReLU_lb{layer_idx}')
                        # C3: Below binary*upper_bound
                        self.model.addConstrs(
                            (flat_new_vars[idx] <=
                             to_new_preubs[idx].item() * flat_binary_vars[idx]
                             for idx in range(len(flat_new_vars))),
                            name=f'ReLU{layer_idx}_ub1-')
                        # C4: Below binary*lower_bound
                        self.model.addConstrs(
                            (flat_new_vars[idx] <=
                             (pre_amb_vars[idx] - to_new_prelbs[idx].item() *
                              (1 - flat_binary_vars[idx]))
                             for idx in range(len(flat_new_vars))),
                            name=f'ReLU{layer_idx}_ub2-')
                    else:
                        new_layer_gurobi_vars = grb.tupledict()

                    for pos in torch.nonzero(pre_lbs >= 0).numpy().tolist():
                        pos = tuple(pos)
                        new_layer_gurobi_vars[pos] = self.gurobi_vars[-1][pos]
                    for pos in torch.nonzero(pre_ubs <= 0).numpy().tolist():
                        new_layer_gurobi_vars[tuple(pos)] = self.zero_var
                else:
                    assert isinstance(self.gurobi_vars[-1][0], grb.Var)

                    amb_mask = (pre_lbs < 0) & (pre_ubs > 0)
                    if amb_mask.sum().item() == 0:
                        pass
                        # print("WARNING: No ambiguous ReLU at a layer")
                    else:
                        to_new_preubs = pre_ubs[amb_mask]
                        new_var_idxs = torch.nonzero(amb_mask).squeeze(
                            1).numpy().tolist()
                        new_vars = self.model.addVars(new_var_idxs,
                                                      lb=0,
                                                      ub=to_new_preubs,
                                                      name=f'z{layer_idx}')
                        new_binary_vars = self.model.addVars(
                            new_var_idxs,
                            lb=0,
                            ub=1,
                            vtype=grb.GRB.BINARY,
                            name=f'delta{layer_idx}')

                        # C1: Superior to 0
                        # C2: Add the constraint that it's superior to the inputs
                        self.model.addConstrs(
                            (new_vars[idx] >= self.gurobi_vars[-1][idx]
                             for idx in new_var_idxs),
                            name=f'ReLU_lb{layer_idx}')
                        # C3: Below binary*upper_bound
                        self.model.addConstrs(
                            (new_vars[idx] <=
                             pre_ubs[idx].item() * new_binary_vars[idx]
                             for idx in new_var_idxs),
                            name=f'ReLU{layer_idx}_ub1-')
                        # C4: Below binary*lower_bound
                        self.model.addConstrs(
                            (new_vars[idx] <=
                             (self.gurobi_vars[-1][idx] - pre_lbs[idx].item() *
                              (1 - new_binary_vars[idx]))
                             for idx in new_var_idxs),
                            name=f'ReLU{layer_idx}_ub2-')

                    # Get all the variables in a list, such that we have the
                    # output of the layer
                    new_layer_gurobi_vars = []
                    new_idx = 0
                    for idx in range(layer_nb_out):
                        if pre_lbs[idx] >= 0:
                            # Pass through variable
                            new_layer_gurobi_vars.append(
                                self.gurobi_vars[-1][idx])
                        elif pre_ubs[idx] <= 0:
                            # Blocked variable
                            new_layer_gurobi_vars.append(self.zero_var)
                        else:
                            new_layer_gurobi_vars.append(new_vars[idx])
                layer_idx += 1
            elif type(layer) == nn.MaxPool1d:
                assert layer.padding == 0, "Non supported Maxpool option"
                assert layer.dilation == 1, "Non supported MaxPool option"
                nb_pre = len(self.gurobi_vars[-1])
                window_size = layer.kernel_size
                stride = layer.stride

                pre_start_idx = 0
                pre_window_end = pre_start_idx + window_size

                while pre_window_end <= nb_pre:
                    ub_max = max(self.upper_bounds[layer_idx - 1]
                                 [pre_start_idx:pre_window_end]).item()
                    window_bin_vars = []
                    neuron_idx = pre_start_idx % stride
                    v = self.model.addVar(
                        vtype=grb.GRB.CONTINUOUS,
                        lb=-grb.GRB.INFINITY,
                        ub=grb.GRB.INFINITY,
                        name=f'MaxPool_out_{layer_idx}_{neuron_idx}')
                    for pre_var_idx, pre_var in enumerate(
                            self.gurobi_vars[-1]
                        [pre_start_idx:pre_window_end]):
                        lb = self.lower_bounds[layer_idx -
                                               1][pre_start_idx +
                                                  pre_var_idx].item()
                        b = self.model.addVar(
                            vtype=grb.GRB.BINARY,
                            name=
                            f'MaxPool_b_{layer_idx}_{neuron_idx}_{pre_var_idx}'
                        )
                        # MIP formulation of max pooling:
                        #
                        # y = max(x_1, x_2, ..., x_n)
                        #
                        # Introduce binary variables d_1, d_2, ..., d_n:
                        # d_i = i if x_i is the maximum value, 0 otherwise
                        #
                        # We know the lower (l_i) and upper bounds (u_i) for x_i
                        #
                        # Denote the maximum of the upper_bounds of all inputs x_i as u_max
                        #
                        # MIP must then satisfy the following constraints:
                        #
                        # Constr_1: l_i <= x_i <= u_i
                        # Constr_2: y >= x_i
                        # Constr_3: y <= x_i + (u_max - l_i)*(1 - d_i)
                        # Constr_4: sum(d_1, d_2, ..., d_n) yer= 1

                        # Constr_1 is already satisfied due to the implementation of LinearizedNetworks.
                        # Constr_2
                        self.model.addConstr(v >= pre_var)
                        # Constr_3
                        self.model.addConstr(v <= pre_var + (ub_max - lb) *
                                             (1 - b))

                        window_bin_vars.append(b)
                    # Constr_4
                    self.model.addConstr(sum(window_bin_vars) == 1)
                    self.model.update()
                    pre_start_idx += stride
                    pre_window_end = pre_start_idx + window_size
                    new_layer_gurobi_vars.append(v)
            elif isinstance(layer, View) or isinstance(layer, Flatten):
                continue
            else:
                raise NotImplementedError

            self.gurobi_vars.append(new_layer_gurobi_vars)

        if len(self.gurobi_vars[-1]) == 1:
            # The network has a scalar output, it works like this.
            pass
        else:
            # The network has multiple outputs, we need to encode that the
            # minimum is below 0, let's add a variable here that corresponds to
            # the minimum
            min_var = self.model.addVar(vtype=grb.GRB.CONTINUOUS,
                                        lb=self.lower_bounds[-1].min().item(),
                                        ub=self.upper_bounds[-1].min().item(),
                                        name="final_output")
            self.model.addConstrs(
                (min_var <= self.gurobi_vars[-1][out_idx]
                 for out_idx in range(len(self.gurobi_vars[-1]))),
                name=f'final_constraint_min_ub')

            bin_min_vars = self.model.addVars(range(len(self.gurobi_vars[-1])),
                                              vtype=grb.GRB.BINARY,
                                              lb=0,
                                              ub=1,
                                              name='final_binary')
            out_lbmin = self.lower_bounds[-1].min()
            self.model.addConstrs(
                (min_var >=
                 (self.gurobi_vars[-1][out_idx] +
                  (out_lbmin - self.upper_bounds[-1][out_idx]).item() *
                  (1 - bin_min_vars[out_idx]))
                 for out_idx in range(len(self.gurobi_vars[-1]))),
                name=f'final_constraint_min_lb')
            self.model.addConstr(
                sum(var for var in bin_min_vars.values()) == 1)

            self.gurobi_vars.append([min_var])
            self.lower_bounds.append(self.lower_bounds[-1].min())
            self.upper_bounds.append(self.upper_bounds[-1].min())

        # Add the final constraint that the output must be less than or equal
        # to zero.
        if not use_obj_function:
            self.model.addConstr(self.gurobi_vars[-1][0] <= 0)
            self.model.setObjective(0, grb.GRB.MAXIMIZE)
            self.check_obj_value_callback = False
        else:
            # Set the minimization of the network output
            self.model.setObjective(self.gurobi_vars[-1][-1], grb.GRB.MINIMIZE)
            self.check_obj_value_callback = True

        # Optimize the model.
        self.model.update()