Beispiel #1
0
    def __init__(self, layers):
        '''
        layers: A list of Pytorch layers containing only Linear/ReLU/MaxPools
        '''
        self.layers = layers
        self.net = nn.Sequential(*layers)

        # Initialize a LinearizedNetwork object to determine the lower and
        # upper bounds at each layer.
        self.lin_net = LinearizedNetwork(layers)
Beispiel #2
0
def bab(gt_prop,
        verif_layers,
        domain,
        return_dict,
        timeout,
        batch_size,
        method,
        tot_iter,
        parent_init,
        args,
        gurobi_dict=None,
        writer=None):
    epsilon = 1e-4

    if gpu:
        cuda_verif_layers = [copy.deepcopy(lay).cuda() for lay in verif_layers]
        domain = domain.cuda()
    else:
        cuda_verif_layers = [copy.deepcopy(lay) for lay in verif_layers]

    # use best of naive interval propagation and KW as intermediate bounds
    intermediate_net = SaddleLP(cuda_verif_layers,
                                store_bounds_primal=False,
                                max_batch=args.max_solver_batch)
    intermediate_net.set_solution_optimizer('best_naive_kw', None)
    anderson_bounds_net = None
    hard_crit = None
    prob_hard_crit = None

    # might need a smaller batch size for hard domains
    hard_batch_size = batch_size if args.hard_batch_size == -1 else args.hard_batch_size

    # Split domains into easy and hard, define two separate bounding methods to handle their last layer.
    if method in ["cut", "gurobi-anderson"]:

        # Set criteria for identifying subproblems as hard
        hard_crit = {
            "lb_threshold": 0.5,
            "depth_threshold": 0,  # 15
            "impr_threshold": 1e-1,
            "doms_len_threshold": 200,
            "auto": args.auto_strat,
            "hard_overhead": args.hard_overhead,  # assumed at full batch
        }

        # Set bounds net for easy domains.
        if method in ["cut"]:
            bigm_adam_params = {
                "bigm_algorithm": "adam",
                "bigm": "only",
                "nb_outer_iter": int(tot_iter),  # cifar_oval: 180
                'initial_step_size':
                args.dualinit_init_step,  # cifar_oval: 1e-2
                'initial_step_size_pinit': args.dualinit_init_step / 10,
                'final_step_size': args.dualinit_fin_step,  # cifar_oval: 1e-4
                'betas': (0.9, 0.999)
            }
            bounds_net = ExpLP(cuda_verif_layers,
                               params=bigm_adam_params,
                               store_bounds_primal=True)
        else:
            bounds_net = LinearizedNetwork(verif_layers)

        # Set bounds net for hard domains.
        if method == "cut":
            anderson_iter = args.hard_iter  # 100
            explp_params = {
                "nb_iter": anderson_iter,
                'bigm': "init",
                'cut': "only",
                "bigm_algorithm": "adam",
                'cut_frequency': 450,
                'max_cuts': 8,
                'cut_add': args.cut_add,  # 2
                'betas': (0.9, 0.999),
                'initial_step_size': args.init_step,
                'final_step_size': args.fin_step,
                "init_params": {
                    "nb_outer_iter":
                    500,  #500 for our datasets, 1000 for cifar10_8_255
                    'initial_step_size': args.dualinit_init_step,
                    'initial_step_size_pinit': args.dualinit_init_step / 10,
                    'final_step_size': args.dualinit_fin_step,
                    'betas': (0.9, 0.999),
                },
            }
            anderson_bounds_net = ExpLP(cuda_verif_layers,
                                        params=explp_params,
                                        fixed_M=True,
                                        store_bounds_primal=True)
            print(f"Running cut for {anderson_iter} iterations")
        elif method == "gurobi-anderson":
            anderson_bounds_net = AndersonLinearizedNetwork(
                verif_layers,
                mode="lp-cut",
                n_cuts=args.n_cuts,
                cuts_per_neuron=True,
                decision_boundary=decision_bound)

        if args.no_easy:
            # Ignore the easy problems bounding, use the hard one for all.
            bounds_net = anderson_bounds_net
            anderson_bounds_net = None

    # Use only a single last layer bounding method for all problems.
    elif method == "prox":
        bounds_net = SaddleLP(cuda_verif_layers,
                              store_bounds_primal=True,
                              max_batch=args.max_solver_batch)
        bounds_net.set_decomposition('pairs', 'KW')
        optprox_params = {
            'nb_total_steps': int(tot_iter),
            'max_nb_inner_steps': 2,  # this is 2/5 as simpleprox
            'initial_eta': args.eta,
            'final_eta': args.feta,
            'log_values': False,
            'maintain_primal': True
        }
        bounds_net.set_solution_optimizer('optimized_prox', optprox_params)
        print(f"Running prox with {tot_iter} steps")
    elif method == "adam":
        bounds_net = SaddleLP(cuda_verif_layers,
                              store_bounds_primal=True,
                              max_batch=args.max_solver_batch)
        bounds_net.set_decomposition('pairs', 'KW')
        adam_params = {
            'nb_steps': int(tot_iter),
            'initial_step_size': args.init_step,
            'final_step_size': args.fin_step,
            'betas': (0.9, 0.999),
            'log_values': False
        }
        bounds_net.set_solution_optimizer('adam', adam_params)
        print(f"Running adam with {tot_iter} steps")
    elif method == "bigm-adam":
        bigm_adam_params = {
            "bigm_algorithm": "adam",
            "bigm": "only",
            "nb_outer_iter": int(tot_iter),
            'initial_step_size': args.init_step,
            'initial_step_size_pinit': args.init_step / 10,
            'final_step_size': args.fin_step,
            'betas': (0.9, 0.999)
        }
        bounds_net = ExpLP(cuda_verif_layers,
                           params=bigm_adam_params,
                           store_bounds_primal=True)
    elif method == "gurobi":
        bounds_net = LinearizedNetwork(verif_layers)

    # branching
    if args.branching_choice == 'heuristic':
        branching_net_name = None
    else:
        raise NotImplementedError

    # try:
    with torch.no_grad():
        min_lb, min_ub, ub_point, nb_states, fail_safe_ratio = relu_bab(
            intermediate_net,
            bounds_net,
            branching_net_name,
            domain,
            decision_bound,
            eps=epsilon,
            timeout=timeout,
            batch_size=batch_size,
            parent_init_flag=parent_init,
            gurobi_specs=gurobi_dict,
            anderson_bounds_net=anderson_bounds_net,
            writer=writer,
            hard_crit=hard_crit,
            hard_batch_size=hard_batch_size)

    if not (min_lb or min_ub or ub_point):
        return_dict["min_lb"] = None
        return_dict["min_ub"] = None
        return_dict["ub_point"] = None
        return_dict["nb_states"] = nb_states
        return_dict["bab_out"] = "timeout"
        return_dict["fs_ratio"] = fail_safe_ratio
    else:
        return_dict["min_lb"] = min_lb.cpu()
        return_dict["min_ub"] = min_ub.cpu()
        return_dict["ub_point"] = ub_point.cpu()
        return_dict["nb_states"] = nb_states
        return_dict["fs_ratio"] = fail_safe_ratio
Beispiel #3
0
def reluify_maxpool(layers, domain, no_opt=False):
    '''
    Remove all the Maxpool units of a feedforward network represented by
    `layers` and replace them by an equivalent combination of ReLU + Linear

    This is only valid over the domain `domain` because we use some knowledge
    about upper and lower bounds of certain neurons

    Args:
      no_opt: Boolean. If set to True, don't optimize the bounds to convert the
              maxpool into ReLU and use interval_analysis. If set to False, will
              use the tight optimized bounds.
    '''
    if no_opt:
        # We're building a MIPNetwork but we are not going to solve it. This is just
        # because this is the class that has the code for interval_analysis

        # TODO: Importing here sucks but avoiding it and importing at the top level
        # would mean a larger refactoring that I'm willing to do right now.
        from plnn.mip_solver import MIPNetwork

        mip_net = MIPNetwork(layers)
        mip_net.do_interval_analysis(domain)
        lbs = mip_net.lower_bounds
    else:
        # We will need some lower bounds for the inputs to the maxpooling
        # We will simply use those given by a LinearizedNetwork
        lin_net = LinearizedNetwork(layers)
        lin_net.define_linear_approximation(domain)
        lbs = lin_net.lower_bounds

    layers = layers[:]

    new_all_layers = []

    idx_of_inp_lbs = 0
    layer_idx = 0
    while layer_idx < len(layers):
        layer = layers[layer_idx]
        if type(layer) is nn.MaxPool1d:
            # We need to decompose this MaxPool until it only has a size of 2
            assert layer.padding == 0
            assert layer.dilation == 1
            if layer.kernel_size > 2:
                assert layer.kernel_size % 2 == 0, "Not supported yet"
                assert layer.stride % 2 == 0, "Not supported yet"
                # We're going to decompose this maxpooling into two maxpooling
                # max(     in_1, in_2 ,      in_3, in_4)
                # will become
                # max( max(in_1, in_2),  max(in_3, in_4))
                first_mp = nn.MaxPool1d(2, stride=2)
                second_mp = nn.MaxPool1d(layer.kernel_size // 2,
                                         stride=layer.stride // 2)
                # We will replace the Maxpooling that was originally there with
                # those two layers
                # We need to add a corresponding layer of lower bounds
                first_lbs = lbs[idx_of_inp_lbs]
                intermediate_lbs = []
                for pair_idx in range(len(first_lbs) // 2):
                    intermediate_lbs.append(
                        max(first_lbs[2 * pair_idx],
                            first_lbs[2 * pair_idx + 1]))
                # Do the replacement
                del layers[layer_idx]
                layers.insert(layer_idx, first_mp)
                layers.insert(layer_idx + 1, second_mp)
                lbs.insert(idx_of_inp_lbs + 1, intermediate_lbs)

                # Now continue so that we re-go through the loop with the now
                # simplified maxpool
                continue
            elif layer.kernel_size == 2:
                # Each pair need two in the intermediate layers that is going
                # to be Relu-ified
                pre_nb_inp_lin = len(lbs[idx_of_inp_lbs])
                # How many starting position can we fit in?
                # 1 + how many stride we can fit before we're too late in the array to fit a kernel_size
                pre_nb_out_lin = (1 + (
                    (pre_nb_inp_lin - layer.kernel_size) // layer.stride)) * 2
                pre_relu_lin = nn.Linear(pre_nb_inp_lin,
                                         pre_nb_out_lin,
                                         bias=True)
                pre_relu_weight = pre_relu_lin.weight.data
                pre_relu_bias = pre_relu_lin.bias.data
                pre_relu_weight.zero_()
                pre_relu_bias.zero_()
                # For each of (x, y) that needs to be transformed to max(x, y)
                # We create (x-y, y-y_lb)
                first_in_index = 0
                first_out_index = 0
                while first_in_index + 1 < pre_nb_inp_lin:
                    pre_relu_weight[first_out_index, first_in_index] = 1
                    pre_relu_weight[first_out_index, first_in_index + 1] = -1

                    pre_relu_weight[first_out_index + 1,
                                    first_in_index + 1] = 1
                    pre_relu_bias[first_out_index +
                                  1] = -lbs[idx_of_inp_lbs][first_in_index + 1]

                    # Now shift
                    first_in_index += layer.stride
                    first_out_index += 2
                new_all_layers.append(pre_relu_lin)
                new_all_layers.append(nn.ReLU())

                # We now need to create the second layer
                # It will sum [max(x-y, 0)], [max(y - y_lb, 0)] and y_lb
                post_nb_inp_lin = pre_nb_out_lin
                post_nb_out_lin = post_nb_inp_lin // 2
                post_relu_lin = nn.Linear(post_nb_inp_lin, post_nb_out_lin)
                post_relu_weight = post_relu_lin.weight.data
                post_relu_bias = post_relu_lin.bias.data
                post_relu_weight.zero_()
                post_relu_bias.zero_()
                first_in_index = 0
                out_index = 0
                while first_in_index + 1 < post_nb_inp_lin:
                    post_relu_weight[out_index, first_in_index] = 1
                    post_relu_weight[out_index, first_in_index + 1] = 1
                    post_relu_bias[out_index] = lbs[idx_of_inp_lbs][
                        layer.stride * out_index + 1]
                    first_in_index += 2
                    out_index += 1
                new_all_layers.append(post_relu_lin)
                idx_of_inp_lbs += 1
            else:
                # This should have been cleaned up in one of the simplify passes
                raise NotImplementedError
        elif type(layer) in [nn.Linear, nn.ReLU]:
            new_all_layers.append(layer)
            idx_of_inp_lbs += 1
        elif type(layer) is View:
            # We shouldn't add the view as we are getting rid of them
            pass
        layer_idx += 1
    return new_all_layers
def main():
    parser = argparse.ArgumentParser(
        description="Compute and time a bunch of bounds.")
    parser.add_argument('eps', type=float, help='Epsilon - default: 0.1')
    parser.add_argument('target_directory',
                        type=str,
                        help='Where to store the results')
    parser.add_argument('--modulo',
                        type=int,
                        help='Numbers of a job to split the dataset over.')
    parser.add_argument('--modulo_do',
                        type=int,
                        help='Which job_id is this one.')
    parser.add_argument(
        '--from_intermediate_bounds',
        action='store_true',
        help=
        "if this flag is true, intermediate bounds are computed w/ best of naive-KW"
    )
    parser.add_argument('--network',
                        type=str,
                        help='which network to use',
                        default="wide",
                        choices=["wide", "deep"])
    args = parser.parse_args()

    results_dir = args.target_directory
    os.makedirs(results_dir, exist_ok=True)

    testset_size = int(1e5)
    for idx in range(testset_size):
        if (args.modulo is not None) and (idx % args.modulo != args.modulo_do):
            continue
        target_dir = os.path.join(results_dir, f"{idx}")
        os.makedirs(target_dir, exist_ok=True)

        X, y, elided_models = load_mnist_wide_net(idx, mnist_test=None)
        if X is None:
            continue
        elided_model = elided_models[y]
        to_ignore = y

        domain = torch.stack([
            torch.clamp(X.squeeze(0) - args.eps, 0, None),
            torch.clamp(X.squeeze(0) + args.eps, None, 1.0)
        ], -1).unsqueeze(0)

        lin_approx_string = "" if not args.from_intermediate_bounds else "-fromintermediate"

        # compute intermediate bounds with KW. Use only these for every method to allow comparison on the last layer
        # and optimize only the last layer
        if args.from_intermediate_bounds:
            cuda_elided_model = copy.deepcopy(elided_model).cuda()
            cuda_domain = domain.cuda()
            intermediate_net = SaddleLP([lay for lay in cuda_elided_model])
            with torch.no_grad():
                intermediate_net.set_solution_optimizer('best_naive_kw', None)
                intermediate_net.define_linear_approximation(
                    cuda_domain, no_conv=False, override_numerical_errors=True)
            intermediate_ubs = intermediate_net.upper_bounds
            intermediate_lbs = intermediate_net.lower_bounds

        ## Proximal methods
        for optprox_steps in [400]:
            optprox_params = {
                'nb_total_steps': optprox_steps,
                'max_nb_inner_steps': 2,  # this is 2/5 as simpleprox
                'initial_eta': 1e0,
                'final_eta': 5e1,
                'log_values': False,
                'inner_cutoff': 0,
                'maintain_primal': True,
                'acceleration_dict': {
                    'momentum': 0.3,  # decent momentum: 0.9 w/ increasing eta
                }
            }
            optprox_target_file = os.path.join(
                target_dir,
                f"Proximal_finalmomentum_{optprox_steps}{lin_approx_string}.txt"
            )
            if not os.path.exists(optprox_target_file):
                cuda_elided_model = copy.deepcopy(elided_model).cuda()
                cuda_domain = domain.cuda()
                optprox_net = SaddleLP([lay for lay in cuda_elided_model])
                optprox_start = time.time()
                with torch.no_grad():
                    optprox_net.set_decomposition('pairs', 'KW')
                    optprox_net.set_solution_optimizer('optimized_prox',
                                                       optprox_params)
                    if not args.from_intermediate_bounds:
                        optprox_net.define_linear_approximation(cuda_domain,
                                                                no_conv=False)
                        ub = optprox_net.upper_bounds[-1]
                    else:
                        optprox_net.build_model_using_bounds(
                            cuda_domain, (intermediate_lbs, intermediate_ubs))
                        _, ub = optprox_net.compute_lower_bound()
                optprox_end = time.time()
                optprox_time = optprox_end - optprox_start
                optprox_ubs = ub.cpu()

                del optprox_net
                dump_bounds(optprox_target_file, optprox_time, optprox_ubs)

        ## Gurobi PLANET Bounds
        grb_target_file = os.path.join(target_dir,
                                       f"Gurobi{lin_approx_string}-fixed.txt")
        if not os.path.exists(grb_target_file):
            grb_net = LinearizedNetwork([lay for lay in elided_model])
            grb_start = time.time()
            if not args.from_intermediate_bounds:
                grb_net.define_linear_approximation(domain[0], n_threads=4)
                ub = grb_net.upper_bounds[-1]
            else:
                grb_net.build_model_using_bounds(
                    domain[0],
                    ([lbs[0].cpu() for lbs in intermediate_lbs
                      ], [ubs[0].cpu() for ubs in intermediate_ubs]),
                    n_threads=4)
                _, ub = grb_net.compute_lower_bound(ub_only=True)
            grb_end = time.time()
            grb_time = grb_end - grb_start
            grb_ubs = torch.Tensor(ub).cpu()
            dump_bounds(grb_target_file, grb_time, grb_ubs)

        ## Cuts
        for cut_steps in [80, 600, 1050, 1650, 2500]:
            explp_params = {
                "nb_iter": cut_steps,
                'bigm': "init",
                'cut': "only",
                "bigm_algorithm": "adam",
                'cut_frequency': 450,
                'max_cuts': 12,
                'cut_add': 2,
                'betas': (0.9, 0.999),
                'initial_step_size': 1e-3,
                'final_step_size': 1e-6,
                "init_params": {
                    "nb_outer_iter": 500,
                    'initial_step_size': 1e-1,
                    'final_step_size': 1e-3,
                    'betas': (0.9, 0.999)
                },
            }
            cut_target_file = os.path.join(
                target_dir, f"Cuts_{cut_steps}{lin_approx_string}.txt")
            if not os.path.exists(cut_target_file):
                cuda_elided_model = copy.deepcopy(elided_model).cuda()
                cuda_domain = domain.cuda()
                exp_net = ExpLP([lay for lay in cuda_elided_model],
                                params=explp_params,
                                use_preactivation=True,
                                fixed_M=True)
                exp_start = time.time()
                with torch.no_grad():
                    if not args.from_intermediate_bounds:
                        exp_net.define_linear_approximation(cuda_domain)
                        ub = exp_net.upper_bounds[-1]
                    else:
                        exp_net.build_model_using_bounds(
                            cuda_domain, (intermediate_lbs, intermediate_ubs))
                        _, ub = exp_net.compute_lower_bound()
                exp_end = time.time()
                exp_time = exp_end - exp_start
                exp_ubs = ub.cpu()

                del exp_net
                dump_bounds(cut_target_file, exp_time, exp_ubs)

        # Big-M supergradient. (iters tuned to take same time as prox)
        for bigm_steps in [850]:
            bigm_adam_params = {
                "bigm_algorithm": "adam",
                "bigm": "only",
                "nb_outer_iter": bigm_steps,
                'initial_step_size': 1e-1,
                'final_step_size': 1e-3,
                'betas': (0.9, 0.999)
            }
            bigm_target_file = os.path.join(
                target_dir, f"Big-M_{bigm_steps}{lin_approx_string}.txt")
            if not os.path.exists(bigm_target_file):
                cuda_elided_model = copy.deepcopy(elided_model).cuda()
                cuda_domain = domain.cuda()
                bigm_net = ExpLP([lay for lay in cuda_elided_model],
                                 params=bigm_adam_params,
                                 use_preactivation=True,
                                 fixed_M=True)
                bigm_start = time.time()
                with torch.no_grad():
                    if not args.from_intermediate_bounds:
                        bigm_net.define_linear_approximation(cuda_domain)
                        ub = bigm_net.upper_bounds[-1]
                    else:
                        bigm_net.build_model_using_bounds(
                            cuda_domain, (intermediate_lbs, intermediate_ubs))
                        _, ub = bigm_net.compute_lower_bound()
                bigm_end = time.time()
                bigm_time = bigm_end - bigm_start
                bigm_ubs = ub.cpu()

                del bigm_net
                dump_bounds(bigm_target_file, bigm_time, bigm_ubs)

        ## Gurobi Anderson Bounds
        for n_cuts in [1]:
            grb_and_target_file = os.path.join(
                target_dir,
                f"Anderson-{n_cuts}cuts{lin_approx_string}-fixed.txt")
            if not os.path.exists(grb_and_target_file):
                lp_and_grb_net = AndersonLinearizedNetwork(
                    [lay for lay in elided_model],
                    mode="lp-cut",
                    n_cuts=n_cuts,
                    cuts_per_neuron=True)
                lp_and_grb_start = time.time()
                if not args.from_intermediate_bounds:
                    lp_and_grb_net.define_linear_approximation(domain[0],
                                                               n_threads=4)
                    ub = lp_and_grb_net.upper_bounds[-1]
                else:
                    lp_and_grb_net.build_model_using_bounds(
                        domain[0],
                        ([lbs[0].cpu() for lbs in intermediate_lbs
                          ], [ubs[0].cpu() for ubs in intermediate_ubs]),
                        n_threads=4)
                    _, ub = lp_and_grb_net.compute_lower_bound(ub_only=True)
                lp_and_grb_end = time.time()
                lp_and_grb_time = lp_and_grb_end - lp_and_grb_start
                lp_and_grb_ubs = torch.Tensor(ub).cpu()
                dump_bounds(grb_and_target_file, lp_and_grb_time,
                            lp_and_grb_ubs)