Ejemplo n.º 1
0
def test_risk_slim(data_csv_file,
                   sample_weights_csv_file=None,
                   max_coefficient=5,
                   max_L0_value=5,
                   max_offset=50,
                   c0_value=1e-6,
                   w_pos=1.00,
                   settings=None):

    # load dataset
    data = load_data_from_csv(dataset_csv_file=data_csv_file,
                              sample_weights_csv_file=sample_weights_csv_file)
    N, P = data['X'].shape

    # offset value
    coef_set = CoefficientSet(variable_names=data['variable_names'],
                              lb=-max_coefficient,
                              ub=max_coefficient,
                              sign=0)
    conservative_offset = get_conservative_offset(data, coef_set, max_L0_value)
    max_offset = min(max_offset, conservative_offset)
    coef_set['(Intercept)'].ub = max_offset
    coef_set['(Intercept)'].lb = -max_offset

    # create constraint dictionary
    trivial_L0_max = P - np.sum(coef_set.C_0j == 0)
    max_L0_value = min(max_L0_value, trivial_L0_max)

    constraints = {
        'L0_min': 0,
        'L0_max': max_L0_value,
        'coef_set': coef_set,
    }

    # Train model using lattice_cpa
    model_info, mip_info, lcpa_info = run_lattice_cpa(data, constraints,
                                                      settings)

    #model info contains key results
    pprint(model_info)

    # lcpa_output contains detailed information about LCPA
    pprint(lcpa_info)

    # todo check solution

    # mip_output contains information to access the MIP
    mip_info['risk_slim_mip']  #CPLEX mip
    mip_info['risk_slim_idx']  #indices of the relevant constraints

    return True
Ejemplo n.º 2
0
# data
data_name = "breastcancer"                                  # name of the data
data_dir = os.getcwd() + '/examples/data/'                  # directory where datasets are stored
data_csv_file = data_dir + data_name + '_data.csv'          # csv file for the dataset
sample_weights_csv_file = None                              # csv file of sample weights for the dataset (optional)

# problem parameters
max_coefficient = 5                                         # value of largest/smallest coefficient
max_L0_value = 5                                            # maximum model size
max_offset = 50                                             # maximum value of offset parameter (optional)
c0_value = 1e-6                                             # L0-penalty parameter such that c0_value > 0; larger values -> sparser models; we set to a small value (1e-6) so that we get a model with max_L0_value terms
w_pos = 1.00                                                # relative weight on examples with y = +1; w_neg = 1.00 (optional)

# load dataset
data = load_data_from_csv(dataset_csv_file = data_csv_file, sample_weights_csv_file = sample_weights_csv_file)

# coefficient set
coef_set = CoefficientSet(variable_names = data['variable_names'], lb=-max_coefficient, ub=max_coefficient, sign=0)

# offset value
conservative_offset = get_conservative_offset(data, coef_set, max_L0_value)
conservative_offset = get_conservative_offset(data, coef_set, max_L0_value)
max_offset = min(max_offset, conservative_offset)
coef_set['(Intercept)'].ub = max_offset
coef_set['(Intercept)'].lb = -max_offset

# create constraint dictionary
trivial_L0_max = P - np.sum(coef_set.C_0j == 0)
max_L0_value = min(max_L0_value, trivial_L0_max)
Ejemplo n.º 3
0
                str(key): loaded_settings[key]
                for key in loaded_settings if key in settings
            }
            settings.update(loaded_settings)

    #overwrite parameters specified by the user
    settings['max_runtime'] = float(
        'inf') if parsed.timelimit == -1 else parsed.timelimit
    settings['c0_value'] = parsed.c0_value
    settings['w_pos'] = parsed.w_pos

    # check if sample weights file was specified, if not set as None
    logger.info("loading data and sample weights")

    data = load_data_from_csv(dataset_csv_file=parsed.data,
                              sample_weights_csv_file=parsed.weights,
                              fold_csv_file=parsed.cvindices,
                              fold_num=parsed.fold)
    N, P = data['X'].shape

    # initialize coefficient set and offset parameter
    logger.info("creating coefficient set and constraints")
    max_coefficient = parsed.max_coef
    max_model_size = parsed.max_size if parsed.max_size >= 0 else float('inf')
    max_offset = parsed.max_offset if parsed.max_offset >= 0 else float('inf')

    coef_set = CoefficientSet(variable_names=data['variable_names'],
                              lb=-max_coefficient,
                              ub=max_coefficient,
                              sign=0)

    trivial_model_size = P - np.sum(coef_set.C_0j == 0)