def create_rows_CG(depth, segments_set): global TARGETS global constraint_indicators constraint_indicators = [] row_value = 0 row_names = [] row_values = [] row_right_sides = [] row_senses = "" num_features = get_num_features() data_size = get_data_size() num_leafs = 2**depth num_nodes = num_leafs - 1 big_M = get_max_value() - get_min_value() constraint_indicators.append(row_value) for i in range(num_features): #constraint (15), indicator 0 for j in range(num_nodes): for l in get_left_leafs(j, num_nodes): col_names = [ "segment_leaf_" + str(s) + "_" + str(l) for s in range(len(segments_set[l])) ] #x_{l,s} col_values = [ max([get_feature_value(r, i) for r in s]) for s in segments_set[l] ] #mu^{i,s} max col_names.extend([ "node_feature_" + str(j) + "_" + str(i), "node_constant_" + str(j) ]) col_values.extend([big_M, -1]) row_names.append("constraint_15_" + str(i) + "_" + str(j) + "_" + str(l)) row_values.append([col_names, col_values]) row_right_sides.append(big_M) row_senses = row_senses + "L" row_value = row_value + 1 constraint_indicators.append(row_value) for i in range(num_features): #constraint (16), indicator 1 for j in range(num_nodes): for l in get_right_leafs(j, num_nodes): col_names = [ "segment_leaf_" + str(s) + "_" + str(l) for s in range(len(segments_set[l])) ] #x_{l,s} col_values = [ -min([get_feature_value(r, i) for r in s]) for s in segments_set[l] ] #mu^{i,s} min col_names.extend([ "node_feature_" + str(j) + "_" + str(i), "node_constant_" + str(j) ]) col_values.extend([big_M, 1]) row_names.append("constraint_16_" + str(i) + "_" + str(j) + "_" + str(l)) row_values.append([col_names, col_values]) row_right_sides.append(big_M + 0.01) row_senses = row_senses + "L" row_value = row_value + 1 constraint_indicators.append(row_value) for r in range(data_size): #constraint (17), indicator 2 col_names, col_values = [], [] for l in range(num_leafs): for s in range(len(segments_set[l])): if r in segments_set[l][s]: col_names.extend(["segment_leaf_" + str(s) + "_" + str(l) ]) #x_{l,s} col_values.extend([1]) row_names.append("constraint_17_" + str(r)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 constraint_indicators.append(row_value) for l in range(num_leafs): #constraint (18), indicator 3 col_names = [ "segment_leaf_" + str(s) + "_" + str(l) for s in range(len(segments_set[l])) ] #x_{l,s} col_values = [1 for s in range(len(segments_set[l]))] #x_{l,s} row_names.append("constraint_18_" + str(l)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 constraint_indicators.append(row_value) for r in range(data_size): #constraint (19), indicator 4 for l in range(num_leafs): col_names, col_values = [], [] for s in range(len(segments_set[l])): if r in segments_set[l][s]: col_names.extend(["segment_leaf_" + str(s) + "_" + str(l) ]) #x_{l,s} col_values.extend([1]) for t in range(get_num_targets()): if TARGETS[t] != get_target(r): col_names.extend( ["prediction_type_" + str(t) + "_" + str(l)]) col_values.extend([1]) col_names.extend(["row_error_" + str(r)]) col_values.extend([-1]) row_names.append("constraint_19_" + str(r) + "_" + str(l)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "L" row_value = row_value + 1 constraint_indicators.append(row_value) for l in range(num_leafs): #constraint (20), indicator 5 col_names = [ "prediction_type_" + str(s) + "_" + str(l) for s in range(get_num_targets()) ] col_values = [1 for s in range(get_num_targets())] row_names.append("constraint_20_" + str(l)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 constraint_indicators.append(row_value) for j in range(num_nodes): #constraint (21), indicator 6 col_names = [ "node_feature_" + str(j) + "_" + str(i) for i in range(num_features) ] col_values = [1 for i in range(num_features)] row_names.append("constraint_21_" + str(j)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 return row_names, row_values, row_right_sides, row_senses
def create_variables(depth, prob): global VARS var_value = 0 var_names = [] var_types = "" var_lb = [] var_ub = [] var_obj = [] num_features = get_num_features() data_size = get_data_size() num_leafs = 2**depth num_nodes = num_leafs - 1 # node n had a boolean test on feature f, boolean. On the paper: f_{i,j} for j in range(num_nodes): for i in range(num_features): VARS["node_feature_" + str(j) + "_" + str(i)] = var_value var_names.append("#" + str(var_value)) var_types = var_types + "B" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 # value used in the boolean test in node n, integer. On the paper: c_{j} for j in range(num_nodes): VARS["node_constant_" + str(j)] = var_value var_names.append("#" + str(var_value)) if continuousconstant == 1: var_types = var_types + "C" else: var_types = var_types + "I" var_lb.append(get_min_value()) var_ub.append(get_max_value()) var_obj.append(0) var_value = var_value + 1 # leaf l predicts type t, boolean. On the paper: p_{l,t} for l in range(num_leafs): for t in range(get_num_targets()): VARS["prediction_type_" + str(t) + "_" + str(l)] = var_value var_names.append("#" + str(var_value)) var_types = var_types + "B" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 # row error, variables to minimize. On the paper: e_{r} for r in range(data_size): VARS["row_error_" + str(r)] = var_value var_names.append("#" + str(var_value)) var_types = var_types + "C" var_lb.append(0) var_ub.append(1) var_obj.append(1) var_value = var_value + 1 # indicates that data row r passes node j when executed in decision tree. Careful, leafs are included. On the paper: pt_{r,j} for r in range(data_size): for j in range(num_nodes): VARS["path_node_" + str(j) + "_" + str(r)] = var_value var_names.append("#" + str(var_value)) var_types = var_types + "B" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 for l in range(num_leafs): VARS["path_leaf_" + str(l) + "_" + str(r)] = var_value var_names.append("#" + str(var_value)) var_types = var_types + "B" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 return var_names, var_types, var_lb, var_ub, var_obj
def create_variables_CG(depth, segments_set): var_value = 0 var_names = [] var_types = "" var_lb = [] var_ub = [] var_obj = [] num_features = get_num_features() data_size = get_data_size() num_leafs = 2**depth num_nodes = num_leafs - 1 # node n had a boolean test on feature f, boolean. On the paper: f_{i,j} for j in range(num_nodes): for i in range(num_features): var_names.append("node_feature_" + str(j) + "_" + str(i)) var_types = var_types + "C" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 # value used in the boolean test in node n, integer. On the paper: c_{j} for j in range(num_nodes): var_names.append("node_constant_" + str(j)) var_types = var_types + "C" var_lb.append(get_min_value()) var_ub.append(get_max_value()) var_obj.append(0) var_value = var_value + 1 # leaf l predicts type t, boolean. On the paper: p_{l,t} for l in range(num_leafs): for t in range(get_num_targets()): var_names.append("prediction_type_" + str(t) + "_" + str(l)) var_types = var_types + "C" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 # row error, variables to minimize. On the paper: e_{r} for r in range(data_size): var_names.append("row_error_" + str(r)) var_types = var_types + "C" var_lb.append(0) var_ub.append(1) var_obj.append(1) var_value = var_value + 1 for l in range(num_leafs): # x_{l,s} for s in range(len(segments_set[l])): var_names.append("segment_leaf_" + str(s) + "_" + str(l)) var_types = var_types + "C" var_lb.append(0) var_ub.append(1) var_obj.append(0) var_value = var_value + 1 return var_names, var_types, var_lb, var_ub, var_obj
def create_rows(depth): global VARS global TARGETS row_value = 0 row_names = [] row_values = [] row_right_sides = [] row_senses = "" num_features = get_num_features() data_size = get_data_size() num_leafs = 2**depth num_nodes = num_leafs - 1 big_M = get_max_value() - get_min_value() big_M = 10 * big_M + 10 for r in range(data_size): #constraint (2) for j in range(num_nodes): col_names = [ VARS["node_feature_" + str(j) + "_" + str(i)] for i in range(num_features) ] #f_{i,j} col_values = [ get_feature_value(r, i) for i in range(num_features) ] #mu^{i,r} left_node = get_left_node(j, num_nodes) if int(left_node) != -1: #if not a leaf col_names.extend( [ VARS["path_node_" + str(j) + "_" + str(r)], VARS["path_node_" + str(left_node) + "_" + str(r)] ] ) #VARS["depth_true_" + str(r) + "_" + str(get_depth(j,num_nodes)-1)]]) else: col_names.extend([ VARS["path_node_" + str(j) + "_" + str(r)], VARS["path_leaf_" + str(j) + "_" + str(r)] ]) col_values.extend([big_M, big_M]) col_names.extend([VARS["node_constant_" + str(j)]]) col_values.extend([-1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(2 * big_M) row_senses = row_senses + "L" row_value = row_value + 1 for r in range(data_size): #constraint (3) for j in range(num_nodes): col_names = [ VARS["node_feature_" + str(j) + "_" + str(i)] for i in range(num_features) ] #f_{i,j} col_values = [ -get_feature_value(r, i) for i in range(num_features) ] #mu^{i,r} right_node = get_right_node(j, num_nodes) if int(right_node) != -1: #if not a leaf col_names.extend( [ VARS["path_node_" + str(j) + "_" + str(r)], VARS["path_node_" + str(right_node) + "_" + str(r)] ] ) #VARS["depth_true_" + str(r) + "_" + str(get_depth(j,num_nodes)-1)]]) else: col_names.extend([ VARS["path_node_" + str(j) + "_" + str(r)], VARS["path_leaf_" + str(j + 1) + "_" + str(r)] ]) #col_names.extend([VARS["path_node_" + str(j) + "_" + str(r)], VARS["depth_true_" + str(r) + "_" + str(get_depth(j,num_nodes)-1)]]) col_values.extend([big_M, big_M]) col_names.extend([VARS["node_constant_" + str(j)]]) col_values.extend([1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(2 * big_M - eps) row_senses = row_senses + "L" row_value = row_value + 1 for l in range(num_leafs): #constraint (4) col_names = [ VARS["prediction_type_" + str(s) + "_" + str(l)] for s in range(get_num_targets()) ] col_values = [1 for s in range(get_num_targets())] row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 for j in range(num_nodes): #constraint (5) col_names = [ VARS["node_feature_" + str(j) + "_" + str(i)] for i in range(num_features) ] col_values = [1 for i in range(num_features)] row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 for r in range(data_size): # constraint (6) col_names = [ VARS["path_leaf_" + str(l) + "_" + str(r)] for l in range(num_leafs) ] col_values = [1 for l in range(num_leafs)] col_names.extend([ VARS["path_node_" + str(j) + "_" + str(r)] for j in range(num_nodes) ]) col_values.extend([1 for j in range(num_nodes)]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(depth + 1) row_senses = row_senses + "E" row_value = row_value + 1 for r in range(data_size): # contraint (7) col_names = [] col_values = [] for l in range(num_leafs): col_names.extend([VARS["path_leaf_" + str(l) + "_" + str(r)]]) col_values.extend([1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "E" row_value = row_value + 1 for r in range(data_size): # constraint (8) for internal nodes for j in range(num_nodes): if j != (num_nodes - 1) / 2: col_names = [VARS["path_node_" + str(j) + "_" + str(r)]] col_values = [1] col_names.extend([ VARS["path_node_" + str(get_parent(j, depth)) + "_" + str(r)] ]) col_values.extend([-1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(0) row_senses = row_senses + "L" row_value = row_value + 1 for r in range(data_size): # constraint (8) for leaves for l in range(num_leafs): col_names = [VARS["path_leaf_" + str(l) + "_" + str(r)]] col_values = [1] col_names.extend( [VARS["path_node_" + str(l - l % 2) + "_" + str(r)]]) col_values.extend([-1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(0) row_senses = row_senses + "L" row_value = row_value + 1 for r in range(data_size): #constraint (9) for l in range(num_leafs): col_names = [VARS["path_leaf_" + str(l) + "_" + str(r)]] col_values = [1] for s in range(get_num_targets()): if TARGETS[s] != get_target(r): col_names.extend( [VARS["prediction_type_" + str(s) + "_" + str(l)]]) col_values.extend([1]) col_names.extend([VARS["row_error_" + str(r)]]) col_values.extend([-1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "L" row_value = row_value + 1 if inputsym == 1: #valid inequalties ? for n in range(num_nodes): left_leaf = get_left_leafs(n, num_nodes) right_leaf = get_right_leafs(n, num_nodes) if len(left_leaf) != 1: continue for s in range(get_num_targets()): col_names = [ VARS["prediction_type_" + str(s) + "_" + str(left_leaf[0])] ] col_values = [1] col_names.extend([ VARS["prediction_type_" + str(s) + "_" + str(right_leaf[0])] ]) col_values.extend([1]) row_names.append("#" + str(row_value)) row_values.append([col_names, col_values]) row_right_sides.append(1) row_senses = row_senses + "L" row_value = row_value + 1 return row_names, row_values, row_right_sides, row_senses