def warm_start_multi(self, dataframe, dataframe2, y, d, modello): ordine_l = [0, 1, 4, 3, 10, 9, 8, 7] ordine_r = [0, 2, 6, 5, 14, 13, 12, 11] mm = SolveSolution(modello) T = pow(2, (d + 1)) - 1 # nodes number floorTb = int(floor(T / 2)) # number of branch nodes Tb = np.arange(0, floorTb) # range branch nodes Tl = np.arange(floorTb, T) # range leaf nodes classes = np.unique(y.values) # possible labels of classification lista_df = [] lista_y = [] y = pd.DataFrame(y) lista_df.insert(0, dataframe) lista_y.insert(0, y) for t in range(int((len(Tb) - 1) / 2) + 1): yy = lista_y[t] print(yy) df_split1 = [] df_split2 = [] y_1 = [] y_2 = [] ind = lista_y[t].index ind_df = lista_df[t].index mdl = self.fit_with_cart(lista_df[t], lista_df[t], lista_y[t]) cl = yy[0].unique() cl.sort() print(cl) print(classes) for f in self.features: mm.add_var_value('a%d_%d' % (ordine_l[t], f), mdl.solution.get_value('a0_%d' % (f))) mm.add_var_value('b_%d' % (ordine_l[t]), mdl.solution.get_value('b_0')) mm.add_var_value('d_%d' % (ordine_l[t]), mdl.solution.get_value('d_0')) if 2 * ordine_l[t] + 1 in Tl: kl = classes leaf = 2 * ordine_l[t] + 1 mm.add_var_value('Nt_%d' % leaf, mdl.solution.get_value('Nt_1')) mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1')) for k in range(len(cl)): print(k, cl[k], list(classes).index(cl[k])) mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_1' % (k))) mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_1' % (k))) for k1 in range(len(cl)): list(kl).remove(cl[k1]) # for k2 in range(len(kl)): # mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2]), leaf), 0) for n in range(len(lista_df[t])): mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_1' % (n))) if 2 * ordine_l[t] + 2 in Tl: kl = classes leaf = 2 * ordine_l[t] + 2 mm.add_var_value('Nt_%d' % leaf, mdl.solution.get_value('Nt_2')) mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_2')) for k in range(len(cl)): mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_2' % (k))) mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_2' % (k))) for k1 in range(len(cl)): list(kl).remove(cl[k1]) # for k2 in range(len(kl)): # mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2]), leaf), 0) for n in range(len(lista_df[t])): mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_2' % (n))) for i in range(len(lista_df[t])): j = ind[i] m = ind_df[i] if mdl.solution.get_value('z_%d_1' % (i)) == 1: df_split1.insert(-1, lista_df[t].loc[m]) y_1.insert(-1, lista_y[t].loc[j]) else: df_split2.insert(-1, lista_df[t].loc[m]) y_2.insert(-1, lista_y[t].loc[j]) df_1 = pd.DataFrame(df_split1) df_2 = pd.DataFrame(df_split2) y_1 = pd.DataFrame(y_1) y_2 = pd.DataFrame(y_2) lista_df.insert(1, df_1) lista_df.insert(2, df_2) lista_y.insert(1, y_1) lista_y.insert(2, y_2) lista_df_r = [] lista_y_r = [] lista_df_r.insert(0, lista_df[0]) lista_df_r.insert(1, lista_df[-1]) lista_y_r.insert(0, lista_y[0]) lista_y_r.insert(1, lista_y[-1]) for t in range(1, int((len(Tb) - 1) / 2) + 1): yy = lista_y_r[t] print(yy) df_split1 = [] df_split2 = [] y_1 = [] y_2 = [] ind = lista_y_r[t].index ind_df = lista_df_r[t].index mdl = self.fit_with_cart(lista_df_r[t], lista_df_r[t], lista_y_r[t]) cl = yy[0].unique() cl.sort() for f in self.features: mm.add_var_value('a%d_%d' % (ordine_r[t], f), mdl.solution.get_value('a0_%d' % (f))) mm.add_var_value('b_%d' % (ordine_r[t]), mdl.solution.get_value('b_0')) mm.add_var_value('d_%d' % (ordine_r[t]), mdl.solution.get_value('d_0')) if 2 * ordine_r[t] + 1 in Tl: kl = classes leaf = 2 * ordine_r[t] + 1 mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1')) mm.add_var_value('Nt_%d' % (leaf), mdl.solution.get_value('Nt_1')) for k in range(len(cl)): mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_1' % (k))) mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_1' % (k))) for k1 in range(len(cl)): list(kl).remove(cl[k1]) # for k2 in range(len(kl)): # mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2], leaf)), 0) mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1')) for n in range(len(lista_df_r[t])): mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_1' % (n))) if 2 * ordine_r[t] + 2 in Tl: kl = classes leaf = 2 * ordine_r[t] + 2 mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_2')) mm.add_var_value('Nt_%d' % (leaf), mdl.solution.get_value('Nt_2')) for k in range(len(cl)): mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_2' % (k))) mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_2' % (k))) for k1 in range(len(cl)): list(kl).remove(cl[k1]) # for k2 in range(len(kl)): # mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2]), leaf), 0) for n in range(len(lista_df_r[t])): mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_2' % (n))) for i in range(len(lista_df_r[t])): j = ind[i] m = ind_df[i] if mdl.solution.get_value('z_%d_1' % (i)) == 1: df_split1.insert(-1, lista_df_r[t].loc[m]) y_1.insert(-1, lista_y_r[t].loc[j]) else: df_split2.insert(-1, lista_df_r[t].loc[m]) y_2.insert(-1, lista_y_r[t].loc[j]) df_1 = pd.DataFrame(df_split1) df_2 = pd.DataFrame(df_split2) y_1 = pd.DataFrame(y_1) y_2 = pd.DataFrame(y_2) lista_df_r.insert(1, df_1) lista_df_r.insert(2, df_2) lista_y_r.insert(1, y_1) lista_y_r.insert(2, y_2) # GRAPH WARM START g = pgv.AGraph(directed=True) # initialize the graph nodes = np.append(Tb, Tl) for n in nodes: # the graph has a node for eache node of the tree g.add_node(n, shape='circle', size=8) if n != 0: father = ceil(n / 2) - 1 g.add_edge(father, n) for t in Tb: coeff = [] feat = [] # if mdl.solution.get_value('d_' + str(t))==0: # g.get_node(t).attr['color']='red' for f in range(len(self.features)): if mm.get_value('a' + str(t) + '_' + str(f)) != 0: coeff.insert(-1, '%.3f' % (mm.get_value('a' + str(t) + '_' + str(f)))) feat.insert(-1, f) g.get_node(t).attr['label'] = str(coeff) + '*X' + str(feat) + str('<=') + str( '%.3f' % (mm.get_value('b_' + str(t)))) for leaf in Tl: if mm.get_value('l_' + str(leaf)) == 0: # these leaves haven't got points g.get_node(leaf).attr['color'] = 'red' for leaf in Tl: s = [] for k in range(len(classes)): s.append(round(mm.get_value('Nkt_' + str(k) + '_' + str(leaf)))) for k in range(len(classes)): if mm.get_value('c_' + str(k) + '_' + str(leaf)) == 1: g.get_node(leaf).attr['label'] = str(s) + '\\n' + 'class %d' % (classes[k]) g.layout(prog='dot') g.draw('/Users/giuliaciarimboli/Desktop/warm_start.pdf') print('la soluzione warm start:', mm) print(mm.check_as_mip_start()) modello.add_mip_start(mm) modello.set_time_limit(3600) modello.solve(log_output=True) modello.print_solution() # GRAPH g = pgv.AGraph(directed=True) # initialize the graph nodes = np.append(Tb, Tl) for n in nodes: # the graph has a node for eache node of the tree g.add_node(n, shape='circle', size=8) if n != 0: father = ceil(n / 2) - 1 g.add_edge(father, n) for t in Tb: coeff = [] feat = [] # if mdl.solution.get_value('d_' + str(t))==0: # g.get_node(t).attr['color']='red' for f in range(len(self.features)): if modello.solution.get_value('a' + str(t) + '_' + str(f)) != 0: coeff.insert(-1, '%.3f' % (modello.solution.get_value('a' + str(t) + '_' + str(f)))) feat.insert(-1, f) g.get_node(t).attr['label'] = str(coeff) + '*X' + str(feat) + str('<=') + str( '%.3f' % (modello.solution.get_value('b_' + str(t)))) for leaf in Tl: if modello.solution.get_value('l_' + str(leaf)) == 0: # these leaves haven't got points g.get_node(leaf).attr['color'] = 'red' for leaf in Tl: s = [] for k in range(len(classes)): s.append(round(modello.solution.get_value('Nkt_' + str(k) + '_' + str(leaf)))) for k in range(len(classes)): if modello.solution.get_value('c_' + str(k) + '_' + str(leaf)) == 1: g.get_node(leaf).attr['label'] = str(s) + '\\n' + 'class %d' % (classes[k]) g.layout(prog='dot') g.draw('/Users/giuliaciarimboli/Desktop/sol finale.pdf') return modello
def find_cart_warmstart(self, dataframe, dataframe2, y): mdl = self.model(dataframe, dataframe2, y) # MIP START clf = DecisionTreeClassifier(max_depth=self.depth, min_samples_leaf=self.Nmin, random_state=1) clf.fit(dataframe, y) dot_data = tree.export_graphviz(clf, out_file=None) graph = graphviz.Source(dot_data) # graph.render(filename="prova",directory='/Users/giuliaciarimboli/Desktop/laurea magistrale/classification trees/graphs',view=True) sk_features = clf.tree_.feature sk_b = clf.tree_.threshold sk_val = clf.tree_.value sk_z = clf.apply(df) nodes = np.append(self.Tb, self.Tl) idx = [0] j = 1 left = clf.tree_.children_left right = clf.tree_.children_right for i in range(len(clf.tree_.children_left)): if idx[i] >= 0: node = idx[i] if clf.tree_.children_left[node] > 0: idx.insert(j, clf.tree_.children_left[node]) j += 1 if clf.tree_.children_right[node] > 0: idx.insert(j, clf.tree_.children_right[node]) j += 1 m = SolveSolution(mdl) count = 0 j = -1 for node in range(len(sk_features)): j += 1 if sk_features[j] >= 0: i = list(idx).index( j) # prendo l'indice j-esimo della lista dei nodi di sklearn, equivalente al nodo oct feat = sk_features[j] # è la feature da prendere nell'i esimo nodo m.add_var_value('a%d_%d' % (i, feat), 1) m.add_var_value(('b_%d' % (i)), sk_b[j]) count += 1 for t in self.Tb: # len(skval) if sk_features[t] >= 0: i = list(idx).index(t) m.add_var_value(('d_%d' % (i)), 1) for leaf in self.Tl: m.add_var_value(('l_%d' % (leaf)), 1) jj = -1 for node in idx: jj += 1 k = np.argmax(sk_val[jj][0]) num = np.sum(sk_val[jj][0]) ii = list(idx).index(jj) if ii in self.Tl: m.add_var_value('c_%d_%d' % (k, ii), 1) m.add_var_value('Nt_%d' % (ii), num) for kl in range(len(self.classes)): m.add_var_value('Nkt_%d_%d' % (kl, ii), sk_val[jj][0][kl]) missing = len(np.append(self.Tb, self.Tl)) - len(idx) for data in range(len(dataframe)): foglia = list(idx).index(sk_z[data]) + missing m.add_var_value('z_%d_%d' % (data, foglia), 1) print(m.check_as_mip_start()) print(m) mdl.add_mip_start(m) return mdl
def fit_with_oct_mip_start(self, dataframe, dataframe2, y, warm_start): sol = self.model(dataframe, dataframe2, y) s = SolveSolution(sol) i = 0 for t in self.Tb: s.add_var_value('b_%d' % (t), warm_start[1][t]) s.add_var_value('d_%d' % (t), warm_start[2][t]) for f in self.features: s.add_var_value('a%d_%d' % (t, f), warm_start[0][t][f]) for leaf in self.Tl: s.add_var_value(('l_%d' % (leaf)), warm_start[3][i]) i += 1 l = 0 # indice for leaf in self.Tl: for k in range(len(self.classes)): s.add_var_value('c_%d_%d' % (k, leaf), warm_start[4][l][k]) l += 1 for point in range(len(dataframe)): ex_leaf = warm_start[5][point] son_right = 2 * ex_leaf + 2 s.add_var_value('z_%d_%d' % (point, son_right), 1) i = 0 j = 0 for leaf in self.Tl: s.add_var_value('Nt_%d' % (leaf), warm_start[6][i]) i += 1 for k in range(len(self.classes)): print(j, k) s.add_var_value('Nkt_%d_%d' % (k, leaf), warm_start[7][j][k]) j += 1 print(s) print(s.check_as_mip_start()) sol.add_mip_start(s) sol.set_time_limit(30) # mdl.parameters.mip.tolerances.mipgap(0.1) #sol.parameters.emphasis.mip = 4 print('finding solution with OCT as MIP START:') s = sol.solve(log_output=True) # sol.print_solution() train_error = 0 for leaf in self.Tl: train_error += s.get_value('L_' + str(leaf)) train_error = train_error / self.M print('train_error:', train_error) # GRAPH self.draw_graph(s) for t in self.Tb: self.B.append(sol.solution.get_value('b_' + str(t))) for leaf in self.Tl: self.l_test.append(sol.solution.get_value('l_' + str(leaf))) for k in range(len(self.classes)): for leaf in self.Tl: self.C.update({(k, leaf): sol.solution.get_value('c_' + str(k) + '_' + str(leaf))}) for t in self.Tb: A_list = [] for f in self.features: A_list.append(sol.solution.get_value('a' + str(t) + '_' + str(f))) self.A.append(A_list) return sol
def warm_start(self, dataframe, y, d, modello): ordine_l = [0, 1, 4, 3, 10, 9, 8, 6, 22, 21, 20, 19, 18, 17, 16, 15] ordine_r = [0, 2, 6, 5, 14, 13, 12, 11, 20, 29, 28, 27, 26, 25, 24, 23] mm = SolveSolution(modello) T = pow(2, (d + 1)) - 1 # nodes number floorTb = int(floor(T / 2)) # number of branch nodes Tb = np.arange(0, floorTb) # range branch nodes Tl = np.arange(floorTb, T) # range leaf nodes classes = np.unique(y.values) # possible labels of classification lista_leaf = [] lista_df = [] lista_y = [] y = pd.DataFrame(y) lista_df.insert(0, dataframe) lista_y.insert(0, y) for t in range(int((len(Tb) - 1) / 2) + 1): yy = lista_y[t] df_split1 = [] df_split2 = [] y_1 = [] y_2 = [] ind = lista_y[t].index ind_df = lista_df[t].index if len(lista_y[t]) > self.Nmin: '''for f in self.features: mm.add_var_value('a%d_%d' % (ordine_l[t], f), 0) mm.add_var_value('a_hat%d_%d' % (ordine_l[t], f), 0) mm.add_var_value('b_%d' % (ordine_l[t]), 0) mm.add_var_value('d_%d' % (ordine_l[t]), 0) if 2 * ordine_l[t] + 1 in Tl: leaf = 2 * ordine_l[t] + 1 for l in range(leaf, leaf + 2): mm.add_var_value('Nt_%d' % l, 0) mm.add_var_value('l_%d' % l, 0) for k in range(len(classes)): mm.add_var_value('c_%d_%d' % (k, l), 0) mm.add_var_value('Nkt_%d_%d' % (k, l), 0) for n in range(0, len(dataframe)): mm.add_var_value('z_%d_%d' % (n, l), 0) lista_df.insert(1, df_1) lista_df.insert(2, df_2) lista_y.insert(1, y_1) lista_y.insert(2, y_2) else:''' mdl = self.fit_with_cart(lista_df[t], lista_y[t]) cl = yy[9].unique() cl.sort() for f in self.features: mm.add_var_value('a%d_%d' % (ordine_l[t], f), mdl.solution.get_value('a0_%d' % f)) mm.add_var_value('a_hat%d_%d' % (ordine_l[t], f), mdl.solution.get_value('a_hat0_%d' % f)) mm.add_var_value('b_%d' % (ordine_l[t]), mdl.solution.get_value('b_0')) mm.add_var_value('d_%d' % (ordine_l[t]), mdl.solution.get_value('d_0')) if 2 * ordine_l[t] + 1 in Tl: leaf = 2 * ordine_l[t] + 1 mm.add_var_value('Nt_%d' % leaf, mdl.solution.get_value('Nt_1')) mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1')) for k in range(len(cl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_1' % (k))) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_1' % (k))) kl = list(set(classes) - set(cl)) for k2 in range(len(kl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) for n in range(len(lista_df[t])): mm.add_var_value('z_%d_%d' % (ind_df[n], leaf), mdl.solution.get_value('z_%d_1' % n)) ind_miss = list( set(ind_df) - set(list(np.array(np.arange(0, len(dataframe)))))) for n in ind_miss: mm.add_var_value('z_%d_%d' % (n, leaf), 0) if 2 * ordine_l[t] + 2 in Tl: leaf = 2 * ordine_l[t] + 2 mm.add_var_value('Nt_%d' % leaf, mdl.solution.get_value('Nt_2')) mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_2')) for k in range(len(cl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_2' % k)) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_2' % k)) kl = list(set(classes) - set(cl)) for k2 in range(len(kl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) for n in range(len(lista_df[t])): mm.add_var_value( 'z_%d_%d' % (ind_df[n], leaf), mdl.solution.get_value('z_%d_2' % (n))) ind_miss = list( set(list(np.array(np.arange(0, len(dataframe))))) - set(ind_df)) for n in ind_miss: mm.add_var_value('z_%d_%d' % (n, leaf), 0) for i in range(len(lista_df[t])): j = ind[i] m = ind_df[i] if mdl.solution.get_value('z_%d_1' % (i)) == 1: df_split1.insert(-1, lista_df[t].loc[m]) y_1.insert(-1, lista_y[t].loc[j]) else: df_split2.insert(-1, lista_df[t].loc[m]) y_2.insert(-1, lista_y[t].loc[j]) df_1 = pd.DataFrame(df_split1) df_2 = pd.DataFrame(df_split2) y_1 = pd.DataFrame(y_1) y_2 = pd.DataFrame(y_2) lista_df.insert(1, df_1) lista_df.insert(2, df_2) lista_y.insert(1, y_1) lista_y.insert(2, y_2) lista_df_r = [] lista_y_r = [] lista_df_r.insert(0, lista_df[0]) lista_df_r.insert(1, lista_df[-1]) lista_y_r.insert(0, lista_y[0]) lista_y_r.insert(1, lista_y[-1]) for t in range(1, int((len(Tb) - 1) / 2) + 1): yy = lista_y_r[t] df_split1 = [] df_split2 = [] y_1 = [] y_2 = [] ind = lista_y_r[t].index ind_df = lista_df_r[t].index if len(lista_y_r[t]) > self.Nmin: '''for f in self.features: mm.add_var_value('a%d_%d' % (ordine_l[t], f), 0) mm.add_var_value('a_hat%d_%d' % (ordine_l[t], f), 0) mm.add_var_value('b_%d' % (ordine_l[t]), 0) mm.add_var_value('d_%d' % (ordine_l[t]), 0) if 2 * ordine_l[t] + 1 in Tl: leaf = 2 * ordine_l[t] + 1 for l in range(leaf, leaf + 2): print(l) mm.add_var_value('Nt_%d' % l, 0) mm.add_var_value('l_%d' % l, 0) for k in range(len(classes)): mm.add_var_value('c_%d_%d' % (k, l), 0) mm.add_var_value('Nkt_%d_%d' % (k, l), 0) for n in range(0, len(dataframe)): mm.add_var_value('z_%d_%d' % (n, l), 0) lista_df_r.insert(1, df_1) lista_df_r.insert(2,df_2) lista_y_r.insert(1, y_1) lista_y_r.insert(2, y_2) else:''' mdl = self.fit_with_cart(lista_df_r[t], lista_y_r[t]) cl = yy[9].unique() cl.sort() for f in self.features: mm.add_var_value('a%d_%d' % (ordine_r[t], f), mdl.solution.get_value('a0_%d' % (f))) mm.add_var_value('a_hat%d_%d' % (ordine_r[t], f), mdl.solution.get_value('a_hat0_%d' % (f))) mm.add_var_value('b_%d' % (ordine_r[t]), mdl.solution.get_value('b_0')) mm.add_var_value('d_%d' % (ordine_r[t]), mdl.solution.get_value('d_0')) if 2 * ordine_r[t] + 1 in Tl: leaf = 2 * ordine_r[t] + 1 mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1')) mm.add_var_value('Nt_%d' % (leaf), mdl.solution.get_value('Nt_1')) for k in range(len(cl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_1' % (k))) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_1' % (k))) kl = list(set(classes) - set(cl)) for k2 in range(len(kl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1')) for n in range(len(lista_df_r[t])): mm.add_var_value('z_%d_%d' % (ind_df[n], leaf), mdl.solution.get_value('z_%d_1' % n)) ind_miss = list( set(ind_df) - set(list(np.array(np.arange(0, len(dataframe)))))) for n in ind_miss: mm.add_var_value('z_%d_%d' % (n, leaf), 0) if 2 * ordine_r[t] + 2 in Tl: leaf = 2 * ordine_r[t] + 2 mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_2')) mm.add_var_value('Nt_%d' % (leaf), mdl.solution.get_value('Nt_2')) for k in range(len(cl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('c_%d_2' % k)) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf), mdl.solution.get_value('Nkt_%d_2' % k)) kl = list(set(classes) - set(cl)) for k2 in range(len(kl)): mm.add_var_value( 'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) mm.add_var_value( 'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf), 0) for n in range(len(lista_df_r[t])): mm.add_var_value( 'z_%d_%d' % (ind_df[n], leaf), mdl.solution.get_value('z_%d_2' % (n))) ind_miss = list( set(ind_df) - set(list(np.array(np.arange(0, len(dataframe)))))) for n in ind_miss: mm.add_var_value('z_%d_%d' % (n, leaf), 0) for i in range(len(lista_df_r[t])): j = ind[i] m = ind_df[i] if mdl.solution.get_value('z_%d_1' % i) == 1: df_split1.insert(-1, lista_df_r[t].loc[m]) y_1.insert(-1, lista_y_r[t].loc[j]) else: df_split2.insert(-1, lista_df_r[t].loc[m]) y_2.insert(-1, lista_y_r[t].loc[j]) df_1 = pd.DataFrame(df_split1) df_2 = pd.DataFrame(df_split2) y_1 = pd.DataFrame(y_1) y_2 = pd.DataFrame(y_2) lista_df_r.insert(1, df_1) lista_df_r.insert(2, df_2) lista_y_r.insert(1, y_1) lista_y_r.insert(2, y_2) # GRAPH WARM START g = pgv.AGraph(directed=True) # initialize the graph nodes = np.append(Tb, Tl) for n in nodes: # the graph has a node for eache node of the tree g.add_node(n, shape='circle', size=8) if n != 0: father = ceil(n / 2) - 1 g.add_edge(father, n) for t in Tb: coeff = [] feat = [] # if mdl.solution.get_value('d_' + str(t))==0: # g.get_node(t).attr['color']='red' for f in range(len(self.features)): if mm.get_value('a' + str(t) + '_' + str(f)) != 0: coeff.insert( -1, '%.3f' % (mm.get_value('a' + str(t) + '_' + str(f)))) feat.insert(-1, f) g.get_node(t).attr['label'] = str(coeff) + '*X' + str( feat) + str('<=') + str('%.3f' % (mm.get_value('b_' + str(t)))) for leaf in Tl: if mm.get_value( 'l_' + str(leaf)) == 0: # these leaves haven't got points g.get_node(leaf).attr['color'] = 'red' for leaf in Tl: s = [] for k in range(len(classes)): s.append( round(mm.get_value('Nkt_' + str(k) + '_' + str(leaf)))) for k in range(len(classes)): if mm.get_value('c_' + str(k) + '_' + str(leaf)) == 1: g.get_node(leaf).attr['label'] = str( s) + '\\n' + 'class %d' % (classes[k]) g.layout(prog='dot') g.draw('/Users/giuliaciarimboli/Desktop/warm_start_LDA.pdf') print('la soluzione warm start:', mm) print(mm.check_as_mip_start()) modello.add_mip_start(mm) modello.set_time_limit(900) modello.parameters.emphasis.mip = 4 s = modello.solve(log_output=True) modello.print_solution() train_error = 0 for leaf in Tl: train_error += s.get_value('L_' + str(leaf)) train_error = train_error / len(y) print('train_error:', train_error) a_test = [] * len(self.features) b_test = [] c_test = [] for t in Tb: a_list = [] b_test.insert(t, s.get_value('b_%d' % t)) for f in self.features: a_list.insert(f, s.get_value('a%d_%d' % (t, f))) a_test.append(a_list) for leaf in Tl: c_list = [] for k in range(len(classes)): c_list.insert(leaf, s.get_value('c_%d_%d' % (k, leaf))) c_test.append(c_list) # GRAPH g = pgv.AGraph(directed=True) # initialize the graph nodes = np.append(Tb, Tl) for n in nodes: # the graph has a node for eache node of the tree g.add_node(n, shape='circle', size=8) if n != 0: father = ceil(n / 2) - 1 g.add_edge(father, n) for t in Tb: coeff = [] feat = [] # if mdl.solution.get_value('d_' + str(t))==0: # g.get_node(t).attr['color']='red' for f in range(len(self.features)): if modello.solution.get_value('a' + str(t) + '_' + str(f)) != 0: coeff.insert( -1, '%.3f' % (modello.solution.get_value('a' + str(t) + '_' + str(f)))) feat.insert(-1, f) g.get_node(t).attr['label'] = str(coeff) + '*X' + str(feat) + str( '<=') + str('%.3f' % (modello.solution.get_value('b_' + str(t)))) for leaf in Tl: if modello.solution.get_value( 'l_' + str(leaf)) == 0: # these leaves haven't got points g.get_node(leaf).attr['color'] = 'red' for leaf in Tl: s = [] for k in range(len(classes)): s.append( round( modello.solution.get_value('Nkt_' + str(k) + '_' + str(leaf)))) for k in range(len(classes)): if modello.solution.get_value('c_' + str(k) + '_' + str(leaf)) == 1: g.get_node(leaf).attr['label'] = str( s) + '\\n' + 'class %d' % (classes[k]) g.layout(prog='dot') g.draw('/Users/giuliaciarimboli/Desktop/solfinale_LDA.pdf') return a_test, b_test, c_test, train_error