Ejemplo n.º 1
0
    def warm_start_multi(self, dataframe, dataframe2, y, d, modello):

        ordine_l = [0, 1, 4, 3, 10, 9, 8, 7]
        ordine_r = [0, 2, 6, 5, 14, 13, 12, 11]

        mm = SolveSolution(modello)

        T = pow(2, (d + 1)) - 1  # nodes number
        floorTb = int(floor(T / 2))  # number of branch nodes
        Tb = np.arange(0, floorTb)  # range branch nodes
        Tl = np.arange(floorTb, T)  # range leaf nodes
        classes = np.unique(y.values)  # possible labels of classification

        lista_df = []
        lista_y = []
        y = pd.DataFrame(y)
        lista_df.insert(0, dataframe)
        lista_y.insert(0, y)
        for t in range(int((len(Tb) - 1) / 2) + 1):
            yy = lista_y[t]
            print(yy)
            df_split1 = []
            df_split2 = []
            y_1 = []
            y_2 = []
            ind = lista_y[t].index
            ind_df = lista_df[t].index
            mdl = self.fit_with_cart(lista_df[t], lista_df[t], lista_y[t])
            cl = yy[0].unique()

            cl.sort()
            print(cl)
            print(classes)
            for f in self.features:
                mm.add_var_value('a%d_%d' % (ordine_l[t], f), mdl.solution.get_value('a0_%d' % (f)))
            mm.add_var_value('b_%d' % (ordine_l[t]), mdl.solution.get_value('b_0'))
            mm.add_var_value('d_%d' % (ordine_l[t]), mdl.solution.get_value('d_0'))
            if 2 * ordine_l[t] + 1 in Tl:
                kl = classes
                leaf = 2 * ordine_l[t] + 1
                mm.add_var_value('Nt_%d' % leaf, mdl.solution.get_value('Nt_1'))
                mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1'))

                for k in range(len(cl)):
                    print(k, cl[k], list(classes).index(cl[k]))
                    mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('c_%d_1' % (k)))
                    mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('Nkt_%d_1' % (k)))
                for k1 in range(len(cl)):
                    list(kl).remove(cl[k1])
                # for k2 in range(len(kl)):
                #    mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2]), leaf), 0)
                for n in range(len(lista_df[t])):
                    mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_1' % (n)))

            if 2 * ordine_l[t] + 2 in Tl:
                kl = classes
                leaf = 2 * ordine_l[t] + 2
                mm.add_var_value('Nt_%d' % leaf, mdl.solution.get_value('Nt_2'))
                mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_2'))
                for k in range(len(cl)):
                    mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('c_%d_2' % (k)))
                    mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('Nkt_%d_2' % (k)))
                for k1 in range(len(cl)):
                    list(kl).remove(cl[k1])
                # for k2 in range(len(kl)):
                #    mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2]), leaf), 0)
                for n in range(len(lista_df[t])):
                    mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_2' % (n)))

            for i in range(len(lista_df[t])):
                j = ind[i]
                m = ind_df[i]
                if mdl.solution.get_value('z_%d_1' % (i)) == 1:
                    df_split1.insert(-1, lista_df[t].loc[m])
                    y_1.insert(-1, lista_y[t].loc[j])
                else:
                    df_split2.insert(-1, lista_df[t].loc[m])
                    y_2.insert(-1, lista_y[t].loc[j])
            df_1 = pd.DataFrame(df_split1)
            df_2 = pd.DataFrame(df_split2)
            y_1 = pd.DataFrame(y_1)
            y_2 = pd.DataFrame(y_2)
            lista_df.insert(1, df_1)
            lista_df.insert(2, df_2)
            lista_y.insert(1, y_1)
            lista_y.insert(2, y_2)

        lista_df_r = []
        lista_y_r = []
        lista_df_r.insert(0, lista_df[0])
        lista_df_r.insert(1, lista_df[-1])
        lista_y_r.insert(0, lista_y[0])
        lista_y_r.insert(1, lista_y[-1])

        for t in range(1, int((len(Tb) - 1) / 2) + 1):
            yy = lista_y_r[t]
            print(yy)
            df_split1 = []
            df_split2 = []
            y_1 = []
            y_2 = []
            ind = lista_y_r[t].index
            ind_df = lista_df_r[t].index
            mdl = self.fit_with_cart(lista_df_r[t], lista_df_r[t], lista_y_r[t])
            cl = yy[0].unique()
            cl.sort()

            for f in self.features:
                mm.add_var_value('a%d_%d' % (ordine_r[t], f), mdl.solution.get_value('a0_%d' % (f)))

            mm.add_var_value('b_%d' % (ordine_r[t]), mdl.solution.get_value('b_0'))
            mm.add_var_value('d_%d' % (ordine_r[t]), mdl.solution.get_value('d_0'))
            if 2 * ordine_r[t] + 1 in Tl:
                kl = classes
                leaf = 2 * ordine_r[t] + 1
                mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1'))
                mm.add_var_value('Nt_%d' % (leaf), mdl.solution.get_value('Nt_1'))

                for k in range(len(cl)):
                    mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('c_%d_1' % (k)))
                    mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('Nkt_%d_1' % (k)))
                for k1 in range(len(cl)):
                    list(kl).remove(cl[k1])
                # for k2 in range(len(kl)):
                #    mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2], leaf)), 0)
                mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_1'))
                for n in range(len(lista_df_r[t])):
                    mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_1' % (n)))
            if 2 * ordine_r[t] + 2 in Tl:
                kl = classes
                leaf = 2 * ordine_r[t] + 2
                mm.add_var_value('l_%d' % (leaf), mdl.solution.get_value('l_2'))
                mm.add_var_value('Nt_%d' % (leaf), mdl.solution.get_value('Nt_2'))

                for k in range(len(cl)):
                    mm.add_var_value('c_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('c_%d_2' % (k)))
                    mm.add_var_value('Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                                     mdl.solution.get_value('Nkt_%d_2' % (k)))
                for k1 in range(len(cl)):
                    list(kl).remove(cl[k1])
                # for k2 in range(len(kl)):
                #    mm.add_var_value('c_%d_%d'%(list(classes).index(kl[k2]), leaf), 0)
                for n in range(len(lista_df_r[t])):
                    mm.add_var_value('z_%d_%d' % (n, leaf), mdl.solution.get_value('z_%d_2' % (n)))

            for i in range(len(lista_df_r[t])):
                j = ind[i]
                m = ind_df[i]
                if mdl.solution.get_value('z_%d_1' % (i)) == 1:
                    df_split1.insert(-1, lista_df_r[t].loc[m])
                    y_1.insert(-1, lista_y_r[t].loc[j])
                else:
                    df_split2.insert(-1, lista_df_r[t].loc[m])
                    y_2.insert(-1, lista_y_r[t].loc[j])
            df_1 = pd.DataFrame(df_split1)
            df_2 = pd.DataFrame(df_split2)
            y_1 = pd.DataFrame(y_1)
            y_2 = pd.DataFrame(y_2)
            lista_df_r.insert(1, df_1)
            lista_df_r.insert(2, df_2)
            lista_y_r.insert(1, y_1)
            lista_y_r.insert(2, y_2)

            # GRAPH WARM START
            g = pgv.AGraph(directed=True)  # initialize the graph

            nodes = np.append(Tb, Tl)
            for n in nodes:  # the graph has a node for eache node of the tree
                g.add_node(n, shape='circle', size=8)

                if n != 0:
                    father = ceil(n / 2) - 1
                    g.add_edge(father, n)

            for t in Tb:
                coeff = []
                feat = []
                # if mdl.solution.get_value('d_' + str(t))==0:
                # g.get_node(t).attr['color']='red'
                for f in range(len(self.features)):
                    if mm.get_value('a' + str(t) + '_' + str(f)) != 0:
                        coeff.insert(-1, '%.3f' % (mm.get_value('a' + str(t) + '_' + str(f))))
                        feat.insert(-1, f)
                g.get_node(t).attr['label'] = str(coeff) + '*X' + str(feat) + str('<=') + str(
                    '%.3f' % (mm.get_value('b_' + str(t))))
            for leaf in Tl:
                if mm.get_value('l_' + str(leaf)) == 0:  # these leaves haven't got points
                    g.get_node(leaf).attr['color'] = 'red'
            for leaf in Tl:
                s = []
                for k in range(len(classes)):
                    s.append(round(mm.get_value('Nkt_' + str(k) + '_' + str(leaf))))
                for k in range(len(classes)):
                    if mm.get_value('c_' + str(k) + '_' + str(leaf)) == 1:
                        g.get_node(leaf).attr['label'] = str(s) + '\\n' + 'class %d' % (classes[k])
            g.layout(prog='dot')
            g.draw('/Users/giuliaciarimboli/Desktop/warm_start.pdf')

        print('la soluzione warm start:', mm)

        print(mm.check_as_mip_start())
        modello.add_mip_start(mm)
        modello.set_time_limit(3600)

        modello.solve(log_output=True)
        modello.print_solution()

        # GRAPH
        g = pgv.AGraph(directed=True)  # initialize the graph

        nodes = np.append(Tb, Tl)
        for n in nodes:  # the graph has a node for eache node of the tree
            g.add_node(n, shape='circle', size=8)

            if n != 0:
                father = ceil(n / 2) - 1
                g.add_edge(father, n)

        for t in Tb:
            coeff = []
            feat = []
            # if mdl.solution.get_value('d_' + str(t))==0:
            # g.get_node(t).attr['color']='red'
            for f in range(len(self.features)):
                if modello.solution.get_value('a' + str(t) + '_' + str(f)) != 0:
                    coeff.insert(-1, '%.3f' % (modello.solution.get_value('a' + str(t) + '_' + str(f))))
                    feat.insert(-1, f)
            g.get_node(t).attr['label'] = str(coeff) + '*X' + str(feat) + str('<=') + str(
                '%.3f' % (modello.solution.get_value('b_' + str(t))))
        for leaf in Tl:
            if modello.solution.get_value('l_' + str(leaf)) == 0:  # these leaves haven't got points
                g.get_node(leaf).attr['color'] = 'red'
        for leaf in Tl:
            s = []
            for k in range(len(classes)):
                s.append(round(modello.solution.get_value('Nkt_' + str(k) + '_' + str(leaf))))
            for k in range(len(classes)):
                if modello.solution.get_value('c_' + str(k) + '_' + str(leaf)) == 1:
                    g.get_node(leaf).attr['label'] = str(s) + '\\n' + 'class %d' % (classes[k])
        g.layout(prog='dot')
        g.draw('/Users/giuliaciarimboli/Desktop/sol finale.pdf')

        return modello
    def warm_start(self, dataframe, y, d, modello):

        ordine_l = [0, 1, 4, 3, 10, 9, 8, 6, 22, 21, 20, 19, 18, 17, 16, 15]
        ordine_r = [0, 2, 6, 5, 14, 13, 12, 11, 20, 29, 28, 27, 26, 25, 24, 23]

        mm = SolveSolution(modello)

        T = pow(2, (d + 1)) - 1  # nodes number
        floorTb = int(floor(T / 2))  # number of branch nodes
        Tb = np.arange(0, floorTb)  # range branch nodes
        Tl = np.arange(floorTb, T)  # range leaf nodes
        classes = np.unique(y.values)  # possible labels of classification
        lista_leaf = []
        lista_df = []
        lista_y = []
        y = pd.DataFrame(y)
        lista_df.insert(0, dataframe)
        lista_y.insert(0, y)
        for t in range(int((len(Tb) - 1) / 2) + 1):
            yy = lista_y[t]
            df_split1 = []
            df_split2 = []
            y_1 = []
            y_2 = []
            ind = lista_y[t].index
            ind_df = lista_df[t].index
            if len(lista_y[t]) > self.Nmin:
                '''for f in self.features:
                    mm.add_var_value('a%d_%d' % (ordine_l[t], f), 0)
                    mm.add_var_value('a_hat%d_%d' % (ordine_l[t], f), 0)
                mm.add_var_value('b_%d' % (ordine_l[t]), 0)
                mm.add_var_value('d_%d' % (ordine_l[t]), 0)
                if 2 * ordine_l[t] + 1 in Tl:
                    leaf = 2 * ordine_l[t] + 1
                    for l in range(leaf, leaf + 2):
                        mm.add_var_value('Nt_%d' % l, 0)
                        mm.add_var_value('l_%d' % l, 0)
                        for k in range(len(classes)):
                            mm.add_var_value('c_%d_%d' % (k, l),
                                             0)
                            mm.add_var_value('Nkt_%d_%d' % (k, l),
                                             0)

                        for n in range(0, len(dataframe)):
                            mm.add_var_value('z_%d_%d' % (n, l), 0)
                lista_df.insert(1, df_1)
                lista_df.insert(2, df_2)
                lista_y.insert(1, y_1)
                lista_y.insert(2, y_2)
            else:'''
                mdl = self.fit_with_cart(lista_df[t], lista_y[t])
                cl = yy[9].unique()
                cl.sort()
                for f in self.features:
                    mm.add_var_value('a%d_%d' % (ordine_l[t], f),
                                     mdl.solution.get_value('a0_%d' % f))
                    mm.add_var_value('a_hat%d_%d' % (ordine_l[t], f),
                                     mdl.solution.get_value('a_hat0_%d' % f))
                mm.add_var_value('b_%d' % (ordine_l[t]),
                                 mdl.solution.get_value('b_0'))
                mm.add_var_value('d_%d' % (ordine_l[t]),
                                 mdl.solution.get_value('d_0'))
                if 2 * ordine_l[t] + 1 in Tl:
                    leaf = 2 * ordine_l[t] + 1
                    mm.add_var_value('Nt_%d' % leaf,
                                     mdl.solution.get_value('Nt_1'))
                    mm.add_var_value('l_%d' % (leaf),
                                     mdl.solution.get_value('l_1'))

                    for k in range(len(cl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('c_%d_1' % (k)))
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('Nkt_%d_1' % (k)))
                    kl = list(set(classes) - set(cl))
                    for k2 in range(len(kl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0)
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf),
                            0)
                    for n in range(len(lista_df[t])):
                        mm.add_var_value('z_%d_%d' % (ind_df[n], leaf),
                                         mdl.solution.get_value('z_%d_1' % n))
                    ind_miss = list(
                        set(ind_df) -
                        set(list(np.array(np.arange(0, len(dataframe))))))
                    for n in ind_miss:
                        mm.add_var_value('z_%d_%d' % (n, leaf), 0)

                if 2 * ordine_l[t] + 2 in Tl:
                    leaf = 2 * ordine_l[t] + 2
                    mm.add_var_value('Nt_%d' % leaf,
                                     mdl.solution.get_value('Nt_2'))
                    mm.add_var_value('l_%d' % (leaf),
                                     mdl.solution.get_value('l_2'))
                    for k in range(len(cl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('c_%d_2' % k))
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('Nkt_%d_2' % k))
                    kl = list(set(classes) - set(cl))
                    for k2 in range(len(kl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0)
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf),
                            0)
                    for n in range(len(lista_df[t])):
                        mm.add_var_value(
                            'z_%d_%d' % (ind_df[n], leaf),
                            mdl.solution.get_value('z_%d_2' % (n)))
                    ind_miss = list(
                        set(list(np.array(np.arange(0, len(dataframe))))) -
                        set(ind_df))
                    for n in ind_miss:
                        mm.add_var_value('z_%d_%d' % (n, leaf), 0)
                for i in range(len(lista_df[t])):
                    j = ind[i]
                    m = ind_df[i]
                    if mdl.solution.get_value('z_%d_1' % (i)) == 1:
                        df_split1.insert(-1, lista_df[t].loc[m])
                        y_1.insert(-1, lista_y[t].loc[j])
                    else:
                        df_split2.insert(-1, lista_df[t].loc[m])
                        y_2.insert(-1, lista_y[t].loc[j])
                df_1 = pd.DataFrame(df_split1)
                df_2 = pd.DataFrame(df_split2)
                y_1 = pd.DataFrame(y_1)
                y_2 = pd.DataFrame(y_2)
                lista_df.insert(1, df_1)
                lista_df.insert(2, df_2)
                lista_y.insert(1, y_1)
                lista_y.insert(2, y_2)

        lista_df_r = []
        lista_y_r = []
        lista_df_r.insert(0, lista_df[0])
        lista_df_r.insert(1, lista_df[-1])
        lista_y_r.insert(0, lista_y[0])
        lista_y_r.insert(1, lista_y[-1])

        for t in range(1, int((len(Tb) - 1) / 2) + 1):
            yy = lista_y_r[t]

            df_split1 = []
            df_split2 = []
            y_1 = []
            y_2 = []
            ind = lista_y_r[t].index
            ind_df = lista_df_r[t].index
            if len(lista_y_r[t]) > self.Nmin:
                '''for f in self.features:
                    mm.add_var_value('a%d_%d' % (ordine_l[t], f), 0)
                    mm.add_var_value('a_hat%d_%d' % (ordine_l[t], f), 0)
                mm.add_var_value('b_%d' % (ordine_l[t]), 0)
                mm.add_var_value('d_%d' % (ordine_l[t]), 0)
                if 2 * ordine_l[t] + 1 in Tl:
                    leaf = 2 * ordine_l[t] + 1
                    for l in range(leaf, leaf + 2):
                        print(l)
                        mm.add_var_value('Nt_%d' % l, 0)
                        mm.add_var_value('l_%d' % l, 0)
                        for k in range(len(classes)):
                            mm.add_var_value('c_%d_%d' % (k, l),
                                             0)
                            mm.add_var_value('Nkt_%d_%d' % (k, l),
                                             0)
                        for n in range(0, len(dataframe)):
                            mm.add_var_value('z_%d_%d' % (n, l), 0)
                lista_df_r.insert(1, df_1)
                lista_df_r.insert(2,df_2)
                lista_y_r.insert(1, y_1)
                lista_y_r.insert(2, y_2)
            else:'''
                mdl = self.fit_with_cart(lista_df_r[t], lista_y_r[t])
                cl = yy[9].unique()
                cl.sort()

                for f in self.features:
                    mm.add_var_value('a%d_%d' % (ordine_r[t], f),
                                     mdl.solution.get_value('a0_%d' % (f)))
                    mm.add_var_value('a_hat%d_%d' % (ordine_r[t], f),
                                     mdl.solution.get_value('a_hat0_%d' % (f)))
                mm.add_var_value('b_%d' % (ordine_r[t]),
                                 mdl.solution.get_value('b_0'))
                mm.add_var_value('d_%d' % (ordine_r[t]),
                                 mdl.solution.get_value('d_0'))
                if 2 * ordine_r[t] + 1 in Tl:
                    leaf = 2 * ordine_r[t] + 1
                    mm.add_var_value('l_%d' % (leaf),
                                     mdl.solution.get_value('l_1'))
                    mm.add_var_value('Nt_%d' % (leaf),
                                     mdl.solution.get_value('Nt_1'))

                    for k in range(len(cl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('c_%d_1' % (k)))
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('Nkt_%d_1' % (k)))
                    kl = list(set(classes) - set(cl))
                    for k2 in range(len(kl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0)
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf),
                            0)
                    mm.add_var_value('l_%d' % (leaf),
                                     mdl.solution.get_value('l_1'))
                    for n in range(len(lista_df_r[t])):
                        mm.add_var_value('z_%d_%d' % (ind_df[n], leaf),
                                         mdl.solution.get_value('z_%d_1' % n))
                    ind_miss = list(
                        set(ind_df) -
                        set(list(np.array(np.arange(0, len(dataframe))))))
                    for n in ind_miss:
                        mm.add_var_value('z_%d_%d' % (n, leaf), 0)
                if 2 * ordine_r[t] + 2 in Tl:
                    leaf = 2 * ordine_r[t] + 2
                    mm.add_var_value('l_%d' % (leaf),
                                     mdl.solution.get_value('l_2'))
                    mm.add_var_value('Nt_%d' % (leaf),
                                     mdl.solution.get_value('Nt_2'))
                    for k in range(len(cl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('c_%d_2' % k))
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(cl[k]), leaf),
                            mdl.solution.get_value('Nkt_%d_2' % k))
                    kl = list(set(classes) - set(cl))
                    for k2 in range(len(kl)):
                        mm.add_var_value(
                            'c_%d_%d' % (list(classes).index(kl[k2]), leaf), 0)
                        mm.add_var_value(
                            'Nkt_%d_%d' % (list(classes).index(kl[k2]), leaf),
                            0)
                    for n in range(len(lista_df_r[t])):
                        mm.add_var_value(
                            'z_%d_%d' % (ind_df[n], leaf),
                            mdl.solution.get_value('z_%d_2' % (n)))
                    ind_miss = list(
                        set(ind_df) -
                        set(list(np.array(np.arange(0, len(dataframe))))))
                    for n in ind_miss:
                        mm.add_var_value('z_%d_%d' % (n, leaf), 0)

                for i in range(len(lista_df_r[t])):
                    j = ind[i]
                    m = ind_df[i]
                    if mdl.solution.get_value('z_%d_1' % i) == 1:
                        df_split1.insert(-1, lista_df_r[t].loc[m])
                        y_1.insert(-1, lista_y_r[t].loc[j])
                    else:
                        df_split2.insert(-1, lista_df_r[t].loc[m])
                        y_2.insert(-1, lista_y_r[t].loc[j])
                df_1 = pd.DataFrame(df_split1)
                df_2 = pd.DataFrame(df_split2)
                y_1 = pd.DataFrame(y_1)
                y_2 = pd.DataFrame(y_2)
                lista_df_r.insert(1, df_1)
                lista_df_r.insert(2, df_2)
                lista_y_r.insert(1, y_1)
                lista_y_r.insert(2, y_2)

            # GRAPH WARM START
            g = pgv.AGraph(directed=True)  # initialize the graph

            nodes = np.append(Tb, Tl)
            for n in nodes:  # the graph has a node for eache node of the tree
                g.add_node(n, shape='circle', size=8)

                if n != 0:
                    father = ceil(n / 2) - 1
                    g.add_edge(father, n)

            for t in Tb:
                coeff = []
                feat = []
                # if mdl.solution.get_value('d_' + str(t))==0:
                # g.get_node(t).attr['color']='red'
                for f in range(len(self.features)):
                    if mm.get_value('a' + str(t) + '_' + str(f)) != 0:
                        coeff.insert(
                            -1, '%.3f' %
                            (mm.get_value('a' + str(t) + '_' + str(f))))
                        feat.insert(-1, f)
                g.get_node(t).attr['label'] = str(coeff) + '*X' + str(
                    feat) + str('<=') + str('%.3f' %
                                            (mm.get_value('b_' + str(t))))
            for leaf in Tl:
                if mm.get_value(
                        'l_' +
                        str(leaf)) == 0:  # these leaves haven't got points
                    g.get_node(leaf).attr['color'] = 'red'
            for leaf in Tl:
                s = []
                for k in range(len(classes)):
                    s.append(
                        round(mm.get_value('Nkt_' + str(k) + '_' + str(leaf))))
                for k in range(len(classes)):
                    if mm.get_value('c_' + str(k) + '_' + str(leaf)) == 1:
                        g.get_node(leaf).attr['label'] = str(
                            s) + '\\n' + 'class %d' % (classes[k])
            g.layout(prog='dot')
            g.draw('/Users/giuliaciarimboli/Desktop/warm_start_LDA.pdf')

        print('la soluzione warm start:', mm)

        print(mm.check_as_mip_start())
        modello.add_mip_start(mm)

        modello.set_time_limit(900)
        modello.parameters.emphasis.mip = 4

        s = modello.solve(log_output=True)
        modello.print_solution()

        train_error = 0
        for leaf in Tl:
            train_error += s.get_value('L_' + str(leaf))
        train_error = train_error / len(y)
        print('train_error:', train_error)

        a_test = [] * len(self.features)
        b_test = []
        c_test = []
        for t in Tb:
            a_list = []
            b_test.insert(t, s.get_value('b_%d' % t))
            for f in self.features:
                a_list.insert(f, s.get_value('a%d_%d' % (t, f)))
            a_test.append(a_list)
        for leaf in Tl:
            c_list = []
            for k in range(len(classes)):
                c_list.insert(leaf, s.get_value('c_%d_%d' % (k, leaf)))
            c_test.append(c_list)
        # GRAPH
        g = pgv.AGraph(directed=True)  # initialize the graph

        nodes = np.append(Tb, Tl)
        for n in nodes:  # the graph has a node for eache node of the tree
            g.add_node(n, shape='circle', size=8)

            if n != 0:
                father = ceil(n / 2) - 1
                g.add_edge(father, n)

        for t in Tb:
            coeff = []
            feat = []
            # if mdl.solution.get_value('d_' + str(t))==0:
            # g.get_node(t).attr['color']='red'
            for f in range(len(self.features)):
                if modello.solution.get_value('a' + str(t) + '_' +
                                              str(f)) != 0:
                    coeff.insert(
                        -1,
                        '%.3f' % (modello.solution.get_value('a' + str(t) +
                                                             '_' + str(f))))
                    feat.insert(-1, f)
            g.get_node(t).attr['label'] = str(coeff) + '*X' + str(feat) + str(
                '<=') + str('%.3f' %
                            (modello.solution.get_value('b_' + str(t))))
        for leaf in Tl:
            if modello.solution.get_value(
                    'l_' + str(leaf)) == 0:  # these leaves haven't got points
                g.get_node(leaf).attr['color'] = 'red'
        for leaf in Tl:
            s = []
            for k in range(len(classes)):
                s.append(
                    round(
                        modello.solution.get_value('Nkt_' + str(k) + '_' +
                                                   str(leaf))))
            for k in range(len(classes)):
                if modello.solution.get_value('c_' + str(k) + '_' +
                                              str(leaf)) == 1:
                    g.get_node(leaf).attr['label'] = str(
                        s) + '\\n' + 'class %d' % (classes[k])
        g.layout(prog='dot')
        g.draw('/Users/giuliaciarimboli/Desktop/solfinale_LDA.pdf')

        return a_test, b_test, c_test, train_error
Ejemplo n.º 3
0
    def fit_with_oct_mip_start(self, dataframe, dataframe2, y, warm_start):

        sol = self.model(dataframe, dataframe2, y)
        s = SolveSolution(sol)

        i = 0
        for t in self.Tb:
            s.add_var_value('b_%d' % (t), warm_start[1][t])
            s.add_var_value('d_%d' % (t), warm_start[2][t])
            for f in self.features:
                s.add_var_value('a%d_%d' % (t, f), warm_start[0][t][f])
        for leaf in self.Tl:
            s.add_var_value(('l_%d' % (leaf)), warm_start[3][i])
            i += 1
        l = 0  # indice
        for leaf in self.Tl:
            for k in range(len(self.classes)):
                s.add_var_value('c_%d_%d' % (k, leaf), warm_start[4][l][k])
            l += 1
        for point in range(len(dataframe)):
            ex_leaf = warm_start[5][point]
            son_right = 2 * ex_leaf + 2
            s.add_var_value('z_%d_%d' % (point, son_right), 1)
        i = 0
        j = 0
        for leaf in self.Tl:
            s.add_var_value('Nt_%d' % (leaf), warm_start[6][i])
            i += 1
            for k in range(len(self.classes)):
                print(j, k)
                s.add_var_value('Nkt_%d_%d' % (k, leaf), warm_start[7][j][k])

            j += 1
        print(s)
        print(s.check_as_mip_start())
        sol.add_mip_start(s)
        sol.set_time_limit(30)
        # mdl.parameters.mip.tolerances.mipgap(0.1)
        #sol.parameters.emphasis.mip = 4
        print('finding solution with OCT as MIP START:')
        s = sol.solve(log_output=True)

        # sol.print_solution()
        train_error = 0
        for leaf in self.Tl:
            train_error += s.get_value('L_' + str(leaf))
        train_error = train_error / self.M
        print('train_error:', train_error)
        # GRAPH
        self.draw_graph(s)

        for t in self.Tb:
            self.B.append(sol.solution.get_value('b_' + str(t)))

        for leaf in self.Tl:
            self.l_test.append(sol.solution.get_value('l_' + str(leaf)))

        for k in range(len(self.classes)):
            for leaf in self.Tl:
                self.C.update({(k, leaf): sol.solution.get_value('c_' + str(k) + '_' + str(leaf))})

        for t in self.Tb:
            A_list = []
            for f in self.features:
                A_list.append(sol.solution.get_value('a' + str(t) + '_' + str(f)))
            self.A.append(A_list)

        return sol
Ejemplo n.º 4
0
    k = np.argmax(sk_val[jj][0])
    num = np.sum(sk_val[jj][0])
    ii = list(idx_sk).index(jj)
    if ii in Tl:
        m.add_var_value('c_%d_%d'%(k,ii), 1)
        m.add_var_value('Nt_%d'%(ii), num)
        for kl in range(len(classes)):
            m.add_var_value('Nkt_%d_%d'%(kl,ii), sk_val[jj][0][kl])
for data in range(Num_points):
    foglia = list(idx_sk).index(sk_z[data])
    m.add_var_value('z_%d_%d'%(data,foglia), 1)


for i in Tb:
    for f in features:
        if m.get_value('a%d_%d'%(i,f))==1:
            print('la feature del nodo %d è la %d:'%(i,f))
for i in Tb:
    print('il valore di b nel nodo %d è pari a:'%(i),m.get_value('b_%d'%(i)))
for leaf in Tl:
    for kl in range(len(classes)):
        print(kl, leaf, m.get_value('Nkt_%d_%d'%(kl,leaf)))

for leaf in Tl:
    for kl in range(len(classes)):
        if m.get_value(('c_%d_%d'%(kl,leaf)))==1:
             print('la classe nella foglia %d è la %d'%(leaf,kl))
for da in range(Num_points):
    for leaf in Tl:

        if m.get_value('z_%d_%d'%(da,leaf))==1: