Ejemplo n.º 1
0
 def vnd(self, n_gen, kmax, n_neighbors, solution, best_res, best_solution,  best_accuracy, best_precision,
         best_recall, best_fscore, best_cols, best_model, data, mode, dummiesList, createDummies, normalize, metric):
     iteration = 0
     while iteration < n_gen:
         k = 0
         neighbor_space = self.generate_neighbors(solution, n_neighbors)
         while k < kmax:
             for n in neighbor_space[k]:
                 accuracy_n, recall_n, precision_n, fscore_n, cols_n, model_n, obj = \
                     utility.fitness2(self=self, mode=mode, solution=n, data=data, dummiesList=dummiesList,
                                      createDummies=createDummies, normalize=normalize)
                 self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
                     obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find
                 if metric == 'accuracy' or 'exactitude':
                     res_nei = accuracy_n
                 elif metric == 'recall' or 'rappel':
                     res_nei = recall_n
                 elif metric == 'precision' or 'précision':
                     res_nei = precision_n
                 elif metric == 'fscore':
                     res_nei = fscore_n
                 else:
                     res_nei = accuracy_n
                 if res_nei > best_res:
                     best_solution = n
                     best_res = res_nei
                     best_accuracy = accuracy_n
                     best_precision = precision_n
                     best_recall = recall_n
                     best_fscore = fscore_n
                     best_cols = cols_n
                     best_model = model_n
             k = k + 1
         iteration = iteration + 1
     return best_res, best_solution, best_accuracy, best_precision, best_recall, best_fscore, best_cols, best_model
Ejemplo n.º 2
0
    def k_opt(self, best_res, best_solution, best_accuracy, best_precision, best_recall, best_fscore, best_cols,
              best_model, data, mods, dummiesList, createDummies, normalize, metric):
        # print("K-opt :")
        # amelioration = True
        # while amelioration:
        #     perturbation = best_solution.copy()
        #     amelioration = False
        #     for i in range(len(perturbation)):
        #         for j in range(len(perturbation)):
        #             if j != (i-1) and j != (i+1):
        #                 pertubation_prime = perturbation.copy()
        #                 pertubation_prime[i] = not pertubation_prime[i]
        #                 pertubation_prime[j] = not pertubation_prime[j]
        #                 res_ij, accuracy_ij, precision_ij, recall_ij, fscore_ij, cols_ij, model_ij =\
        #                     self.fitness(mods, pertubation_prime, data, dummiesList, createDummies, normalize, metric)
        #                 if res_ij > best_res:
        #                     best_solution = pertubation_prime
        #                     best_res = res_ij
        #                     best_accuracy = accuracy_ij
        #                     best_precision = precision_ij
        #                     best_recall = recall_ij
        #                     best_fscore = fscore_ij
        #                     best_cols = cols_ij
        #                     best_model = model_ij
        #                     amelioration = True
        # print("value: ", best_res, "amelioration?: ", amelioration)
        mutate_index = random.sample(range(0, len(best_solution)), 1)
        perturbation = best_solution.copy()
        for m in mutate_index:
            perturbation[m] = not perturbation[m]

        accuracy_ij, precision_ij, recall_ij, fscore_ij, cols_ij, model_ij, obj =\
            utility.fitness2(self=self, mode=mods, solution=perturbation, data=data, dummiesList=dummiesList,
                             createDummies=createDummies, normalize=normalize)

        self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
            obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

        if metric == 'accuracy' or 'exactitude':
            res_ij = accuracy_ij
        elif metric == 'recall' or 'rappel':
            res_ij = recall_ij
        elif metric == 'precision' or 'précision':
            res_ij = precision_ij
        elif metric == 'fscore':
            res_ij = fscore_ij
        else:
            res_ij = accuracy_ij
        if res_ij > best_res:
            best_solution = perturbation
            best_res = res_ij
            best_accuracy = accuracy_ij
            best_precision = precision_ij
            best_recall = recall_ij
            best_fscore = fscore_ij
            best_cols = cols_ij
            best_model = model_ij

        return best_res, best_solution, best_accuracy, best_precision, best_recall, best_fscore, best_cols, best_model
Ejemplo n.º 3
0
    def optimization(self, part, n_gen, n_gen_vnd, kmax, n_neighbors, data,
                     dummiesList, createDummies, normalize, metric, x, y, besties, names, iters):

        debut = time.time()

        for mode in part:

            folderName = mode.upper()

            utility.createDirectory(path=self.path2, folderName=folderName)

            cols = self.data.drop([self.target], axis=1).columns

            iteration = 0

            u, c = np.unique(data[self.target], return_counts=True)
            unique = list(u)

            # Initialisation du tableau
            self.tab_data, self.tab_vals = tab.init(size=len(cols), matrix_size=len(unique),
                                                    filename='tab_' + self.data_name + '_' + mode)

            x1 = []
            y1 = []
            y2 = []

            x2 = []
            yX = []

            initial_solution = np.random.choice(a=[False, True], size=self.copy.columns.size - 1)
            solution = initial_solution

            accuracy, recall, precision, fscore, cols, model, obj = \
                utility.fitness2(self, mode, solution, data, dummiesList, createDummies, normalize)

            self.tab_data, self.tab_vals, self.tab_insert, self.tab_find =\
                obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

            if metric == 'accuracy' or 'exactitude':
                res_sol = accuracy
            elif metric == 'recall' or 'rappel':
                res_sol = recall
            elif metric == 'precision' or 'précision':
                res_sol = precision
            elif metric == 'fscore':
                res_sol = fscore
            else:
                res_sol = accuracy

            best_solution = solution
            best_res = res_sol
            best_accuracy = accuracy
            best_precision = precision
            best_recall = recall
            best_fscore = fscore
            best_cols = cols
            best_model = model

            while iteration < n_gen:
                instant = time.time()
                res_prime, res_sol_prime, accuracy_prime, precision_prime, recall_prime, fscore_prime, cols_prime,\
                model_prime = self.k_opt(
                    best_res, best_solution, best_accuracy, best_precision, best_recall, best_fscore, best_cols,
                    best_model, data, mode, dummiesList, createDummies, normalize, metric
                )

                if res_prime > best_res:
                    best_solution = res_sol_prime
                    best_res = res_prime
                    best_accuracy = accuracy_prime
                    best_precision = precision_prime
                    best_recall = recall_prime
                    best_fscore = fscore_prime
                    best_cols = cols_prime
                    best_model = model_prime

                res_nei, res_sol, accuracy_n, precision_n, recall_n, fscore_n, cols_n, model_n = self.vnd(
                    n_gen_vnd, kmax, n_neighbors, res_sol_prime, best_res, best_solution, best_accuracy, best_precision,
                    best_recall, best_fscore, best_cols, best_model, data, mode, dummiesList, createDummies, normalize,
                    metric)

                if res_nei > best_res:
                    best_solution = res_sol
                    best_res = res_nei
                    best_accuracy = accuracy_n
                    best_precision = precision_n
                    best_recall = recall_n
                    best_fscore = fscore_n
                    best_cols = cols_n
                    best_model = model_n

                print("mode: ", mode, " valeur: ", best_res, " iteration: ", iteration,
                      " temps exe: ", str(timedelta(seconds=(time.time() - instant))),
                      " temps total: ", str(timedelta(seconds=(time.time() - debut))))

                x1.append(iteration)
                y1.append(best_res)

                tmp = []
                tmp2 = []
                for i in range(len(best_model)):
                    tmp.append(utility.getTruePositive(best_model, i) /
                               (utility.getFalseNegative(best_model, i) +
                                utility.getTruePositive(best_model, i)))
                    tmp2.append(0)
                x2.append(tmp2[:])
                yX.append(tmp[:])

                fig, ax = plt.subplots()
                ax.plot(x1, y1)
                ax.set_title("Evolution du score par génération (" + folderName + ")"
                             + "\nRecherche locale itérée")
                ax.set_xlabel("génération")
                ax.set_ylabel(metric)
                ax.grid()
                ax.legend(labels=["Le meilleur"],
                          loc='center left', bbox_to_anchor=(1.04, 0.5), borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName), 'plot_' + str(n_gen) + '.png')
                b = os.path.join(os.getcwd(), a)
                #if iteration == n_gen - 1:
                fig.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig)

                fig2, ax2 = plt.subplots()

                ax2.plot(x1, yX)

                ax2.set_title("Evolution du score par génération pour chacune des classes (" + folderName + ")"
                              + "\nRecherrche locale itérée")
                ax2.set_xlabel("génération")
                ax2.set_ylabel(metric)
                ax2.grid()
                ax2.legend(labels=unique, loc='center left', bbox_to_anchor=(1.04, 0.5), borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName), 'plotb_' + str(n_gen) + '.png')
                b = os.path.join(os.getcwd(), a)
                #if iteration == n_gen - 1:
                fig2.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig2)

                iteration = iteration + 1

                self.write_res(folderName=folderName, mode=mode, n_gen=n_gen, n_gen_vnd=n_gen_vnd,
                               kmax=kmax, n_neighbors=n_neighbors, y1=y1, y2=y2, yX=yX, colMax=best_cols,
                               bestScore=best_res, bestScoreA=best_accuracy, bestScoreP=best_precision,
                               bestScoreR=best_recall, bestScoreF=best_fscore, bestModel=best_model, debut=debut)

                if (iteration % 5) == 0:
                    print("Sauvegarde du tableau actuel dans les fichiers, itération:", iteration)
                    tab.dump(self.tab_data, self.tab_vals, 'tab_' + self.data_name + '_' + mode)

            arg1, arg2 = utility.getList(bestModel=best_model, bestScore=best_res, bestScoreA=best_accuracy,
                                         bestScoreP=best_precision, bestScoreR=best_recall, bestScoreF=best_fscore,
                                         bestCols=best_cols, indMax=best_solution, unique=unique, mode=mode)

            x.put(list(arg1))
            y.put(list(arg2))
            besties.put(y1)
            names.put(folderName + ": " + "{:.3f}".format(best_res))
            iters.put(iteration)
Ejemplo n.º 4
0
    def optimization(self, part, temperature, alpha, final_temperature, data,
                     dummiesList, createDummies, normalize, metric, x, y,
                     besties, names, iters):

        debut = time.time()

        for mode in part:

            folderName = mode.upper()

            utility.createDirectory(path=self.path2, folderName=folderName)

            iteration = 0

            cols = self.data.drop([self.target], axis=1).columns

            u, c = np.unique(data[self.target], return_counts=True)
            unique = list(u)

            # Initialisation du tableau
            self.tab_data, self.tab_vals = tab.init(
                size=len(cols),
                matrix_size=len(unique),
                filename='tab_' + self.data_name + '_' + mode)

            x1 = []
            y1 = []
            y2 = []

            x2 = []
            yX = []

            solution = np.random.choice(a=[False, True],
                                        size=self.copy.columns.size - 1)

            best_solution = None
            best_res = 0
            best_accuracy = None
            best_precision = None
            best_recall = None
            best_fscore = None
            best_cols = None
            best_model = None

            begin_temperature = temperature

            while temperature > final_temperature:
                instant = time.time()

                mutate_index = random.sample(range(0, len(solution)), 1)
                neighbor = solution.copy()
                for m in mutate_index:
                    neighbor[m] = not neighbor[m]

                accuracy, recall, precision, fscore, cols, model, obj = \
                    utility.fitness2(self, mode, solution, data, dummiesList, createDummies, normalize)

                self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
                    obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

                accuracy_n, recall_n, precision_n, fscore_n, cols_n, model_n, obj = \
                    utility.fitness2(self, mode, neighbor, data, dummiesList, createDummies, normalize)

                self.tab_data, self.tab_vals, self.tab_insert, self.tab_find = \
                    obj.tab_data, obj.tab_vals, obj.tab_insert, obj.tab_find

                if metric == 'accuracy' or 'exactitude':
                    res_sol = accuracy
                    res_nei = accuracy_n
                elif metric == 'recall' or 'rappel':
                    res_sol = recall
                    res_nei = recall_n
                elif metric == 'precision' or 'précision':
                    res_sol = precision
                    res_nei = precision_n
                elif metric == 'fscore':
                    res_sol = fscore
                    res_nei = fscore_n
                else:
                    res_sol = accuracy
                    res_nei = accuracy_n

                if res_sol > best_res:
                    best_solution = solution
                    best_res = res_sol
                    best_accuracy = accuracy
                    best_precision = precision
                    best_recall = recall
                    best_fscore = fscore
                    best_cols = cols
                    best_model = model

                cost = res_nei - res_sol
                if cost >= 0:
                    solution = neighbor
                    res_sol = res_nei
                else:
                    r = random.uniform(0, 1)
                    if r < math.exp(-cost / temperature):
                        solution = neighbor
                        res_sol = res_nei

                print("mode: ", mode, " valeur: ", best_res, " iteration: ",
                      iteration, " temps exe: ",
                      str(timedelta(seconds=(time.time() - instant))),
                      " temps total: ",
                      str(timedelta(seconds=(time.time() - debut))))

                x1.append(iteration)
                y1.append(best_res)
                y2.append(res_sol)

                tmp = []
                tmp2 = []
                for i in range(len(best_model)):
                    tmp.append(
                        utility.getTruePositive(best_model, i) /
                        (utility.getFalseNegative(best_model, i) +
                         utility.getTruePositive(best_model, i)))
                    tmp2.append(0)
                x2.append(tmp2[:])
                yX.append(tmp[:])

                fig, ax = plt.subplots()
                ax.plot(x1, y1)
                ax.plot(x1, y2)
                ax.set_title("Evolution du score par génération (" +
                             folderName + ")" + "\nRecuit simulé")
                ax.set_xlabel("génération")
                ax.set_ylabel(metric)
                ax.grid()
                ax.legend(labels=["Le meilleur", "Valeur actuelle"],
                          loc='center left',
                          bbox_to_anchor=(1.04, 0.5),
                          borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plot_' + str(begin_temperature) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if iteration == begin_temperature - 1:
                fig.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig)

                fig2, ax2 = plt.subplots()

                ax2.plot(x1, yX)

                ax2.set_title(
                    "Evolution du score par génération pour chacune des classes ("
                    + folderName + ")" + "\nRecuit simulé")
                ax2.set_xlabel("génération")
                ax2.set_ylabel(metric)
                ax2.grid()
                ax2.legend(labels=unique,
                           loc='center left',
                           bbox_to_anchor=(1.04, 0.5),
                           borderaxespad=0)
                a = os.path.join(os.path.join(self.path2, folderName),
                                 'plotb_' + str(begin_temperature) + '.png')
                b = os.path.join(os.getcwd(), a)
                # if iteration == begin_temperature - 1:
                fig2.savefig(os.path.abspath(b), bbox_inches="tight")
                plt.close(fig2)

                temperature = temperature - alpha
                iteration = iteration + 1

            self.write_res(folderName=folderName,
                           mode=mode,
                           temperature=begin_temperature,
                           alpha=alpha,
                           final_temperature=final_temperature,
                           y1=y1,
                           y2=y2,
                           yX=yX,
                           colMax=best_cols,
                           bestScore=best_res,
                           bestScoreA=best_accuracy,
                           bestScoreP=best_precision,
                           bestScoreR=best_recall,
                           bestScoreF=best_fscore,
                           bestModel=best_model,
                           debut=debut)

            if (iteration % 5) == 0:
                print(
                    "Sauvegarde du tableau actuel dans les fichiers, itération:",
                    iteration)
                tab.dump(self.tab_data, self.tab_vals,
                         'tab_' + self.data_name + '_' + mode)

            arg1, arg2 = utility.getList(bestModel=best_model,
                                         bestScore=best_res,
                                         bestScoreA=best_accuracy,
                                         bestScoreP=best_precision,
                                         bestScoreR=best_recall,
                                         bestScoreF=best_fscore,
                                         bestCols=best_cols,
                                         indMax=best_solution,
                                         unique=unique,
                                         mode=mode)

            x.put(list(arg1))
            y.put(list(arg2))
            besties.put(y2)
            names.put(folderName + ": " + "{:.3f}".format(best_res))
            iters.put(iteration)

            tab.dump(self.tab_data, self.tab_vals,
                     'tab_' + self.data_name + '_' + mode)