예제 #1
0
       def model_baseline(self, prob):

           n_cols = prob.data.shape[1]
           particle = np.zeros(shape=(1, n_cols + 1))
           particle[:] = 1
           evaluator_ = SolutionEvaluator(prob, 1)
           score = evaluator_.evaluate(particle)
           return score[0]
예제 #2
0
       def fit(self, X, unused_y, **kargs):
           
           if not isinstance(X, pd.DataFrame):
               raise PSOException('The "X" parameter must be a data frame')
               
           self._initialize(X)
           prob = Problem(X, unused_y, self.estimator,
                          self.cv, **kargs)
           

           self.N_ = prob.n_cols
           self.evaluator_ = SolutionEvaluator(prob, self.num_particles)

           score_all = self.model_baseline(prob)
           rootLogger.info((
                       f'Score with all features - {score_all[-1]}'))

           self.velocity_ = np.zeros(shape=(self.num_particles, self.N_))
           self.best_global_, self.best_global_[:] = \
             np.zeros(shape=(1, self.N_ + 1)), 'nan'
            
           self.best_individual_, self.best_individual_[:] = \
             np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan'
           self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1))
           
           
           while not self._is_stop_criteria_accepted():
               self.init_search_()
               count_sel_feat = self.count_features(self.best_global_[0])
               
               best_glob = self.best_global_[0]
               self.selected_features_  = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
               self.selected_features_, = np.where(self.selected_features_.mask == True)
               colunas = list(prob.data.iloc[:, self.selected_features_].columns)
               rootLogger.info((
                       f'Iteration: {self.iteration_}/{self.max_iter} \n , '
                       f'Best global metric: {self.best_global_[:, -1]} \n , '
                       f'Index features_selected: {self.selected_features_} \n , '
                       f'Number of selected features: {count_sel_feat} \n , '
                       f'Columns selected: {colunas}'))
               
               
               for i in range(0, self.num_particles):
                   self.count.append(self.count_features(
                           self.best_individual_[i, :]))
               
           best_glob = self.best_global_[0]
           self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
           self.selected_features_, = np.where(self.selected_features_.mask == True)
           colunas = list(prob.data.iloc[:, self.selected_features_].columns)
           rootLogger.info((f'Final Index features selected: {self.selected_features_} /n, '
                            f'Final Columns selected: {colunas} \n'))
           
           self._final_cols = colunas     
           self._final_index = self.selected_features_
예제 #3
0
class BBASelector(object):
    def __init__(self,
                 estimator,
                 theta=1.0,
                 gamma=1.0,
                 epsilon=1,
                 num_particles=30,
                 max_iter=100,
                 max_local_improvement=50,
                 maximize_objective=True,
                 initialization='uniform',
                 cv=3):

        self.theta = theta
        self.gamma = gamma
        self.epsilon = epsilon
        self.num_particles = num_particles
        self.max_iter = max_iter
        self.cv = cv
        self.evaluator_ = None
        self.estimator = estimator
        self.velocity_ = None
        self.solution_ = None
        self.initialize = 0
        self.initialize_1 = 0
        self.maxfit = 0
        self.maxindex = 0
        self.N = None
        self.max_local_improvement = max_local_improvement
        self.local_improvement = 0
        self.particles = None
        self.count = []
        self.N_ = 0
        self.iteration_ = 0
        self.pop_ = None
        self.count_global = 0
        self._final_cols = None
        self._final_index = None
        self._setup_initialization(initialization)
        self._setup_solution_comparator(maximize_objective)
        self.selected_features_ = None

    def _setup_initialization(self, initialization):

        init_method = {
            'uniform': create_population_uniform_strategy,
            '20_50': create_population_20_50_strategy
        }

        self._initialization = initialization
        if initialization not in init_method:
            raise BBAException(f'Invalid method {initialization!r}')
        self.init_method_ = init_method[initialization]
        #self.init_search_ = init_search[type_search]

    def _setup_solution_comparator(self, maximize_objective):

        self.maximize_objective = maximize_objective
        if self.maximize_objective:
            self.is_solution_better = maximize_comparator
        else:
            self.is_solution_better = minimize_comparator

    def model_baseline(self, prob):

        n_cols = prob.data.shape[1]
        particle = np.zeros(shape=(1, n_cols + 1))
        particle[:] = 1
        evaluator_ = SolutionEvaluator(prob, 1)
        score = evaluator_.evaluate(particle)

        return score[0]

    def fit(self, X, unused_y, **kargs):

        if not isinstance(X, pd.DataFrame):
            raise BBAException('The "X" parameter must be a data frame')

        prob = Problem(X, unused_y, self.estimator, self.cv, **kargs)

        self.N_ = prob.n_cols
        self._initialize(X)

        self.evaluator_ = SolutionEvaluator(prob, self.num_particles)

        score_all = self.model_baseline(prob)
        rootLogger.info((f'Score with all features - {score_all[-1]}'))

        while not self._is_stop_criteria_accepted():
            self.init_search()

            count_sel_feat = self.count_features(self.best_global_[0])

            best_glob = self.best_global_[0]
            self.selected_features_ = np.ma.masked_where(
                best_glob[:-1] > 0.6, best_glob[:-1])
            self.selected_features_, = np.where(
                self.selected_features_.mask == True)
            colunas = list(prob.data.iloc[:, self.selected_features_].columns)
            rootLogger.info(
                (f'Iteration: {self.iteration_}/{self.max_iter} \n , '
                 f'Best global metric: {self.best_global_[:, -1]} \n , '
                 f'Index features_selected: {self.selected_features_} \n , '
                 f'Number of selected features: {count_sel_feat} \n , '
                 f'Columns selected: {colunas}'))

        best_glob = self.best_global_[0]
        self.selected_features_ = np.ma.masked_where(best_glob[:-1] > 0.6,
                                                     best_glob[:-1])
        self.selected_features_, = np.where(
            self.selected_features_.mask == True)
        colunas = list(prob.data.iloc[:, self.selected_features_].columns)
        rootLogger.info(
            (f'Final Index features selected: {self.selected_features_} /n, '
             f'Final Columns selected: {colunas} \n'))

        self._final_cols = colunas
        self._final_index = self.selected_features_

    def _initialize(self, X):

        self.iteration_ = 0
        self.pop_ = self.init_method_(X, self.num_particles)
        self.particles_loudness, self.particles_rate = pulse_frequency_rate(
            self.num_particles)
        self.particles_rate_ = np.zeros(shape=(self.num_particles, 1))
        self.velocity_ = np.zeros(shape=(self.num_particles, self.N_))
        self.best_individual_ = np.zeros(shape=(self.num_particles, 1))
        self.best_global_ = np.zeros(shape=(1, self.N_ + 1))

    def _is_stop_criteria_accepted(self):

        no_global_improv = self.local_improvement >= self.max_local_improvement
        max_iter_reached = self.iteration_ >= self.max_iter
        return max_iter_reached or no_global_improv

    def init_search(self):

        self.pop_ = self.evaluator_.evaluate(self.pop_)
        self.evaluate_score(self.pop_)
        self.calculate_best_global()
        self.bat_position()
        self.update_velocity()
        self.iteration_ += 1

    def evaluate_score(self, pop):

        if self.initialize == 0:
            for i in np.arange(0, len(pop)):
                self.best_individual_[i] = pop[i, -1]

            self.initialize = 1

        for _i in np.arange(0, self.num_particles):
            rand = np.random.choice([0, 1], 1)[0]
            if (rand < self.particles_loudness[_i]) & (
                    pop[_i, -1] > self.best_individual_[_i]):
                self.best_individual_[_i] = pop[_i, -1]
                self.particles_loudness[
                    _i] = self.theta * self.particles_loudness[_i]
                self.particles_rate_[_i] = self.particles_rate[_i] * (
                    1 - np.exp(self.gamma * self.iteration_))

        self.maxfit, self.maxindex = np.max(self.best_individual_), np.argmax(
            self.best_individual_)

    def calculate_best_global(self):

        if self.initialize_1 == 0:
            for i in np.arange(0, self.N_ + 1):
                self.best_global_[0, i] = self.pop_[0, i]

            self.initialize_1 = 1

            if self.is_solution_better(self.maxfit, self.best_global_[0, -1]):

                for j in np.arange(0, self.N_ + 1):
                    self.best_global_[0, j] = self.pop_[self.maxindex, j]

    def bat_position(self):

        for _i in np.arange(0, self.num_particles):

            rand = np.random.choice([0, 1], 1)[0]

            if rand > self.particles_rate_[_i]:
                for j in range(0, self.N_):
                    self.pop_[_i,
                              j] = self.pop_[_i, j] + self.epsilon * np.mean(
                                  self.particles_loudness)
                    sigma = uniform(0, 1)
                    if sigma < 1 * (1 / (1 + np.exp(-self.pop_[_i, j]))):
                        self.pop_[_i, j] = 1
                    else:
                        self.pop_[_i, j] = 0

    def update_velocity(self):

        for _i in np.arange(0, self.num_particles):
            betha = np.random.choice([0, 1], 1)[0]
            rand = np.random.choice([0, 1], 1)[0]
            if (rand < self.particles_loudness[_i]) & (
                    self.best_individual_[_i] < self.best_global_[0, -1]):
                for j in range(0, self.N_):
                    fi = 0 + (0 + 1) * betha
                    self.velocity_[_i, j] = self.velocity_[_i, j] + (
                        self.best_global_[0, j] - self.pop_[_i, j]) * fi
                    self.pop_[_i, j] = self.pop_[_i, j] + self.velocity_[_i, j]
                    sigma = uniform(0, 1)
                    if sigma < (1 / (1 + np.exp(-self.pop_[_i, j]))):
                        self.pop_[_i, j] = 1
                    else:
                        self.pop_[_i, j] = 0

    def count_features(self, particle_proportions, threshold=1):

        count = 0
        for i in range(0, self.N_):
            if particle_proportions[i] == threshold:
                count = count + 1
        return count

    @property
    def final_cols(self):
        return self._final_cols

    @property
    def final_index(self):
        return self._final_index
예제 #4
0
class PSOSelector(object):
      
       def __init__(self, estimator, w=0.7298, c1=1.49618, c2=1.49618,
                    num_particles=30, max_iter=100, max_local_improvement=50,
                    maximize_objective=True, initialization='uniform',
                    fitness_method='type_2', cv = 3):
                                                 
           self.w = w
           self.c1 = c1
           self.c2 = c2
           self.num_particles = num_particles
           self.max_iter = max_iter
           self.cv = cv
           self.evaluator_ = None
           self.estimator = estimator
           self.velocity_ = None
           self.solution_ = None
           self.initialize = 0
           self.initialize_1 = 0
           self.N = None
           self.max_local_improvement = max_local_improvement
           self.local_improvement = 0
           self.particles = None
           self.count = []
           self.N_ = 0
           self.iteration_ = 0
           self.pop_ = None
           self.count_global = 0
           self._final_cols = None
           self._final_index = None
           self._setup_initialization(initialization, fitness_method)
           self._setup_solution_comparator(maximize_objective)
           self.selected_features_ = None
           
       def _setup_initialization(self, initialization, type_search):
           
           init_method = {
                   'uniform': create_population_uniform_strategy,
                   '20_50': create_population_20_50_strategy
                         }
           
           init_search = {
                    'type_1': self.search_type_1,
                    'type_2': self.search_type_2,
                    'type_3': self.search_type_3,
                    'type_4': self.search_type_4
                          }     
                                                     
           self._initialization = initialization
           if initialization not in init_method:
               raise PSOException(f'Invalid method {initialization!r}')
           self.init_method_ = init_method[initialization]
           self.init_search_ = init_search[type_search]
           
       def _setup_solution_comparator(self, maximize_objective):
           
           self.maximize_objective = maximize_objective
           if self.maximize_objective:
               self.is_solution_better = maximize_comparator
           else:
               self.is_solution_better = minimize_comparator
               
       def model_baseline(self, prob):

           n_cols = prob.data.shape[1]
           particle = np.zeros(shape=(1, n_cols + 1))
           particle[:] = 1
           evaluator_ = SolutionEvaluator(prob, 1)
           score = evaluator_.evaluate(particle)
           return score[0]


       def fit(self, X, unused_y, **kargs):
           
           if not isinstance(X, pd.DataFrame):
               raise PSOException('The "X" parameter must be a data frame')
               
           self._initialize(X)
           prob = Problem(X, unused_y, self.estimator,
                          self.cv, **kargs)
           

           self.N_ = prob.n_cols
           self.evaluator_ = SolutionEvaluator(prob, self.num_particles)

           score_all = self.model_baseline(prob)
           rootLogger.info((
                       f'Score with all features - {score_all[-1]}'))

           self.velocity_ = np.zeros(shape=(self.num_particles, self.N_))
           self.best_global_, self.best_global_[:] = \
             np.zeros(shape=(1, self.N_ + 1)), 'nan'
            
           self.best_individual_, self.best_individual_[:] = \
             np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan'
           self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1))
           
           
           while not self._is_stop_criteria_accepted():
               self.init_search_()
               count_sel_feat = self.count_features(self.best_global_[0])
               
               best_glob = self.best_global_[0]
               self.selected_features_  = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
               self.selected_features_, = np.where(self.selected_features_.mask == True)
               colunas = list(prob.data.iloc[:, self.selected_features_].columns)
               rootLogger.info((
                       f'Iteration: {self.iteration_}/{self.max_iter} \n , '
                       f'Best global metric: {self.best_global_[:, -1]} \n , '
                       f'Index features_selected: {self.selected_features_} \n , '
                       f'Number of selected features: {count_sel_feat} \n , '
                       f'Columns selected: {colunas}'))
               
               
               for i in range(0, self.num_particles):
                   self.count.append(self.count_features(
                           self.best_individual_[i, :]))
               
           best_glob = self.best_global_[0]
           self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
           self.selected_features_, = np.where(self.selected_features_.mask == True)
           colunas = list(prob.data.iloc[:, self.selected_features_].columns)
           rootLogger.info((f'Final Index features selected: {self.selected_features_} /n, '
                            f'Final Columns selected: {colunas} \n'))
           
           self._final_cols = colunas     
           self._final_index = self.selected_features_
           
       def _initialize(self, X):
           
          self.iteration_ = 0
          self.pop_ = self.init_method_(X, self.num_particles)
              
       def _is_stop_criteria_accepted(self):
          
          no_global_improv = self.local_improvement >= self.max_local_improvement
          max_iter_reached = self.iteration_ >= self.max_iter
          return max_iter_reached or no_global_improv
      
       def search_type_1(self):
           
          self.pop_ = self.evaluator_.evaluate(self.pop_)
          self.calculate_best_individual_type_1(self.pop_)
          self.calculate_best_global_type_1()
          self.solution_[self.iteration_, :] = self.best_global_
          self.update_velocity()
          self.iteration_ += 1
             
       def search_type_2(self):
         
          self.pop_ = self.evaluator_.evaluate(self.pop_)
          self.calculate_best_individual_type_2(self.pop_)
          self.calculate_best_global_type_2()
          self.solution_[self.iteration_, :] = self.best_global_
          self.update_velocity()
          self.iteration_ += 1

       def search_type_3(self):
         
          self.pop_ = self.evaluator_.evaluate(self.pop_)
          self.calculate_best_individual_type_3(self.pop_)
          self.calculate_best_global_type_3()
          self.solution_[self.iteration_, :] = self.best_global_
          self.update_velocity()
          self.iteration_ += 1

       def search_type_4(self):
         
          self.pop_ = self.evaluator_.evaluate(self.pop_)
          self.calculate_best_individual_type_4(self.pop_)
          self.calculate_best_global_type_4()
          self.solution_[self.iteration_, :] = self.best_global_
          self.update_velocity()
          self.iteration_ += 1        
         
       def update_velocity(self):
         
         w = self.w
         c1, c2 = self.c1, self.c2
         
         for i in range(0, len(self.pop_) - 1):
             for j in range(0, self.N_):
                 r1= round(uniform(0,1), 2)
                 r2 = round(uniform(0, 1), 2)
                 pop = self.pop_[i, j]
                 inertia = w * self.velocity_[i,j]
                 cognitive = c1 * r1 * (self.best_individual_[i,j] - pop)
                 social = c2 * r2 * (self.best_global_[0, j] - pop)
                 velocity = inertia + cognitive + social
                 self.velocity_[i,j] = velocity
                 self.pop_[i,j] += velocity
                 
       def calculate_best_individual_type_2(self, pop):
         
         if self.initialize == 0:
            for i in range(0, len(pop)):
                for j in range(0, self.N_ + 1):
                    self.best_individual_[i,j] = pop[i,j]
                         
            self.initialize = 1
            return
             
         for i in range(0, len(pop)):
             candidate_a = pop[i, self.N_]
             candidate_b = self.best_individual_[i, self.N_]
             if self.is_solution_better(candidate_a,candidate_b):
                 for j in range(0 , self.N_ + 1):
                     self.best_individual_[i,j] = pop[i,j]
                 continue    
             
             particle_count = self.count_features(self.pop_[i, :])
             count_best_individual = self.count_features(
                     self.best_individual_[i, :])
             
             if particle_count > 0:
                 if (candidate_a == candidate_b
                        and particle_count < count_best_individual):
                     
                     for j in range(0, self.N_ + 1):
                         self.best_individual_[i,j] = pop[i,j]
                         
                         
       def calculate_best_global_type_2(self):
         
         if self.initialize_1 == 0:
             for i in range(0, self.N_ + 1):
                 self.best_global_[0,i] = self.best_individual_[0, i]
                 
             self.initialize_1 = 1
             self.count_global = self.count_features(self.best_global_[0, :])

                        
         for i in range(0, self.num_particles): 
             best_ind = self.best_individual_[i, self.N_]
             best_global = self.best_global_[0, self.N_]

             if self.is_solution_better(best_ind,
                                        best_global):
                 
                self.local_improvement = 1
                for j in range(0, self.N_ + 1):
                    
                    self.best_global_[0,j] = self.best_individual_[i,j]
                 

                self.count_global = self.count_features(
                         self.best_global_[0, :])
                 
                continue
             
             count_best_individual = self.count_features(self.best_individual_[i, :])
             
             if (best_global == best_ind
                       and count_best_individual < self.count_global):
                 
                 self.local_improvement = 1
                 self.count_global = 0
                 for j in range(0, self.N_ + 1):
                     self.best_global_[0, j] = self.best_individual_[i,j]
                     
                 self.count_global = self.count_features(
                         self.best_global_[0, :])
                 
                           
       def calculate_best_individual_type_1(self,pop):

         if self.initialize == 0:
             for i in range(0, len(pop)):
                 for j in range(0, self.N_ + 1):
                     self.best_individual_[i,j] = pop[i,j]
             self.initialize = 1
             return
         
         for i in range(0, len(pop)):
             if self.is_solution_better(pop[i,self.N_],
                                        self.best_individual_[i, self.N_]): 
                 for j in range(0, self.N_ + 1):
                     self.best_individual_[i, j] = pop[i,j]
                    
                 
       def calculate_best_global_type_1(self):
          if self.initialize_1 == 0:
              for i in range(0, self.N_ + 1):
                  self.best_global_[0,i] = self.best_individual_[0,i]
              self.initialize_1 = 1    

          for i in range(0, len(self.pop_)):
              if self.is_solution_better(self.best_individual_[i, self.N_],
                                         self.best_global_[0, self.N_]):
                  self.local_improvement = 1
              
              for j in range(0, self.N_ + 1):
                  self.best_global_[0,j] = self.best_individual_[i,j]
                  
          self.local_improvement += 1
          

       def calculate_best_individual_type_3(self, pop):
         
         if self.initialize == 0:
            for i in range(0, len(pop)):
                for j in range(0, self.N_ + 1):
                    self.best_individual_[i,j] = pop[i,j]
                         
            self.initialize = 1
            return
             
         for i in range(0, len(pop)):
             candidate_a = pop[i, self.N_]
             candidate_b = self.best_individual_[i, self.N_]

             particle_count = self.count_features(self.pop_[i, :])
             count_best_individual = self.count_features(
                     self.best_individual_[i, :])
             
             if particle_count > 0:
                 if (self.is_solution_better(candidate_a,candidate_b)
                        and particle_count <= count_best_individual):
                     
                     for j in range(0, self.N_ + 1):
                         self.best_individual_[i,j] = pop[i,j]

                 elif (candidate_a == candidate_b 
                      and particle_count < count_best_individual):   

                     for j in range(0, self.N_ + 1):
                         self.best_individual_[i,j] = pop[i,j]    
                 else:
                     continue        
                         
       def calculate_best_global_type_3(self):
         
         if self.initialize_1 == 0:
             for i in range(0, self.N_ + 1):
                 self.best_global_[0,i] = self.best_individual_[0, i]
                 
             self.initialize_1 = 1
             self.count_global = self.count_features(self.best_global_[0, :])

                        
         for i in range(0, self.num_particles): 
             best_ind = self.best_individual_[i, self.N_]
             best_global = self.best_global_[0, self.N_]
             count_best_individual = self.count_features(self.best_individual_[i, :])
             
             if (self.is_solution_better(best_ind,best_global)
                 and count_best_individual <= self.count_global):
                 
                self.local_improvement = 1
                for j in range(0, self.N_ + 1):
                    self.best_global_[0,j] = self.best_individual_[i,j]
                 

                self.count_global = self.count_features(
                         self.best_global_[0, :])
                 
             elif (best_ind == best_global
                  and count_best_individual < self.count_global):

                 self.local_improvement = 1
                 self.count_global = 0
                 for j in range(0, self.N_ + 1):
                     self.best_global_[0, j] = self.best_individual_[i,j]
                     
                 self.count_global = self.count_features(
                         self.best_global_[0, :])

             else:
                 continue            



       def calculate_best_individual_type_4(self, pop):
         
         if self.initialize == 0:
            for i in range(0, len(pop)):
                for j in range(0, self.N_ + 1):
                    self.best_individual_[i,j] = pop[i,j]
                         
            self.initialize = 1
            return
             
         for i in range(0, len(pop)):
             candidate_a = pop[i, self.N_]
             candidate_b = self.best_individual_[i, self.N_]

             particle_count = self.count_features(self.pop_[i, :])
             count_best_individual = self.count_features(
                     self.best_individual_[i, :])
             
             if particle_count > 0:
                 if (self.is_solution_better(candidate_a,candidate_b)
                        and particle_count <= count_best_individual):
                     
                     for j in range(0, self.N_ + 1):
                         self.best_individual_[i,j] = pop[i,j]

                 elif (self.is_solution_better(candidate_a, 0.95 * candidate_b) 
                      and particle_count < count_best_individual):   

                     for j in range(0, self.N_ + 1):
                         self.best_individual_[i,j] = pop[i,j]    
                 else:
                     continue        
                         
       def calculate_best_global_type_4(self):
         
         if self.initialize_1 == 0:
             for i in range(0, self.N_ + 1):
                 self.best_global_[0,i] = self.best_individual_[0, i]
                 
             self.initialize_1 = 1
             self.count_global = self.count_features(self.best_global_[0, :])

                        
         for i in range(0, self.num_particles): 
             best_ind = self.best_individual_[i, self.N_]
             best_global = self.best_global_[0, self.N_]
             count_best_individual = self.count_features(self.best_individual_[i, :])
             
             if (self.is_solution_better(best_ind,best_global)
                 and count_best_individual <= self.count_global):
                 
                self.local_improvement = 1
                for j in range(0, self.N_ + 1):
                    self.best_global_[0,j] = self.best_individual_[i,j]
                 

                self.count_global = self.count_features(
                         self.best_global_[0, :])
                 
             elif (self.is_solution_better(best_ind, 0.95 * best_global)
                  and count_best_individual < self.count_global):

                 self.local_improvement = 1
                 self.count_global = 0
                 for j in range(0, self.N_ + 1):
                     self.best_global_[0, j] = self.best_individual_[i,j]
                     
                 self.count_global = self.count_features(
                         self.best_global_[0, :])

             else:
                 continue   


       def count_features(self, particle_proportions, threshold=0.6):
         
         count = 0
         for i in range(0, self.N_):
             if particle_proportions[i] > threshold:
                 count = count + 1
         return count        
         
       @property   
       def final_cols(self):           
           return self._final_cols 

       @property
       def final_index(self):
           return self._final_index      
              
              

            
                 
                 
                 
                
                
                
                
                
                
                
                
                     


            
                 
                 
                 
                 
                 
                 
                 
                 
                 
         
         
         
         
          
          
          
          
          
          
          
          
          
          
          
          
          
          
        
          
          
          
          
          
           
           
               
               
               
               
               
               
               
               
               
               
           
           
           
           
           
           
           
           
           


           
           
           
           
           
                                                 
                    
                    
                    
                                                 
                                                 
예제 #5
0
class PSOSelector(object):
      
       def __init__(self, estimator, w=0.7298, c1=1.49618, c2=1.49618,
                    num_particles=30, max_iter=100, max_local_improvement=50,
                    maximize_objective=True, initialization='uniform',
                    fitness_method='type_2', cv = 3, verbose=True):
                                                 
           self.w = w
           self.c1 = c1
           self.c2 = c2
           self.num_particles = num_particles
           self.max_iter = max_iter
           self.cv = cv
           self.evaluator_ = None
           self.estimator = estimator
           self.velocity_ = None
           self.best_individual_ = None
           self.best_global_ = None
           self.best_global_fm = 0
           self.best_global_cp = 0
           self.best_cp_ = 0
           self.best_fm_ = 0
           self.solution_ = None
           self.initialize = 0
           self.initialize_1 = 0
           self.verbose = verbose
           self.N = None
           self.max_local_improvement = max_local_improvement
           self.local_improvement = 0
           self.particles = None
           self.count = []
           self.N_ = 0
           self.iteration_ = 0
           self.pop_ = {}
           self.count_global = 0
           self._setup_initialization(initialization, fitness_method)
           self._setup_solution_comparator(maximize_objective)
           self.selected_features_ = None
           
       def _setup_initialization(self, initialization, type_search):
           
           init_method = {
                   'uniform': create_population_uniform_strategy,
                   '20_50': create_population_20_50_strategy
                         }
           
           init_search = {
                    'type_1': self.search_type_1,
                    'type_2': self.search_type_2
                          }     
                                                     
           self._initialization = initialization
           if initialization not in init_method:
               raise PSOException(f'Invalid method {initialization!r}')
           self.init_method_ = init_method[initialization]
           self.init_search_ = init_search[type_search]
           
       def _setup_solution_comparator(self, maximize_objective):
           
           self.maximize_objective = maximize_objective
           if self.maximize_objective:
               self.is_solution_better = maximize_comparator
           else:
               self.is_solution_better = minimize_comparator
               
               
               
       def fit(self, X, unused_y = None, **kargs):
           
           if not isinstance(X, pd.DataFrame):
               raise PSOException('The "X" parameter must be a data frame')
        
           colunas_full  = X.columns
            
           self._initialize(X)
           

           if unused_y.all() != None:
               self.pop_['cp'] = np.zeros(shape=(1, self.num_particles))[0]
               self.pop_['fm'] = np.zeros(shape=(1, self.num_particles))[0]

           prob = Problem(X, unused_y, self.estimator,
                          self.cv, **kargs)
           
           self.N_ = prob.n_cols
           self.evaluator_ = SolutionEvaluator(prob, self.num_particles)
           
           self.velocity_ = np.zeros(shape=(self.num_particles, self.N_))
           self.best_individual_, self.best_individual_[:] = \
             np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan'
        
           self.best_global_, self.best_global_[:] = \
             np.zeros(shape=(1, self.N_ + 1)), 'nan'
           self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1))
           
           #Parameters to store the purity and fmeasure metric
           self.best_fm_ = np.zeros(shape=(1, self.num_particles))[0]
           self.best_cp_ = np.zeros(shape=(1, self.num_particles))[0]

           while not self._is_stop_criteria_accepted():
               self.init_search_()
               count_sel_feat = self.count_features(self.best_global_[0])
               
               best_glob = self.best_global_[0]
               self.selected_features_  = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
               self.selected_features_, = np.where(self.selected_features_.mask == True)
               colunas = colunas_full[self.selected_features_]

               if self.verbose:
                   #breakpoint()
                   interm_var = f'Iteration: {self.iteration_}/{self.max_iter} \n ,'
                   interm_var = interm_var + f'Best global metric - CP X PF: {self.best_global_[:, -1]} \n , '
                   if unused_y.all() != None:

                       interm_var = interm_var + f'Best global metric purity: {self.best_global_cp} \n ,'
                       interm_var = interm_var + f'Best global metric f-measure: {self.best_global_fm} \n ,'     

                   interm_var = interm_var + f'Index features_selected: {self.selected_features_} \n , '
                   interm_var = interm_var + f'Number of selected features: {count_sel_feat} \n , ' 
                   interm_var = interm_var + f'Columns selected: {colunas}'       
                   rootLogger.info(interm_var)
              
               
               for i in range(0, self.num_particles):
                   self.count.append(self.count_features(
                           self.best_individual_[i, :]))
                       
               best_glob = self.best_global_[0]
               self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
               self.selected_features_, = np.where(self.selected_features_.mask == True)
               
       def _initialize(self, X):
           
          self.iteration_ = 0
          self.pop_['pop'] = self.init_method_(X, self.num_particles)
            
       def _is_stop_criteria_accepted(self):
            
          no_global_improv = self.local_improvement >= self.max_local_improvement
          max_iter_reached = self.iteration_ >= self.max_iter
          return max_iter_reached or no_global_improv
      
       def search_type_1(self):
          
          self.pop_ = self.evaluator_.evaluate(self.pop_)
          self.calculate_best_individual_pso_1_1(self.pop_)
          self.calculate_best_global_pso_1_1()
          self.solution_[self.iteration_, :] = self.best_global_
          self.update_velocity()
          self.iteration_ += 1
          
          
       def search_type_2(self):
         
          self.pop_ = self.evaluator_.evaluate(self.pop_)
          self.calculate_best_individual(self.pop_)
          self.calculate_best_global()
          self.solution_[self.iteration_, :] = self.best_global_
          self.update_velocity()
          self.iteration_ += 1
         
       def update_velocity(self):
         
         w = self.w
         c1, c2 = self.c1, self.c2
         
         for i in range(0, len(self.pop_['pop']) - 1):
             for j in range(0, self.N_):
                 r1= round(uniform(0,1), 2)
                 r2 = round(uniform(0, 1), 2)
                 pop = self.pop_['pop'][i, j]
                 inertia = w * self.velocity_[i,j]
                 cognitive = c1 * r1 * (self.best_individual_[i,j] - pop)
                 social = c2 * r2 * (self.best_global_[0, j] - pop)
                 velocity = inertia + cognitive + social
                 self.velocity_[i,j] = velocity
                 self.pop_['pop'][i,j] += velocity
                 
       def calculate_best_individual(self, pop):

         if self.initialize == 0:
             for i in range(0, len(pop['pop'])):
                 for j in range(0, self.N_ + 1):
                     self.best_individual_[i,j] = pop['pop'][i, j]
                 if 'fm' in pop.keys():
                     self.best_fm_[i] = pop['fm'][i]
                 if 'cp' in pop.keys():
                     self.best_cp_[i] = pop['cp'][i]
             self.initialize = 1
             return             

         for i in range(0, len(pop['pop'])):
             candidate_a = pop['pop'][i, self.N_]
             candidate_b = self.best_individual_[i, self.N_]
             if self.is_solution_better(candidate_a, candidate_b):
                 for j in range(0, self.N_ + 1):
                     self.best_individual_[i, j] = pop['pop'][i,j]
                 if 'fm' in pop.keys():
                     self.best_fm_[i] = pop['fm'][i]
                 if 'cp' in pop.keys():
                     self.best_cp_[i] = pop['cp'][i]
                 continue               
         
             
             particle_count = self.count_features(self.pop_['pop'][i, :])
             count_best_individual = self.count_features(
                     self.best_individual_[i, :])
             
             if particle_count > 0:
                 if (pop['pop'][i,self.N_] == self.best_individual_[i, self.N_]
                        and particle_count < count_best_individual):
                     
                     for j in range(0, self.N_ + 1):
                         self.best_individual_[i,j] = pop['pop'][i,j]
                 if 'fm' in pop.keys():
                     self.best_fm_[i] = pop['fm'][i]
                 if 'cp' in pop.keys():
                     self.best_cp_[i] = pop['cp'][i]

                         
                         
       def calculate_best_global(self):

         if self.initialize_1 == 0:
             
             for i in range(0, self.N_ + 1):
                 self.best_global_[0,i] = self.best_individual_[0, i]
             if 'fm' in self.pop_.keys():
                 self.best_global_fm = self.best_fm_[0]
             if 'cp' in self.pop_.keys():
                 self.best_global_cp = self.best_cp_[0]

             self.initialize_1 = 1
             
             self.count_global = self.count_features(self.best_global_[0, :])
                         
         for i in range(0, self.num_particles):   
             if self.is_solution_better(self.best_individual_[i, self.N_],
                                        self.best_global_[0, self.N_]):
                 
                 self.local_improvement = 1
                 self.count_global = 0
                 
                 for j in range(0, self.N_ + 1):
                     self.best_global_[0,j] = self.best_individual_[i,j]

                 if 'fm' in self.pop_.keys():
                     self.best_global_fm = self.best_fm_[i]
                 if 'cp' in self.pop_.keys():
                     self.best_global_cp = self.best_cp_[i]    
                 self.count_global = self.count_features(
                         self.best_global_[0, :])
                 
                 continue
             
             count_best_individual = self.count_features(self.best_individual_[i, :])
             
             best_global = self.best_global_[0, self.N_]
             best_ind = self.best_individual_[i, self.N_]
             
             if (best_global == best_ind
                       and count_best_individual < self.count_global):
                 
                 self.local_improvement = 1
                 self.count_global = 0
                 for j in range(0, self.N_ + 1):
                     self.best_global_[0, j] = self.best_individual_[i,j]

                 if 'fm' in self.pop_.keys():
                     self.best_globao_fm = self.best_fm_[i]
                 if 'cp' in self.pop_.keys():
                     self.best_global_cp = self.best_cp_[i]

                 self.count_global = self.count_features(
                         self.best_global_[0, :])
                 
         self.local_improvement += 1        
                 
                 
       def calculate_best_individual_pso_1_1(self,pop):

         if self.initialize == 0:
             for i in range(0, len(pop['pop'])):
                 for j in range(0, self.N_ + 1):
                     self.best_individual_[i,j] = pop['pop'][i,j]
                 if 'fm' in pop.keys():
                     self.best_fm_[i] = pop['fm'][i]
                 if 'cp' in pop.keys():
                     self.best_cp_[i] = pop['cp'][i]


             self.initialize = 1
             return
         
         for i in range(0, len(pop['pop'])):
             if self.is_solution_better(pop['pop'][i,self.N_],
                                        self.best_individual_[i, self.N_]):
                 
                 for j in range(0, self.N_ + 1):
                     self.best_individual_[i, j] = pop['pop'][i,j]
                 if 'fm' in pop.keys():
                     self.best_fm_[i] = pop['fm'][i]
                 if 'cp' in pop.keys():
                     self.best_cp_[i] = pop['cp'][i]        
                    
                 
       def calculate_best_global_pso_1_1(self):
          if self.initialize_1 == 0:
              for i in range(0, self.N_ + 1):
                  self.best_global[0,i] = self.best_individual_[0,i]
                    
              if 'fm' in self.pop_.keys():
                   self.best_global_fm = self.best_fm_[0]
              if 'cp' in self.pop_.keys():
                   self.best_global_cp = self.best_cp_[0]        
              self.initialize_1 = 1    

          for i in range(0, len(self.pop_['pop'])):
              if self.is_solution_better(self.best_individual_[i, self.N_],
                                         self.best_global_[0, self.N_]):
                  self.local_improvement = 1
              
              for j in range(0, self.N_ + 1):
                  self.best_global_[0,j] = self.best_individual_[i,j]

              if 'fm' in self.pop_.keys():
                  self.best_global_fm = self.best_fm_[i]
              if 'cp' in self.pop_.keys():
                  self.best_global_cp = self.best_cp[i]        
          self.local_improvement += 1
          
          
       def count_features(self, particle_proportions, threshold=0.6):
          
         count = 0
         for i in range(0, self.N_):
             if particle_proportions[i] > threshold:
                 count = count + 1
         return count        
         
          
                  
              
              
              

            
                 
                 
                 
                
                
                
                
                
                
                
                
                     


            
                 
                 
                 
                 
                 
                 
                 
                 
                 
         
         
         
         
          
          
          
          
          
          
          
          
          
          
          
          
          
          
        
          
          
          
          
          
           
           
               
               
               
               
               
               
               
               
               
               
           
           
           
           
           
           
           
           
           


           
           
           
           
           
                                                 
                    
                    
                    
                                                 
                                                 
예제 #6
0
       def fit(self, X, unused_y = None, **kargs):
           
           if not isinstance(X, pd.DataFrame):
               raise PSOException('The "X" parameter must be a data frame')
        
           colunas_full  = X.columns
            
           self._initialize(X)
           

           if unused_y.all() != None:
               self.pop_['cp'] = np.zeros(shape=(1, self.num_particles))[0]
               self.pop_['fm'] = np.zeros(shape=(1, self.num_particles))[0]

           prob = Problem(X, unused_y, self.estimator,
                          self.cv, **kargs)
           
           self.N_ = prob.n_cols
           self.evaluator_ = SolutionEvaluator(prob, self.num_particles)
           
           self.velocity_ = np.zeros(shape=(self.num_particles, self.N_))
           self.best_individual_, self.best_individual_[:] = \
             np.zeros(shape=(self.num_particles, self.N_ + 1)), 'nan'
        
           self.best_global_, self.best_global_[:] = \
             np.zeros(shape=(1, self.N_ + 1)), 'nan'
           self.solution_ = np.zeros(shape=(self.max_iter + 1, self.N_ + 1))
           
           #Parameters to store the purity and fmeasure metric
           self.best_fm_ = np.zeros(shape=(1, self.num_particles))[0]
           self.best_cp_ = np.zeros(shape=(1, self.num_particles))[0]

           while not self._is_stop_criteria_accepted():
               self.init_search_()
               count_sel_feat = self.count_features(self.best_global_[0])
               
               best_glob = self.best_global_[0]
               self.selected_features_  = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
               self.selected_features_, = np.where(self.selected_features_.mask == True)
               colunas = colunas_full[self.selected_features_]

               if self.verbose:
                   #breakpoint()
                   interm_var = f'Iteration: {self.iteration_}/{self.max_iter} \n ,'
                   interm_var = interm_var + f'Best global metric - CP X PF: {self.best_global_[:, -1]} \n , '
                   if unused_y.all() != None:

                       interm_var = interm_var + f'Best global metric purity: {self.best_global_cp} \n ,'
                       interm_var = interm_var + f'Best global metric f-measure: {self.best_global_fm} \n ,'     

                   interm_var = interm_var + f'Index features_selected: {self.selected_features_} \n , '
                   interm_var = interm_var + f'Number of selected features: {count_sel_feat} \n , ' 
                   interm_var = interm_var + f'Columns selected: {colunas}'       
                   rootLogger.info(interm_var)
              
               
               for i in range(0, self.num_particles):
                   self.count.append(self.count_features(
                           self.best_individual_[i, :]))
                       
               best_glob = self.best_global_[0]
               self.selected_features_ = np.ma.masked_where(best_glob[:-1]>0.6, best_glob[:-1])
               self.selected_features_, = np.where(self.selected_features_.mask == True)