def __init__(self, model, x, y, cv_split=5, verbose=0, do_parallel=False):
        """
            Parameters
            -----------
            model : scikit-learn supported model, 
                x :  {array-like}, shape = [n_samples, n_features]
                     Training vectors, where n_samples is the number of samples 
                     and n_features is the number of features.
 
                y  : {array-like}, shape = [n_samples]
                     Target Values
            cv_split: int
                     Number of splits for cross_validation to calculate fitness.
            
            verbose: 0 or 1
        """
        self.model = model
        self.n_features = x.shape[1]
        self.toolbox = None
        self.creator = self._create()
        self.cv_split = cv_split
        self.x = x
        self.y = y
        self.verbose = verbose
        if self.verbose == 1:
            print(
                "Model {} will select best features among {} features using cv_split :{}."
                .format(model, x.shape[1], cv_split))
            print("Shape of train_x: {} and target: {}".format(
                x.shape, y.shape))
        self.do_parallel = do_parallel
        self.final_fitness = []
        self.fitness_in_generation = {}
        self.best_ind = None
        self.fit_obj = ff.FitenessFunction(self.cv_split)
 def train(self):
     fit_obj = ff.FitenessFunction(10)
     feature_idx = np.where(np.asarray(self.best_ind) == 1)[0]
     # print(feature_idx, self.best_ind)
     fitness = fit_obj.calculate_fitness(self.model,
                                         self.x.iloc[:,
                                                     feature_idx], self.y)
     print("The accuracy using feature set {} is {}%".format(
         feature_idx, fitness * 100))
 def evaluate(self, individual):
     fit_obj = ff.FitenessFunction()
     np_ind = np.asarray(individual)
     if np.sum(np_ind) == 0:
         fitness = 0.0
     else:
         feature_idx = np.where(np_ind == 1)[0]
         fitness = fit_obj.calculate_fitness\
             (self.model,self.x[:,feature_idx], self.y, self.x_test[:,feature_idx],self.y_test,self.x_development[:,feature_idx],self.y_development)
     if self.verbose == 1:
         pass
     return fitness,
Example #4
0
    def evaluate(self, individual):
        fit_obj = ff.FitenessFunction(self.cv_split)
        np_ind = np.asarray(individual)
        if np.sum(np_ind) == 0:
            fitness = 0.0
        else:
            feature_idx = np.where(np_ind == 1)[0]
            fitness = fit_obj.calculate_fitness(self.model, self.x[:, feature_idx], self.y)

        if self.verbose == 1:
            print("Individual: {}  Fitness_score: {} ".format(individual, fitness))

        return fitness,
Example #5
0
    def evaluate(self, individual):
        fit_obj = ff.FitenessFunction()
        self.f1_score_test = fit_obj.get_f1()

        np_ind = np.asarray(individual)
        if np.sum(np_ind) == 0:
            fitness = 0.0
        else:
            feature_idx = np.where(np_ind == 1)[0]
            fitness = fit_obj.calculate_fitness\
                (self.model,self.x[:,feature_idx], self.y, self.x_test[:,feature_idx],self.y_test,self.x_development[:,feature_idx],self.y_development)

        if self.verbose == 1:
            print("Individual: {}  Fitness_score: {} ".format(
                individual, fitness))
            print(len(individual))

        return fitness,
    def f_per_particle(self, m, alpha=0.88):

        fit_obj = ff.FitenessFunction()


        # Get the subset of the features from the binary mask

        if np.count_nonzero(m) == 0:
            X_subset = self.X
        else:
            feature_idx = np.where(np.asarray(m) == 1)[0]
            X_subset = self.X.iloc[:, feature_idx]

        P, cv_set = fit_obj.calculate_fitness(self.classifier, X_subset, self.Y)

        # Compute for the objective function
        # j = (alpha * (1.0 - P)
        #      + (1.0 - alpha) * (1 - (X_subset.shape[1] / self.num_features)))

        return (P,cv_set)