def __init__(self, model, x, y, cv_split=5, verbose=0, do_parallel=False): """ Parameters ----------- model : scikit-learn supported model, x : {array-like}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : {array-like}, shape = [n_samples] Target Values cv_split: int Number of splits for cross_validation to calculate fitness. verbose: 0 or 1 """ self.model = model self.n_features = x.shape[1] self.toolbox = None self.creator = self._create() self.cv_split = cv_split self.x = x self.y = y self.verbose = verbose if self.verbose == 1: print( "Model {} will select best features among {} features using cv_split :{}." .format(model, x.shape[1], cv_split)) print("Shape of train_x: {} and target: {}".format( x.shape, y.shape)) self.do_parallel = do_parallel self.final_fitness = [] self.fitness_in_generation = {} self.best_ind = None self.fit_obj = ff.FitenessFunction(self.cv_split)
def train(self): fit_obj = ff.FitenessFunction(10) feature_idx = np.where(np.asarray(self.best_ind) == 1)[0] # print(feature_idx, self.best_ind) fitness = fit_obj.calculate_fitness(self.model, self.x.iloc[:, feature_idx], self.y) print("The accuracy using feature set {} is {}%".format( feature_idx, fitness * 100))
def evaluate(self, individual): fit_obj = ff.FitenessFunction() np_ind = np.asarray(individual) if np.sum(np_ind) == 0: fitness = 0.0 else: feature_idx = np.where(np_ind == 1)[0] fitness = fit_obj.calculate_fitness\ (self.model,self.x[:,feature_idx], self.y, self.x_test[:,feature_idx],self.y_test,self.x_development[:,feature_idx],self.y_development) if self.verbose == 1: pass return fitness,
def evaluate(self, individual): fit_obj = ff.FitenessFunction(self.cv_split) np_ind = np.asarray(individual) if np.sum(np_ind) == 0: fitness = 0.0 else: feature_idx = np.where(np_ind == 1)[0] fitness = fit_obj.calculate_fitness(self.model, self.x[:, feature_idx], self.y) if self.verbose == 1: print("Individual: {} Fitness_score: {} ".format(individual, fitness)) return fitness,
def evaluate(self, individual): fit_obj = ff.FitenessFunction() self.f1_score_test = fit_obj.get_f1() np_ind = np.asarray(individual) if np.sum(np_ind) == 0: fitness = 0.0 else: feature_idx = np.where(np_ind == 1)[0] fitness = fit_obj.calculate_fitness\ (self.model,self.x[:,feature_idx], self.y, self.x_test[:,feature_idx],self.y_test,self.x_development[:,feature_idx],self.y_development) if self.verbose == 1: print("Individual: {} Fitness_score: {} ".format( individual, fitness)) print(len(individual)) return fitness,
def f_per_particle(self, m, alpha=0.88): fit_obj = ff.FitenessFunction() # Get the subset of the features from the binary mask if np.count_nonzero(m) == 0: X_subset = self.X else: feature_idx = np.where(np.asarray(m) == 1)[0] X_subset = self.X.iloc[:, feature_idx] P, cv_set = fit_obj.calculate_fitness(self.classifier, X_subset, self.Y) # Compute for the objective function # j = (alpha * (1.0 - P) # + (1.0 - alpha) * (1 - (X_subset.shape[1] / self.num_features))) return (P,cv_set)