def calc_fitness(pop): # the pop parameter is a list of gene combinations rsts = list() # rsts will contain the generated individuals from all gene combinations if len(pop[0]) > 20: # smaller k in k-fold CV for lower time cost when the length of gene combination is above 20 k = K - 2 else: k = K for gene_rg in pop: indiv = Individual() # create a GA Individual for each gene combination in the pop parameter indiv.ft_rg = gene_rg indiv.gene_len = len(gene_rg) mt_clf = np.array([0.0] * len(metrics_names)) X_cur = X[:, gene_rg] # k fold cross validation skf = StratifiedKFold(n_splits=k, random_state=random.randint(0, 100), shuffle=True) clf = [] for train_index, test_index in skf.split(X_cur, y): clf = GaussianNB() X_train = X_cur[train_index, :] X_test = X_cur[test_index, :] y_train = y[train_index] y_test = y[test_index] clf.fit(X_train, y_train) predicts = clf.predict(X_test) result = confusion_matrix(y_test, predicts, labels=[0, 1]) mt_clf += np.array(calc_metrics(result)) indiv.metrics = list(map(lambda x: x / k, mt_clf)) indiv.clf = clf indiv.acc = indiv.metrics[2] if is_imbalanced: # deal with imbalanced data-sets indiv.index = (0.6 * indiv.metrics[2] + 0.4 * indiv.metrics[3]) else: indiv.index = indiv.acc rsts.append(indiv) return rsts