Example #1
0
def calc_fitness(pop):          # the pop parameter is a list of gene combinations
    rsts = list()               # rsts will contain the generated individuals from all gene combinations
    if len(pop[0]) > 20:        # smaller k in k-fold CV for lower time cost when the length of gene combination is above 20
        k = K - 2
    else:
        k = K
    for gene_rg in pop:
        indiv = Individual()    # create a GA Individual for each gene combination in the pop parameter
        indiv.ft_rg = gene_rg
        indiv.gene_len = len(gene_rg)
        mt_clf = np.array([0.0] * len(metrics_names))
        X_cur = X[:, gene_rg]
        # k fold cross validation
        skf = StratifiedKFold(n_splits=k, random_state=random.randint(0, 100), shuffle=True)
        clf = []
        for train_index, test_index in skf.split(X_cur, y):
            clf = GaussianNB()
            X_train = X_cur[train_index, :]
            X_test = X_cur[test_index, :]
            y_train = y[train_index]
            y_test = y[test_index]
            clf.fit(X_train, y_train)
            predicts = clf.predict(X_test)
            result = confusion_matrix(y_test, predicts, labels=[0, 1])
            mt_clf += np.array(calc_metrics(result))
        indiv.metrics = list(map(lambda x: x / k, mt_clf))
        indiv.clf = clf
        indiv.acc = indiv.metrics[2]
        if is_imbalanced:        # deal with imbalanced data-sets
            indiv.index = (0.6 * indiv.metrics[2] + 0.4 * indiv.metrics[3])
        else:
            indiv.index = indiv.acc
        rsts.append(indiv)
    return rsts