def feature_selection(self, X, y, state='train'):
     if state == 'train':
         print('Feature_selection starts!')
         fb = FeatureBand(r0=self.fb_r0,
                          n0=self.fb_n0,
                          clf=load_clf('logistic'),
                          max_iter=self.fb_max_iter,
                          k=self.fb_k,
                          population_size=self.fb_population_size,
                          local_search=True)
         times, iter_best, global_best = fb.fit(X,
                                                y,
                                                metrics=self.fb_metric)
         for key in self.train_data:
             self.train_data[key] = fb.transform(self.train_data[key])
         self.fb_operator = fb
         if self.headers is not None:
             self.selected_headers = self.headers[fb.featrue_selected]
     return 0
Example #2
0
DATASET = "rna"  # ["madelon", "basehock", "usps", "coil20"]
FINAL_CLASSIFIER = "logistic"  # ["knn", "logistic", "linear_svm"]
k = 300
n_splits = 5

r0 = 50
max_iter = 200
population_size = 10
n0 = 500

#x, y = load_dataset(DATASET)
x = np.array(pd.read_csv('./medicaldata/tpotfssRNASeq/Xtrain.csv'))
y = np.array(pd.read_csv('./medicaldata/tpotfssRNASeq/ytrain.csv',
                         header=None))
print(x.shape, y.shape)
clf = load_clf(FINAL_CLASSIFIER)

skf = StratifiedKFold(n_splits=n_splits, random_state=42)
fold_index = 0
perfs = []
for train_index, test_index in skf.split(x, y):
    print("fold:", fold_index + 1)
    if fold_index != 0:
        continue
    #x_train, x_test = x[train_index], x[test_index]
    #y_train, y_test = y[train_index], y[test_index]
    x_train, y_train = x, y
    fb = FeatureBand(r0=r0,
                     n0=n0,
                     clf=clf,
                     max_iter=max_iter,
Example #3
0
from featureband.CCM.core import ccm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

DATASET = "usps"  # ["madelon", "basehock", "usps", "coil20"]
FINAL_CLASSIFIER = "knn"  # ["knn", "logistic", "linear_svm"]
n_splits = 5

x, y = load_dataset(DATASET)

n_samples = np.random.choice(x.shape[0], 3000, replace=False)
x = x[n_samples, :]
y = y[n_samples]

print(x.shape, y.shape)
knn = load_clf("knn")
logistic = load_clf("logistic")

knn_perfs = np.zeros(10)
logistic_perfs = np.zeros(10)
skf = StratifiedKFold(n_splits=n_splits, random_state=42)
fold_index = 0
fout = open("usps_data.txt", 'w')
for train_index, test_index in skf.split(x, y):
    print("fold:", fold_index + 1)

    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    for i, k in enumerate(np.arange(10, 101, 10)):
        rank = ccm.ccm(x_train,