def feature_selection(self, X, y, state='train'): if state == 'train': print('Feature_selection starts!') fb = FeatureBand(r0=self.fb_r0, n0=self.fb_n0, clf=load_clf('logistic'), max_iter=self.fb_max_iter, k=self.fb_k, population_size=self.fb_population_size, local_search=True) times, iter_best, global_best = fb.fit(X, y, metrics=self.fb_metric) for key in self.train_data: self.train_data[key] = fb.transform(self.train_data[key]) self.fb_operator = fb if self.headers is not None: self.selected_headers = self.headers[fb.featrue_selected] return 0
DATASET = "rna" # ["madelon", "basehock", "usps", "coil20"] FINAL_CLASSIFIER = "logistic" # ["knn", "logistic", "linear_svm"] k = 300 n_splits = 5 r0 = 50 max_iter = 200 population_size = 10 n0 = 500 #x, y = load_dataset(DATASET) x = np.array(pd.read_csv('./medicaldata/tpotfssRNASeq/Xtrain.csv')) y = np.array(pd.read_csv('./medicaldata/tpotfssRNASeq/ytrain.csv', header=None)) print(x.shape, y.shape) clf = load_clf(FINAL_CLASSIFIER) skf = StratifiedKFold(n_splits=n_splits, random_state=42) fold_index = 0 perfs = [] for train_index, test_index in skf.split(x, y): print("fold:", fold_index + 1) if fold_index != 0: continue #x_train, x_test = x[train_index], x[test_index] #y_train, y_test = y[train_index], y[test_index] x_train, y_train = x, y fb = FeatureBand(r0=r0, n0=n0, clf=clf, max_iter=max_iter,
from featureband.CCM.core import ccm from sklearn.model_selection import StratifiedKFold from sklearn.metrics import accuracy_score DATASET = "usps" # ["madelon", "basehock", "usps", "coil20"] FINAL_CLASSIFIER = "knn" # ["knn", "logistic", "linear_svm"] n_splits = 5 x, y = load_dataset(DATASET) n_samples = np.random.choice(x.shape[0], 3000, replace=False) x = x[n_samples, :] y = y[n_samples] print(x.shape, y.shape) knn = load_clf("knn") logistic = load_clf("logistic") knn_perfs = np.zeros(10) logistic_perfs = np.zeros(10) skf = StratifiedKFold(n_splits=n_splits, random_state=42) fold_index = 0 fout = open("usps_data.txt", 'w') for train_index, test_index in skf.split(x, y): print("fold:", fold_index + 1) x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] for i, k in enumerate(np.arange(10, 101, 10)): rank = ccm.ccm(x_train,