config = utils.load_json("demo_ca.json") gc = GCForest(config) datasets = ['cirrhosis', 'obesity', 't2d'] for dataset_idx, name in enumerate(datasets): thre_features = {} X = None Y = None if name == 'cirrhosis': X, Y = load.cirrhosis_data() elif name == 't2d': X, Y = load.t2d_data() elif name == 'obesity': X, Y = load.obesity_data() else: raise Exception('the dataset is not defined!!!') output_features = pd.Series() for train, test in cv.split(X, Y): x_train = X.iloc[train] y_train = Y[train] x_test = X.iloc[test] y_test = Y[test] X_train = x_train.values.reshape(-1, 1, len(x_train.columns)) X_test = x_test.values.reshape(-1, 1, len(x_test.columns)) X_train_enc, _features = gc.fit_transform(X_train, y_train)
data_sets = ["cirrhosis", 'obesity', 't2d'] feature_sets = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50] # feature_len = [542, 572, 465] clf_svm = SVC(kernel='linear') cv = StratifiedKFold(n_splits=5, shuffle=False, random_state=0) for k, data_name in enumerate(data_sets): print("==================") print(data_name) ll = [] X, y = None, None if data_name == 'obesity': X, y = load.obesity_data() elif data_name == 'cirrhosis': X, y = load.cirrhosis_data() else: X, y = load.t2d_data() clf_acc_before = cross_val_score(clf_svm, X, y, cv=cv, scoring='accuracy') print(np.mean(clf_acc_before)) for feat in feature_sets: llm = [] print("------------") # ###### deep forest output_dir = osp.join("output", "result") mat = [] for i in range(5):