cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0) config = utils.load_json("demo_ca.json") gc = GCForest(config) datasets = ['cirrhosis', 'obesity', 't2d'] for dataset_idx, name in enumerate(datasets): thre_features = {} X = None Y = None if name == 'cirrhosis': X, Y = load.cirrhosis_data() elif name == 't2d': X, Y = load.t2d_data() elif name == 'obesity': X, Y = load.obesity_data() else: raise Exception('the dataset is not defined!!!') output_features = pd.Series() for train, test in cv.split(X, Y): x_train = X.iloc[train] y_train = Y[train] x_test = X.iloc[test] y_test = Y[test] X_train = x_train.values.reshape(-1, 1, len(x_train.columns)) X_test = x_test.values.reshape(-1, 1, len(x_test.columns))
clf_svm = SVC(kernel='linear') cv = StratifiedKFold(n_splits=5, shuffle=False, random_state=0) for k, data_name in enumerate(data_sets): print("==================") print(data_name) ll = [] X, y = None, None if data_name == 'obesity': X, y = load.obesity_data() elif data_name == 'cirrhosis': X, y = load.cirrhosis_data() else: X, y = load.t2d_data() clf_acc_before = cross_val_score(clf_svm, X, y, cv=cv, scoring='accuracy') print(np.mean(clf_acc_before)) for feat in feature_sets: llm = [] print("------------") # ###### deep forest output_dir = osp.join("output", "result") mat = [] for i in range(5): path = osp.join(output_dir, data_name + "-cv-" + str(i)) file = open(path, 'r') dicts = json.load(file)