cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

config = utils.load_json("demo_ca.json")
gc = GCForest(config)

datasets = ['cirrhosis', 'obesity', 't2d']

for dataset_idx, name in enumerate(datasets):
    thre_features = {}
    X = None
    Y = None
    if name == 'cirrhosis':
        X, Y = load.cirrhosis_data()
    elif name == 't2d':
        X, Y = load.t2d_data()
    elif name == 'obesity':
        X, Y = load.obesity_data()
    else:
        raise Exception('the dataset is not defined!!!')

    output_features = pd.Series()
    for train, test in cv.split(X, Y):
        x_train = X.iloc[train]
        y_train = Y[train]

        x_test = X.iloc[test]
        y_test = Y[test]

        X_train = x_train.values.reshape(-1, 1, len(x_train.columns))
        X_test = x_test.values.reshape(-1, 1, len(x_test.columns))
Beispiel #2
0
clf_svm = SVC(kernel='linear')
cv = StratifiedKFold(n_splits=5, shuffle=False, random_state=0)

for k, data_name in enumerate(data_sets):
    print("==================")
    print(data_name)
    ll = []

    X, y = None, None

    if data_name == 'obesity':
        X, y = load.obesity_data()
    elif data_name == 'cirrhosis':
        X, y = load.cirrhosis_data()
    else:
        X, y = load.t2d_data()

    clf_acc_before = cross_val_score(clf_svm, X, y, cv=cv, scoring='accuracy')
    print(np.mean(clf_acc_before))

    for feat in feature_sets:
        llm = []
        print("------------")
        # ###### deep forest
        output_dir = osp.join("output", "result")
        mat = []
        for i in range(5):
            path = osp.join(output_dir, data_name + "-cv-" + str(i))
            file = open(path, 'r')
            dicts = json.load(file)