コード例 #1
0
config = utils.load_json("demo_ca.json")
gc = GCForest(config)

datasets = ['cirrhosis', 'obesity', 't2d']

for dataset_idx, name in enumerate(datasets):
    thre_features = {}
    X = None
    Y = None
    if name == 'cirrhosis':
        X, Y = load.cirrhosis_data()
    elif name == 't2d':
        X, Y = load.t2d_data()
    elif name == 'obesity':
        X, Y = load.obesity_data()
    else:
        raise Exception('the dataset is not defined!!!')

    output_features = pd.Series()
    for train, test in cv.split(X, Y):
        x_train = X.iloc[train]
        y_train = Y[train]

        x_test = X.iloc[test]
        y_test = Y[test]

        X_train = x_train.values.reshape(-1, 1, len(x_train.columns))
        X_test = x_test.values.reshape(-1, 1, len(x_test.columns))

        X_train_enc, _features = gc.fit_transform(X_train, y_train)
コード例 #2
0
ファイル: acc_svm.py プロジェクト: chestnut55/cascade-forest
data_sets = ["cirrhosis", 'obesity', 't2d']
feature_sets = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
# feature_len = [542, 572, 465]

clf_svm = SVC(kernel='linear')
cv = StratifiedKFold(n_splits=5, shuffle=False, random_state=0)

for k, data_name in enumerate(data_sets):
    print("==================")
    print(data_name)
    ll = []

    X, y = None, None

    if data_name == 'obesity':
        X, y = load.obesity_data()
    elif data_name == 'cirrhosis':
        X, y = load.cirrhosis_data()
    else:
        X, y = load.t2d_data()

    clf_acc_before = cross_val_score(clf_svm, X, y, cv=cv, scoring='accuracy')
    print(np.mean(clf_acc_before))

    for feat in feature_sets:
        llm = []
        print("------------")
        # ###### deep forest
        output_dir = osp.join("output", "result")
        mat = []
        for i in range(5):