def RRExtra_rf_dis(n_trees, X, Y, train_indices, test_indices, seed): clf = RRExtraTreesClassifier(n_estimators=500, random_state=seed, oob_score=True, n_jobs=-1) clf = clf.fit(X[train_indices], Y[train_indices]) pred = clf.predict(X[test_indices]) weight = clf.score(X[test_indices], Y[test_indices]) #print(1 - clf.oob_score_) n_samples = X.shape[0] dis = np.zeros((n_samples, n_samples)) for i in range(n_samples): dis[i][i] = 1 res = clf.apply(X) for i in range(n_samples): for j in range(i + 1, n_samples): a = np.ravel(res[i]) b = np.ravel(res[j]) score = a == b d = float(score.sum()) / n_trees dis[i][j] = dis[j][i] = d X_features1 = np.transpose(dis) X_features2 = X_features1[train_indices] X_features3 = np.transpose(X_features2) return X_features3[train_indices], X_features3[test_indices], weight, pred
return X_features3[train_indices], X_features3[test_indices], weight, pred from uci_loader import * X, y = getdataset('diabetes') print(X.shape) clf = RRForestClassifier(n_estimators=20, random_state=1000, n_jobs=-1) indd = int(len(y) / 2) clf.fit(X[:indd], y[:indd]) print("Random Rotation Forest Accuracy:", np.mean(clf.predict(X[indd:]) == y[indd:])) classifier = RRExtraTreesClassifier(n_estimators=20) classifier.fit(X[:indd], y[:indd]) print("Random Rotation Extra Trees Accuracy:", np.mean(classifier.predict(X[indd:]) == y[indd:])) classifier = RandomForestClassifier(n_estimators=20) classifier.fit(X[:indd], y[:indd]) print("Random Forest Accuracy:", np.mean(classifier.predict(X[indd:]) == y[indd:])) train_indices, test_indices = splitdata(X=X, Y=y, ratio=0.5, seed=1000) print("Start rest") # view1 seed = 1000 X_features_train1, X_features_test1, w1, pred1 = RR_rf_dis( n_trees=10, X=X, Y=y, train_indices=train_indices,