def RRExtra_rf_dis(n_trees, X, Y, train_indices, test_indices, seed): clf = RRExtraTreesClassifier(n_estimators=500, random_state=seed, bootstrap=True, n_jobs=-1) clf.fit(X[train_indices], Y[train_indices]) pred = clf.predict(X[test_indices]) weight = clf.score(X[test_indices], Y[test_indices]) #print(1 - clf.oob_score_) n_samples = X.shape[0] dis = np.zeros((n_samples, n_samples)) for i in range(n_samples): dis[i][i] = 1 res = clf.apply(X) for i in range(n_samples): for j in range(i + 1, n_samples): a = np.ravel(res[i]) b = np.ravel(res[j]) score = a == b d = float(score.sum()) / n_trees dis[i][j] = dis[j][i] = d X_features1 = np.transpose(dis) X_features2 = X_features1[train_indices] X_features3 = np.transpose(X_features2) return X_features3[train_indices], X_features3[test_indices], weight, pred
X_features1 = np.transpose(dis) X_features2 = X_features1[train_indices] X_features3 = np.transpose(X_features2) return X_features3[train_indices], X_features3[test_indices], weight, pred from uci_loader import * X, y = getdataset('diabetes') print(X.shape) clf = RRForestClassifier(n_estimators=20, random_state=1000, n_jobs=-1) indd = int(len(y) / 2) clf.fit(X[:indd], y[:indd]) print("Random Rotation Forest Accuracy:", np.mean(clf.predict(X[indd:]) == y[indd:])) classifier = RRExtraTreesClassifier(n_estimators=20) classifier.fit(X[:indd], y[:indd]) print("Random Rotation Extra Trees Accuracy:", np.mean(classifier.predict(X[indd:]) == y[indd:])) classifier = RandomForestClassifier(n_estimators=20) classifier.fit(X[:indd], y[:indd]) print("Random Forest Accuracy:", np.mean(classifier.predict(X[indd:]) == y[indd:])) train_indices, test_indices = splitdata(X=X, Y=y, ratio=0.5, seed=1000) print("Start rest") # view1 seed = 1000 X_features_train1, X_features_test1, w1, pred1 = RR_rf_dis( n_trees=10,
from uci_comparison import compare_estimators from sklearn.ensemble.forest import RandomForestClassifier, ExtraTreesClassifier from rr_forest import RRForestClassifier from rr_extra_forest import RRExtraTreesClassifier estimators = { 'RandomForest': RandomForestClassifier(n_estimators=20), 'RndRotForest': RRForestClassifier(n_estimators=20), 'ExtraTrees': ExtraTreesClassifier(n_estimators=20), 'RndRotETrees': RRExtraTreesClassifier(n_estimators=20), } # optionally, pass a list of UCI dataset identifiers as the datasets parameter, e.g. datasets=['iris', 'diabetes'] # optionally, pass a dict of scoring functions as the metric parameter, e.g. metrics={'F1-score': f1_score} compare_estimators(estimators)
from uci_loader import * X, y = getdataset('diabetes') from rr_forest import RRForestClassifier from rr_extra_forest import RRExtraTreesClassifier from sklearn.ensemble.forest import RandomForestClassifier classifier = RRForestClassifier(n_estimators=20) classifier.fit(X[:len(y) / 2], y[:len(y) / 2]) print "Random Rotation Forest Accuracy:", np.mean( classifier.predict(X[len(y) / 2:]) == y[len(y) / 2:]) classifier = RRExtraTreesClassifier(n_estimators=20) classifier.fit(X[:len(y) / 2], y[:len(y) / 2]) print "Random Rotation Extra Trees Accuracy:", np.mean( classifier.predict(X[len(y) / 2:]) == y[len(y) / 2:]) classifier = RandomForestClassifier(n_estimators=20) classifier.fit(X[:len(y) / 2], y[:len(y) / 2]) print "Random Forest Accuracy:", np.mean( classifier.predict(X[len(y) / 2:]) == y[len(y) / 2:])