Ejemplo n.º 1
0
 def __init__(self,
              n_estimators=100,
              max_features="auto",
              max_depth=6,
              min_samples_split=2,
              min_weight_fraction_leaf=0.0,
              n_jobs=-1,
              random_state=None,
              random_seed=0,
              **kwargs):
     parameters = {
         "n_estimators": n_estimators,
         "max_features": max_features,
         "max_depth": max_depth,
         "min_samples_split": min_samples_split,
         "min_weight_fraction_leaf": min_weight_fraction_leaf,
         "n_jobs": n_jobs
     }
     parameters.update(kwargs)
     random_seed = deprecate_arg("random_state", "random_seed",
                                 random_state, random_seed)
     et_classifier = SKExtraTreesClassifier(random_state=random_seed,
                                            **parameters)
     super().__init__(parameters=parameters,
                      component_obj=et_classifier,
                      random_seed=random_seed)
Ejemplo n.º 2
0
def test_feature_importance(X_y_binary):
    X, y = X_y_binary

    clf = ExtraTreesClassifier(n_jobs=1)
    sk_clf = SKExtraTreesClassifier(max_depth=6, random_state=0, n_jobs=1)
    sk_clf.fit(X, y)
    sk_feature_importance = sk_clf.feature_importances_

    clf.fit(X, y)
    feature_importance = clf.feature_importance

    np.testing.assert_almost_equal(sk_feature_importance,
                                   feature_importance,
                                   decimal=5)
Ejemplo n.º 3
0
def test_fit_predict_binary(X_y_binary):
    X, y = X_y_binary

    sk_clf = SKExtraTreesClassifier(max_depth=6, random_state=0)
    sk_clf.fit(X, y)
    y_pred_sk = sk_clf.predict(X)
    y_pred_proba_sk = sk_clf.predict_proba(X)

    clf = ExtraTreesClassifier()
    clf.fit(X, y)
    y_pred = clf.predict(X)
    y_pred_proba = clf.predict_proba(X)

    np.testing.assert_almost_equal(y_pred, y_pred_sk, decimal=5)
    np.testing.assert_almost_equal(y_pred_proba, y_pred_proba_sk, decimal=5)
Ejemplo n.º 4
0
def test_fit_predict_multi(X_y_multi):
    X, y = X_y_multi

    sk_clf = SKExtraTreesClassifier(max_depth=6, random_state=0)
    sk_clf.fit(X, y)
    y_pred_sk = sk_clf.predict(X)
    y_pred_proba_sk = sk_clf.predict_proba(X)

    clf = ExtraTreesClassifier()
    fitted = clf.fit(X, y)
    assert isinstance(fitted, ExtraTreesClassifier)

    y_pred = clf.predict(X)
    y_pred_proba = clf.predict_proba(X)

    np.testing.assert_almost_equal(y_pred, y_pred_sk, decimal=5)
    np.testing.assert_almost_equal(y_pred_proba, y_pred_proba_sk, decimal=5)
Ejemplo n.º 5
0
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.datasets import load_breast_cancer, load_iris, load_wine
from extra_trees.ensemble.forest import ExtraTreesClassifier

# prepare models
classification_models = [
    ('Logistic', LogisticRegression()),
    ('Nearest Neighbors', KNeighborsClassifier()),
    ('SVM', SVC()),
    ('DecisionTree', DecisionTreeClassifier()),
    ('RandomForest', RandomForestClassifier()),
    ('ExtraTrees (SciKit)', SKExtraTreesClassifier()),
    ('ExtraTrees', ExtraTreesClassifier()),
]
seed = 7

print("breast_cancer")
breast_cancer = load_breast_cancer()
X, y = breast_cancer.data, breast_cancer.target
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in classification_models:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    cv_results = model_selection.cross_val_score(model, X, y, cv=kfold, scoring=scoring)
    results.append(cv_results)
Ejemplo n.º 6
0
    "ExtraTrees (SciKit)",
    "ExtraTrees",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    MLPClassifier(alpha=1),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    AdaBoostClassifier(),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=2),
    SKExtraTreesClassifier(n_estimators=10, max_features=2),
    ExtraTreesClassifier(n_estimators=10, max_features=2),
]

X, y = make_classification(n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           random_state=1,
                           n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)

datasets = [
    make_moons(noise=0.3, random_state=0),
    make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
Ejemplo n.º 7
0
                                    n_redundant=2)

train_samples = 100  # Samples used for training the models

X_train = X[:train_samples]
X_test = X[train_samples:]
y_train = y[:train_samples]
y_test = y[train_samples:]

# Create classifiers
lr = LogisticRegression()
gnb = GaussianNB()
svc = LinearSVC(C=1.0)
dtc = DecisionTreeClassifier(min_samples_split=2)
rfc = RandomForestClassifier(n_estimators=100)
sk_etc = SKExtraTreesClassifier(n_estimators=100)
etc = ExtraTreesClassifier(n_estimators=100)

# #############################################################################
# Plot calibration plots

plt.figure(figsize=(10, 10))
ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
ax2 = plt.subplot2grid((3, 1), (2, 0))

ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
for clf, name in [
    (lr, 'Logistic'),
    (gnb, 'Naive Bayes'),
    (svc, 'Support Vector Classification'),
    (dtc, 'Decision Tree'),