Ejemplo n.º 1
0
    def test_class_weights(self, iris_X, iris_y):
        X_train, X_test, y_train, y_test = train_test_split(iris_X,
                                                            iris_y,
                                                            test_size=0.5,
                                                            random_state=42)
        forest = RangerForestClassifier()
        weights = {
            0: 0.7,
            1: 0.2,
            2: 0.1,
        }
        forest.fit(X_train, y_train, class_weights=weights)
        forest.predict(X_test)

        forest = RangerForestClassifier()
        m = {0: "a", 1: "b", 2: "c"}
        y_train_str = [m.get(v) for v in y_train]
        weights = {
            "a": 0.7,
            "b": 0.2,
            "c": 0.1,
        }
        forest.fit(X_train, y_train_str, class_weights=weights)
        forest.predict(X_test)

        weights = {
            0: 0.7,
        }
        with pytest.raises(ValueError):
            forest.fit(X_train, y_train, class_weights=weights)
    def test_predict(self, iris_X, iris_y):
        rfc = RangerForestClassifier()
        rfc.fit(iris_X, iris_y)
        pred = rfc.predict(iris_X)
        assert len(pred) == iris_X.shape[0]

        # test with single record
        iris_X_record = iris_X[0:1, :]
        pred = rfc.predict(iris_X_record)
        assert len(pred) == 1
Ejemplo n.º 3
0
    def test_categorical_features(self, iris_X, iris_y,
                                  respect_categorical_features):
        # add a categorical feature
        categorical_col = np.atleast_2d(
            np.array([random.choice([0, 1]) for _ in range(iris_X.shape[0])]))
        iris_X_c = np.hstack((iris_X, categorical_col.transpose()))
        categorical_features = [iris_X.shape[1]]

        forest = RangerForestClassifier(
            respect_categorical_features=respect_categorical_features, )

        if respect_categorical_features not in [
                "partition", "ignore", "order"
        ]:
            with pytest.raises(ValueError):
                forest.fit(iris_X_c,
                           iris_y,
                           categorical_features=categorical_features)
            return

        forest.fit(iris_X_c, iris_y, categorical_features=categorical_features)
        forest.predict(iris_X_c)
    def test_sample_fraction(self, iris_X, iris_y):
        rfc = RangerForestClassifier(sample_fraction=[0.69])
        rfc.fit(iris_X, iris_y)
        assert rfc.sample_fraction_ == [0.69]
        rfc = RangerForestClassifier(sample_fraction=0.69)
        rfc.fit(iris_X, iris_y)
        assert rfc.sample_fraction_ == [0.69]

        # test with single record
        iris_X_record = iris_X[0:1, :]
        pred = rfc.predict(iris_X_record)
        assert len(pred) == 1
        pred = rfc.predict_proba(iris_X_record)
        assert len(pred) == 1
        pred = rfc.predict_log_proba(iris_X_record)
        assert len(pred) == 1
    def test_accuracy(self, iris_X, iris_y):
        X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.33, random_state=42)

        # train and test a random forest classifier
        rf = RandomForestClassifier()
        rf.fit(X_train, y_train)
        y_pred_rf = rf.predict(X_test)
        rf_acc = accuracy_score(y_test, y_pred_rf)

        # train and test a ranger classifier
        ra = RangerForestClassifier()
        ra.fit(X_train, y_train)
        y_pred_ra = ra.predict(X_test)
        ranger_acc = accuracy_score(y_test, y_pred_ra)

        # the accuracy should be good
        assert rf_acc > 0.9
        assert ranger_acc > 0.9
Ejemplo n.º 6
0
def RangerForestIMPL(train_embeddings, train_labels,test_embeddings,test_labels,single_label=True):
    rfc = RangerForestClassifier()
    rfc.fit(train_embeddings, train_labels)
    predsranger = rfc.predict(test_embeddings)
    if single_label==True:
        return single_label_metrics(test_labels,predsranger)