def test_class_weights(self, iris_X, iris_y): X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.5, random_state=42) tree = RangerTreeClassifier() weights = { 0: 0.7, 1: 0.2, 2: 0.1, } tree.fit(X_train, y_train, class_weights=weights) tree.predict(X_test) tree = RangerTreeClassifier() m = {0: "a", 1: "b", 2: "c"} y_train_str = [m.get(v) for v in y_train] weights = { "a": 0.7, "b": 0.2, "c": 0.1, } tree.fit(X_train, y_train_str, class_weights=weights) tree.predict(X_test) weights = { 0: 0.7, } with pytest.raises(ValueError): tree.fit(X_train, y_train, class_weights=weights)
def test_predict(self, iris_X, iris_y): tree = RangerTreeClassifier() tree.fit(iris_X, iris_y) pred = tree.predict(iris_X) assert len(pred) == iris_X.shape[0] # test with single record iris_X_record = iris_X[0:1, :] pred = tree.predict(iris_X_record) assert len(pred) == 1
def test_categorical_features(self, iris_X, iris_y, respect_categorical_features): # add a categorical feature categorical_col = np.atleast_2d( np.array([random.choice([0, 1]) for _ in range(iris_X.shape[0])])) iris_X_c = np.hstack((iris_X, categorical_col.transpose())) categorical_features = [iris_X.shape[1]] tree = RangerTreeClassifier( respect_categorical_features=respect_categorical_features, ) if respect_categorical_features not in [ "partition", "ignore", "order" ]: with pytest.raises(ValueError): tree.fit(iris_X_c, iris_y, categorical_features=categorical_features) return tree.fit(iris_X_c, iris_y, categorical_features=categorical_features) tree.predict(iris_X_c)
def test_sample_fraction(self, iris_X, iris_y): tree = RangerTreeClassifier(sample_fraction=[0.69]) tree.fit(iris_X, iris_y) assert tree.sample_fraction_ == [0.69] tree = RangerTreeClassifier(sample_fraction=0.69) tree.fit(iris_X, iris_y) assert tree.sample_fraction_ == [0.69] # test with single record iris_X_record = iris_X[0:1, :] pred = tree.predict(iris_X_record) assert len(pred) == 1 pred = tree.predict_proba(iris_X_record) assert len(pred) == 1 pred = tree.predict_log_proba(iris_X_record) assert len(pred) == 1
def test_accuracy(self, iris_X, iris_y): X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.33, random_state=42) # train and test a random forest classifier rf = RandomForestClassifier() rf.fit(X_train, y_train) y_pred_rf = rf.predict(X_test) rf_acc = accuracy_score(y_test, y_pred_rf) # train and test a ranger classifier ra = RangerTreeClassifier() ra.fit(X_train, y_train) y_pred_ra = ra.predict(X_test) ranger_acc = accuracy_score(y_test, y_pred_ra) # the accuracy should be good assert rf_acc > 0.9 assert ranger_acc > 0.9