def test_class_weights(self, iris_X, iris_y): X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.5, random_state=42) forest = RangerForestClassifier() weights = { 0: 0.7, 1: 0.2, 2: 0.1, } forest.fit(X_train, y_train, class_weights=weights) forest.predict(X_test) forest = RangerForestClassifier() m = {0: "a", 1: "b", 2: "c"} y_train_str = [m.get(v) for v in y_train] weights = { "a": 0.7, "b": 0.2, "c": 0.1, } forest.fit(X_train, y_train_str, class_weights=weights) forest.predict(X_test) weights = { 0: 0.7, } with pytest.raises(ValueError): forest.fit(X_train, y_train, class_weights=weights)
def test_predict(self, iris_X, iris_y): rfc = RangerForestClassifier() rfc.fit(iris_X, iris_y) pred = rfc.predict(iris_X) assert len(pred) == iris_X.shape[0] # test with single record iris_X_record = iris_X[0:1, :] pred = rfc.predict(iris_X_record) assert len(pred) == 1
def test_categorical_features(self, iris_X, iris_y, respect_categorical_features): # add a categorical feature categorical_col = np.atleast_2d( np.array([random.choice([0, 1]) for _ in range(iris_X.shape[0])])) iris_X_c = np.hstack((iris_X, categorical_col.transpose())) categorical_features = [iris_X.shape[1]] forest = RangerForestClassifier( respect_categorical_features=respect_categorical_features, ) if respect_categorical_features not in [ "partition", "ignore", "order" ]: with pytest.raises(ValueError): forest.fit(iris_X_c, iris_y, categorical_features=categorical_features) return forest.fit(iris_X_c, iris_y, categorical_features=categorical_features) forest.predict(iris_X_c)
def test_sample_fraction(self, iris_X, iris_y): rfc = RangerForestClassifier(sample_fraction=[0.69]) rfc.fit(iris_X, iris_y) assert rfc.sample_fraction_ == [0.69] rfc = RangerForestClassifier(sample_fraction=0.69) rfc.fit(iris_X, iris_y) assert rfc.sample_fraction_ == [0.69] # test with single record iris_X_record = iris_X[0:1, :] pred = rfc.predict(iris_X_record) assert len(pred) == 1 pred = rfc.predict_proba(iris_X_record) assert len(pred) == 1 pred = rfc.predict_log_proba(iris_X_record) assert len(pred) == 1
def test_accuracy(self, iris_X, iris_y): X_train, X_test, y_train, y_test = train_test_split(iris_X, iris_y, test_size=0.33, random_state=42) # train and test a random forest classifier rf = RandomForestClassifier() rf.fit(X_train, y_train) y_pred_rf = rf.predict(X_test) rf_acc = accuracy_score(y_test, y_pred_rf) # train and test a ranger classifier ra = RangerForestClassifier() ra.fit(X_train, y_train) y_pred_ra = ra.predict(X_test) ranger_acc = accuracy_score(y_test, y_pred_ra) # the accuracy should be good assert rf_acc > 0.9 assert ranger_acc > 0.9
def RangerForestIMPL(train_embeddings, train_labels,test_embeddings,test_labels,single_label=True): rfc = RangerForestClassifier() rfc.fit(train_embeddings, train_labels) predsranger = rfc.predict(test_embeddings) if single_label==True: return single_label_metrics(test_labels,predsranger)