def test_OutSamplerTransformer_regressor(multi_output): np.random.seed(123) X = np.random.randn(100, 10) if multi_output: y = np.random.randn(100, 2) else: y = np.random.randn(100) model = OutSamplerTransformer(RandomForestRegressor(n_estimators=10, random_state=123), cv=10) model.fit(X, y) y1 = model.model.predict(X) y2 = model.transform(X) assert not is_classifier(model) assert not is_regressor(model) if multi_output: assert np.abs(y1[:, 0] - y2[:, 0]).max() <= 10**(-10) assert np.abs(y1[:, 1] - y2[:, 1]).max() <= 10**(-10) assert y2.shape == (100, 2) assert model.get_feature_names() == [ "output%d__RandomForestRegressor__target" % d for d in range(y.shape[1]) ] else: assert np.abs(y1 - y2[:, 0]).max() <= 10**(-10) assert y2.shape == (100, 1) assert model.get_feature_names() == ["RandomForestRegressor__target"]
def test_OutSamplerTransformer_classifier(): np.random.seed(123) X = np.random.randn(100, 10) y = 1 * (np.random.randn(100) > 0) model = OutSamplerTransformer( RandomForestClassifier(n_estimators=10, random_state=123)) model.fit(X, y) p1 = model.model.predict_proba(X) p2 = model.transform(X) assert not is_classifier(model) assert not is_regressor(model) assert np.abs(p1[:, 1] - p2[:, 0]).max() <= 10**(-10) assert p2.shape == (100, 1) assert model.get_feature_names() == ["RandomForestClassifier__1"] y = np.array(["a", "b", "c"])[np.random.randint(0, 3, 100)] model = OutSamplerTransformer( RandomForestClassifier(n_estimators=10, random_state=123)) model.fit(X, y) p1 = model.model.predict_proba(X) p2 = model.transform(X) assert p1.shape == (100, 3) assert p2.shape == (100, 3) assert np.abs(p1 - p2).max() <= 10**(-10) assert model.get_feature_names() == [ "RandomForestClassifier__a", "RandomForestClassifier__b", "RandomForestClassifier__c", ]
def test_OutSamplerTransformer_regressor(): np.random.seed(123) X = np.random.randn(100, 10) y = np.random.randn(100) model = OutSamplerTransformer(RandomForestRegressor(n_estimators=10, random_state=123), cv=10) model.fit(X, y) y1 = model.model.predict(X) y2 = model.transform(X) assert not is_classifier(model) assert not is_regressor(model) assert np.abs(y1 - y2[:, 0]).max() <= 10**(-10) assert y2.shape == (100, 1) assert model.get_feature_names() == ["RandomForestRegressor__target"]
def test_OutSamplerTransformer_classifier(multi_output): np.random.seed(123) X = np.random.randn(100, 10) if multi_output: y = 1 * (np.random.randn(100, 2) > 0) else: y = 1 * (np.random.randn(100) > 0) model = OutSamplerTransformer( RandomForestClassifier(n_estimators=10, random_state=123)) model.fit(X, y) p1 = model.model.predict_proba(X) p2 = model.transform(X) assert not is_classifier(model) assert not is_regressor(model) if multi_output: assert np.abs(p1[0][:, 1] - p2[:, 0]).max() <= 10**(-10) assert np.abs(p1[1][:, 1] - p2[:, 1]).max() <= 10**(-10) else: assert np.abs(p1[:, 1] - p2[:, 0]).max() <= 10**(-10) assert p2.shape == (100, 1 + 1 * multi_output) if multi_output: assert model.get_feature_names() == [ "output%d__RandomForestClassifier__1" % d for d in range(y.shape[1]) ] else: assert model.get_feature_names() == ["RandomForestClassifier__1"] ### Test with strings if multi_output: y = np.array(["a", "b", "c"])[np.random.randint(0, 3, 200).reshape( (100, 2))] else: y = np.array(["a", "b", "c"])[np.random.randint(0, 3, 100)] model = OutSamplerTransformer( RandomForestClassifier(n_estimators=10, random_state=123)) model.fit(X, y) p1 = model.model.predict_proba(X) p2 = model.transform(X) if multi_output: assert isinstance(p1, list) assert len(p1) == y.shape[1] assert p2.shape == (100, 6) assert np.abs(p1[0] - p2[:, 0:3]).max() <= 10**(-10) assert np.abs(p1[1] - p2[:, 3:]).max() <= 10**(-10) else: assert p1.shape == (100, 3) assert p2.shape == (100, 3) assert np.abs(p1 - p2).max() <= 10**(-10) assert model.get_feature_names() == [ "RandomForestClassifier__a", "RandomForestClassifier__b", "RandomForestClassifier__c", ]