def test_subsample_regression_4k(self): rows = 4000 X = np.random.rand(rows, 3) X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)]) y = pd.Series(np.random.rand(rows), name="target") gft3 = GoldenFeaturesTransformer(self.automl_dir, REGRESSION) X_train, X_test, y_train, y_test = gft3._subsample(X, y) self.assertTrue(X_train.shape[0], 2000) self.assertTrue(X_test.shape[0], 2000) self.assertTrue(y_train.shape[0], 2000) self.assertTrue(y_test.shape[0], 2000)
def test_subsample_multiclass_4k(self): rows = 4000 X = np.random.rand(rows, 3) X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)]) y = pd.Series(np.random.randint(0, 4, rows), name="target") gft3 = GoldenFeaturesTransformer(self.automl_dir, MULTICLASS_CLASSIFICATION) X_train, X_test, y_train, y_test = gft3._subsample(X, y) self.assertTrue(X_train.shape[0], 2000) self.assertTrue(X_test.shape[0], 2000) self.assertTrue(y_train.shape[0], 2000) self.assertTrue(y_test.shape[0], 2000) for uni in [np.unique(y_train), np.unique(y_test)]: for i in range(4): self.assertTrue(i in uni)