コード例 #1
0
 def test_predict_proba_in_regression(self):
     model = AutoML(explain_level=0,
                    verbose=0,
                    random_state=1,
                    results_path=self.automl_dir)
     model.fit(boston.data, boston.target)
     with self.assertRaises(AutoMLException) as context:
         # Try to call predict_proba in regression task
         model.predict_proba(boston.data)
コード例 #2
0
    def test_too_small_time_limit(self):
        rows = 100000
        X = np.random.uniform(size=(rows, 100))
        y = np.random.randint(0, 2, size=(rows, ))

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=1,
                        train_ensemble=False)
        with self.assertRaises(AutoMLException) as context:
            automl.fit(X, y)
コード例 #3
0
 def test_new_directory(self):
     """ Directory does not exist, create it """
     # Assert directory does not exist
     self.assertTrue(not os.path.exists(self.automl_dir))
     # Create model with dir
     model = AutoML(results_path=self.automl_dir)
     # Generate data
     X, y = datasets.make_classification(n_samples=30)
     # Fit data
     model.fit(
         X, y)  # AutoML only validates constructor params on `fit()` call
     # Assert directory was created
     self.assertTrue(os.path.exists(self.automl_dir))
コード例 #4
0
    def test_different_input_types(self):
        """ Test the different data input types for AutoML"""
        model = AutoML(
            total_time_limit=10,
            explain_level=0,
            start_random_models=1,
            algorithms=["Linear"],
            verbose=0,
        )
        X, y = datasets.make_regression()

        # First test - X and y as numpy arrays

        pred = model.fit(X, y).predict(X)

        self.assertIsInstance(pred, np.ndarray)
        self.assertEqual(len(pred), X.shape[0])

        del model

        model = AutoML(
            total_time_limit=10,
            explain_level=0,
            start_random_models=1,
            algorithms=["Linear"],
            verbose=0,
        )
        # Second test - X and y as pandas dataframe
        X_pandas = pd.DataFrame(X)
        y_pandas = pd.DataFrame(y)
        pred_pandas = model.fit(X_pandas, y_pandas).predict(X_pandas)

        self.assertIsInstance(pred_pandas, np.ndarray)
        self.assertEqual(len(pred_pandas), X.shape[0])

        del model

        model = AutoML(
            total_time_limit=10,
            explain_level=0,
            start_random_models=1,
            algorithms=["Linear"],
            verbose=0,
        )
        # Third test - X and y as lists
        X_list = pd.DataFrame(X).values.tolist()
        y_list = pd.DataFrame(y).values.tolist()
        pred_list = model.fit(X_pandas, y_pandas).predict(X_pandas)

        self.assertIsInstance(pred_list, np.ndarray)
        self.assertEqual(len(pred_list), X.shape[0])
コード例 #5
0
    def test_one_column_input_regression(self):
        a = AutoML(results_path=self.automl_dir,
                   total_time_limit=5,
                   explain_level=0)
        a.set_advanced(start_random_models=1)

        X = pd.DataFrame({"feature_1": np.random.rand(100)})
        y = np.random.rand(100)

        a.fit(X, y)
        p = a.predict(X)

        self.assertTrue("prediction" in p.columns)
        self.assertTrue(p.shape[0] == 100)
コード例 #6
0
    def test_tune_only_default(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            tuning_mode="Insane",
            algorithms=["Xgboost"],
        )

        automl.fit(X, y)
        self.assertEqual(len(automl._models), 1)
コード例 #7
0
    def test_regression(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.rand(self.rows)

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=1,
                        algorithms=["Xgboost"],
                        train_ensemble=False)
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)
        self.assertTrue(len(pred.columns.tolist()) == 1)
        self.assertTrue(pred.columns[0] == "prediction")
コード例 #8
0
    def test_one_column_input_regression(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=5,
            explain_level=0,
            start_random_models=1,
        )

        X, y = datasets.make_regression(n_features=1)

        a.fit(X, y)
        p = a.predict(X)

        self.assertIsInstance(p, np.ndarray)
        self.assertEqual(len(p), X.shape[0])
コード例 #9
0
    def test_encoding_strange_characters(self):

        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.permutation(["ɛ", "🂲"] * int(self.rows / 2))

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Baseline"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
コード例 #10
0
 def test_fit_returns_self(self):
     """Tests if the `fit()` method returns `self`. This allows to quickly implement one-liners with AutoML"""
     model = AutoML()
     self.assertTrue(
         isinstance(model.fit(iris.data, iris.target), AutoML),
         "`fit()` method must return 'self'",
     )
コード例 #11
0
 def test_breast_cancer_dataset(self):
     """ Tests AutoML in the breast cancer (binary classification)"""
     model = AutoML(explain_level=0, verbose=0, random_state=1)
     score = model.fit(breast_cancer.data,
                       breast_cancer.target).score(breast_cancer.data,
                                                   breast_cancer.target)
     self.assertGreater(score, 0.5)
コード例 #12
0
 def test_empty_directory(self):
     """ Directory exists and is empty, use it """
     # Assert directory does not exist
     self.assertTrue(not os.path.exists(self.automl_dir))
     # Make dir
     os.mkdir(self.automl_dir)
     # Assert dir exists
     self.assertTrue(os.path.exists(self.automl_dir))
     # Create automl with dir
     model = AutoML(results_path=self.automl_dir)
     # Generate data
     X, y = datasets.make_classification(n_samples=30)
     # Fit data
     model.fit(
         X, y)  # AutoML only validates constructor params on `fit()` call
     self.assertTrue(os.path.exists(self.automl_dir))
コード例 #13
0
    def test_disable_stack_models_adjusted_validation(self):

        X = np.random.uniform(size=(100, 2))
        y = np.random.randint(0, 2, size=(100, ))
        X[:, 0] = y
        X[:, 1] = -y

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=5,
                        mode="Compete")
        automl.fit(X, y)
        # the stacking should be disabled
        # because of small time limit
        self.assertFalse(automl._stack_models)
        self.assertFalse(automl.tuner._stack_models)
        self.assertFalse(automl._time_ctrl._is_stacking)
コード例 #14
0
 def test_no_constructor_args(self):
     """Tests the use of AutoML without passing any args. Should work without any arguments"""
     # Create model with no arguments
     model = AutoML()
     # Assert than an Exception is raised
     score = model.fit(iris.data, iris.target).score(iris.data, iris.target)
     self.assertGreater(score, 0.5)
コード例 #15
0
    def test_one_column_input_bin_class(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=5,
            explain_level=0,
            start_random_models=1,
        )

        X = pd.DataFrame({"feature_1": np.random.rand(100)})
        y = (np.random.rand(X.shape[0]) > 0.5).astype(int)

        a.fit(X, y)
        p = a.predict(X)

        self.assertIsInstance(p, np.ndarray)
        self.assertEqual(len(p), X.shape[0])
コード例 #16
0
 def test_get_params(self):
     """
     Passes params in AutoML constructor and uses `get_params()` after fitting.
     Initial params must be equal to the ones returned by `get_params()`.
     """
     # Create model
     model = AutoML(hill_climbing_steps=3, start_random_models=1)
     # Get params before fit
     params_before_fit = model.get_params()
     # Generate data
     X, y = datasets.make_classification(n_samples=30)
     # Fit data
     model.fit(X, y)
     # Get params after fit
     params_after_fit = model.get_params()
     # Assert before and after params are equal
     self.assertEquals(params_before_fit, params_after_fit)
コード例 #17
0
    def test_bin_class_01(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows)

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=1,
                        algorithms=["Xgboost"],
                        train_ensemble=False)
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)
        for col in ["prediction_0", "prediction_1", "label"]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)
        self.assertTrue(0 in u or 1 in u)
        self.assertTrue(len(u) <= 2)
コード例 #18
0
    def test_disable_stack_models(self):

        X = np.random.uniform(size=(100, 2))
        y = np.random.randint(0, 2, size=(100, ))
        X[:, 0] = y
        X[:, 1] = -y

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=5,
            mode="Compete",
            validation_strategy={"validation_type": "split"},
        )
        automl.fit(X, y)
        self.assertFalse(automl._stack_models)
        self.assertFalse(automl.tuner._stack_models)
        self.assertFalse(automl._time_ctrl._is_stacking)
コード例 #19
0
    def test_category_data_type(self):

        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows)

        X["f1"] = X["f1"].astype("category")

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["CatBoost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
コード例 #20
0
    def test_repeated_kfold(self):
        REPEATS = 3
        FOLDS = 2

        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=10,
            algorithms=["Random Forest"],
            train_ensemble=False,
            validation_strategy={
                "validation_type": "kfold",
                "k_folds": FOLDS,
                "repeats": REPEATS,
                "shuffle": True,
                "stratify": True,
            },
            start_random_models=1,
        )

        X, y = datasets.make_classification(
            n_samples=100,
            n_features=5,
            n_informative=4,
            n_redundant=1,
            n_classes=2,
            n_clusters_per_class=3,
            n_repeated=0,
            shuffle=False,
            random_state=0,
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)

        result_files = os.listdir(
            os.path.join(self.automl_dir, "1_Default_RandomForest"))

        cnt = 0
        for repeat in range(REPEATS):
            for fold in range(FOLDS):
                learner_name = construct_learner_name(fold, repeat, REPEATS)
                self.assertTrue(
                    f"{learner_name}.random_forest" in result_files)
                self.assertTrue(f"{learner_name}_training.log" in result_files)
                cnt += 1
        self.assertTrue(cnt, 6)
コード例 #21
0
 def test_iris_dataset(self):
     """ Tests AutoML in the iris dataset (Multiclass classification)"""
     model = AutoML(explain_level=0,
                    verbose=0,
                    random_state=1,
                    results_path=self.automl_dir)
     score = model.fit(iris.data, iris.target).score(iris.data, iris.target)
     self.assertGreater(score, 0.5)
コード例 #22
0
    def test_custom_init(self):

        X = np.random.uniform(size=(30, 2))
        y = np.random.randint(0, 2, size=(30, ))

        automl = AutoML(results_path=self.automl_dir,
                        model_time_limit=1,
                        algorithms=["Xgboost"],
                        explain_level=0,
                        train_ensemble=False,
                        stack_models=False,
                        validation_strategy={"validation_type": "split"},
                        start_random_models=3,
                        hill_climbing_steps=1,
                        top_models_to_improve=1)

        automl.fit(X, y)
        self.assertGreater(len(automl._models), 4)
コード例 #23
0
 def test_boston_dataset(self):
     """ Tests AutoML in the boston dataset (Regression)"""
     model = AutoML(explain_level=0,
                    verbose=0,
                    random_state=1,
                    results_path=self.automl_dir)
     score = model.fit(boston.data,
                       boston.target).score(boston.data, boston.target)
     self.assertGreater(score, 0.5)
コード例 #24
0
    def test_regression(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.rand(self.rows)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        self.assertIsInstance(pred, np.ndarray)
        self.assertEqual(len(pred), X.shape[0])
コード例 #25
0
    def test_regression_missing_target(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(self.rows), name="target")

        y.iloc[1] = None

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)
        self.assertTrue(len(pred.columns.tolist()) == 1)
        self.assertTrue(pred.columns[0] == "prediction")
コード例 #26
0
    def test_bin_class_AB(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.permutation(["a", "B"] * int(self.rows / 2))

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        p = automl.predict(X)
        pred = automl.predict(X)
        u = np.unique(pred)
        self.assertTrue("a" in u or "B" in u)
        self.assertTrue(len(u) <= 2)
コード例 #27
0
    def test_score_without_y(self):
        """Tests the use of `score()` without passing y. Should raise AutoMLException"""
        model = AutoML(explain_level=0, verbose=0, random_state=1)
        # Assert than an Exception is raised
        with self.assertRaises(AutoMLException) as context:
            # Try to score without passing 'y'
            score = model.fit(breast_cancer.data,
                              breast_cancer.target).score(breast_cancer.data)

        self.assertTrue("y must be specified" in str(context.exception))
コード例 #28
0
    def test_multi_class_abcd_mixed_int(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.permutation([1, "B", "CC", "d"] * self.rows),
                      name="target")

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)
        u = np.unique(pred)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)
コード例 #29
0
    def test_predict_on_empty_dataframe(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(self.rows), name="target")

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)

        with self.assertRaises(AutoMLException) as context:
            pred = automl.predict(pd.DataFrame())

        with self.assertRaises(AutoMLException) as context:
            pred = automl.predict(np.empty(shape=(0, 3)))
コード例 #30
0
    def test_multi_class_0123(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 4, self.rows * 4)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(0 in u or 1 in u or 2 in u or 3 in u)
        self.assertTrue(len(u) <= 4)