Esempio n. 1
0
    def test_multi_class_abcd_missing_target(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.permutation(["a", "B", "CC", "d"] * self.rows),
                      name="target")

        y.iloc[1] = None
        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)

        for col in [
                "prediction_a",
                "prediction_B",
                "prediction_CC",
                "prediction_d",
                "label",
        ]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)
Esempio n. 2
0
    def test_bin_class_AB_missing_targets(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.permutation(["a", "B"] * int(self.rows / 2)),
                      name="target")

        y.iloc[1] = None
        y.iloc[3] = np.NaN
        y.iloc[13] = np.nan

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        p = automl.predict(X)
        pred = automl.predict(X)
        for col in ["prediction_a", "prediction_B", "label"]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)
        self.assertTrue("a" in u or "B" in u)
        self.assertTrue(len(u) <= 2)
Esempio n. 3
0
    def test_save_load(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=10,
            explain_level=0,
            mode="Explain",
            train_ensemble=True,
        )
        a.set_advanced(start_random_models=1)

        X, y = datasets.make_classification(
            n_samples=100,
            n_features=5,
            n_informative=4,
            n_redundant=1,
            n_classes=2,
            n_clusters_per_class=3,
            n_repeated=0,
            shuffle=False,
            random_state=0,
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)
        p = a.predict(X)

        a2 = AutoML(results_path=self.automl_dir)
        p2 = a2.predict(X)

        self.assertTrue((p["label"] == p2["label"]).all())
Esempio n. 4
0
    def test_multi_class_0123(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 4, self.rows * 4)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)

        for col in [
                "prediction_0",
                "prediction_1",
                "prediction_2",
                "prediction_3",
                "label",
        ]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)

        self.assertTrue("0" in u or "1" in u or "2" in u or "3" in u)
        self.assertTrue(len(u) <= 4)
Esempio n. 5
0
    def test_explain_just_permutation_importance(self):
        a = AutoML(results_path=self.automl_dir,
                   total_time_limit=1,
                   algorithms=["Xgboost"],
                   train_ensemble=False,
                   validation={
                       "validation_type": "kfold",
                       "k_folds": 2,
                       "shuffle": True,
                       "stratify": True,
                   },
                   explain_level=1)
        a.set_advanced(start_random_models=1)

        X, y = datasets.make_regression(n_samples=100,
                                        n_features=5,
                                        n_informative=4,
                                        shuffle=False,
                                        random_state=0)
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)

        result_files = os.listdir(os.path.join(self.automl_dir, "model_1"))

        # There should be no files with:
        # - permutation importance
        # - shap importance
        # - shap dependence
        # - shap decisions

        # Check permutation importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" not in f:
                produced = True
                break
        self.assertTrue(produced)
        # Check shap importance
        produced = False
        for f in result_files:
            if "importance.csv" in f and "shap" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap dependence
        produced = False
        for f in result_files:
            if "dependence.png" in f:
                produced = True
                break
        self.assertFalse(produced)
        # Check shap decisions
        produced = False
        for f in result_files:
            if "decisions.png" in f:
                produced = True
                break
        self.assertFalse(produced)
Esempio n. 6
0
    def test_one_column_input_regression(self):
        a = AutoML(results_path=self.automl_dir,
                   total_time_limit=5,
                   explain_level=0)
        a.set_advanced(start_random_models=1)

        X = pd.DataFrame({"feature_1": np.random.rand(100)})
        y = np.random.rand(100)

        a.fit(X, y)
        p = a.predict(X)

        self.assertTrue("prediction" in p.columns)
        self.assertTrue(p.shape[0] == 100)
Esempio n. 7
0
    def test_regression(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.rand(self.rows)

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=1,
                        algorithms=["Xgboost"],
                        train_ensemble=False)
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)
        self.assertTrue(len(pred.columns.tolist()) == 1)
        self.assertTrue(pred.columns[0] == "prediction")
Esempio n. 8
0
    def test_bin_class_AB(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.permutation(['a', 'B'] * int(self.rows / 2))

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=1,
                        algorithms=["Xgboost"],
                        train_ensemble=False)
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        p = automl.predict(X)
        pred = automl.predict(X)
        for col in ["prediction_a", "prediction_B", "label"]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)
        self.assertTrue('a' in u or 'B' in u)
        self.assertTrue(len(u) <= 2)
Esempio n. 9
0
    def test_regression_missing_target(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(self.rows), name="target")

        y.iloc[1] = None

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)
        self.assertTrue(len(pred.columns.tolist()) == 1)
        self.assertTrue(pred.columns[0] == "prediction")
Esempio n. 10
0
    def test_integration(self):
        a = AutoML(results_path=self.automl_dir, model_time_limit=1)
        a.set_advanced(start_random_models=1)

        X, y = datasets.make_classification(
            n_samples=100,
            n_features=5,
            n_informative=4,
            n_redundant=1,
            n_classes=2,
            n_clusters_per_class=3,
            n_repeated=0,
            shuffle=False,
            random_state=0,
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)
        p = a.predict(X)

        self.assertTrue("label" in p.columns)
Esempio n. 11
0
    def test_bin_class_11(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows) * 2 - 1

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        p = automl.predict(X)
        pred = automl.predict(X)
        for col in ["prediction_-1", "prediction_1", "label"]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)
        self.assertTrue(-1 in u or 1 in u)
        self.assertTrue(len(u) <= 2)