def test_regression_missing_target(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.rand(self.rows), name="target")

        y.iloc[1] = None

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        self.assertIsInstance(pred, np.ndarray)
        self.assertEqual(len(pred), X.shape[0])
    def test_multi_class_0123(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 4, self.rows * 4)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)

        u = np.unique(pred)

        self.assertTrue(0 in u or 1 in u or 2 in u or 3 in u)
        self.assertTrue(len(u) <= 4)
    def test_multi_class_abcd_mixed_int(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.permutation([1, "B", "CC", "d"] * self.rows),
                      name="target")

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
            start_random_models=1,
        )
        automl.fit(X, y)
        pred = automl.predict(X)
        u = np.unique(pred)

        self.assertTrue(np.intersect1d(u, ["a", "B", "CC", "d"]).shape[0] > 0)
        self.assertTrue(len(u) <= 4)
Beispiel #4
0
    def test_bin_class_01(self):
        X = np.random.rand(self.rows, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = np.random.randint(0, 2, self.rows)

        automl = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            algorithms=["Xgboost"],
            train_ensemble=False,
            explain_level=0,
        )
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)
        for col in ["prediction_0", "prediction_1", "label"]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)
        self.assertTrue(0 in u or 1 in u)
        self.assertTrue(len(u) <= 2)
Beispiel #5
0
    def test_integration(self):
        a = AutoML(results_path=self.automl_dir, model_time_limit=1)
        a.set_advanced(start_random_models=1)

        X, y = datasets.make_classification(
            n_samples=100,
            n_features=5,
            n_informative=4,
            n_redundant=1,
            n_classes=2,
            n_clusters_per_class=3,
            n_repeated=0,
            shuffle=False,
            random_state=0,
        )
        X = pd.DataFrame(X, columns=[f"f_{i}" for i in range(X.shape[1])])

        a.fit(X, y)
        p = a.predict(X)

        self.assertTrue("label" in p.columns)
Beispiel #6
0
    def test_multi_class_abcd(self):
        X = np.random.rand(self.rows * 4, 3)
        X = pd.DataFrame(X, columns=[f"f{i}" for i in range(3)])
        y = pd.Series(np.random.permutation(['a', 'B', 'CC', 'd'] * self.rows),
                      name='target')

        automl = AutoML(results_path=self.automl_dir,
                        total_time_limit=1,
                        algorithms=["Xgboost"],
                        train_ensemble=False)
        automl.set_advanced(start_random_models=1)
        automl.fit(X, y)
        pred = automl.predict(X)

        for col in [
                "prediction_a", "prediction_B", "prediction_CC",
                "prediction_d", "label"
        ]:
            self.assertTrue(col in pred.columns.tolist())
        u = np.unique(pred["label"].values)

        self.assertTrue(np.intersect1d(u, ['a', 'B', 'CC', 'd']).shape[0] > 0)
        self.assertTrue(len(u) <= 4)
Beispiel #7
0
class JarAutoML(AutoMachineLearning):
    def __init__(self, n_folds_validation: int, shuffle_data: bool,
                 max_rand: int) -> None:
        super().__init__(n_folds_validation, shuffle_data, max_rand)
        # initialize _clf as AutoMl type
        self.estimator = AutoML(mode="Compete",
                                explain_level=0,
                                random_state=self._random_state,
                                validation_strategy={
                                    "validation_type": "kfold",
                                    "k_folds": self._n_folds_validation,
                                    "shuffle": self._shuffle_data
                                })

    # abstract class method implementation
    def fit_model(self, x_train: DataFrame, y_train: NpArray) -> None:
        # clf fit method
        self.estimator.fit(x_train, y_train)

    # abstract class method implementation
    def predict_model(self, x_test: DataFrame) -> tuple:
        # clf predict. Returns prediction as tuple
        prediction_tuple = tuple(self.estimator.predict(x_test))
        return prediction_tuple
    def test_integration(self):
        a = AutoML(
            results_path=self.automl_dir,
            total_time_limit=1,
            explain_level=0,
            start_random_models=1,
        )

        X, y = datasets.make_classification(
            n_samples=100,
            n_features=5,
            n_informative=4,
            n_redundant=1,
            n_classes=2,
            n_clusters_per_class=3,
            n_repeated=0,
            shuffle=False,
            random_state=0,
        )

        a.fit(X, y)
        p = a.predict(X)
        self.assertIsInstance(p, np.ndarray)
        self.assertEqual(len(p), X.shape[0])
Beispiel #9
0
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from supervised import AutoML

train = pd.read_csv(
    "https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/train.csv"
)

X = train[train.columns[2:]]
y = train["Survived"]

automl = AutoML(results_path="AutoML_3")
automl.fit(X, y)

test = pd.read_csv(
    "https://raw.githubusercontent.com/pplonski/datasets-for-start/master/Titanic/test_with_Survived.csv"
)
predictions = automl.predict(test)
print(f"Accuracy: {accuracy_score(test['Survived'], predictions)*100.0:.2f}%")
Beispiel #10
0
from db import get_live_data
from db import insert_predictions

from supervised import AutoML

X_live, ids = get_live_data()
if X_live is None or not X_live.shape[0]:
    print("No new data")
else:
    print("Compute predictions")
    automl = AutoML(results_path="Response_Classifier")
    predictions = automl.predict(X_live)
    print("Insert prediction into DB")
    insert_predictions(predictions, ids)
Beispiel #11
0
from sheets import get_train_data,get_client,write_out
from supervised import AutoML
import pandas as pd
from sklearn.model_selection import train_test_split


# get the training data
df_name = "sheet name"
cred_path = "path/credentials.json"
email = "*****@*****.**"


client = get_client(cred_path)

X_train, y_train = get_train_data(client,df_name)
# train AutoML
X_train,X_test,y_train,y_test = train_test_split(X_train,y_train,test_size=0.1)

automl = AutoML(results_path="Automl_output",total_time_limit=10)
#automl.fit(X_train, y_train)

train_pred = automl.predict(X_train)
test_pred = automl.predict(X_test)

data = {'train_target':y_train,"train_prediction":train_pred,
        'test_target':y_test,'test_prediction':test_pred}

write_out(client,data,email)
Beispiel #12
0
import pandas as pd
from supervised import AutoML

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sub = pd.read_csv("sample_submission.csv")
x_cols = train.columns[2:]
print(x_cols)

automl = AutoML(results_path="AutoML_3",
                mode="Compete",
                total_time_limit=4 * 3600,
                eval_metric="r2")
automl.fit(train[x_cols], train["y"])

sub[sub.columns[1:]] = automl.predict(test)
sub.to_csv("sub_1.csv", index=False)
Beispiel #13
0
import numpy as np
import pandas as pd
from supervised import AutoML

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sub = pd.read_csv("sample_submission.csv")
x_cols = train.columns[2:]
print(x_cols)
print(train.columns)
print(train["target"].min())
print(train["target"].max())

automl = AutoML(mode="Compete", eval_metric="rmse", total_time_limit=4 * 3600)
automl.fit(train[x_cols], np.log(train["target"]))

sub[sub.columns[1]] = np.exp(automl.predict(test))
sub.to_csv("sub_1.csv", index=False)