Esempio n. 1
0
 def test_predict_proba_in_regression(self):
     model = AutoML(explain_level=0,
                    verbose=0,
                    random_state=1,
                    results_path=self.automl_dir)
     model.fit(boston.data, boston.target)
     with self.assertRaises(AutoMLException) as context:
         # Try to call predict_proba in regression task
         model.predict_proba(boston.data)
Esempio n. 2
0
def define_and_evaluate_mljar_pipeline(X, y, random_state=0):
    
    outer_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
    nested_scores = []
    for train_inds, test_inds in outer_cv.split(X, y):
        
        X_train, y_train = X[train_inds, :], y[train_inds]
        X_test, y_test = X[test_inds, :], y[test_inds]

        binary = len((set(y))) == 2
        eval_metric = "auc" if binary else "logloss" 
        ml_task = "binary_classification" if binary else "multiclass_classification"

        shutil.rmtree("AutoML_1", ignore_errors=True)
        automl = AutoML(results_path="AutoML_1", mode="Compete", eval_metric=eval_metric, total_time_limit=SEC, ml_task=ml_task)
        automl.fit(X_train, y_train)
        y_pred = automl.predict_proba(X_test)

        # same as roc_auc_ovr_weighted
        if binary:
            score = roc_auc_score(y_test, y_pred[:, 1], average="weighted", multi_class="ovr")
        else:
            score = roc_auc_score(y_test, y_pred, average="weighted", multi_class="ovr")
        nested_scores.append(score)
    return nested_scores
Esempio n. 3
0
import pandas as pd
from supervised import AutoML

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sub = pd.read_csv("sample_submission.csv")
x_cols = train.columns[2:]
print(x_cols)

automl = AutoML(mode="Compete", eval_metric="auc", total_time_limit=4 * 3600)
automl.fit(train[x_cols], train["target"])

sub[sub.columns[1:]] = automl.predict_proba(test)[:, 1]
sub.to_csv("sub_1.csv", index=False)
import pandas as pd
from supervised import AutoML

train = pd.read_csv("~/Downloads/tabular-playground-series-mar-2021/train.csv")
test = pd.read_csv("~/Downloads/tabular-playground-series-mar-2021/test.csv")

X_train = train.drop(["id", "target"], axis=1)
y_train = train.target
X_test = test.drop(["id"], axis=1)

automl = AutoML(
    mode="Optuna",
    eval_metric="auc",
    algorithms=["CatBoost"],
    optuna_time_budget=1800,  # tune each algorithm for 30 minutes
    total_time_limit=48 *
    3600,  # total time limit, set large enough to have time to compute all steps
    features_selection=False)
automl.fit(X_train, y_train)

preds = automl.predict_proba(X_test)
submission = pd.DataFrame({"id": test.id, "target": preds[:, 1]})
submission.to_csv("1_submission.csv", index=False)
Esempio n. 5
0
import pandas as pd
from supervised import AutoML

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sub = pd.read_csv("sample_submission.csv")
x_cols = train.columns[2:]

automl = AutoML(mode="Compete",
                total_time_limit=4 * 3600,
                stack_models=True,
                features_selection=False)
automl.fit(train[x_cols], train["target"])

sub["PredictedProb"] = automl.predict_proba(test)[:, 1]
sub.to_csv("sub_1.csv", index=False)
Esempio n. 6
0
import pandas as pd
from supervised import AutoML

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
sub = pd.read_csv("sample_submission.csv")
x_cols = train.columns[2:]

automl = AutoML(mode="Compete", total_time_limit=4 * 3600, eval_metric="auc")
automl.fit(train[x_cols], train["target"])

sub["target"] = automl.predict_proba(test)[:, 1]
sub.to_csv("sub_1.csv", index=False)