Esempio n. 1
0
def test_imputer_feature_boston():
    o = Inputers(
        description_filepath="../../descriptions/pre/inputers/boston.yaml")
    train = o.transform(dataset="train")
    imp = Imputers(description_filepath=
                   "../../descriptions/pre/cleaners/most_frequent_impute.yaml")
    assert imp.transform(train, inplace=True, verbose=True).shape == (506, 14)
Esempio n. 2
0
def test_booL_some(City):
    o = Inputers(
        description_filepath="../../descriptions/pre/inputers/yeast3.yaml")
    train = o.transform(dataset="train")

    train["Alm"] = False
    train.loc[0, "Alm"] = True
    assert boolean_to_integer(train, inplace=True).loc[0, "Alm"] == 1
Esempio n. 3
0
def test_inputer_datasets():
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml")
    assert inputer.datasets() == [
        "train",
        "valid",
        "test",
        "sampleSubmission",
        "directory_path",
    ]
Esempio n. 4
0
def test_learn_train_kw_target_iris(flower):
    inputer = Inputers(description_filepath=
                       "../../descriptions/pre/inputers/pima-diabetes.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    o = Learners(
        description_filepath="../../descriptions/learners/RFC.sm.yaml")
    assert inputer.target == "Outcome"
Esempio n. 5
0
def test_imputer_features_nans_found():
    o = Inputers(
        description_filepath="../../descriptions/pre/inputers/yeast3.yaml")
    train = o.transform(dataset="train")
    train["bad"] = np.NaN
    imp = Imputers(description_filepath=
                   "../../descriptions/pre/cleaners/most_frequent_impute.yaml")
    assert (imp.transform(train, features=["Alm"], inplace=True,
                          verbose=True).isnull().any().any()) == True
Esempio n. 6
0
def test_learn_model_name():
    inputer = Inputers(description_filepath=
                       "../../descriptions/pre/inputers/pima-diabetes.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/RFC.yaml")
    learner.train(X, y, checkpoint="RandomForest.ckp")
    assert learner.model_name == "RandomForest"
Esempio n. 7
0
def test_inputer_transform_splitter_X_valid():
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml")
    train = inputer.transform()
    y = train[inputer.target].values
    X = train[train.columns.difference([inputer.target])]
    splitter = Splitters(
        description_filepath="../../descriptions/pre/inputers/split_30_stratify.yaml"
    )
    train, valid, y_train, y_valid = splitter.transform(X, y)
    assert valid.shape == (45, 4) and y_valid.shape == (45,)
Esempio n. 8
0
def test_evaluate_otto_group():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/otto_group.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/XGBC.yaml")
    learner.train(X, y, checkpoint="pima_LGBMC.ckp")
    assert len(learner.evaluate(X, y).keys()) == 7
Esempio n. 9
0
def test_train_predict_prob_LGBC_wine():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/wine.yaml")
    wine = inputer.transform()
    y = wine[inputer.target].values
    X = wine[wine.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/LGBC.yaml")
    learner.train(X, y, target=inputer.target, checkpoint="wineLGBMC.ckp")
    assert learner.predict_proba(X).shape == (178, 3)
Esempio n. 10
0
def test_train_predict_prob_XGBC_yeast3():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/yeast3.yaml")
    yeast3 = inputer.transform()
    y = yeast3[inputer.target].values
    X = yeast3[yeast3.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/XGBC.yaml")
    learner.train(X, y, checkpoint="yeast3LGBMC.ckp")
    assert learner.predict_proba(X).shape == (1484, 2)
Esempio n. 11
0
def test_train_predict_prob_LGBC():
    inputer = Inputers(description_filepath=
                       "../../descriptions/pre/inputers/pima-diabetes.yaml")
    diabetes = inputer.transform()
    y = diabetes[inputer.target].values
    X = diabetes[diabetes.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/LGBC.yaml")
    learner.train(X, y, checkpoint="diabetesLGBMC.ckp")
    assert learner.predict_proba(X).shape == (768, 2)
Esempio n. 12
0
def test_learn_train_predict_prob_LGBC():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/otto_group.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/LGBC.yaml")
    learner.train(X, y, checkpoint="otto_group_LGBMC.ckp")
    assert learner.predict(X).shape == (61878, )
Esempio n. 13
0
def test_learn_train_predict_XGBC():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/wine.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/XGBC.yaml")
    learner.train(X, y, checkpoint="LGBMClassifier.ckp")
    assert learner.predict(X).shape == (178, )
Esempio n. 14
0
def test_imputer_feature_boston_err():
    o = Inputers(
        description_filepath="../../descriptions/pre/inputers/boston.yaml")
    train = o.transform(dataset="train")
    train["ggoo"] = np.nan
    imp = Imputers(description_filepath=
                   "../../descriptions/pre/cleaners/most_frequent_impute.yaml")
    with pytest.raises(PasoError):
        assert imp.transform(train, inplace=True,
                             verbose=True).shape == (506, 14)
Esempio n. 15
0
def test_learn_train_kw_target_pima():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/wine.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/RFC.yaml")
    learner.train(X, y, checkpoint="diabetesRandomForest1.ckp")
    assert learner.trained == True
Esempio n. 16
0
def test_train_predict_prob_XGBC_creditdard():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/creditcard.yaml")
    creditdard = inputer.transform()
    y = creditdard[inputer.target].values
    X = creditdard[creditdard.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/XGBC.yaml")
    learner.train(X, y, checkpoint="creditdardLGBMC.ckp")
    assert learner.predict_proba(X).shape == (284807, 2)
Esempio n. 17
0
def test_learn_train_no_y():
    inputer = Inputers(description_filepath=
                       "../../descriptions/pre/inputers/pima-diabetes.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    o = Learners(
        description_filepath="../../descriptions/learners/RFC.sm.yaml")
    with pytest.raises(IndexError):
        o.train(X) == o
Esempio n. 18
0
def test_inputer_transform_splitter_onto_wrong_place():
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml")
    Flower = inputer.transform()
    splitter = Splitters()
    with pytest.raises(PasoError):
        train, valid = splitter.transform(
            Flower,
            target=inputer.target,
            description_filepath="../../descriptions/pre/inputers/test_size_30.yaml",
        )
Esempio n. 19
0
def test_imputer_features_not_found():
    o = Inputers(
        description_filepath="../../descriptions/pre/inputers/yeast3.yaml")
    train = o.transform(dataset="train")
    imp = Imputers(description_filepath=
                   "../../descriptions/pre/cleaners/most_frequent_impute.yaml")
    with pytest.raises(PasoError):
        train = imp.transform(train,
                              features=["bad", "badder", "Alm"],
                              inplace=True,
                              verbose=True)
Esempio n. 20
0
def test_inputer_imputer():
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml")
    assert inputer.inputers() == [
        "exec",
        "cvs",
        "xls",
        "xlsm",
        "text",
        "image2D",
        "image3D",
    ]
Esempio n. 21
0
def test_spltter_transform_creditcard_20__url_cvs_zip():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/creditcard.yaml"
    )
    train = inputer.transform(dataset="train")
    y = train[inputer.target].values
    X = train[train.columns.difference([inputer.target])]
    splitter = Splitters(
        description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml"
    )
    train, valid, _, _ = splitter.transform(X, y)
    assert train.shape == (227845, 30) and valid.shape == (56962, 30)
Esempio n. 22
0
def test_predict_Prob_error():
    inputer = Inputers(description_filepath=
                       "../../descriptions/pre/inputers/pima-diabetes.yaml")
    diabetes = inputer.transform()
    learner = Learners(
        description_filepath="../../descriptions/learners/LGBC.yaml")
    # learner.train(
    #     diabetes, target=inputer.target, checkpoint="diabetesLGBMC.ckp"
    # )
    X = diabetes
    X_train = X[X.columns.difference([inputer.target])]
    with pytest.raises(PasoError):
        assert learner.predict_proba(X_train).shape == (768, 2)
Esempio n. 23
0
def test_imputer_features_allnans():
    o = Inputers(
        description_filepath="../../descriptions/pre/inputers/yeast3.yaml")
    train = o.transform(dataset="train")
    train["Alm"] = np.NaN
    train.loc[0, "Alm"] = 0
    imp = Imputers(description_filepath=
                   "../../descriptions/pre/cleaners/most_frequent_impute.yaml")
    with pytest.raises(PasoError):
        assert (imp.transform(train,
                              features=["Alm"],
                              inplace=True,
                              verbose=True).isnull().all().all()) == False
Esempio n. 24
0
def test_splitter_transform__onto_group():
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml")
    train = inputer.transform()
    y = train[inputer.target].values
    X = train[train.columns.difference([inputer.target])]
    splitter = Splitters(
        description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml"
    )
    train, valid, y_train, y_valid = splitter.transform(X, y)
    assert (
        train.shape == (120, 4)
        and valid.shape == (30, 4)
        and y_train.shape == (120,)
        and y_valid.shape == (30,)
    )
Esempio n. 25
0
def test_spitter_transform_s_wine():
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml")
    train = inputer.transform()
    y = train[inputer.target].values
    X = train[train.columns.difference([inputer.target])]
    splitter = Splitters(
        description_filepath="../../descriptions/pre/inputers/split_30_stratify.yaml"
    )
    train, valid, y_train, y_valid = splitter.transform(X, y)
    assert (
        train.shape == (124, 13)
        and valid.shape == (54, 13)
        and y_train.shape == (124,)
        and y_valid.shape == (54,)
    )
Esempio n. 26
0
def test_learner_cross_validate_LGBC():
    inputer = Inputers(description_filepath=
                       "../../descriptions/pre/inputers/pima-diabetes.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/LGBC.yaml")
    learner.train(X, y, checkpoint="pima_LGBMC.ckp")
    learner.cross_validate(
        X,
        y,
        cv_description_filepath=
        "../../descriptions/learners/Cross_validation_classification.yaml",
    )
    assert len(learner.evaluate(X, y).keys()) == 9
Esempio n. 27
0
def test_learner_cross_validate_RFC_iris_multiclass_evaluate_AO():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/iris.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/RFC.yaml")
    learner.train(X, y, checkpoint="iris)RC.ckp")
    learner.cross_validate(
        X,
        y,
        cv_description_filepath=
        "../../descriptions/learners/Cross_validation_classification.yaml",
    )

    assert learner.evaluate(X, y)["accuracy"] == 1.0
Esempio n. 28
0
def test_spltter_transform_creditcard_url_30__cvs_zip():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/yeast3.yaml"
    )
    train = inputer.transform(dataset="train")
    y = train[inputer.target].values
    X = train[train.columns.difference([inputer.target])]
    splitter = Splitters(
        description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml"
    )
    train, valid, y_train, y_valid = splitter.transform(X, y)
    assert (
        train.shape == (1187, 8)
        and valid.shape == (297, 8)
        and y_train.shape == (1187,)
        and y_valid.shape == (297,)
    )
Esempio n. 29
0
def test_learner_cross_validate_RFC_iris_milticlass_evaluate_test_accuracy():
    inputer = Inputers(
        description_filepath="../../descriptions/pre/inputers/iris.yaml")
    dataset = inputer.transform()
    y = dataset[inputer.target].values
    X = dataset[dataset.columns.difference([inputer.target])]
    learner = Learners(
        description_filepath="../../descriptions/learners/RFC.yaml")
    learner.train(X, y, checkpoint="pima_LGBMC.ckp")
    score = learner.cross_validate(
        X,
        y,
        cv_description_filepath=
        "../../descriptions/learners/Cross_validation_classification.yaml",
    )

    assert score["mean"]["test_accuracy"] >= 0.95
Esempio n. 30
0
def test_inputer_transform_cvs_url():
    link = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data"
    names = [
        "Class",
        "Alcohol",
        "Malic-acid",
        "Ash",
        "Alcalinity-ash",
        "Magnesium",
        "phenols",
        "Flavanoids",
        "Nonflavanoid-phenols",
        "Proanthocyanins",
        "Color-intensity",
        "Hue",
        "OD280-OD315-diluted-wines",
        "Proline",
    ]

    winmeo = pd.read_csv(link, names=names).head()
    inputer = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml")
    assert (inputer.transform().columns == winmeo.columns).any()