def test_imputer_feature_boston(): o = Inputers( description_filepath="../../descriptions/pre/inputers/boston.yaml") train = o.transform(dataset="train") imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") assert imp.transform(train, inplace=True, verbose=True).shape == (506, 14)
def test_booL_some(City): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") train["Alm"] = False train.loc[0, "Alm"] = True assert boolean_to_integer(train, inplace=True).loc[0, "Alm"] == 1
def test_inputer_datasets(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") assert inputer.datasets() == [ "train", "valid", "test", "sampleSubmission", "directory_path", ]
def test_learn_train_kw_target_iris(flower): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] o = Learners( description_filepath="../../descriptions/learners/RFC.sm.yaml") assert inputer.target == "Outcome"
def test_imputer_features_nans_found(): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") train["bad"] = np.NaN imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") assert (imp.transform(train, features=["Alm"], inplace=True, verbose=True).isnull().any().any()) == True
def test_learn_model_name(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="RandomForest.ckp") assert learner.model_name == "RandomForest"
def test_inputer_transform_splitter_X_valid(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") train = inputer.transform() y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_30_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert valid.shape == (45, 4) and y_valid.shape == (45,)
def test_evaluate_otto_group(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/otto_group.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/XGBC.yaml") learner.train(X, y, checkpoint="pima_LGBMC.ckp") assert len(learner.evaluate(X, y).keys()) == 7
def test_train_predict_prob_LGBC_wine(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/wine.yaml") wine = inputer.transform() y = wine[inputer.target].values X = wine[wine.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") learner.train(X, y, target=inputer.target, checkpoint="wineLGBMC.ckp") assert learner.predict_proba(X).shape == (178, 3)
def test_train_predict_prob_XGBC_yeast3(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") yeast3 = inputer.transform() y = yeast3[inputer.target].values X = yeast3[yeast3.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/XGBC.yaml") learner.train(X, y, checkpoint="yeast3LGBMC.ckp") assert learner.predict_proba(X).shape == (1484, 2)
def test_train_predict_prob_LGBC(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") diabetes = inputer.transform() y = diabetes[inputer.target].values X = diabetes[diabetes.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") learner.train(X, y, checkpoint="diabetesLGBMC.ckp") assert learner.predict_proba(X).shape == (768, 2)
def test_learn_train_predict_prob_LGBC(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/otto_group.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") learner.train(X, y, checkpoint="otto_group_LGBMC.ckp") assert learner.predict(X).shape == (61878, )
def test_learn_train_predict_XGBC(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/wine.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/XGBC.yaml") learner.train(X, y, checkpoint="LGBMClassifier.ckp") assert learner.predict(X).shape == (178, )
def test_imputer_feature_boston_err(): o = Inputers( description_filepath="../../descriptions/pre/inputers/boston.yaml") train = o.transform(dataset="train") train["ggoo"] = np.nan imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") with pytest.raises(PasoError): assert imp.transform(train, inplace=True, verbose=True).shape == (506, 14)
def test_learn_train_kw_target_pima(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/wine.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="diabetesRandomForest1.ckp") assert learner.trained == True
def test_train_predict_prob_XGBC_creditdard(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/creditcard.yaml") creditdard = inputer.transform() y = creditdard[inputer.target].values X = creditdard[creditdard.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/XGBC.yaml") learner.train(X, y, checkpoint="creditdardLGBMC.ckp") assert learner.predict_proba(X).shape == (284807, 2)
def test_learn_train_no_y(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] o = Learners( description_filepath="../../descriptions/learners/RFC.sm.yaml") with pytest.raises(IndexError): o.train(X) == o
def test_inputer_transform_splitter_onto_wrong_place(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") Flower = inputer.transform() splitter = Splitters() with pytest.raises(PasoError): train, valid = splitter.transform( Flower, target=inputer.target, description_filepath="../../descriptions/pre/inputers/test_size_30.yaml", )
def test_imputer_features_not_found(): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") with pytest.raises(PasoError): train = imp.transform(train, features=["bad", "badder", "Alm"], inplace=True, verbose=True)
def test_inputer_imputer(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") assert inputer.inputers() == [ "exec", "cvs", "xls", "xlsm", "text", "image2D", "image3D", ]
def test_spltter_transform_creditcard_20__url_cvs_zip(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/creditcard.yaml" ) train = inputer.transform(dataset="train") y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml" ) train, valid, _, _ = splitter.transform(X, y) assert train.shape == (227845, 30) and valid.shape == (56962, 30)
def test_predict_Prob_error(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") diabetes = inputer.transform() learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") # learner.train( # diabetes, target=inputer.target, checkpoint="diabetesLGBMC.ckp" # ) X = diabetes X_train = X[X.columns.difference([inputer.target])] with pytest.raises(PasoError): assert learner.predict_proba(X_train).shape == (768, 2)
def test_imputer_features_allnans(): o = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml") train = o.transform(dataset="train") train["Alm"] = np.NaN train.loc[0, "Alm"] = 0 imp = Imputers(description_filepath= "../../descriptions/pre/cleaners/most_frequent_impute.yaml") with pytest.raises(PasoError): assert (imp.transform(train, features=["Alm"], inplace=True, verbose=True).isnull().all().all()) == False
def test_splitter_transform__onto_group(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/iris.yaml") train = inputer.transform() y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert ( train.shape == (120, 4) and valid.shape == (30, 4) and y_train.shape == (120,) and y_valid.shape == (30,) )
def test_spitter_transform_s_wine(): inputer = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml") train = inputer.transform() y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_30_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert ( train.shape == (124, 13) and valid.shape == (54, 13) and y_train.shape == (124,) and y_valid.shape == (54,) )
def test_learner_cross_validate_LGBC(): inputer = Inputers(description_filepath= "../../descriptions/pre/inputers/pima-diabetes.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/LGBC.yaml") learner.train(X, y, checkpoint="pima_LGBMC.ckp") learner.cross_validate( X, y, cv_description_filepath= "../../descriptions/learners/Cross_validation_classification.yaml", ) assert len(learner.evaluate(X, y).keys()) == 9
def test_learner_cross_validate_RFC_iris_multiclass_evaluate_AO(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/iris.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="iris)RC.ckp") learner.cross_validate( X, y, cv_description_filepath= "../../descriptions/learners/Cross_validation_classification.yaml", ) assert learner.evaluate(X, y)["accuracy"] == 1.0
def test_spltter_transform_creditcard_url_30__cvs_zip(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/yeast3.yaml" ) train = inputer.transform(dataset="train") y = train[inputer.target].values X = train[train.columns.difference([inputer.target])] splitter = Splitters( description_filepath="../../descriptions/pre/inputers/split_20_stratify.yaml" ) train, valid, y_train, y_valid = splitter.transform(X, y) assert ( train.shape == (1187, 8) and valid.shape == (297, 8) and y_train.shape == (1187,) and y_valid.shape == (297,) )
def test_learner_cross_validate_RFC_iris_milticlass_evaluate_test_accuracy(): inputer = Inputers( description_filepath="../../descriptions/pre/inputers/iris.yaml") dataset = inputer.transform() y = dataset[inputer.target].values X = dataset[dataset.columns.difference([inputer.target])] learner = Learners( description_filepath="../../descriptions/learners/RFC.yaml") learner.train(X, y, checkpoint="pima_LGBMC.ckp") score = learner.cross_validate( X, y, cv_description_filepath= "../../descriptions/learners/Cross_validation_classification.yaml", ) assert score["mean"]["test_accuracy"] >= 0.95
def test_inputer_transform_cvs_url(): link = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data" names = [ "Class", "Alcohol", "Malic-acid", "Ash", "Alcalinity-ash", "Magnesium", "phenols", "Flavanoids", "Nonflavanoid-phenols", "Proanthocyanins", "Color-intensity", "Hue", "OD280-OD315-diluted-wines", "Proline", ] winmeo = pd.read_csv(link, names=names).head() inputer = Inputers(description_filepath="../../descriptions/pre/inputers/wine.yaml") assert (inputer.transform().columns == winmeo.columns).any()