Ejemplo n.º 1
0
def test_classification():
    # autofeat with numpy arrays but as classification
    X, target = get_random_data()
    target = np.array(target > target.mean(), dtype=int)
    afreg = AutoFeatClassifier(verbose=1, feateng_steps=3)
    df = afreg.fit_transform(X, target)
    assert afreg.score(X, target) >= 0.9999, "Accuracy should be 1."
    assert afreg.score(df, target) >= 0.9999, "Accuracy should be 1."
    assert list(df.columns)[:3] == ["x000", "x001",
                                    "x002"], "Wrong column names"
Ejemplo n.º 2
0
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)
    # additionally check the class, but don't run all the other tests
    for estimator, check in check_estimator(AutoFeatRegressor,
                                            generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)

    print("## Running sklearn Classifier tests")
    # we allow for nan in transform
    successful_tests = set(["check_estimators_nan_inf"])
    for estimator, check in check_estimator(AutoFeatClassifier(
            feateng_steps=1, featsel_runs=1, always_return_numpy=True),
                                            generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)
    # additionally check the class, but don't run all the other tests
    for estimator, check in check_estimator(AutoFeatClassifier,
                                            generate_only=True):
        if check.func.__name__ not in successful_tests:
            print(check.func.__name__)
            successful_tests.add(check.func.__name__)
            check(estimator)

    print("## Running custom tests")
    print("# test_do_almost_nothing")
Ejemplo n.º 3
0
def test_autofeat(dataset, feateng_steps=2):
    # load data
    X, y, units = load_classification_dataset(dataset)
    # split in training and test parts
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=12)
    # run autofeat
    afreg = AutoFeatClassifier(verbose=1,
                               feateng_steps=feateng_steps,
                               units=units)
    # fit autofeat on less data, otherwise ridge reg model with xval will overfit on new features
    X_train_tr = afreg.fit_transform(X_train, y_train)
    X_test_tr = afreg.transform(X_test)
    print("autofeat new features:", len(afreg.new_feat_cols_))
    print("autofeat Acc. on training data:",
          accuracy_score(y_train, afreg.predict(X_train_tr)))
    print("autofeat Acc. on test data:",
          accuracy_score(y_test, afreg.predict(X_test_tr)))
    # train rreg on transformed train split incl cross-validation for parameter selection
    print("# Logistic Regression")
    rreg = LogisticRegression(class_weight="balanced")
    param_grid = {"C": np.logspace(-4, 4, 10)}
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        gsmodel = GridSearchCV(rreg, param_grid, cv=5)
        gsmodel.fit(X_train_tr, y_train)
    print("best params:", gsmodel.best_params_)
    print("best score:", gsmodel.best_score_)
    print("Acc. on training data:",
          accuracy_score(y_train, gsmodel.predict(X_train_tr)))
    print("Acc. on test data:",
          accuracy_score(y_test, gsmodel.predict(X_test_tr)))
    print("# Random Forest")
    rforest = RandomForestClassifier(n_estimators=100, random_state=13)
    param_grid = {"min_samples_leaf": [0.0001, 0.001, 0.01, 0.05, 0.1, 0.2]}
    gsmodel = GridSearchCV(rforest, param_grid, cv=5)
    gsmodel.fit(X_train_tr, y_train)
    print("best params:", gsmodel.best_params_)
    print("best score:", gsmodel.best_score_)
    print("Acc. on training data:",
          accuracy_score(y_train, gsmodel.predict(X_train_tr)))
    print("Acc. on test data:",
          accuracy_score(y_test, gsmodel.predict(X_test_tr)))

    if gsmodel:
        pickle.dump(gsmodel,
                    open('model.pkl',
                         'wb'))  # store the artifact in docker container

    if not os.environ["INPUT_MYINPUT"] == 'zeroinputs':
        inputs = ast.literal_eval(os.environ["INPUT_MYINPUT"])
        print("\nThe Predicted Ouput is :")
        output = gsmodel.predict([inputs])
        print(output)
    else:
        output = ["None"]
        print("\nUser didn't provided inputs to predict")

        print(
            "\n=======================Action Completed========================"
        )
        print(f"::set-output name=myOutput::{output[0]}")