Ejemplo n.º 1
0
def naive_bayes(X_train, y_train, X_test, y_test):
    # naive bayes:
    nb = GaussianNB().fit(X_train, y_train)
    pred = nb.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    # print(nb.get_params())
    return err, pred
Ejemplo n.º 2
0
def logistic_reg_op_params(X_train, y_train, X_test, y_test):
    parameters = {
        "solver": ['newton-cg', 'lbfgs', 'liblinear'],
        "multi_class": ['auto'],
        "penalty": ['l2'],
        "C": [0.01, 0.1, 1, 10, 100]
    }
    lg = LogisticRegression()
    scoring = {
        'err_margin':
        metrics.make_scorer(evaluate.error_margin, greater_is_better=False)
    }
    # clf = RandomizedSearchCV(estimator=lg, param_distributions=parameters, n_iter=50, cv=3, verbose=2,
    #                          random_state=1, n_jobs=2, scoring=scoring, refit='err_margin')
    clf = GridSearchCV(estimator=lg,
                       param_grid=parameters,
                       verbose=2,
                       n_jobs=2,
                       scoring=scoring,
                       refit='err_margin')
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    print("Best params:", clf.best_params_)
    return err
Ejemplo n.º 3
0
def svm_clf_op_params(X_train, y_train, X_test, y_test):
    # parameters = {"kernel": ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'],
    #               "gamma": ['scale', 'auto'],
    #               "C": [x for x in np.linspace(start=1.0, stop=10.0, num=100, dtype=float)],
    #               "shrinking": [True, False]}
    parameters = {
        'C': [0.1, 1, 10, 100],
        'gamma': ['auto', 'scale'],
        'kernel': ['rbf']
    }
    svc = SVC()
    scoring = {
        'err_margin':
        metrics.make_scorer(evaluate.error_margin, greater_is_better=False)
    }
    # clf = RandomizedSearchCV(estimator=svc, param_distributions=parameters, n_iter=20, cv=3, verbose=2,
    #                          random_state=1, n_jobs=2, scoring=scoring, refit='err_margin')
    clf = GridSearchCV(estimator=svc,
                       param_grid=parameters,
                       n_jobs=2,
                       scoring=scoring,
                       refit='err_margin',
                       verbose=2)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    print("Best params:", clf.best_params_)
    return err
Ejemplo n.º 4
0
def svm_clf(X_train, y_train, X_test, y_test):
    # svc
    # svc = make_pipeline(StandardScaler(), SVC(gamma="auto"))
    svc = SVC(gamma="auto")
    svc.fit(X_train, y_train)
    pred = svc.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    return err, pred
Ejemplo n.º 5
0
def random_forest_reg(X_train, y_train, X_test, y_test):
    # random forest reg:
    random_forest = RandomForestRegressor(n_estimators=100,
                                          min_samples_split=5,
                                          min_samples_leaf=2)
    random_forest.fit(X_train, y_train)
    pred = random_forest.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    return err, pred
Ejemplo n.º 6
0
def logistic_reg(X_train, y_train, X_test, y_test):
    # logistic regression
    lg = LogisticRegression(random_state=0,
                            solver='newton-cg',
                            multi_class='ovr',
                            max_iter=200,
                            penalty='l2',
                            n_jobs=-1)
    lg.fit(X_train, y_train)
    pred = lg.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    return err, pred
Ejemplo n.º 7
0
def knn(X_train, y_train, X_test, y_test):
    # knn:
    # k = int(math.sqrt(len(X_train[0])))
    # k = k if k & 1 else k + 1 # k will be odd
    knn = KNeighborsClassifier(algorithm='auto',
                               n_jobs=-1,
                               n_neighbors=15,
                               weights='distance',
                               metric='euclidean')
    knn.fit(X_train, y_train)
    pred = knn.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    return err, pred
Ejemplo n.º 8
0
def svm_reg(X_train, y_train, X_test, y_test):
    # svr
    # svr = make_pipeline(StandardScaler(), SVR(gamma="scale", C=1.1, epsilon=1.1))
    svr = SVR(gamma="scale", C=1.1, epsilon=1.1)
    svr.fit(X_train, y_train)
    pred = svr.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    # print(svr.get_params())
    # print(svr.predict(X_example))
    # file = open(PREDICTION_FILE, "w")
    # for i in range(len(pred)):
    #     file.write(names_test[i] + ": " + str(pred[i]) + "\n")
    # file.close()
    return err, pred
Ejemplo n.º 9
0
def ridge_clf_op_params(X_train, y_train, X_test, y_test):
    parameters = {
        "alpha": [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
        "solver": ['svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']
    }
    ridge = RidgeClassifier()
    scoring = {
        'err_margin':
        metrics.make_scorer(evaluate.error_margin, greater_is_better=False)
    }

    clf = GridSearchCV(estimator=ridge,
                       param_grid=parameters,
                       verbose=2,
                       n_jobs=2,
                       scoring=scoring,
                       refit='err_margin')
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    print("Best params:", clf.best_params_)
    return err
Ejemplo n.º 10
0
def knn_op_params(X_train, y_train, X_test, y_test):
    parameters = {
        'weights': ['distance', 'uniform'],
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
        'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21],
        'metric': ['euclidean']
    }
    scoring = {
        'err_margin':
        metrics.make_scorer(evaluate.error_margin, greater_is_better=False)
    }
    knn = KNeighborsClassifier()
    clf = GridSearchCV(knn,
                       parameters,
                       scoring=scoring,
                       refit='err_margin',
                       n_jobs=-1,
                       verbose=2)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    print("Best params:", clf.best_params_)
    return err
Ejemplo n.º 11
0
def random_forest_reg_op_params(X_train, y_train, X_test, y_test):
    # parameters = {'bootstrap': [True, False],
    #               'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
    #               'max_features': ['auto', 'sqrt'],
    #               'min_samples_leaf': [1, 2, 4],
    #               'min_samples_split': [2, 5, 10],
    #               'n_estimators': [100, 150, 200]}
    parameters = {
        "n_estimators": [10, 100, 500, 1000],
        "max_depth": [5, 8, 15, 25, 30],
        "min_samples_split": [2, 5, 10, 15, 100],
        "min_samples_leaf": [1, 2, 5, 10],
        "max_features": list(range(1,
                                   len(X_train[0]) // 2 + 1))
    }
    random_forest = RandomForestRegressor()
    scoring = {
        'err_margin':
        metrics.make_scorer(evaluate.error_margin, greater_is_better=False)
    }
    clf = RandomizedSearchCV(estimator=random_forest,
                             param_distributions=parameters,
                             n_iter=20,
                             cv=3,
                             verbose=2,
                             random_state=42,
                             n_jobs=-1,
                             scoring=scoring,
                             refit='err_margin')
    # clf = GridSearchCV(estimator=random_forest, param_grid=parameters, verbose=2, n_jobs=2,
    #                    scoring=scoring, refit='err_margin')
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    print("Best params:", clf.best_params_)
    return err
Ejemplo n.º 12
0
def ridge_clf(X_train, y_train, X_test, y_test):
    ridge = RidgeClassifier(alpha=0.9, solver='lsqr')
    ridge.fit(X_train, y_train)
    pred = ridge.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    return err, pred
Ejemplo n.º 13
0
def ridge_reg(X_train, y_train, X_test, y_test):
    ridge = Ridge(alpha=0.5, solver='sag')
    ridge.fit(X_train, y_train)
    pred = ridge.predict(X_test)
    err = evaluate.error_margin(pred, y_test) / 10.0
    return err, pred