예제 #1
0
def k_fold_opt(images, labels):
    kf = KFold(n_splits=5)
    for train, test in kf.split(images):
        images_train, images_test = images[train], images[test]
        labels_train, labels_test = labels[train], labels[test]
        # svm = run_svm(images_train, labels_train) # trained SVM model
        estim = HyperoptEstimator(classifier=svc('clf'))
        estim.fit(images_train, labels_train)

        print(estim.score(images_test, labels_test))
        print(estim.best_model())
    return
예제 #2
0
def svm_model_tpe():
    estim = HyperoptEstimator(classifier=svc('my_clf',
                                             kernels=['linear', 'sigmoid']),
                              preprocessing=[pca('my_pca')],
                              algo=tpe.suggest,
                              max_evals=150,
                              trial_timeout=60,
                              verbose=0)
    estim.fit(x_train, y_train)
    print("score", estim.score(x_test, y_test))
    print("accuracy score", accuracy_score(estim.predict(x_test), y_test))
    print(estim.best_model())
def bayesian_opt_pipeline():
    X, y = generate_dataset()

    estimator = HyperoptEstimator(
        classifier=svc("hyperopt_svc"),
        preprocessing=any_preprocessing("hyperopt_preprocess"),
        algo=tpe.suggest,
        max_evals=100,
        trial_timeout=120)
    start_time = time.time()
    estimator.fit(X, y)
    print(f"Time taken for fitting {time.time() - start_time} seconds")

    print("best model:")
    print(estimator.best_model())
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values
    X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values

    estim = HyperoptEstimator(classifier=svc('mySVC'),
        algo=tpe.suggest, max_evals=100, trial_timeout=120, verbose=True)

    estim.fit(X_train, y_train)

    print("\n\n{}\n\n".format(estim.score(X_test, y_test)))
    print("\n\n{}\n\n".format(estim.best_model()))
예제 #5
0
import time
from hpsklearn import HyperoptEstimator, svc
from sklearn import svm

if use_hpsklearn:
    estim = HyperoptEstimator(classifier=svc('mySVC'))
else:
    estim = svm.SVC()
X, y = make_classification(n_samples=100,
                           n_features=10,
                           n_informative=5,
                           n_redundant=5,
                           random_state=1)
X.shape

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)
X_train.shape
# ---

if use_hpsklearn:
    estimator = HyperoptEstimator(classifier=svc("mySVC"))
    # AttributeError: 'hyperopt_estimator' object has no attribute 'classifier'
    # https://github.com/hyperopt/hyperopt-sklearn/issues/168
    # solved by including from hyperopt import tpe after import o hpsklearn
    # https://github.com/hyperopt/hyperopt-sklearn/issues/168#issuecomment-799733587
else:
    estimator = svm.SVC()
#%%
# e = HyperoptEstimator(
#     classifier=any_classifier('my_clf'),
#   preprocessing=any_preprocessing('my_pre'),
#   algo=tpe.suggest,
#   max_evals=2,
#   trial_timeout=5)
# # e.get_params()
예제 #7
0
파일: zz.py 프로젝트: hzhou256/py
                        skiprows=1)
        f2 = np.loadtxt('D:/Study/Bioinformatics/AFP/feature_matrix/' +
                        name_ds + '/train_label.csv',
                        delimiter=',')
        f3 = np.loadtxt('D:/Study/Bioinformatics/AFP/feature_matrix/' +
                        name_ds + '/' + name + '/test_' + name + '.csv',
                        delimiter=',',
                        skiprows=1)
        f4 = np.loadtxt('D:/Study/Bioinformatics/AFP/feature_matrix/' +
                        name_ds + '/test_label.csv',
                        delimiter=',')

        np.set_printoptions(suppress=True)
        X_train = get_feature(f1)
        y_train = f2
        X_test = get_feature(f3)
        y_test = f4
        clf = svm.SVC(kernel='rbf', probability=True)

        if __name__ == '__main__':
            estim = HyperoptEstimator(classifier=svc('clf'),
                                      algo=tpe.suggest,
                                      max_evals=100,
                                      trial_timeout=120)
            estim.fit(X_train,
                      y_train,
                      n_folds=5,
                      cv_shuffle=True,
                      random_state=0)
            print(estim.score(X_test, y_test))
            print(estim.best_model())
예제 #8
0
def ml_algo(x_input, y_output):

    #1. Neural Network - MLPClassifier

    if choosen_model == "Multi-layer perceptron":

        param_search = [
            {
                'activation': ['logistic']
            }
        ]  #   param_search = [ {'activation': ['logistic','relu'], 'learning_rate': ['constant','adaptive'], 'solver': ['adam','lbfgs']} ]

        model = MLPClassifier(solver='adam',
                              learning_rate='constant',
                              hidden_layer_sizes=(10, 2),
                              random_state=None,
                              max_iter=4,
                              alpha=0.00001,
                              shuffle=False)

    ############### D. MODEL VALIDATION ##############

    n_train_hours = int(len(x_input) * 0.8)
    X_train, X_test = x_input[:n_train_hours, :], x_input[n_train_hours:, :]
    y_train, y_test = y_output[:n_train_hours], y_output[n_train_hours:]

    # Variables standardization
    if standardization == 1:
        (X_train, X_test) = standardize_me(X_train, X_test,
                                           standardization)  #robust #min #bina

    #Time series Forward x Cross_validation
    if choosen_model == "Multi-layer perceptron":
        print("ok")
        nested_cross_v = TimeSeriesSplit(
            n_splits=cross_validation_split).split(X_train)
        gsearch = GridSearchCV(estimator=model,
                               cv=nested_cross_v,
                               param_grid=param_search)
        gsearch.fit(X_train, y_train)
        predicted_y = gsearch.predict(X_test)

    else:
        #2. SVM
        gsearch = HyperoptEstimator(classifier=svc('mySVC'),
                                    algo=tpe.suggest,
                                    max_evals=4,
                                    trial_timeout=50)
        gsearch.fit(X_train, y_train)
        predicted_y = gsearch.predict(X_test)

    confusion_matrix_final = confusion_matrix(y_test,
                                              predicted_y,
                                              labels=[1, -1])

    #score_final = f1_score(y_test, predicted_y, average='binary')
    score_final = accuracy_score(y_test, predicted_y)
    fsdsdsds = pd.DataFrame(data=predicted_y)  #Y ouput
    fsdsdsds.to_csv("backsjds_final.csv")

    return (gsearch, score_final, confusion_matrix_final)
예제 #9
0
        X_train = get_feature(f1)
        y_train = f2
        X_test = get_feature(f3)
        y_test = f4

        C_space = hp.loguniform('C', low=-5, high=8)
        gamma_space = hp.loguniform('g', low=-8, high=3)

        if __name__ == '__main__':
            estim = HyperoptEstimator(classifier=svc('SVM',
                                                     kernels=['rbf'],
                                                     probability=True,
                                                     C=C_space,
                                                     gamma=gamma_space,
                                                     shrinking=True,
                                                     tol=0.001,
                                                     cache_size=200,
                                                     verbose=False,
                                                     max_iter=-1,
                                                     random_state=0,
                                                     degree=3,
                                                     coef0=0.0),
                                      algo=tpe.suggest,
                                      max_evals=100,
                                      trial_timeout=120,
                                      preprocessing=[],
                                      ex_preprocs=[])
            estim.fit(X_train,
                      y_train,
                      n_folds=5,
                      cv_shuffle=True,
예제 #10
0
def main():
    train_size = 0.8


    X_train, X_valid, y_train, y_valid, scaler = load_train_data(train_size=train_size, scale_it=True, square_root_it=True)
    X_test, X_test_ids = load_test_data(scaler=scaler, square_root_it=True)

    full_X_train, _, full_y_train, _, full_scaler = load_train_data(full_train=True, scale_it=True, square_root_it=True)
    X_test_for_full, X_test_ids = load_test_data(scaler=full_scaler, square_root_it=True)


    # logistic
    # loss = ~0.6...
    # clf = LogisticRegression()
    # clf.fit(X_train, y_train)
    # clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    # clf_isotonic.fit(X_train, y_train)
    # y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    # log_loss_mc(y_valid, y_valid_predicted)
    

    # gnb
    # loss = ~1.6...
    # clf = GaussianNB()
    # clf.fit(X_train, y_train)
    # clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    # clf_isotonic.fit(X_train, y_train)
    # y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    # log_loss_mc(y_valid, y_valid_predicted)
    

    # rf
    # when n_estimators=100, without calibration, loss = ~0.6
    # when n_estimators=100, with calibration, loss = ~0.483
    clf = RandomForestClassifier(n_estimators=600, n_jobs=-1, verbose=1)
    clf.fit(X_train, y_train)
    clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)
    

    # linear svc
    clf = LinearSVC(C=1.0, verbose=2)
    clf.fit(X_train, y_train)
    prob_pos = clf.decision_function(X_valid)
    prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    y_valid_predicted = prob_pos
    log_loss_mc(y_valid, y_valid_predicted)
    clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # well, non-linear svc
    clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, cache_size=2000, class_weight=None, verbose=True, max_iter=-1)
    clf.fit(X_train, y_train)
    prob_pos = clf.decision_function(X_valid)
    prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    y_valid_predicted = prob_pos
    log_loss_mc(y_valid, y_valid_predicted)
    # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
    clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # non-linear svc using sigmoidal
    # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
    # probability=True
    clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, cache_size=2000, class_weight=None, verbose=True, max_iter=-1)
    clf.fit(X_train, y_train)
    y_valid_predicted = clf.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # nusvc, wtf?
    clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None)
    clf.fit(X_train, y_train)
    prob_pos = clf.decision_function(X_valid)
    prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())
    y_valid_predicted = prob_pos
    log_loss_mc(y_valid, y_valid_predicted)
    # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm
    clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # nusvc using sigmoidal?
    clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None)
    clf.fit(X_train, y_train)
    y_valid_predicted = clf.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # k means
    clf = KNeighborsClassifier(n_neighbors=9, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None)
    clf.fit(X_train, y_train)
    y_valid_predicted = clf.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)
    clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic')
    clf_isotonic.fit(X_train, y_train)
    y_valid_predicted = clf_isotonic.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # hyperopt?!
    estim = HyperoptEstimator( classifier=svc('mySVC') )
    estim.fit(X_train, y_train)


    # pca?!
    # http://scikit-learn.org/stable/auto_examples/plot_digits_pipe.html#example-plot-digits-pipe-py
    pca = PCA()
    logistic = LogisticRegression()
    pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
    pipe.fit(X_train, y_train)
    y_valid_predicted = pipe.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)

    # pca + svc
    pca = PCA()
    svc = SVC(probability=False, cache_size=1000, verbose=True)
    pipe = Pipeline(steps=[('pca', pca), ('svc', svc)])
    n_components = [20, 40, 64, 90]
    Cs = np.logspace(-4, 4, 5)
    #gammas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1]
    gammas = [0.001, 0.005, 0.01, 0.1, 1]
    estimator = GridSearchCV(pipe,
                         dict(pca__n_components=n_components,
                              svc__C=Cs,
                              svc__gamma=gammas), verbose=2)
    estimator.fit(X_train, y_train)
    y_valid_predicted = estimator.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)


    # wow

    from sklearn.preprocessing import MinMaxScaler
    train_size = 0.8
    X_train, X_valid, y_train, y_valid, scaler = load_train_data(train_size=train_size, scale_it=True, square_root_it=False)
    X_test, X_test_ids = load_test_data(scaler=scaler, square_root_it=False)
    full_X_train, _, full_y_train, _, full_scaler = load_train_data(full_train=True, scale_it=True, square_root_it=False)
    X_test_for_full, X_test_ids = load_test_data(scaler=full_scaler, square_root_it=False)

    mm_scaler = MinMaxScaler()
    X_train = mm_scaler.fit_transform(X_train)
    X_valid = mm_scaler.transform(X_valid)

    svc = SVC(probability=False, cache_size=1000, verbose=False)
    gammas = np.exp2([-7, -5, -3, 0, 3, 5, 7])
    Cs = np.exp2([-7, -5, -3, 0, 3, 5, 7])
    pipe = Pipeline(steps=[('svc', svc)])
    estimator = GridSearchCV(pipe,
                         dict(svc__C=Cs,
                              svc__gamma=gammas), verbose=2)
    estimator.fit(X_train, y_train)
    y_valid_predicted = estimator.predict_proba(X_valid)
    log_loss_mc(y_valid, y_valid_predicted)