def k_fold_opt(images, labels): kf = KFold(n_splits=5) for train, test in kf.split(images): images_train, images_test = images[train], images[test] labels_train, labels_test = labels[train], labels[test] # svm = run_svm(images_train, labels_train) # trained SVM model estim = HyperoptEstimator(classifier=svc('clf')) estim.fit(images_train, labels_train) print(estim.score(images_test, labels_test)) print(estim.best_model()) return
def svm_model_tpe(): estim = HyperoptEstimator(classifier=svc('my_clf', kernels=['linear', 'sigmoid']), preprocessing=[pca('my_pca')], algo=tpe.suggest, max_evals=150, trial_timeout=60, verbose=0) estim.fit(x_train, y_train) print("score", estim.score(x_test, y_test)) print("accuracy score", accuracy_score(estim.predict(x_test), y_test)) print(estim.best_model())
def bayesian_opt_pipeline(): X, y = generate_dataset() estimator = HyperoptEstimator( classifier=svc("hyperopt_svc"), preprocessing=any_preprocessing("hyperopt_preprocess"), algo=tpe.suggest, max_evals=100, trial_timeout=120) start_time = time.time() estimator.fit(X, y) print(f"Time taken for fitting {time.time() - start_time} seconds") print("best model:") print(estimator.best_model())
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values estim = HyperoptEstimator(classifier=svc('mySVC'), algo=tpe.suggest, max_evals=100, trial_timeout=120, verbose=True) estim.fit(X_train, y_train) print("\n\n{}\n\n".format(estim.score(X_test, y_test))) print("\n\n{}\n\n".format(estim.best_model()))
import time from hpsklearn import HyperoptEstimator, svc from sklearn import svm if use_hpsklearn: estim = HyperoptEstimator(classifier=svc('mySVC')) else: estim = svm.SVC()
X, y = make_classification(n_samples=100, n_features=10, n_informative=5, n_redundant=5, random_state=1) X.shape X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) X_train.shape # --- if use_hpsklearn: estimator = HyperoptEstimator(classifier=svc("mySVC")) # AttributeError: 'hyperopt_estimator' object has no attribute 'classifier' # https://github.com/hyperopt/hyperopt-sklearn/issues/168 # solved by including from hyperopt import tpe after import o hpsklearn # https://github.com/hyperopt/hyperopt-sklearn/issues/168#issuecomment-799733587 else: estimator = svm.SVC() #%% # e = HyperoptEstimator( # classifier=any_classifier('my_clf'), # preprocessing=any_preprocessing('my_pre'), # algo=tpe.suggest, # max_evals=2, # trial_timeout=5) # # e.get_params()
skiprows=1) f2 = np.loadtxt('D:/Study/Bioinformatics/AFP/feature_matrix/' + name_ds + '/train_label.csv', delimiter=',') f3 = np.loadtxt('D:/Study/Bioinformatics/AFP/feature_matrix/' + name_ds + '/' + name + '/test_' + name + '.csv', delimiter=',', skiprows=1) f4 = np.loadtxt('D:/Study/Bioinformatics/AFP/feature_matrix/' + name_ds + '/test_label.csv', delimiter=',') np.set_printoptions(suppress=True) X_train = get_feature(f1) y_train = f2 X_test = get_feature(f3) y_test = f4 clf = svm.SVC(kernel='rbf', probability=True) if __name__ == '__main__': estim = HyperoptEstimator(classifier=svc('clf'), algo=tpe.suggest, max_evals=100, trial_timeout=120) estim.fit(X_train, y_train, n_folds=5, cv_shuffle=True, random_state=0) print(estim.score(X_test, y_test)) print(estim.best_model())
def ml_algo(x_input, y_output): #1. Neural Network - MLPClassifier if choosen_model == "Multi-layer perceptron": param_search = [ { 'activation': ['logistic'] } ] # param_search = [ {'activation': ['logistic','relu'], 'learning_rate': ['constant','adaptive'], 'solver': ['adam','lbfgs']} ] model = MLPClassifier(solver='adam', learning_rate='constant', hidden_layer_sizes=(10, 2), random_state=None, max_iter=4, alpha=0.00001, shuffle=False) ############### D. MODEL VALIDATION ############## n_train_hours = int(len(x_input) * 0.8) X_train, X_test = x_input[:n_train_hours, :], x_input[n_train_hours:, :] y_train, y_test = y_output[:n_train_hours], y_output[n_train_hours:] # Variables standardization if standardization == 1: (X_train, X_test) = standardize_me(X_train, X_test, standardization) #robust #min #bina #Time series Forward x Cross_validation if choosen_model == "Multi-layer perceptron": print("ok") nested_cross_v = TimeSeriesSplit( n_splits=cross_validation_split).split(X_train) gsearch = GridSearchCV(estimator=model, cv=nested_cross_v, param_grid=param_search) gsearch.fit(X_train, y_train) predicted_y = gsearch.predict(X_test) else: #2. SVM gsearch = HyperoptEstimator(classifier=svc('mySVC'), algo=tpe.suggest, max_evals=4, trial_timeout=50) gsearch.fit(X_train, y_train) predicted_y = gsearch.predict(X_test) confusion_matrix_final = confusion_matrix(y_test, predicted_y, labels=[1, -1]) #score_final = f1_score(y_test, predicted_y, average='binary') score_final = accuracy_score(y_test, predicted_y) fsdsdsds = pd.DataFrame(data=predicted_y) #Y ouput fsdsdsds.to_csv("backsjds_final.csv") return (gsearch, score_final, confusion_matrix_final)
X_train = get_feature(f1) y_train = f2 X_test = get_feature(f3) y_test = f4 C_space = hp.loguniform('C', low=-5, high=8) gamma_space = hp.loguniform('g', low=-8, high=3) if __name__ == '__main__': estim = HyperoptEstimator(classifier=svc('SVM', kernels=['rbf'], probability=True, C=C_space, gamma=gamma_space, shrinking=True, tol=0.001, cache_size=200, verbose=False, max_iter=-1, random_state=0, degree=3, coef0=0.0), algo=tpe.suggest, max_evals=100, trial_timeout=120, preprocessing=[], ex_preprocs=[]) estim.fit(X_train, y_train, n_folds=5, cv_shuffle=True,
def main(): train_size = 0.8 X_train, X_valid, y_train, y_valid, scaler = load_train_data(train_size=train_size, scale_it=True, square_root_it=True) X_test, X_test_ids = load_test_data(scaler=scaler, square_root_it=True) full_X_train, _, full_y_train, _, full_scaler = load_train_data(full_train=True, scale_it=True, square_root_it=True) X_test_for_full, X_test_ids = load_test_data(scaler=full_scaler, square_root_it=True) # logistic # loss = ~0.6... # clf = LogisticRegression() # clf.fit(X_train, y_train) # clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic') # clf_isotonic.fit(X_train, y_train) # y_valid_predicted = clf_isotonic.predict_proba(X_valid) # log_loss_mc(y_valid, y_valid_predicted) # gnb # loss = ~1.6... # clf = GaussianNB() # clf.fit(X_train, y_train) # clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic') # clf_isotonic.fit(X_train, y_train) # y_valid_predicted = clf_isotonic.predict_proba(X_valid) # log_loss_mc(y_valid, y_valid_predicted) # rf # when n_estimators=100, without calibration, loss = ~0.6 # when n_estimators=100, with calibration, loss = ~0.483 clf = RandomForestClassifier(n_estimators=600, n_jobs=-1, verbose=1) clf.fit(X_train, y_train) clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic') clf_isotonic.fit(X_train, y_train) y_valid_predicted = clf_isotonic.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # linear svc clf = LinearSVC(C=1.0, verbose=2) clf.fit(X_train, y_train) prob_pos = clf.decision_function(X_valid) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) y_valid_predicted = prob_pos log_loss_mc(y_valid, y_valid_predicted) clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic') clf_isotonic.fit(X_train, y_train) y_valid_predicted = clf_isotonic.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # well, non-linear svc clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, cache_size=2000, class_weight=None, verbose=True, max_iter=-1) clf.fit(X_train, y_train) prob_pos = clf.decision_function(X_valid) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) y_valid_predicted = prob_pos log_loss_mc(y_valid, y_valid_predicted) # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic') clf_isotonic.fit(X_train, y_train) y_valid_predicted = clf_isotonic.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # non-linear svc using sigmoidal # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm # probability=True clf = SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, cache_size=2000, class_weight=None, verbose=True, max_iter=-1) clf.fit(X_train, y_train) y_valid_predicted = clf.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # nusvc, wtf? clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None) clf.fit(X_train, y_train) prob_pos = clf.decision_function(X_valid) prob_pos = \ (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) y_valid_predicted = prob_pos log_loss_mc(y_valid, y_valid_predicted) # http://stackoverflow.com/questions/29873981/error-with-sklearn-calibratedclassifiercv-and-svm clf_isotonic = CalibratedClassifierCV(OneVsRestClassifier(clf), cv=5, method='isotonic') clf_isotonic.fit(X_train, y_train) y_valid_predicted = clf_isotonic.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # nusvc using sigmoidal? clf = NuSVC(nu=0.5, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=True, tol=0.001, cache_size=2000, verbose=True, max_iter=-1, random_state=None) clf.fit(X_train, y_train) y_valid_predicted = clf.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # k means clf = KNeighborsClassifier(n_neighbors=9, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None) clf.fit(X_train, y_train) y_valid_predicted = clf.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) clf_isotonic = CalibratedClassifierCV(clf, cv=5, method='isotonic') clf_isotonic.fit(X_train, y_train) y_valid_predicted = clf_isotonic.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # hyperopt?! estim = HyperoptEstimator( classifier=svc('mySVC') ) estim.fit(X_train, y_train) # pca?! # http://scikit-learn.org/stable/auto_examples/plot_digits_pipe.html#example-plot-digits-pipe-py pca = PCA() logistic = LogisticRegression() pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)]) pipe.fit(X_train, y_train) y_valid_predicted = pipe.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # pca + svc pca = PCA() svc = SVC(probability=False, cache_size=1000, verbose=True) pipe = Pipeline(steps=[('pca', pca), ('svc', svc)]) n_components = [20, 40, 64, 90] Cs = np.logspace(-4, 4, 5) #gammas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1, 1] gammas = [0.001, 0.005, 0.01, 0.1, 1] estimator = GridSearchCV(pipe, dict(pca__n_components=n_components, svc__C=Cs, svc__gamma=gammas), verbose=2) estimator.fit(X_train, y_train) y_valid_predicted = estimator.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted) # wow from sklearn.preprocessing import MinMaxScaler train_size = 0.8 X_train, X_valid, y_train, y_valid, scaler = load_train_data(train_size=train_size, scale_it=True, square_root_it=False) X_test, X_test_ids = load_test_data(scaler=scaler, square_root_it=False) full_X_train, _, full_y_train, _, full_scaler = load_train_data(full_train=True, scale_it=True, square_root_it=False) X_test_for_full, X_test_ids = load_test_data(scaler=full_scaler, square_root_it=False) mm_scaler = MinMaxScaler() X_train = mm_scaler.fit_transform(X_train) X_valid = mm_scaler.transform(X_valid) svc = SVC(probability=False, cache_size=1000, verbose=False) gammas = np.exp2([-7, -5, -3, 0, 3, 5, 7]) Cs = np.exp2([-7, -5, -3, 0, 3, 5, 7]) pipe = Pipeline(steps=[('svc', svc)]) estimator = GridSearchCV(pipe, dict(svc__C=Cs, svc__gamma=gammas), verbose=2) estimator.fit(X_train, y_train) y_valid_predicted = estimator.predict_proba(X_valid) log_loss_mc(y_valid, y_valid_predicted)