def objective(trial): iris = load_iris() X, y = cudf.DataFrame(pd.DataFrame( iris.data.astype('float32'))), cudf.DataFrame( pd.DataFrame(iris.target.astype('float32'))) solver = trial.suggest_categorical("solver", ["qn"]) C = trial.suggest_uniform("C", 0.0, 1.0) if solver == "qn": penalty = trial.suggest_categorical("penalty", ["l1", "l2"]) else: # 'penalty' parameter isn't relevant for this solver, # so we always specify 'l2' as the dummy value. penalty = "l2" classifier = LogisticRegression(max_iter=200, solver=solver, C=C, penalty=penalty) X_train, X_valid, y_train, y_valid = train_test_split(X, y) classifier.fit(X_train, y_train) score = classifier.score(X_valid, y_valid) return score
def run_log_reg(scaled_df): raw_train_arr = [] raw_test_arr = [] # Init metrics metrics = ['accuracy', 'f1', 'roc_auc_ovr'] # Set c vals and penalty C_vals = range(-8, 5) C_vals = [10**val for val in C_vals] penalty = ['none', 'l1', 'l2'] # Init params params = {'penalty': penalty, 'C': C_vals} # Over five trials for i in range(5): # Train test split X_train, X_test, y_train, y_test = train_test_split( scaled_df.iloc[:, :-1], scaled_df.y, train_size=5000) # Init clf clf = LogisticRegression() # Init gridsearch and run search_results = GridSearchCV(clf, params, scoring=metrics, refit=False) search_results.fit(X_train, y_train) # Get results and organize results = pd.DataFrame(search_results.cv_results_['params']) results['mean_accuracy'] = search_results.cv_results_[ 'mean_test_accuracy'] results['mean_f1'] = search_results.cv_results_['mean_test_f1'] results['mean_auc'] = search_results.cv_results_[ 'mean_test_roc_auc_ovr'] # Get optimal clfs opt_acc_inf = results.sort_values(by='mean_accuracy', ascending=False).iloc[0] opt_f1_inf = results.sort_values(by='mean_f1', ascending=False).iloc[0] opt_auc_inf = results.sort_values(by='mean_auc', ascending=False).iloc[0] # Init optimal clfs opt_acc_clf = LogisticRegression(C=opt_acc_inf.C, penalty=opt_acc_inf.penalty, max_iter=100000) opt_f1_clf = LogisticRegression(C=opt_f1_inf.C, penalty=opt_f1_inf.penalty, max_iter=100000) opt_auc_clf = LogisticRegression(C=opt_auc_inf.C, penalty=opt_auc_inf.penalty, max_iter=100000) # Fit clfs opt_acc_clf.fit(X_train, y_train) opt_f1_clf.fit(X_train, y_train) opt_auc_clf.fit(X_train, y_train) # Get train and test metrics train_score_acc = opt_acc_clf.score(X_train, y_train) train_score_f1 = f1_score(y_train, opt_f1_clf.predict(X_train)) train_score_auc = roc_auc_score(y_train, opt_auc_clf.predict(X_train)) test_score_acc = opt_acc_clf.score(X_test, y_test) test_score_f1 = f1_score(y_test, opt_f1_clf.predict(X_test)) test_score_auc = roc_auc_score(y_test, opt_auc_clf.predict(X_test)) raw_train_arr.append( [train_score_acc, train_score_f1, train_score_auc]) raw_test_arr.append([test_score_acc, test_score_f1, test_score_auc]) # Create dataframe from results raw_train_arr = np.array(raw_train_arr).reshape(5, 3) raw_test_arr = np.array(raw_test_arr).reshape(5, 3) raw_train_df = pd.DataFrame(data=raw_train_arr, columns=['accuracy', 'f1', 'auc']) raw_test_df = pd.DataFrame(data=raw_test_arr, columns=['accuracy', 'f1', 'auc']) # Return results return raw_train_df, raw_test_df