def objective_svm(x): C_in, gamma_in = x[0]**2, x[1]**2 svm_classifier = SVC(C=C_in, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma=gamma_in, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) k = 5 kf = StratifiedKFold(k) i = 0 score = 0 for train_index, test_index in kf.split(X_train, Y_train): svm_classifier.fit(X_train[train_index], Y_train[train_index]) Y_pred = svm_classifier.predict(X_train[test_index]) score += f1_score(Y_train[test_index], Y_pred) i += 1 return score
lgb_eval = lgb.Dataset(X_train[test_index], Y_train[test_index], reference=lgb_train) gbm = lgb.train(parameters, train_set=lgb_train, num_boost_round=100, valid_sets=lgb_eval, verbose_eval=40, feval=f1_score_lgbm) res = gbm.predict(X_test) Y_pred = gbm.predict(X_train[test_index]) Y_pred_train = gbm.predict(X_train[train_index]) predictions[:, i] = res predictions_train[test_index] = Y_pred print("train: " + str(f1_score(Y_train[train_index], Y_pred_train.round()))) print("test: " + str(f1_score(Y_train[test_index], Y_pred.round()))) i += 1 # save submission file Y_test = (np.sum(predictions, axis=1) > 2.5).astype(int) submission = pd.DataFrame(Y_test) submission.to_csv(path_or_buf=path_to_submissions + "-".join(my_features_acronym) + "lgbm" + ".csv", index=True, index_label="id", header=["category"]) # save probabilities for stacking stacking_logits_test = np.sum(predictions, axis=1) stacking_test = pd.DataFrame(stacking_logits_test)
kf = KFold(k) predictions = np.zeros((X_test.shape[0], k)) predictions_test = np.zeros((X_test.shape[0], k)) predictions_train = np.zeros(X_train.shape[0]) i = 0 # for each fold store predictions on test set and print validation results test_score = 0.0 for train_index, test_index in kf.split(X_train, Y_train): RF.fit(X_train[train_index], Y_train[train_index]) Y_pred = RF.predict(X_train[test_index]) Y_pred_train = RF.predict(X_train[train_index]) predictions[:, i] = RF.predict(X_test) predictions_test[:, i] = RF.predict_proba(X_test)[:, 1] predictions_train[test_index] = RF.predict_proba(X_train[test_index])[:, 1] current_test_score = f1_score(Y_train[test_index], Y_pred) test_score += current_test_score print("train: " + str(f1_score(Y_train[train_index], Y_pred_train))) print("test: " + str(current_test_score)) i += 1 print("CV test score: " + str(test_score / k)) # save submission file Y_test = (np.sum(predictions, axis=1) > 2.5).astype(int) submission = pd.DataFrame(Y_test) submission.to_csv( path_or_buf=path_to_submissions + "-".join(my_features_acronym) + "RF.csv", index=True, index_label="id", header=["category"] )
print("parameters:") print(parameters) print("cross validation:") LogReg = LogisticRegressionCV(max_iter=parameters['max_iter'], tol=parameters['tol'], penalty=parameters['penalty']) k = 5 kf = KFold(k) predictions = np.zeros((X_test.shape[0], k)) i = 0 for train_index, test_index in kf.split(X_train, Y_train): LogReg.fit(X_train[train_index], Y_train[train_index]) Y_pred = LogReg.predict(X_train[test_index]) Y_pred_train = LogReg.predict(X_train[train_index]) predictions[:, i] = LogReg.predict(X_test) print("train: " + str(f1_score(Y_train[train_index], Y_pred_train))) print("test: " + str(f1_score(Y_train[test_index], Y_pred))) i += 1 Y_test = (np.sum(predictions, axis=1) > 2.5).astype(int) # submission = pd.DataFrame(Y_test) # submission.to_csv( # path_or_buf=path_to_submissions+"-".join(my_features_string)+"LogReg.csv", # index=True, # index_label="id", # header=["category"] # )
n_estimators=parameters["n_estimators"], criterion=parameters["criterion"], max_depth=parameters["max_depth"], min_samples_leaf=parameters["min_samples_leaf"], bootstrap=parameters["bootstrap"], n_jobs=parameters["n_jobs"]) k = 2 kf = KFold(k) train_score = 0.0 test_score = 0.0 for train_index, test_index in kf.split(X_train, Y_train): RF.fit(X_train[train_index], Y_train[train_index]) Y_pred = RF.predict(X_train[test_index]) Y_pred_train = RF.predict(X_train[train_index]) train_score += f1_score(Y_train[train_index], Y_pred_train) test_score += f1_score(Y_train[test_index], Y_pred) train_score /= k test_score /= k if test_score > best_test_score: best_index = i best_train_score = train_score best_test_score = test_score print("train score: " + str(train_score)) print("test score: " + str(test_score)) print("") print("for this round, the best feature was " +