def gen_sub(): train = train_data.copy() train_idx = [i for i in range(train.shape[0])] test = test_data.copy() test_idx = [i + train.shape[0] for i in range(test.shape[0])] y = train["interest_level"].apply(lambda x: target_num_map[x]) del train["interest_level"] data = pd.concat([train, test]).reset_index() X_train, X_test, feats = coreProcess(data, y, train_idx, test_idx) xgb_clf = xgboostClassifier( objective='multi:softprob', eval_metric='mlogloss', num_class=3, nthread=12, eta=0.02, max_depth=6, subsample=0.8, colsample_bytree=1.0, colsample_bylevel=0.8, min_child_weight=1, silent=1, num_rounds=1700, seed=RS, ) print("Trainning:...") xgb_clf.fit(X_train, y) preds = xgb_clf.predict_proba(X_test) sub = pd.DataFrame(preds) # sub.columns = ["high", "medium", "low"] sub.columns = ["high", "medium", "low"] sub["listing_id"] = test.listing_id.values sub.to_csv("submission.csv", index=False)
def validation_score(early_stop=False): clf = xgboostClassifier( objective='multi:softprob', eval_metric='mlogloss', num_class=3, nthread=3, eta=0.04, max_depth=6, subsample=0.7, colsample_bytree=1.0, colsample_bylevel=0.7, min_child_weight=1, silent=1, num_rounds=700, seed=RS, ) print("*** Validation start ***") data = train_data.copy() y = data["interest_level"].apply(lambda x: target_num_map[x]) del data["interest_level"] # skf = StratifiedKFold(n_splits=5, random_state=RS, shuffle=True) skf = StratifiedKFold(n_splits=3, shuffle=False) cv_scores = [] i = 0 for train_idx, val_idx in skf.split(data, y): i += 1 X = data.copy() y_train, y_val = y.iloc[train_idx], y.iloc[val_idx] X_train, X_val, feats = coreProcess(X, y_train, train_idx, val_idx) clf.fit(X_train, y_train) # clf.fit_CV(X_train, X_val, y_train, y_val) y_val_pred = clf.predict_proba(X_val) loss = log_loss(y_val, y_val_pred) print("Iteration {}'s loss: {}".format(i, loss)) cv_scores.append(loss) if early_stop: break print("*** Validation finished ***\n") return cv_scores
def search(): param_dict = { 'eta': [0.02], 'max_depth': [6], 'subsample': [0.8], 'colsample_bylevel': [0.7], 'num_rounds': [1400, 1500, 1600, 1650], } clf = xgboostClassifier( objective='multi:softprob', eval_metric='mlogloss', num_class=3, nthread=12, eta=0.04, max_depth=6, subsample=0.7, colsample_bytree=1.0, colsample_bylevel=1.0, min_child_weight=1, silent=1, num_rounds=700, seed=RS, ) paramSearch(clf, param_dict)
# colsample_bytree = 1.0, # colsample_bylevel = 0.8, # min_child_weight=1, # silent = 1, # num_rounds = 1700, # seed = 0, # )) clfs.append( xgboostClassifier( objective='multi:softprob', eval_metric='mlogloss', num_class=3, nthread=9, eta=0.02, max_depth=6, subsample=0.8, colsample_bytree=1.0, colsample_bylevel=0.7, min_child_weight=1, silent=1, num_rounds=1500, seed=0, )) clfs.append( xgboostClassifier( objective='multi:softprob', eval_metric='mlogloss', num_class=3, nthread=9, eta=0.02, max_depth=6,