def gen_sub():
    train = train_data.copy()
    train_idx = [i for i in range(train.shape[0])]
    test = test_data.copy()
    test_idx = [i + train.shape[0] for i in range(test.shape[0])]
    y = train["interest_level"].apply(lambda x: target_num_map[x])
    del train["interest_level"]
    data = pd.concat([train, test]).reset_index()
    X_train, X_test, feats = coreProcess(data, y, train_idx, test_idx)
    xgb_clf = xgboostClassifier(
        objective='multi:softprob',
        eval_metric='mlogloss',
        num_class=3,
        nthread=12,
        eta=0.02,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=1.0,
        colsample_bylevel=0.8,
        min_child_weight=1,
        silent=1,
        num_rounds=1700,
        seed=RS,
    )
    print("Trainning:...")
    xgb_clf.fit(X_train, y)

    preds = xgb_clf.predict_proba(X_test)
    sub = pd.DataFrame(preds)
    # sub.columns = ["high", "medium", "low"]
    sub.columns = ["high", "medium", "low"]
    sub["listing_id"] = test.listing_id.values
    sub.to_csv("submission.csv", index=False)
def validation_score(early_stop=False):
    clf = xgboostClassifier(
        objective='multi:softprob',
        eval_metric='mlogloss',
        num_class=3,
        nthread=3,
        eta=0.04,
        max_depth=6,
        subsample=0.7,
        colsample_bytree=1.0,
        colsample_bylevel=0.7,
        min_child_weight=1,
        silent=1,
        num_rounds=700,
        seed=RS,
    )
    print("*** Validation start ***")
    data = train_data.copy()
    y = data["interest_level"].apply(lambda x: target_num_map[x])
    del data["interest_level"]

    # skf = StratifiedKFold(n_splits=5, random_state=RS, shuffle=True)
    skf = StratifiedKFold(n_splits=3, shuffle=False)
    cv_scores = []
    i = 0
    for train_idx, val_idx in skf.split(data, y):
        i += 1
        X = data.copy()
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        X_train, X_val, feats = coreProcess(X, y_train, train_idx, val_idx)
        clf.fit(X_train, y_train)
        # clf.fit_CV(X_train, X_val, y_train, y_val)
        y_val_pred = clf.predict_proba(X_val)
        loss = log_loss(y_val, y_val_pred)
        print("Iteration {}'s loss: {}".format(i, loss))
        cv_scores.append(loss)
        if early_stop:
            break
    print("*** Validation finished ***\n")
    return cv_scores
def search():
    param_dict = {
        'eta': [0.02],
        'max_depth': [6],
        'subsample': [0.8],
        'colsample_bylevel': [0.7],
        'num_rounds': [1400, 1500, 1600, 1650],
    }
    clf = xgboostClassifier(
        objective='multi:softprob',
        eval_metric='mlogloss',
        num_class=3,
        nthread=12,
        eta=0.04,
        max_depth=6,
        subsample=0.7,
        colsample_bytree=1.0,
        colsample_bylevel=1.0,
        min_child_weight=1,
        silent=1,
        num_rounds=700,
        seed=RS,
    )
    paramSearch(clf, param_dict)
 #     colsample_bytree = 1.0,
 #     colsample_bylevel = 0.8,
 #     min_child_weight=1,
 #     silent = 1,
 #     num_rounds = 1700,
 #     seed = 0,
 # ))
 clfs.append(
     xgboostClassifier(
         objective='multi:softprob',
         eval_metric='mlogloss',
         num_class=3,
         nthread=9,
         eta=0.02,
         max_depth=6,
         subsample=0.8,
         colsample_bytree=1.0,
         colsample_bylevel=0.7,
         min_child_weight=1,
         silent=1,
         num_rounds=1500,
         seed=0,
     ))
 clfs.append(
     xgboostClassifier(
         objective='multi:softprob',
         eval_metric='mlogloss',
         num_class=3,
         nthread=9,
         eta=0.02,
         max_depth=6,