コード例 #1
0
clf_xgb = XGBRegressor(max_depth=3, n_estimators=1000)
clf_gbm = GBMRegressor(exec_path=path_to_exec,
                       num_iterations=1000,
                       learning_rate=0.01,
                       num_leaves=255,
                       min_data_in_leaf=1,
                       early_stopping_round=20,
                       verbose=False)

x_train, x_test, y_train, y_test = model_selection.train_test_split(
    X, Y, test_size=test_size, random_state=seed)

# Training the two models
clf_gbm.fit(x_train, y_train, test_data=[(x_test, y_test)])
clf_xgb.fit(x_train,
            y_train,
            eval_set=[(x_test, y_test)],
            eval_metric='rmse',
            early_stopping_rounds=20,
            verbose=False)

print("xgboost: feature importance")
dic_fi = clf_xgb.booster().get_fscore()
xgb_fi = [(feature_names[int(k[1:])], dic_fi[k]) for k in dic_fi]
xgb_fi = sorted(xgb_fi, key=lambda x: x[1], reverse=True)
print(xgb_fi)

print("lightgbm: feature importance")
gbm_fi = clf_gbm.feature_importance(feature_names)
print(gbm_fi)
コード例 #2
0
        feature_fraction=0.7,
        feature_fraction_seed=seed,
        bagging_fraction=1,
        bagging_freq=10,
        bagging_seed=seed,
        metric_freq=1,
        early_stopping_round=50)
    json.dump(
        gbmr.param,
        open('{0}_lgbm_{1}{2}'.format(model_path, exec_time, model_params),
             'wb+'))
    gbmr.fit(validate_features.values,
             validate_labels.values[:, 0],
             test_data=[(train_features.values, train_labels.values[:, 0])])

    importance = dict(gbmr.feature_importance(train_features.columns.tolist()))
    importance = sorted(importance.items(), key=operator.itemgetter(1))
    df = pd.DataFrame(gbmr.feature_importance(train_features.columns.tolist()),
                      columns=['feature', 'importance'])
    df['importance'] = df['importance'] / df['importance'].sum()
    df.to_csv('{0}_lgbm_{1}{2}'.format(model_path, exec_time,
                                       model_feature_importance_csv),
              index=False)

    val_label = gbmr.predict(validate_features)
    val_frame = pd.Series(val_label, index=validate_features.index)
    val_frame.name = probability_consumed_label
    val_coupons = pd.read_csv(validate_path + 'dataset.csv')
    val_coupons = val_coupons.join(val_frame).join(
        val_frame.map(lambda x: 0. if x < 0.5 else 1.).rename('map')).join(
            pd.read_csv(validate_path + 'labels.csv')['Label'])
コード例 #3
0
        learning_rate=0.1,
        tree_learner='serial',
        min_data_in_leaf=10,
        metric='auc',
        feature_fraction=0.7,
        feature_fraction_seed=seed,
        bagging_fraction=1,
        bagging_freq=10,
        bagging_seed=seed,
        metric_freq=1,
        early_stopping_round=50
    )
    json.dump(gbmr.param, open('{0}_lgbm_{1}{2}'.format(model_path, exec_time, model_params), 'wb+'))
    gbmr.fit(validate_features.values, validate_labels.values[:, 0], test_data=[(train_features.values, train_labels.values[:, 0])])

    importance = dict(gbmr.feature_importance(train_features.columns.tolist()))
    importance = sorted(importance.items(), key=operator.itemgetter(1))
    df = pd.DataFrame(gbmr.feature_importance(train_features.columns.tolist()), columns=['feature', 'importance'])
    df['importance'] = df['importance'] / df['importance'].sum()
    df.to_csv('{0}_lgbm_{1}{2}'.format(model_path, exec_time, model_feature_importance_csv), index=False)

    val_label = gbmr.predict(validate_features)
    val_frame = pd.Series(val_label, index=validate_features.index)
    val_frame.name = probability_consumed_label
    val_coupons = pd.read_csv(validate_path + 'dataset.csv')
    val_coupons = val_coupons.join(val_frame).join(val_frame.map(lambda x: 0. if x < 0.5 else 1.).rename('map')).join(pd.read_csv(validate_path + 'labels.csv')['Label'])
    val_coupons.to_csv('{0}_lgbm_{1}{2}'.format(model_path, exec_time, val_diff_file), index=False)
    print confusion_matrix(val_coupons['Label'], val_coupons['map'])

    print gbmr.best_round
    print 'generate submission'