# test['logvar38'] = test['var38'].map(np.log1p) # # Encode var36 as category # test['var36'] = test['var36'].astype('category') # test = pd.get_dummies(test) test_normalized = normalize(test, axis=0) test_pca = pca.fit_transform(test_normalized) test['PCA1'] = test_pca[:,0] test['PCA2'] = test_pca[:,1] sel_test = test[features] sel_test = fs.transform(sel_test) probs = clf.predict_proba(sel_test, ntree_limit=clf.best_iteration) submission = pd.DataFrame({"ID":test.index, "TARGET":probs[:,1]}) submission.to_csv("submission.csv", index=False) # plot feature importance print 'plot feature importance ...' #mapFeat = dict(zip(["f"+str(i) for i in range(len(features))],features)) mapFeat = dict(zip(features,features)) ts = pd.Series(clf.booster().get_fscore()) #ts.index = ts.reset_index()['index'].map(mapFeat) ts.sort_values()[-15:].plot(kind="barh", title=("features importance")) featp = ts.sort_values()[-15:].plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10)) plt.title('XGBoost Feature Importance') fig_featp = featp.get_figure() fig_featp.savefig('feature_importance_xgb.png', bbox_inches='tight', pad_inches=1)
test_normalized = normalize(test, axis=0) test_pca = pca.fit_transform(test_normalized) test['PCA1'] = test_pca[:, 0] test['PCA2'] = test_pca[:, 1] sel_test = test[features] sel_test = fs.transform(sel_test) probs = clf.predict_proba(sel_test, ntree_limit=clf.best_iteration) submission = pd.DataFrame({"ID": test.index, "TARGET": probs[:, 1]}) submission.to_csv("submission.csv", index=False) # plot feature importance print 'plot feature importance ...' #mapFeat = dict(zip(["f"+str(i) for i in range(len(features))],features)) mapFeat = dict(zip(features, features)) ts = pd.Series(clf.booster().get_fscore()) #ts.index = ts.reset_index()['index'].map(mapFeat) ts.sort_values()[-15:].plot(kind="barh", title=("features importance")) featp = ts.sort_values()[-15:].plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10)) plt.title('XGBoost Feature Importance') fig_featp = featp.get_figure() fig_featp.savefig('feature_importance_xgb.png', bbox_inches='tight', pad_inches=1)