plot_data = plot_data.dropna().loc[:100000, :] grid = sns.PairGrid(data=plot_data, size=3, diag_sharey=False, hue='TARGET', vars=[x for x in list(plot_data.columns) if x != 'TARGET']) grid.map_upper(plt.scatter, alpha=0.2) grid.map_diag(sns.kdeplot) grid.map_lower(sns.kdeplot, cmap=plt.cm.OrRd_r) plt.suptitle('Ext Source And Age Features Pair Plot', size=32, y=1.05) htmlutils.write_image(plt) htmlutils.close_section() poly_features_obj = PolynomialFeatures(app_train, app_test) poly_features, poly_features_test, poly_target, poly_names = poly_features_obj.get_polynomial_features( ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH']) htmlutils.write_text(f"Polynomial features shape: {poly_features.shape}") htmlutils.write_text(f"Polynomial features name: {poly_names[:15]}") htmlutils.close_section() poly_features = pd.DataFrame(poly_features, columns=poly_names) poly_features['TARGET'] = poly_target poly_corrs = poly_features.corr()['TARGET'].sort_values() htmlutils.write_dataframe('Most negative correlations', poly_corrs.head(10)) htmlutils.write_dataframe('Most positive correlations', poly_corrs.tail(5)) htmlutils.close_section() app_train_poly, app_test_poly = poly_features_obj.append_data_polynom_features( ) htmlutils.write_text( f"Training data with polynomial feature shape: {app_train_poly.shape}") htmlutils.write_text( f"Test data with polynomial features shape: {app_test_poly.shape}")