def _generate_oof_predictions(self, X_train, y_train, feature_selection_func=None, **kwargs): train_n = X_train.shape[0] cv = KFold(n=train_n, n_folds=10, random_state=123) cv_scores = [] oof_predictions = np.zeros(shape=(train_n,)) for fold, (tr_idx, te_idx) in enumerate(cv): X_train_ = X_train.iloc[tr_idx] y_train_ = self.target_transform.transform(y_train.iloc[tr_idx]) X_test_ = X_train.iloc[te_idx] y_test_ = y_train.iloc[te_idx] # select features self.feature_selector.fit(X_train_, y_train_) X_train_ = self.feature_selector.transform(X_train_) X_test_ = self.feature_selector.transform(X_test_) self.model.fit(X_train_, y_train_) preds_k = self.target_transform.transform_back(self.model.predict(X_test_)) oof_predictions[te_idx] = preds_k gini_k = normalized_gini(y_test_, preds_k) cv_scores.append(gini_k) print 'Fold %d: %.4f' % (fold + 1, gini_k) oof_gini = normalized_gini(y_train, oof_predictions) print 'Final: %.4f' % (oof_gini) return oof_predictions, oof_gini, cv_scores
def ensemble_predictions(self): # find optimal weights self.opt_weights = find_ensemble_weights( opt_func=self.opt_func, predictions=self.oof_predictions, y_true=Y_TRAIN, w_init=np.array(self.oof_ginis)**0.5, verbose=self.verbose, **self.kwargs)['x'] # ensemble oof predictions self.ensembled_oof_predictions_ = _ensemble_predictions( predictions=self.oof_predictions, ensemble_weights=self.opt_weights) self.ensembled_oof_gini_ = normalized_gini( Y_TRAIN, self.ensembled_oof_predictions_) if self.verbose: print '\nEnsembled CV: {}\n'.format( np.round(self.ensembled_oof_gini_, 6)) for w, pred_col in zip(self.opt_weights, self.oof_predictions.columns): print '{} : {}'.format(np.round(w, 4), pred_col) # ensemble lb predictions self.ensembled_lb_predictions_ = _ensemble_predictions( predictions=self.lb_predictions, ensemble_weights=self.opt_weights)
def ensemble_predictions(self): # find optimal weights self.opt_weights = find_ensemble_weights( opt_func=self.opt_func, predictions=self.oof_predictions, y_true=Y_TRAIN, w_init=np.array(self.oof_ginis)**0.5, verbose=self.verbose, **self.kwargs )['x'] # ensemble oof predictions self.ensembled_oof_predictions_ = _ensemble_predictions( predictions=self.oof_predictions, ensemble_weights=self.opt_weights ) self.ensembled_oof_gini_ = normalized_gini(Y_TRAIN, self.ensembled_oof_predictions_) if self.verbose: print '\nEnsembled CV: {}\n'.format(np.round(self.ensembled_oof_gini_, 6)) for w, pred_col in zip(self.opt_weights, self.oof_predictions.columns): print '{} : {}'.format(np.round(w, 4), pred_col) # ensemble lb predictions self.ensembled_lb_predictions_ = _ensemble_predictions( predictions=self.lb_predictions, ensemble_weights=self.opt_weights )
def ensemble_predictions(self): # ensemble oof predictions self.ensembled_oof_predictions_ = self.oof_predictions.apply(self.ensemble_func, axis=1) self.ensembled_oof_gini_ = normalized_gini(Y_TRAIN, self.ensembled_oof_predictions_) if self.verbose: print '\nEnsembled CV: {}\n'.format(np.round(self.ensembled_oof_gini_, 6)) # ensemble lb predictions self.ensembled_lb_predictions_ = self.lb_predictions.apply(self.ensemble_func, axis=1)
def ensemble_predictions(self): # ensemble oof predictions self.ensembled_oof_predictions_ = self.oof_predictions.apply( self.ensemble_func, axis=1) self.ensembled_oof_gini_ = normalized_gini( Y_TRAIN, self.ensembled_oof_predictions_) if self.verbose: print '\nEnsembled CV: {}\n'.format( np.round(self.ensembled_oof_gini_, 6)) # ensemble lb predictions self.ensembled_lb_predictions_ = self.lb_predictions.apply( self.ensemble_func, axis=1)
def fit(self, X, y): for col in X.columns: gini = normalized_gini(y, X[col]) if abs(gini) > self.min_gini: self.current_features.append(col) self.selected_features += self.current_features