コード例 #1
0
    def _generate_oof_predictions(self, X_train, y_train, feature_selection_func=None, **kwargs):
        train_n = X_train.shape[0]
        cv = KFold(n=train_n, n_folds=10, random_state=123)
        cv_scores = []

        oof_predictions = np.zeros(shape=(train_n,))
        for fold, (tr_idx, te_idx) in enumerate(cv):

            X_train_ = X_train.iloc[tr_idx]
            y_train_ = self.target_transform.transform(y_train.iloc[tr_idx])
            X_test_  = X_train.iloc[te_idx]
            y_test_  = y_train.iloc[te_idx]

            # select features
            self.feature_selector.fit(X_train_, y_train_)
            X_train_ = self.feature_selector.transform(X_train_)
            X_test_  = self.feature_selector.transform(X_test_)


            self.model.fit(X_train_, y_train_)
            preds_k = self.target_transform.transform_back(self.model.predict(X_test_))
            oof_predictions[te_idx] = preds_k

            gini_k = normalized_gini(y_test_, preds_k)
            cv_scores.append(gini_k)
            print 'Fold %d: %.4f' % (fold + 1, gini_k)

        oof_gini = normalized_gini(y_train, oof_predictions)
        print 'Final: %.4f' % (oof_gini)
        return oof_predictions, oof_gini, cv_scores
コード例 #2
0
ファイル: ensemble.py プロジェクト: cavaunpeu/kaggle-liberty
    def ensemble_predictions(self):
        # find optimal weights
        self.opt_weights = find_ensemble_weights(
            opt_func=self.opt_func,
            predictions=self.oof_predictions,
            y_true=Y_TRAIN,
            w_init=np.array(self.oof_ginis)**0.5,
            verbose=self.verbose,
            **self.kwargs)['x']

        # ensemble oof predictions
        self.ensembled_oof_predictions_ = _ensemble_predictions(
            predictions=self.oof_predictions,
            ensemble_weights=self.opt_weights)
        self.ensembled_oof_gini_ = normalized_gini(
            Y_TRAIN, self.ensembled_oof_predictions_)
        if self.verbose:
            print '\nEnsembled CV: {}\n'.format(
                np.round(self.ensembled_oof_gini_, 6))
            for w, pred_col in zip(self.opt_weights,
                                   self.oof_predictions.columns):
                print '{} : {}'.format(np.round(w, 4), pred_col)

        # ensemble lb predictions
        self.ensembled_lb_predictions_ = _ensemble_predictions(
            predictions=self.lb_predictions, ensemble_weights=self.opt_weights)
コード例 #3
0
ファイル: ensemble.py プロジェクト: cavaunpeu/kaggle-liberty
    def ensemble_predictions(self):
        # find optimal weights
        self.opt_weights = find_ensemble_weights(
            opt_func=self.opt_func,
            predictions=self.oof_predictions,
            y_true=Y_TRAIN,
            w_init=np.array(self.oof_ginis)**0.5,
            verbose=self.verbose,
            **self.kwargs
        )['x']

        # ensemble oof predictions
        self.ensembled_oof_predictions_ = _ensemble_predictions(
            predictions=self.oof_predictions,
            ensemble_weights=self.opt_weights
        )
        self.ensembled_oof_gini_ = normalized_gini(Y_TRAIN, self.ensembled_oof_predictions_)
        if self.verbose:
            print '\nEnsembled CV: {}\n'.format(np.round(self.ensembled_oof_gini_, 6))
            for w, pred_col in zip(self.opt_weights, self.oof_predictions.columns):
                print '{} : {}'.format(np.round(w, 4), pred_col)

        # ensemble lb predictions
        self.ensembled_lb_predictions_ = _ensemble_predictions(
            predictions=self.lb_predictions,
            ensemble_weights=self.opt_weights
        )
コード例 #4
0
ファイル: ensemble.py プロジェクト: cavaunpeu/kaggle-liberty
    def ensemble_predictions(self):
        # ensemble oof predictions
        self.ensembled_oof_predictions_ = self.oof_predictions.apply(self.ensemble_func, axis=1)
        self.ensembled_oof_gini_ = normalized_gini(Y_TRAIN, self.ensembled_oof_predictions_)
        if self.verbose:
            print '\nEnsembled CV: {}\n'.format(np.round(self.ensembled_oof_gini_, 6))

        # ensemble lb predictions
        self.ensembled_lb_predictions_ = self.lb_predictions.apply(self.ensemble_func, axis=1)
コード例 #5
0
ファイル: ensemble.py プロジェクト: cavaunpeu/kaggle-liberty
    def ensemble_predictions(self):
        # ensemble oof predictions
        self.ensembled_oof_predictions_ = self.oof_predictions.apply(
            self.ensemble_func, axis=1)
        self.ensembled_oof_gini_ = normalized_gini(
            Y_TRAIN, self.ensembled_oof_predictions_)
        if self.verbose:
            print '\nEnsembled CV: {}\n'.format(
                np.round(self.ensembled_oof_gini_, 6))

        # ensemble lb predictions
        self.ensembled_lb_predictions_ = self.lb_predictions.apply(
            self.ensemble_func, axis=1)
コード例 #6
0
 def fit(self, X, y):
     for col in X.columns:
         gini = normalized_gini(y, X[col])
         if abs(gini) > self.min_gini:
             self.current_features.append(col)
     self.selected_features += self.current_features
コード例 #7
0
 def fit(self, X, y):
     for col in X.columns:
         gini = normalized_gini(y, X[col])
         if abs(gini) > self.min_gini:
             self.current_features.append(col)
     self.selected_features += self.current_features