Beispiel #1
0
    def fit(self, X, y=None):

        X_norm = MaxAbsScaler().fit_transform(X)
        if issparse(X):
            if type(X) != csc_matrix:
                X = X.tocsc()
            X_norm = X_norm.tocsc()

        print('Running Cor')
        cor_support = cor_selector(X, y, self.feature_names, self.num_features)
        print('Running Chi2')
        chi_support = chi2_selector(X_norm, y, self.num_features)
        print('Running RFE')
        rfe_support = rfe_selector(X_norm, y, self.num_features,
                                   self.random_state)
        print('Running LR')
        embeded_lr_support = embeded_lr_selector(X_norm, y, self.num_features,
                                                 self.random_state)
        print('Running RF')
        embeded_rf_support = embeded_rf_selector(
            X,
            y,
            self.num_features,
            n_jobs=self.n_jobs,
            random_state=self.random_state)
        print('Running XG')
        embeded_xgb_support = embeded_xgb_selector(
            X,
            y,
            self.num_features,
            n_jobs=self.n_jobs,
            random_state=self.random_state)

        feature_selection_df = pd.DataFrame({
            'feature': self.feature_names,
            'pearson': cor_support,
            'chi_2': chi_support,
            'rfe': rfe_support,
            'logistics': embeded_lr_support,
            'random_forest': embeded_rf_support,
            'xgboost': embeded_xgb_support
        })

        feature_selection_df['total'] = np.sum(feature_selection_df, axis=1)
        self.feature_selection_df_ = feature_selection_df

        keep_features = feature_selection_df.query('total >= {}'.format(
            self.min_selections))['feature'].tolist()

        # Keep the features that we always want (e.g. domain expertise)
        if self.always_keep is not None:
            keep_features.extend(self.always_keep)

        self.keep_features_ = [
            f for f in self.feature_names if f in keep_features
        ]

        return self