def predict_proba(self, X, *args, **kwargs): check_is_fitted(self, 'fitted') # use full_trained model or not proba_dfs = [] if self.full_train: for name, model in self.full_fitted_models: df_proba = pd.DataFrame( {'proba_{}'.format(name): model.predict_proba(X)[:, 1]} ) proba_dfs.append(df_proba) else: for name, models in self.fitted_models: avg_proba = np.average( np.hstack( [ model.predict_proba(X)[:, 1].reshape((len(X), 1)) for model in models ] ), axis=1 ) df_proba = pd.DataFrame({'proba_{}'.format(name): avg_proba}) proba_dfs.append(df_proba) # horizontal concat P1 from all base models df_base_pred = pd.concat(proba_dfs, axis=1) if not self.proba: df_base_pred = df_base_pred > 0.5 # ensembler make predictions return self.ensembler.predict_proba(df_base_pred, *args, **kwargs)
def transform(self, X): """Merge reserved df with PCA Parameters ---------- X: array-like, shape [n_samples, n_features] Returns ------- X : new array with dimension reduction, shape [n_samples, n_features] """ check_is_fitted(self, 'pca') if self.copy: X = X.copy() X = force_array(X) # implement RFECV transform method X_reserved = X[:, self.rfecv.support_] if sum(self.cols_for_pca) > 1: # converted X_pca = \ self.pca.transform( self.standardizer.fit_transform(X[:, self.cols_for_pca]) ) return np.hstack((X_reserved, X_pca)) else: # speical case: no feature is eliminated return X_reserved
def get_fitted_models_(self): check_is_fitted(self, 'fitted') if self.full_train: fitted_models = self.full_fitted_models else: fitted_models = self.fitted_models return fitted_models
def predict_proba(self, X): check_is_fitted(self, 'model_dict') # NOTE: let's say we respect dataframe if not isinstance(X, (pd.DataFrame, pd.Series)): X = pd.DataFrame(force_array(X)) # predict on dispatcher and get group group_new = self.dispatcher.predict(X) group_new = self.le.transform(group_new) index_dict = \ { group: np.where(group_new == group)[0] for group in self.unique_groups } # predict by group proba_dfs = [] for (group, index) in index_dict.items(): if len(index): df_proba = pd.DataFrame(self.model_dict[group].prodict_proba( X.iloc[index]), index=index) proba_dfs.append(df_proba) # concat all prodictions into one dataframe df_proba = pd.concat(proba_dfs) return force_array(df_proba.sort_index())
def transform(self, X, y=None): check_is_fitted(self, 'features_selected') self._type_check(X, y) X = force_array(X)[:, self.features_selected] if self.is_dataframe: X = pd.DataFrame(X, columns=self.df_cols[self.support_], index=self.df_idx) return X
def get_score_dict_(self): check_is_fitted(self, 'features_selected') # change col name if underlying data is data frame if self.is_dataframe: dict_id_to_col = {i: col for i, col in enumerate(self.df_cols)} self.score_dict = { dict_id_to_col[key]: value for (key, value) in self.score_dict.items() } # append base score self.score_dict = { **self.score_dict, **{ 'base_score': self.base_score } } return self.score_dict
def predict_proba(self, X, *args, **kwargs): check_is_fitted(self, 'fitted') # use full_trained model or not if self.full_train: base_models_list = self.full_fitted_models else: base_models_list = self.fitted_models # get pred from all base models proba_dfs = [] for i, model in enumerate(base_models_list): df_proba = pd.DataFrame( {'proba_{}'.format(i): model.predict_proba(X)[:, 1]} ) proba_dfs.append(df_proba) # horizontal concat P1 from all base models df_base_pred = pd.concat(proba_dfs, axis=1) if not self.proba: df_base_pred = df_base_pred >= 0.5 # ensembler make predictions return self.ensembler.predict_proba(df_base_pred, *args, **kwargs)
def transform(self, X, *args, **kwargs): """ transform method gets called when the ensemble is predicting It calls predict method on every model in self.fitted_models, then it will output average predictions from them """ check_is_fitted(self, 'fitted_models') # output probas from full fitted model if self.full_train: pred = self.full_fitted_model\ .predict_proba(X)[:, 1].reshape((len(X), 1)) else: # get average probas from fitted models pred = np.average(np.hstack([ model.predict_proba(X)[:, 1].reshape((len(X), 1)) for model in self.fitted_models ]), axis=1).reshape((len(X), 1)) # if need to convert to predict if not self.proba: pred = pred > 0.5 return pred
def transform(self, X): """Return final transformed df_X Parameters ---------- X: array-like, shape [n_samples, n_features] Returns ------- X : new array with dimension reduction, shape [n_samples, n_features] """ check_is_fitted(self, 'rfecv') if self.copy: X = X.copy() X = force_array(X) # first step - PCA transformation X_pca = self.pca.transform(self.standardizer.fit_transform(X)) if self.pca.n_components_ > 1: X_pruned = self.rfecv.transform(X_pca) return X_pruned else: return X_pca
def get_fitted_ensembler_(self): check_is_fitted(self, 'fitted') return self.ensembler
def transform(self, X): check_is_fitted(self, 'type_check') return X
def get_model_dict_(self): check_is_fitted(self, 'model_dict') return self.model_dict