def get_importance(self, X=None, tau=None, model_tau_feature=None, features=None, method='auto', normalize=True): """ Builds a model (using X to predict estimated/actual tau), and then calculates feature importances based on a specified method. Currently supported methods include: - auto (calculates importance based on estimator's default implementation of feature importance; estimator must be tree-based) Note: if none provided, it uses lightgbm's LGBMRegressor as estimator, and "gain" as importance type - permutation (calculates importance based on mean decrease in accuracy; estimator can be any form) Hint: for permutation, downsample data for better performance especially if X.shape[1] is large Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (np.array): list/array of feature names. If None, an enumerated list will be used. method (str): auto, permutation normalize (bool): normalize by sum of importances if method=gini (defaults to True) """ explainer = Explainer(method=method, control_name=self.control_name, X=X, tau=tau, model_tau=None, r_learners=self.models_tau, features=features, classes=self._classes, normalize=normalize) return explainer.get_importance()
def get_importance(self, X=None, tau=None, model_tau_feature=None, features=None, method='auto', normalize=True, test_size=0.3, random_state=None): """ Builds a model (using X to predict estimated/actual tau), and then calculates feature importances based on a specified method. Currently supported methods are: - auto (calculates importance based on estimator's default implementation of feature importance; estimator must be tree-based) Note: if none provided, it uses lightgbm's LGBMRegressor as estimator, and "gain" as importance type - permutation (calculates importance based on mean decrease in accuracy when a feature column is permuted; estimator can be any form) Hint: for permutation, downsample data for better performance especially if X.shape[1] is large Args: X (np.matrix or np.array or pd.Dataframe): a feature matrix tau (np.array): a treatment effect vector (estimated/actual) model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object features (np.array): list/array of feature names. If None, an enumerated list will be used method (str): auto, permutation normalize (bool): normalize by sum of importances if method=auto (defaults to True) test_size (float/int): if float, represents the proportion of the dataset to include in the test split. If int, represents the absolute number of test samples (used for estimating permutation importance) random_state (int/RandomState instance/None): random state used in permutation importance estimation """ explainer = Explainer(method=method, control_name=self.control_name, X=X, tau=tau, model_tau=model_tau_feature, features=features, classes=self._classes, normalize=normalize, test_size=test_size, random_state=random_state) return explainer.get_importance()