def __init__(self, n_estimators=100, tree_size=4, sample_fract='default', max_rules=30, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alpha=None, random_state=None): self.n_estimators = n_estimators self.tree_size = tree_size self.sample_fract = sample_fract self.max_rules = max_rules self.memory_par = memory_par self.tree_generator = tree_generator self.lin_trim_quantile = lin_trim_quantile self.lin_standardise = lin_standardise self.exp_rand_tree_size = exp_rand_tree_size self.include_linear = include_linear self.alpha = alpha self.random_state = random_state self.winsorizer = Winsorizer(trim_quantile=self.lin_trim_quantile) self.friedscale = FriedScale(self.winsorizer) self.stddev = None self.mean = None self._init_prediction_task( ) # decides between regressor and classifier
def __init__(self, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alphas=None, cv=3, random_state=None): self.tree_generator = tree_generator self.lin_trim_quantile = lin_trim_quantile self.lin_standardise = lin_standardise self.winsorizer = Winsorizer(trim_quantile=lin_trim_quantile) self.friedscale = FriedScale(self.winsorizer) self.stddev = None self.mean = None self.exp_rand_tree_size = exp_rand_tree_size self.max_rules = max_rules self.sample_fract = sample_fract self.memory_par = memory_par self.tree_size = tree_size self.random_state = random_state self.include_linear = include_linear self.cv = cv self.alphas = alphas
def __init__(self, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, rfmode='regress', lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, model_type='rl', Cs=None, cv=3, random_state=None): self.tree_generator = tree_generator self.rfmode = rfmode self.lin_trim_quantile = lin_trim_quantile self.lin_standardise = lin_standardise self.winsorizer = Winsorizer(trim_quantile=lin_trim_quantile) self.friedscale = FriedScale(self.winsorizer) self.stddev = None self.mean = None self.exp_rand_tree_size = exp_rand_tree_size self.max_rules = max_rules self.sample_fract = sample_fract self.memory_par = memory_par self.tree_size = tree_size self.random_state = random_state self.model_type = model_type self.cv = cv self.Cs = Cs
def test_fried_scale(): x_scale_test = np.zeros([100, 2]) x_scale_test[0:5, 0] = -100 x_scale_test[5:10, 0] = 100 x_scale_test[10:55, 0] = 1 x_scale_test[5:55, 1] = 1 # winsorised version of first column at trim=0.1: note, will not be scaled because it is already an indicator function, as per FP004 fs = FriedScale() # trim_quantile=0.1) fs.train(x_scale_test) '''
class RuleFit(BaseEstimator, TransformerMixin, RuleSet): """Rulefit class. Rather than using this class directly, should use RuleFitRegressor or RuleFitClassifier Parameters ---------- tree_size: Number of terminal nodes in generated trees. If exp_rand_tree_size=True, this will be the mean number of terminal nodes. sample_fract: fraction of randomly chosen training observations used to produce each tree. FP 2004 (Sec. 2) max_rules: total number of terms included in the final model (both linear and rules) approximate total number of candidate rules generated for fitting also is based on this Note that actual number of candidate rules will usually be lower than this due to duplicates. memory_par: scale multiplier (shrinkage factor) applied to each new tree when sequentially induced. FP 2004 (Sec. 2) lin_standardise: If True, the linear terms will be standardised as per Friedman Sec 3.2 by multiplying the winsorised variable by 0.4/stdev. lin_trim_quantile: If lin_standardise is True, this quantile will be used to trim linear terms before standardisation. exp_rand_tree_size: If True, each boosted tree will have a different maximum number of terminal nodes based on an exponential distribution about tree_size. (Friedman Sec 3.3) include_linear: Include linear terms as opposed to only rules random_state: Integer to initialise random objects and provide repeatability. tree_generator: Optional: this object will be used as provided to generate the rules. This will override almost all the other properties above. Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None) Attributes ---------- rule_ensemble: RuleEnsemble The rule ensemble feature_names: list of strings, optional (default=None) The names of the features (columns) """ def __init__(self, n_estimators=100, tree_size=4, sample_fract='default', max_rules=30, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alpha=None, random_state=None): self.n_estimators = n_estimators self.tree_size = tree_size self.sample_fract = sample_fract self.max_rules = max_rules self.memory_par = memory_par self.tree_generator = tree_generator self.lin_trim_quantile = lin_trim_quantile self.lin_standardise = lin_standardise self.exp_rand_tree_size = exp_rand_tree_size self.include_linear = include_linear self.alpha = alpha self.random_state = random_state self.winsorizer = Winsorizer(trim_quantile=self.lin_trim_quantile) self.friedscale = FriedScale(self.winsorizer) self.stddev = None self.mean = None self._init_prediction_task( ) # decides between regressor and classifier def _init_prediction_task(self): """ RuleFitRegressor and RuleFitClassifier override this method to alter the prediction task. When using this class directly, it is equivalent to RuleFitRegressor """ self.prediction_task = 'regression' def fit(self, X, y=None, feature_names=None): """Fit and estimate linear combination of rule ensemble """ X, y = check_X_y(X, y) if self.prediction_task == 'classification': self.classes_ = unique_labels(y) self.n_features_in_ = X.shape[1] self.n_features_ = X.shape[1] self.feature_dict_ = get_feature_dict(X.shape[1], feature_names) self.feature_placeholders = np.array(list(self.feature_dict_.keys())) self.feature_names = np.array(list(self.feature_dict_.values())) extracted_rules = self._extract_rules(X, y) self.rules_without_feature_names_, self.coef, self.intercept = self._score_rules( X, y, extracted_rules) self.rules_ = [ replace_feature_name(rule, self.feature_dict_) for rule in self.rules_without_feature_names_ ] self.complexity_ = self._get_complexity() return self def predict_continuous_output(self, X): """Predict outcome of linear model for X """ if type(X) == pd.DataFrame: X = X.values.astype(np.float32) y_pred = np.zeros(X.shape[0]) y_pred += self.eval_weighted_rule_sum(X) if self.include_linear: if self.lin_standardise: X = self.friedscale.scale(X) y_pred += X @ self.coef[:X.shape[1]] return y_pred + self.intercept def predict(self, X): '''Predict. For regression returns continuous output. For classification, returns discrete output. ''' check_is_fitted(self) X = check_array(X) if self.prediction_task == 'regression': return self.predict_continuous_output(X) else: return np.argmax(self.predict_proba(X), axis=1) def predict_proba(self, X): check_is_fitted(self) X = check_array(X) continuous_output = self.predict_continuous_output(X) logits = np.vstack( (1 - continuous_output, continuous_output)).transpose() return softmax(logits, axis=1) def transform(self, X=None, rules=None): """Transform dataset. Parameters ---------- X : array-like matrix, shape=(n_samples, n_features) Input data to be transformed. Use ``dtype=np.float32`` for maximum efficiency. Returns ------- X_transformed: matrix, shape=(n_samples, n_out) Transformed data set """ df = pd.DataFrame(X, columns=self.feature_placeholders) X_transformed = np.zeros((X.shape[0], len(rules))) for i, r in enumerate(rules): features_r_uses = [term.split(' ')[0] for term in r.split(' and ')] X_transformed[df[features_r_uses].query(r).index.values, i] = 1 return X_transformed def get_rules(self, exclude_zero_coef=False, subregion=None): """Return the estimated rules Parameters ---------- exclude_zero_coef: If True (default), returns only the rules with an estimated coefficient not equalt to zero. subregion: If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over subregion of inputs (FP 2004 eq. 30/31/32). Returns ------- rules: pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds the coefficients and 'support' the support of the rule in the training data set (X) """ n_features = len(self.coef) - len(self.rules_) rule_ensemble = list(self.rules_without_feature_names_) output_rules = [] ## Add coefficients for linear effects for i in range(0, n_features): if self.lin_standardise: coef = self.coef[i] * self.friedscale.scale_multipliers[i] else: coef = self.coef[i] if subregion is None: importance = abs(coef) * self.stddev[i] else: subregion = np.array(subregion) importance = sum( abs(coef) * abs([x[i] for x in self.winsorizer.trim(subregion)] - self.mean[i])) / len(subregion) output_rules += [(self.feature_names[i], 'linear', coef, 1, importance)] ## Add rules for i in range(0, len(self.rules_)): rule = rule_ensemble[i] coef = self.coef[i + n_features] if subregion is None: importance = abs(coef) * (rule.support * (1 - rule.support))**(1 / 2) else: rkx = self.transform(subregion, [rule])[:, -1] importance = sum( abs(coef) * abs(rkx - rule.support)) / len(subregion) output_rules += [(self.rules_[i].rule, 'rule', coef, rule.support, importance)] rules = pd.DataFrame( output_rules, columns=["rule", "type", "coef", "support", "importance"]) if exclude_zero_coef: rules = rules.ix[rules.coef != 0] return rules def visualize(self): rules = self.get_rules() rules = rules[rules.coef != 0].sort_values("support", ascending=False) pd.set_option('display.max_colwidth', -1) return rules[['rule', 'coef']].round(3) def _extract_rules(self, X, y) -> List[Rule]: return extract_rulefit(X, y, feature_names=self.feature_placeholders, n_estimators=self.n_estimators, tree_size=self.tree_size, memory_par=self.memory_par, tree_generator=self.tree_generator, exp_rand_tree_size=self.exp_rand_tree_size, random_state=self.random_state) def _score_rules(self, X, y, rules) -> Tuple[List[Rule], List[float], float]: X_concat = np.zeros([X.shape[0], 0]) # standardise linear variables if requested (for regression model only) if self.include_linear: # standard deviation and mean of winsorized features self.winsorizer.train(X) winsorized_X = self.winsorizer.trim(X) self.stddev = np.std(winsorized_X, axis=0) self.mean = np.mean(winsorized_X, axis=0) if self.lin_standardise: self.friedscale.train(X) X_regn = self.friedscale.scale(X) else: X_regn = X.copy() X_concat = np.concatenate((X_concat, X_regn), axis=1) X_rules = self.transform(X, rules) if X_rules.shape[0] > 0: X_concat = np.concatenate((X_concat, X_rules), axis=1) # no rules fit and self.include_linear == False if X_concat.shape[1] == 0: return [], [], 0 return score_linear(X_concat, y, rules, prediction_task=self.prediction_task, max_rules=self.max_rules, alpha=self.alpha, random_state=self.random_state)
class RuleFitRegressor(BaseEstimator, TransformerMixin, RuleSet): """Rulefit class Parameters ---------- tree_size: Number of terminal nodes in generated trees. If exp_rand_tree_size=True, this will be the mean number of terminal nodes. sample_fract: fraction of randomly chosen training observations used to produce each tree. FP 2004 (Sec. 2) max_rules: approximate total number of rules generated for fitting. Note that actual number of rules will usually be lower than this due to duplicates. memory_par: scale multiplier (shrinkage factor) applied to each new tree when sequentially induced. FP 2004 (Sec. 2) lin_standardise: If True, the linear terms will be standardised as per Friedman Sec 3.2 by multiplying the winsorised variable by 0.4/stdev. lin_trim_quantile: If lin_standardise is True, this quantile will be used to trim linear terms before standardisation. exp_rand_tree_size: If True, each boosted tree will have a different maximum number of terminal nodes based on an exponential distribution about tree_size. (Friedman Sec 3.3) include_linear: Include linear terms as opposed to only rules random_state: Integer to initialise random objects and provide repeatability. tree_generator: Optional: this object will be used as provided to generate the rules. This will override almost all the other properties above. Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None) Attributes ---------- rule_ensemble: RuleEnsemble The rule ensemble feature_names: list of strings, optional (default=None) The names of the features (columns) """ def __init__(self, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alphas=None, cv=3, random_state=None): self.tree_generator = tree_generator self.lin_trim_quantile = lin_trim_quantile self.lin_standardise = lin_standardise self.winsorizer = Winsorizer(trim_quantile=lin_trim_quantile) self.friedscale = FriedScale(self.winsorizer) self.stddev = None self.mean = None self.exp_rand_tree_size = exp_rand_tree_size self.max_rules = max_rules self.sample_fract = sample_fract self.memory_par = memory_par self.tree_size = tree_size self.random_state = random_state self.include_linear = include_linear self.cv = cv self.alphas = alphas def fit(self, X, y=None, feature_names=None): """Fit and estimate linear combination of rule ensemble """ if type(X) == pd.DataFrame: X = X.values if type(y) in [pd.DataFrame, pd.Series]: y = y.values self.n_obs = X.shape[0] self.n_features_ = X.shape[1] self.feature_names_, self.feature_dict_ = enum_features(X, feature_names) self.tree_generator = self._get_tree_ensemble(classify=False) self._fit_tree_ensemble(X, y) extracted_rules = self._extract_rules() self.rules_without_feature_names_, self.coef, self.intercept = self._score_rules(X, y, extracted_rules) return self def predict(self, X): """Predict outcome for X """ if type(X) == pd.DataFrame: X = X.values.astype(np.float32) y_pred = np.zeros(X.shape[0]) y_pred += self.eval_weighted_rule_sum(X) if self.include_linear: if self.lin_standardise: X = self.friedscale.scale(X) y_pred += X @ self.coef[:X.shape[1]] return y_pred + self.intercept def predict_proba(self, X): y = self.predict(X) preds = np.vstack((1 - y, y)).transpose() return softmax(preds, axis=1) def transform(self, X=None, rules=None): """Transform dataset. Parameters ---------- X : array-like matrix, shape=(n_samples, n_features) Input data to be transformed. Use ``dtype=np.float32`` for maximum efficiency. Returns ------- X_transformed: matrix, shape=(n_samples, n_out) Transformed data set """ df = pd.DataFrame(X, columns=self.feature_names_) X_transformed = np.zeros([X.shape[0], 0]) for r in rules: curr_rule_feature = np.zeros(X.shape[0]) curr_rule_feature[list(df.query(r).index)] = 1 curr_rule_feature = np.expand_dims(curr_rule_feature, axis=1) X_transformed = np.concatenate((X_transformed, curr_rule_feature), axis=1) return X_transformed def get_rules(self, exclude_zero_coef=False, subregion=None): """Return the estimated rules Parameters ---------- exclude_zero_coef: If True (default), returns only the rules with an estimated coefficient not equalt to zero. subregion: If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over subregion of inputs (FP 2004 eq. 30/31/32). Returns ------- rules: pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds the coefficients and 'support' the support of the rule in the training data set (X) """ n_features = len(self.coef) - len(self.rules_without_feature_names_) rule_ensemble = list(self.rules_without_feature_names_) output_rules = [] ## Add coefficients for linear effects for i in range(0, n_features): if self.lin_standardise: coef = self.coef[i] * self.friedscale.scale_multipliers[i] else: coef = self.coef[i] if subregion is None: importance = abs(coef) * self.stddev[i] else: subregion = np.array(subregion) importance = sum(abs(coef) * abs([x[i] for x in self.winsorizer.trim(subregion)] - self.mean[i])) / len( subregion) output_rules += [(self.feature_names_[i], 'linear', coef, 1, importance)] ## Add rules for i in range(0, len(self.rules_without_feature_names_)): rule = rule_ensemble[i] coef = self.coef[i + n_features] if subregion is None: importance = abs(coef) * (rule.support * (1 - rule.support)) ** (1 / 2) else: rkx = self.transform(subregion, [rule])[:, -1] importance = sum(abs(coef) * abs(rkx - rule.support)) / len(subregion) output_rules += [(rule.__str__(), 'rule', coef, rule.support, importance)] rules = pd.DataFrame(output_rules, columns=["rule", "type", "coef", "support", "importance"]) if exclude_zero_coef: rules = rules.ix[rules.coef != 0] return rules def visualize(self): rules = self.get_rules() rules = rules[rules.coef != 0].sort_values("support", ascending=False) pd.set_option('display.max_colwidth', -1) return rules[['rule', 'coef']].round(3) def _get_tree_ensemble(self, classify=False): if self.tree_generator is None: n_estimators_default = int(np.ceil(self.max_rules / self.tree_size)) self.sample_fract_ = min(0.5, (100 + 6 * np.sqrt(self.n_obs)) / self.n_obs) tree_generator = GradientBoostingRegressor(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par, subsample=self.sample_fract_, random_state=self.random_state, max_depth=100) if type(tree_generator) not in [GradientBoostingRegressor, RandomForestRegressor]: raise ValueError("RuleFit only works with RandomForest and BoostingRegressor") return tree_generator def _fit_tree_ensemble(self, X, y): ## fit tree generator if not self.exp_rand_tree_size: # simply fit with constant tree size self.tree_generator.fit(X, y) else: # randomise tree size as per Friedman 2005 Sec 3.3 np.random.seed(self.random_state) tree_sizes = np.random.exponential(scale=self.tree_size - 2, size=int(np.ceil(self.max_rules * 2 / self.tree_size))) tree_sizes = np.asarray([2 + np.floor(tree_sizes[i_]) for i_ in np.arange(len(tree_sizes))], dtype=int) i = int(len(tree_sizes) / 4) while np.sum(tree_sizes[0:i]) < self.max_rules: i = i + 1 tree_sizes = tree_sizes[0:i] self.tree_generator.set_params(warm_start=True) curr_est_ = 0 for i_size in np.arange(len(tree_sizes)): size = tree_sizes[i_size] self.tree_generator.set_params(n_estimators=curr_est_ + 1) self.tree_generator.set_params(max_leaf_nodes=size) random_state_add = self.random_state if self.random_state else 0 self.tree_generator.set_params( random_state=i_size + random_state_add) # warm_state=True seems to reset random_state, such that the trees are highly correlated, unless we manually change the random_sate here. self.tree_generator.fit(np.copy(X, order='C'), np.copy(y, order='C')) curr_est_ = curr_est_ + 1 self.tree_generator.set_params(warm_start=False) if isinstance(self.tree_generator, RandomForestRegressor): self.estimators_ = [[x] for x in self.tree_generator.estimators_] else: self.estimators_ = self.tree_generator.estimators_ def _extract_rules(self): seen_antecedents = set() extracted_rules = [] for estimator in self.estimators_: for rule_value_pair in tree_to_rules(estimator[0], np.array(self.feature_names_), prediction_values=True): if rule_value_pair[0] not in seen_antecedents: extracted_rules.append(rule_value_pair) seen_antecedents.add(rule_value_pair[0]) extracted_rules = sorted(extracted_rules, key=lambda x: x[1]) extracted_rules = list(map(lambda x: x[0], extracted_rules)) return extracted_rules def _score_rules(self, X, y, rules): X_concat = np.zeros([X.shape[0], 0]) # standardise linear variables if requested (for regression model only) if self.include_linear: # standard deviation and mean of winsorized features self.winsorizer.train(X) winsorized_X = self.winsorizer.trim(X) self.stddev = np.std(winsorized_X, axis=0) self.mean = np.mean(winsorized_X, axis=0) if self.lin_standardise: self.friedscale.train(X) X_regn = self.friedscale.scale(X) else: X_regn = X.copy() X_concat = np.concatenate((X_concat, X_regn), axis=1) X_rules = self.transform(X, rules) if X_rules.shape[0] > 0: X_concat = np.concatenate((X_concat, X_rules), axis=1) return score_lasso(X_concat, y, rules, alphas=self.alphas, cv=self.cv, max_rules=self.max_rules, random_state=self.random_state)
class RuleFitRegressor(BaseEstimator, TransformerMixin): """Rulefit class Parameters ---------- tree_size: Number of terminal nodes in generated trees. If exp_rand_tree_size=True, this will be the mean number of terminal nodes. sample_fract: fraction of randomly chosen training observations used to produce each tree. FP 2004 (Sec. 2) max_rules: approximate total number of rules generated for fitting. Note that actual number of rules will usually be lower than this due to duplicates. memory_par: scale multiplier (shrinkage factor) applied to each new tree when sequentially induced. FP 2004 (Sec. 2) rfmode: 'regress' for regression or 'classify' for binary classification. lin_standardise: If True, the linear terms will be standardised as per Friedman Sec 3.2 by multiplying the winsorised variable by 0.4/stdev. lin_trim_quantile: If lin_standardise is True, this quantile will be used to trim linear terms before standardisation. exp_rand_tree_size: If True, each boosted tree will have a different maximum number of terminal nodes based on an exponential distribution about tree_size. (Friedman Sec 3.3) model_type: 'r': rules only; 'l': linear terms only; 'rl': both rules and linear terms random_state: Integer to initialise random objects and provide repeatability. tree_generator: Optional: this object will be used as provided to generate the rules. This will override almost all the other properties above. Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None) Attributes ---------- rule_ensemble: RuleEnsemble The rule ensemble feature_names: list of strings, optional (default=None) The names of the features (columns) """ def __init__(self, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, rfmode='regress', lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, model_type='rl', Cs=None, cv=3, random_state=None): self.tree_generator = tree_generator self.rfmode = rfmode self.lin_trim_quantile = lin_trim_quantile self.lin_standardise = lin_standardise self.winsorizer = Winsorizer(trim_quantile=lin_trim_quantile) self.friedscale = FriedScale(self.winsorizer) self.stddev = None self.mean = None self.exp_rand_tree_size = exp_rand_tree_size self.max_rules = max_rules self.sample_fract = sample_fract self.memory_par = memory_par self.tree_size = tree_size self.random_state = random_state self.model_type = model_type self.cv = cv self.Cs = Cs def fit(self, X, y=None, feature_names=None, verbose=False): """Fit and estimate linear combination of rule ensemble """ if type(X) == pd.DataFrame: X = X.values if type(y) in [pd.DataFrame, pd.Series]: y = y.values ## Enumerate features if feature names not provided N = X.shape[0] if feature_names is None: self.feature_names = [ 'feature_' + str(x) for x in range(0, X.shape[1]) ] else: self.feature_names = feature_names if 'r' in self.model_type: ## initialise tree generator if self.tree_generator is None: n_estimators_default = int( np.ceil(self.max_rules / self.tree_size)) self.sample_fract_ = min(0.5, (100 + 6 * np.sqrt(N)) / N) if self.rfmode == 'regress': self.tree_generator = GradientBoostingRegressor( n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par, subsample=self.sample_fract_, random_state=self.random_state, max_depth=100) else: self.tree_generator = GradientBoostingClassifier( n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par, subsample=self.sample_fract_, random_state=self.random_state, max_depth=100) if self.rfmode == 'regress': if type(self.tree_generator) not in [ GradientBoostingRegressor, RandomForestRegressor ]: raise ValueError( "RuleFit only works with RandomForest and BoostingRegressor" ) else: if type(self.tree_generator) not in [ GradientBoostingClassifier, RandomForestClassifier ]: raise ValueError( "RuleFit only works with RandomForest and BoostingClassifier" ) ## fit tree generator if not self.exp_rand_tree_size: # simply fit with constant tree size self.tree_generator.fit(X, y) else: # randomise tree size as per Friedman 2005 Sec 3.3 np.random.seed(self.random_state) tree_sizes = np.random.exponential( scale=self.tree_size - 2, size=int(np.ceil(self.max_rules * 2 / self.tree_size))) tree_sizes = np.asarray([ 2 + np.floor(tree_sizes[i_]) for i_ in np.arange(len(tree_sizes)) ], dtype=int) i = int(len(tree_sizes) / 4) while np.sum(tree_sizes[0:i]) < self.max_rules: i = i + 1 tree_sizes = tree_sizes[0:i] self.tree_generator.set_params(warm_start=True) curr_est_ = 0 for i_size in np.arange(len(tree_sizes)): size = tree_sizes[i_size] self.tree_generator.set_params(n_estimators=curr_est_ + 1) self.tree_generator.set_params(max_leaf_nodes=size) random_state_add = self.random_state if self.random_state else 0 self.tree_generator.set_params( random_state=i_size + random_state_add ) # warm_state=True seems to reset random_state, such that the trees are highly correlated, unless we manually change the random_sate here. self.tree_generator.get_params()['n_estimators'] self.tree_generator.fit(np.copy(X, order='C'), np.copy(y, order='C')) curr_est_ = curr_est_ + 1 self.tree_generator.set_params(warm_start=False) tree_list = self.tree_generator.estimators_ if isinstance(self.tree_generator, RandomForestRegressor) or isinstance( self.tree_generator, RandomForestClassifier): tree_list = [[x] for x in self.tree_generator.estimators_] ## extract rules self.rule_ensemble = RuleEnsemble(tree_list=tree_list, feature_names=self.feature_names) ## concatenate original features and rules X_rules = self.rule_ensemble.transform(X) ## standardise linear variables if requested (for regression model only) if 'l' in self.model_type: ## standard deviation and mean of winsorized features self.winsorizer.train(X) winsorized_X = self.winsorizer.trim(X) self.stddev = np.std(winsorized_X, axis=0) self.mean = np.mean(winsorized_X, axis=0) if self.lin_standardise: self.friedscale.train(X) X_regn = self.friedscale.scale(X) else: X_regn = X.copy() ## Compile Training data X_concat = np.zeros([X.shape[0], 0]) if 'l' in self.model_type: X_concat = np.concatenate((X_concat, X_regn), axis=1) if 'r' in self.model_type: if X_rules.shape[0] > 0: X_concat = np.concatenate((X_concat, X_rules), axis=1) ## fit Lasso if self.rfmode == 'regress': if self.Cs is None: # use defaultshasattr(self.Cs, "__len__"): n_alphas = 100 alphas = None elif hasattr(self.Cs, "__len__"): n_alphas = None alphas = 1. / self.Cs else: n_alphas = self.Cs alphas = None self.lscv = LassoCV(n_alphas=n_alphas, alphas=alphas, cv=self.cv, random_state=self.random_state) self.lscv.fit(X_concat, y) self.coef_ = self.lscv.coef_ self.intercept_ = self.lscv.intercept_ else: Cs = 10 if self.Cs is None else self.Cs self.lscv = LogisticRegressionCV(Cs=Cs, cv=self.cv, penalty='l1', random_state=self.random_state, solver='liblinear') self.lscv.fit(X_concat, y) self.coef_ = self.lscv.coef_[0] self.intercept_ = self.lscv.intercept_[0] return self def predict(self, X): """Predict outcome for X """ if type(X) == pd.DataFrame: X = X.values.astype(np.float32) X_concat = np.zeros([X.shape[0], 0]) if 'l' in self.model_type: if self.lin_standardise: X_concat = np.concatenate((X_concat, self.friedscale.scale(X)), axis=1) else: X_concat = np.concatenate((X_concat, X), axis=1) if 'r' in self.model_type: rule_coefs = self.coef_[-len(self.rule_ensemble.rules):] if len(rule_coefs) > 0: X_rules = self.rule_ensemble.transform(X, coefs=rule_coefs) if X_rules.shape[0] > 0: X_concat = np.concatenate((X_concat, X_rules), axis=1) return self.lscv.predict(X_concat) def predict_proba(self, X): y = self.predict(X) return np.vstack((1 - y, y)).transpose() def transform(self, X=None, y=None): """Transform dataset. Parameters ---------- X : array-like matrix, shape=(n_samples, n_features) Input data to be transformed. Use ``dtype=np.float32`` for maximum efficiency. Returns ------- X_transformed: matrix, shape=(n_samples, n_out) Transformed data set """ return self.rule_ensemble.transform(X) def get_rules(self, exclude_zero_coef=False, subregion=None): """Return the estimated rules Parameters ---------- exclude_zero_coef: If True (default), returns only the rules with an estimated coefficient not equalt to zero. subregion: If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over subregion of inputs (FP 2004 eq. 30/31/32). Returns ------- rules: pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds the coefficients and 'support' the support of the rule in the training data set (X) """ n_features = len(self.coef_) - len(self.rule_ensemble.rules) rule_ensemble = list(self.rule_ensemble.rules) output_rules = [] ## Add coefficients for linear effects for i in range(0, n_features): if self.lin_standardise: coef = self.coef_[i] * self.friedscale.scale_multipliers[i] else: coef = self.coef_[i] if subregion is None: importance = abs(coef) * self.stddev[i] else: subregion = np.array(subregion) importance = sum( abs(coef) * abs([x[i] for x in self.winsorizer.trim(subregion)] - self.mean[i])) / len(subregion) output_rules += [(self.feature_names[i], 'linear', coef, 1, importance)] ## Add rules for i in range(0, len(self.rule_ensemble.rules)): rule = rule_ensemble[i] coef = self.coef_[i + n_features] if subregion is None: importance = abs(coef) * (rule.support * (1 - rule.support))**(1 / 2) else: rkx = rule.transform(subregion) importance = sum( abs(coef) * abs(rkx - rule.support)) / len(subregion) output_rules += [(rule.__str__(), 'rule', coef, rule.support, importance)] rules = pd.DataFrame( output_rules, columns=["rule", "type", "coef", "support", "importance"]) if exclude_zero_coef: rules = rules.ix[rules.coef != 0] return rules def visualize(self): rules = self.get_rules() rules = rules[rules.coef != 0].sort_values("support", ascending=False) pd.set_option('display.max_colwidth', -1) return rules[['rule', 'coef']].round(3)