def deserialize_random_forest_regressor(model_dict): model = RandomForestRegressor(**model_dict['params']) estimators = [ deserialize_decision_tree_regressor(decision_tree) for decision_tree in model_dict['estimators_'] ] model.estimators_ = np.array(estimators) model.n_features_ = model_dict['n_features_'] model.n_outputs_ = model_dict['n_outputs_'] model.max_depth = model_dict['max_depth'] model.min_samples_split = model_dict['min_samples_split'] model.min_samples_leaf = model_dict['min_samples_leaf'] model.min_weight_fraction_leaf = model_dict['min_weight_fraction_leaf'] model.max_features = model_dict['max_features'] model.max_leaf_nodes = model_dict['max_leaf_nodes'] model.min_impurity_decrease = model_dict['min_impurity_decrease'] model.min_impurity_split = model_dict['min_impurity_split'] if 'oob_score_' in model_dict: model.oob_score_ = model_dict['oob_score_'] if 'oob_prediction_' in model_dict: model.oob_prediction_ = np.array(model_dict['oob_prediction_']) return model
def _fit_random_forest(X_train, y_train, obj_func_name = "mse", sample_weight = None, model_params = None, log_target_reg = False): if model_params is None: model_params = {} # checking if obj_func is correct _check_obj_func_name(obj_func_name, "random_forest") if "criterion" in model_params.keys(): assert obj_func_name == model_params["criterion"], "use the obj_func_name argument instead of setting criterion in model_params" model_params["criterion"] = obj_func_name model = RandomForestRegressor(**model_params) if log_target_reg: model = _log_target_regressor(model) model.fit(X = X_train, y = y_train, sample_weight = sample_weight) if log_target_reg: if hasattr(model.regressor_, "oob_prediction_"): oob_pred = model.regressor_.oob_prediction_ # must convert back to original units oob_pred = model.inverse_func(oob_pred) else: oob_pred = None model.oob_prediction_ = oob_pred model.feature_importances_ = model.regressor_.feature_importances_ return model