def __init__(self, loss='ls', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_depth=3, min_impurity_decrease=0., min_impurity_split=None, init=None, random_state=None, max_features=None, alpha=0.9, verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto'): n_estimators = int(n_estimators) _GradientBoostingRegressor.__init__( self, loss, learning_rate, n_estimators, subsample, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_depth, min_impurity_decrease, min_impurity_split, init, random_state, max_features, alpha, verbose, max_leaf_nodes, warm_start, presort) BaseWrapperReg.__init__(self)
def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in ['sum']: raise Exception('PMML model ensemble should use sum.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [ segment for segment in segments if segment.find('True') is not None and segment.find('TreeModel') is not None ] n_estimators = len(valid_segments) self.n_outputs_ = 1 GradientBoostingRegressor.__init__(self, n_estimators=n_estimators) clf = DecisionTreeRegressor(random_state=123) try: clf.n_features_in_ = self.n_features_in_ except AttributeError: clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ self.template_estimator = clf self._check_params() self._init_state() mean = mining_model.find('Targets').find('Target').get( 'rescaleConstant', 0) self.init_.constant_ = np.array([mean]) self.init_.n_outputs_ = 1 for x, y in np.ndindex(self.estimators_.shape): factor = float( mining_model.find('Targets').find('Target').get( 'rescaleFactor', 1)) self.estimators_[x, y] = get_tree(self, valid_segments[x], rescale_factor=factor) # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for x, y in np.ndindex(self.estimators_.shape): clf = self.estimators_[x, y] n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories) self.categorical = [ x != -1 for x in self.estimators_[0, 0].n_categories ]