Beispiel #1
0
 def __init__(self,
              loss='ls',
              learning_rate=0.1,
              n_estimators=100,
              subsample=1.0,
              criterion='friedman_mse',
              min_samples_split=2,
              min_samples_leaf=1,
              min_weight_fraction_leaf=0.,
              max_depth=3,
              min_impurity_decrease=0.,
              min_impurity_split=None,
              init=None,
              random_state=None,
              max_features=None,
              alpha=0.9,
              verbose=0,
              max_leaf_nodes=None,
              warm_start=False,
              presort='auto'):
     n_estimators = int(n_estimators)
     _GradientBoostingRegressor.__init__(
         self, loss, learning_rate, n_estimators, subsample, criterion,
         min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
         max_depth, min_impurity_decrease, min_impurity_split, init,
         random_state, max_features, alpha, verbose, max_leaf_nodes,
         warm_start, presort)
     BaseWrapperReg.__init__(self)
Beispiel #2
0
    def __init__(self, pmml):
        PMMLBaseRegressor.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in ['sum']:
            raise Exception('PMML model ensemble should use sum.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
            and segment.find('TreeModel') is not None
        ]

        n_estimators = len(valid_segments)
        self.n_outputs_ = 1
        GradientBoostingRegressor.__init__(self, n_estimators=n_estimators)

        clf = DecisionTreeRegressor(random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self._check_params()
        self._init_state()

        mean = mining_model.find('Targets').find('Target').get(
            'rescaleConstant', 0)
        self.init_.constant_ = np.array([mean])
        self.init_.n_outputs_ = 1

        for x, y in np.ndindex(self.estimators_.shape):
            factor = float(
                mining_model.find('Targets').find('Target').get(
                    'rescaleFactor', 1))
            self.estimators_[x, y] = get_tree(self,
                                              valid_segments[x],
                                              rescale_factor=factor)

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for x, y in np.ndindex(self.estimators_.shape):
            clf = self.estimators_[x, y]
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [
            x != -1 for x in self.estimators_[0, 0].n_categories
        ]