Beispiel #1
0
 def __init__(self, loss='deviance', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse',
              min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_depth=3,
              min_impurity_decrease=0., min_impurity_split=None, init=None, random_state=None, max_features=None,
              verbose=0, max_leaf_nodes=None, warm_start=False, presort='auto'):
     n_estimators = int(n_estimators)
     _skGradientBoostingClassifier.__init__(
         self, loss, learning_rate, n_estimators, subsample, criterion, min_samples_split, min_samples_leaf,
         min_weight_fraction_leaf, max_depth, min_impurity_decrease, min_impurity_split, init, random_state,
         max_features, verbose, max_leaf_nodes, warm_start, presort)
     BaseWrapperClf.__init__(self)
Beispiel #2
0
 def __init__(self, n_estimators=20, learning_rate=0.1):
     GradientBoostingClassifier.__init__(
         self,
         n_estimators=n_estimators,
         learning_rate=learning_rate
     )
Beispiel #3
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in ['modelChain']:
            raise Exception('PMML model ensemble should use modelChain.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [None] * self.n_classes_

        indices = range(self.n_classes_)
        # For binary classification, only the predictions of the first class need to be described, the other can be inferred
        # Not all PMML models do this, but we assume the following conditions imply this approach.
        if self.n_classes_ == 2 and len(
                segments) == 2 and segments[-1].find('TreeModel') is None:
            indices = [0]

        for i in indices:
            valid_segments[i] = [
                segment for segment in segments[i].find('MiningModel').find(
                    'Segmentation').findall('Segment')
                if segment.find('True') is not None
                and segment.find('TreeModel') is not None
            ]

        n_estimators = len(valid_segments[0])
        GradientBoostingClassifier.__init__(self, n_estimators=n_estimators)

        clf = DecisionTreeRegressor(random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self._check_params()

        if self.n_classes_ == 2 and len(
                segments) == 3 and segments[-1].find('TreeModel') is None:
            # For binary classification where both sides are specified, we need to force multinomial deviance
            self.loss_ = _gb_losses.MultinomialDeviance(self.n_classes_ + 1)
            self.loss_.K = 2

        try:
            self.init = None
            self._init_state()

            self.init_.class_prior_ = [
                expit(-float(segments[i].find('MiningModel').find(
                    'Targets').find('Target').get('rescaleConstant')))
                for i in indices
            ]

            if self.n_classes_ == 2:
                self.init_.class_prior_ = [
                    self.init_.class_prior_[0], 1 - self.init_.class_prior_[0]
                ]

            self.init_.classes_ = [i for i, _ in enumerate(self.classes_)]
            self.init_.n_classes_ = self.n_classes_
            self.init_.n_outputs_ = 1
            self.init_._strategy = self.init_.strategy
        except AttributeError:
            self.init = 'zero'
            self._init_state()

        for x, y in np.ndindex(self.estimators_.shape):
            try:
                factor = float(segments[y].find('MiningModel').find(
                    'Targets').find('Target').get('rescaleFactor', 1))
                self.estimators_[x, y] = get_tree(self,
                                                  valid_segments[y][x],
                                                  rescale_factor=factor)
            except AttributeError:
                self.estimators_[x, y] = get_tree(self, valid_segments[y][x])

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for x, y in np.ndindex(self.estimators_.shape):
            clf = self.estimators_[x, y]
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [
            x != -1 for x in self.estimators_[0, 0].n_categories
        ]