コード例 #1
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in ['modelChain']:
            raise Exception('PMML model ensemble should use modelChain.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [None] * self.n_classes_

        indices = range(self.n_classes_)
        # For binary classification, only the predictions of the first class need to be described, the other can be inferred
        # Not all PMML models do this, but we assume the following conditions imply this approach.
        if self.n_classes_ == 2 and len(
                segments) == 2 and segments[-1].find('TreeModel') is None:
            indices = [0]

        for i in indices:
            valid_segments[i] = [
                segment for segment in segments[i].find('MiningModel').find(
                    'Segmentation').findall('Segment')
                if segment.find('True') is not None
                and segment.find('TreeModel') is not None
            ]

        n_estimators = len(valid_segments[0])
        GradientBoostingClassifier.__init__(self, n_estimators=n_estimators)

        clf = DecisionTreeRegressor(random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self._check_params()

        if self.n_classes_ == 2 and len(
                segments) == 3 and segments[-1].find('TreeModel') is None:
            # For binary classification where both sides are specified, we need to force multinomial deviance
            self.loss_ = _gb_losses.MultinomialDeviance(self.n_classes_ + 1)
            self.loss_.K = 2

        try:
            self.init = None
            self._init_state()

            self.init_.class_prior_ = [
                expit(-float(segments[i].find('MiningModel').find(
                    'Targets').find('Target').get('rescaleConstant')))
                for i in indices
            ]

            if self.n_classes_ == 2:
                self.init_.class_prior_ = [
                    self.init_.class_prior_[0], 1 - self.init_.class_prior_[0]
                ]

            self.init_.classes_ = [i for i, _ in enumerate(self.classes_)]
            self.init_.n_classes_ = self.n_classes_
            self.init_.n_outputs_ = 1
            self.init_._strategy = self.init_.strategy
        except AttributeError:
            self.init = 'zero'
            self._init_state()

        for x, y in np.ndindex(self.estimators_.shape):
            try:
                factor = float(segments[y].find('MiningModel').find(
                    'Targets').find('Target').get('rescaleFactor', 1))
                self.estimators_[x, y] = get_tree(self,
                                                  valid_segments[y][x],
                                                  rescale_factor=factor)
            except AttributeError:
                self.estimators_[x, y] = get_tree(self, valid_segments[y][x])

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for x, y in np.ndindex(self.estimators_.shape):
            clf = self.estimators_[x, y]
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [
            x != -1 for x in self.estimators_[0, 0].n_categories
        ]
コード例 #2
0
    def __init__(self, pmml):
        PMMLBaseRegressor.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in ['sum']:
            raise Exception('PMML model ensemble should use sum.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
            and segment.find('TreeModel') is not None
        ]

        n_estimators = len(valid_segments)
        self.n_outputs_ = 1
        GradientBoostingRegressor.__init__(self, n_estimators=n_estimators)

        clf = DecisionTreeRegressor(random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self._check_params()
        self._init_state()

        mean = mining_model.find('Targets').find('Target').get(
            'rescaleConstant', 0)
        self.init_.constant_ = np.array([mean])
        self.init_.n_outputs_ = 1

        for x, y in np.ndindex(self.estimators_.shape):
            factor = float(
                mining_model.find('Targets').find('Target').get(
                    'rescaleFactor', 1))
            self.estimators_[x, y] = get_tree(self,
                                              valid_segments[x],
                                              rescale_factor=factor)

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for x, y in np.ndindex(self.estimators_.shape):
            clf = self.estimators_[x, y]
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [
            x != -1 for x in self.estimators_[0, 0].n_categories
        ]