예제 #1
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)
        OneHotEncodingMixin.__init__(self)
        LinearSVC.__init__(self)

        # Import coefficients and intercepts
        model = self.root.find('RegressionModel')

        if model is None:
            raise Exception('PMML model does not contain RegressionModel.')

        tables = [
            table for table in model.findall('RegressionTable')
            if table.find('NumericPredictor') is not None
        ]

        self.coef_ = [
            _linear_get_coefficients(self, table) for table in tables
        ]
        self.intercept_ = [float(table.get('intercept')) for table in tables]

        if len(self.coef_) == 1:
            self.coef_ = [self.coef_[0]]

        if len(self.intercept_) == 1:
            self.intercept_ = [self.intercept_[0]]

        self.coef_ = np.array(self.coef_)
        self.intercept_ = np.array(self.intercept_)
예제 #2
0
    def __init__(self, pmml, n_jobs=None):
        PMMLBaseClassifier.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in [
                'majorityVote', 'average'
        ]:
            raise Exception(
                'PMML model ensemble should use majority vote or average.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
        ]

        if len(valid_segments) < len(segments):
            warnings.warn(
                'Warning: {} segment(s) ignored because of unsupported predicate.'
                .format(len(segments) - len(valid_segments)))

        n_estimators = len(valid_segments)
        RandomForestClassifier.__init__(self,
                                        n_estimators=n_estimators,
                                        n_jobs=n_jobs)
        self._validate_estimator()

        clf = self._make_estimator(append=False, random_state=123)
        clf.classes_ = self.classes_
        clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        clf.n_classes_ = self.n_classes_
        self.template_estimator = clf

        self.estimators_ = [self.get_tree(s) for s in valid_segments]

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for clf in self.estimators_:
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)
예제 #3
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)

        tree_model = self.root.find('TreeModel')

        if tree_model is None:
            raise Exception('PMML model does not contain TreeModel.')

        # Parse tree
        try:
            self.tree_ = Tree(self.n_features_in_,
                              np.array([self.n_classes_], dtype=np.intp),
                              self.n_outputs_, np.array([], dtype=np.int32))
        except AttributeError:
            self.tree_ = Tree(self.n_features_,
                              np.array([self.n_classes_], dtype=np.intp),
                              self.n_outputs_, np.array([], dtype=np.int32))

        split = tree_model.get('splitCharacteristic')
        if split == 'binarySplit':
            first_node = tree_model.find('Node')
        else:
            first_node = unflatten(tree_model.find('Node'))

        nodes, values = construct_tree(first_node, self.classes_,
                                       self.field_mapping)

        node_ndarray = np.ascontiguousarray(nodes, dtype=NODE_DTYPE)
        value_ndarray = np.ascontiguousarray(values)
        max_depth = None

        state = {
            'max_depth': (2**31) - 1 if max_depth is None else max_depth,
            'node_count': node_ndarray.shape[0],
            'nodes': node_ndarray,
            'values': value_ndarray
        }
        self.tree_.__setstate__(state)

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        n_categories = np.asarray([
            len(self.field_mapping[field.get('name')][1].categories)
            if field.get('optype') == 'categorical' else -1
            for field in fields if field.tag == 'DataField'
        ],
                                  dtype=np.int32,
                                  order='C')

        self.tree_.set_n_categories(n_categories)
  def __init__(self, pmml):
    PMMLBaseClassifier.__init__(self, pmml)
    OneHotEncodingMixin.__init__(self)
    LogisticRegression.__init__(self)

    # Import coefficients and intercepts
    model = self.root.find('RegressionModel')
    mining_model = self.root.find('MiningModel')
    tables = []

    if mining_model is not None and self.n_classes_ > 2:
      self.multi_class = 'ovr'
      segmentation = mining_model.find('Segmentation')

      if segmentation.get('multipleModelMethod') not in ['modelChain']:
        raise Exception('PMML model for multi-class logistic regression should use modelChain method.')

      # Parse segments
      segments = segmentation.findall('Segment')
      valid_segments = [segment for segment in segments if segment.find('True') is not None]
      models = [segment.find('RegressionModel') for segment in valid_segments]

      tables = [
        models[i].find('RegressionTable') for i in range(self.n_classes_)
      ]
    elif model is not None:
      self.multi_class = 'auto'
      tables = [
        table for table in model.findall('RegressionTable')
        if table.find('NumericPredictor') is not None
      ]
    else:
      raise Exception('PMML model does not contain RegressionModel or Segmentation.')

    self.coef_ = [
      _get_coefficients(self, table)
      for table in tables
    ]
    self.intercept_ = [
      float(table.get('intercept'))
      for table in tables
    ]

    if len(self.coef_) == 1:
      self.coef_ = [self.coef_[0]]

    if len(self.intercept_) == 1:
      self.intercept_ = [self.intercept_[0]]

    self.coef_ = np.array(self.coef_)
    self.intercept_ = np.array(self.intercept_)
    self.solver = 'lbfgs'
예제 #5
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)
        OneHotEncodingMixin.__init__(self)

        # Import coefficients and intercepts
        model = self.root.find('GeneralRegressionModel')

        if model is None:
            raise Exception(
                'PMML model does not contain GeneralRegressionModel.')

        self.coef_ = np.array([_get_coefficients(self, model)])
        self.intercept_ = _get_intercept(model)
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)
        OneHotEncodingMixin.__init__(self)

        model = self.root.find('NaiveBayesModel')

        if model is None:
            raise Exception('PMML model does not contain NaiveBayesModel.')

        inputs = model.find('BayesInputs')

        target_values = {
            target: self._get_target_values(inputs, target)
            for target in self.classes_
        }

        try:
            outputs = model.find('BayesOutput').find(
                'TargetValueCounts').findall('TargetValueCount')
            counts = [int(x.get('count')) for x in outputs]
            self.class_prior_ = np.array([x / np.sum(counts) for x in counts])
        except AttributeError:
            self.class_prior_ = np.array(
                [1 / len(self.classes_) for _ in self.classes_])

        self.theta_ = np.array(
            [[float(value.get('mean', 0)) for value in target_values[target]]
             for target in self.classes_])
        try:
            self.sigma_ = np.array([[
                float(value.get('variance', 0))
                for value in target_values[target]
            ] for target in self.classes_])
        except AttributeError:
            self.var_ = np.array([[
                float(value.get('variance', 0))
                for value in target_values[target]
            ] for target in self.classes_])
예제 #7
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)

        model = self.root.find('NaiveBayesModel')

        if model is None:
            raise Exception('PMML model does not contain NaiveBayesModel.')

        inputs = model.find('BayesInputs')

        target_values = {
            target: self._get_target_values(inputs, target)
            for target in self.classes_
        }

        self.class_prior_ = np.array(
            [1 / len(self.classes_) for _ in self.classes_])
        self.theta_ = np.array(
            [[float(value.get('mean', 0)) for value in target_values[target]]
             for target in self.classes_])
        self.sigma_ = np.array([[
            float(value.get('variance', 0)) for value in target_values[target]
        ] for target in self.classes_])
예제 #8
0
  def __init__(self, pmml, n_jobs=None):
    PMMLBaseClassifier.__init__(self, pmml)
    KNeighborsClassifier.__init__(self, n_jobs=n_jobs)
    PMMLBaseKNN.__init__(self)

    KNeighborsClassifier.fit(self, self._X, self._y)
예제 #9
0
 def __init__(self, pmml):
     PMMLBaseClassifier.__init__(self, pmml)
     OneHotEncodingMixin.__init__(self)
     SVC.__init__(self)
     PMMLBaseSVM.__init__(self)
예제 #10
0
    def __init__(self, pmml):
        PMMLBaseClassifier.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in ['modelChain']:
            raise Exception('PMML model ensemble should use modelChain.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [None] * self.n_classes_

        indices = range(self.n_classes_)
        # For binary classification, only the predictions of the first class need to be described, the other can be inferred
        # Not all PMML models do this, but we assume the following conditions imply this approach.
        if self.n_classes_ == 2 and len(
                segments) == 2 and segments[-1].find('TreeModel') is None:
            indices = [0]

        for i in indices:
            valid_segments[i] = [
                segment for segment in segments[i].find('MiningModel').find(
                    'Segmentation').findall('Segment')
                if segment.find('True') is not None
                and segment.find('TreeModel') is not None
            ]

        n_estimators = len(valid_segments[0])
        GradientBoostingClassifier.__init__(self, n_estimators=n_estimators)

        clf = DecisionTreeRegressor(random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self._check_params()

        if self.n_classes_ == 2 and len(
                segments) == 3 and segments[-1].find('TreeModel') is None:
            # For binary classification where both sides are specified, we need to force multinomial deviance
            self.loss_ = _gb_losses.MultinomialDeviance(self.n_classes_ + 1)
            self.loss_.K = 2

        try:
            self.init = None
            self._init_state()

            self.init_.class_prior_ = [
                expit(-float(segments[i].find('MiningModel').find(
                    'Targets').find('Target').get('rescaleConstant')))
                for i in indices
            ]

            if self.n_classes_ == 2:
                self.init_.class_prior_ = [
                    self.init_.class_prior_[0], 1 - self.init_.class_prior_[0]
                ]

            self.init_.classes_ = [i for i, _ in enumerate(self.classes_)]
            self.init_.n_classes_ = self.n_classes_
            self.init_.n_outputs_ = 1
            self.init_._strategy = self.init_.strategy
        except AttributeError:
            self.init = 'zero'
            self._init_state()

        for x, y in np.ndindex(self.estimators_.shape):
            try:
                factor = float(segments[y].find('MiningModel').find(
                    'Targets').find('Target').get('rescaleFactor', 1))
                self.estimators_[x, y] = get_tree(self,
                                                  valid_segments[y][x],
                                                  rescale_factor=factor)
            except AttributeError:
                self.estimators_[x, y] = get_tree(self, valid_segments[y][x])

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for x, y in np.ndindex(self.estimators_.shape):
            clf = self.estimators_[x, y]
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [
            x != -1 for x in self.estimators_[0, 0].n_categories
        ]