def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) OneHotEncodingMixin.__init__(self) LinearSVC.__init__(self) # Import coefficients and intercepts model = self.root.find('RegressionModel') if model is None: raise Exception('PMML model does not contain RegressionModel.') tables = [ table for table in model.findall('RegressionTable') if table.find('NumericPredictor') is not None ] self.coef_ = [ _linear_get_coefficients(self, table) for table in tables ] self.intercept_ = [float(table.get('intercept')) for table in tables] if len(self.coef_) == 1: self.coef_ = [self.coef_[0]] if len(self.intercept_) == 1: self.intercept_ = [self.intercept_[0]] self.coef_ = np.array(self.coef_) self.intercept_ = np.array(self.intercept_)
def __init__(self, pmml, n_jobs=None): PMMLBaseClassifier.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in [ 'majorityVote', 'average' ]: raise Exception( 'PMML model ensemble should use majority vote or average.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [ segment for segment in segments if segment.find('True') is not None ] if len(valid_segments) < len(segments): warnings.warn( 'Warning: {} segment(s) ignored because of unsupported predicate.' .format(len(segments) - len(valid_segments))) n_estimators = len(valid_segments) RandomForestClassifier.__init__(self, n_estimators=n_estimators, n_jobs=n_jobs) self._validate_estimator() clf = self._make_estimator(append=False, random_state=123) clf.classes_ = self.classes_ clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ clf.n_classes_ = self.n_classes_ self.template_estimator = clf self.estimators_ = [self.get_tree(s) for s in valid_segments] # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for clf in self.estimators_: n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories)
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) tree_model = self.root.find('TreeModel') if tree_model is None: raise Exception('PMML model does not contain TreeModel.') # Parse tree try: self.tree_ = Tree(self.n_features_in_, np.array([self.n_classes_], dtype=np.intp), self.n_outputs_, np.array([], dtype=np.int32)) except AttributeError: self.tree_ = Tree(self.n_features_, np.array([self.n_classes_], dtype=np.intp), self.n_outputs_, np.array([], dtype=np.int32)) split = tree_model.get('splitCharacteristic') if split == 'binarySplit': first_node = tree_model.find('Node') else: first_node = unflatten(tree_model.find('Node')) nodes, values = construct_tree(first_node, self.classes_, self.field_mapping) node_ndarray = np.ascontiguousarray(nodes, dtype=NODE_DTYPE) value_ndarray = np.ascontiguousarray(values) max_depth = None state = { 'max_depth': (2**31) - 1 if max_depth is None else max_depth, 'node_count': node_ndarray.shape[0], 'nodes': node_ndarray, 'values': value_ndarray } self.tree_.__setstate__(state) # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') self.tree_.set_n_categories(n_categories)
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) OneHotEncodingMixin.__init__(self) LogisticRegression.__init__(self) # Import coefficients and intercepts model = self.root.find('RegressionModel') mining_model = self.root.find('MiningModel') tables = [] if mining_model is not None and self.n_classes_ > 2: self.multi_class = 'ovr' segmentation = mining_model.find('Segmentation') if segmentation.get('multipleModelMethod') not in ['modelChain']: raise Exception('PMML model for multi-class logistic regression should use modelChain method.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [segment for segment in segments if segment.find('True') is not None] models = [segment.find('RegressionModel') for segment in valid_segments] tables = [ models[i].find('RegressionTable') for i in range(self.n_classes_) ] elif model is not None: self.multi_class = 'auto' tables = [ table for table in model.findall('RegressionTable') if table.find('NumericPredictor') is not None ] else: raise Exception('PMML model does not contain RegressionModel or Segmentation.') self.coef_ = [ _get_coefficients(self, table) for table in tables ] self.intercept_ = [ float(table.get('intercept')) for table in tables ] if len(self.coef_) == 1: self.coef_ = [self.coef_[0]] if len(self.intercept_) == 1: self.intercept_ = [self.intercept_[0]] self.coef_ = np.array(self.coef_) self.intercept_ = np.array(self.intercept_) self.solver = 'lbfgs'
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) OneHotEncodingMixin.__init__(self) # Import coefficients and intercepts model = self.root.find('GeneralRegressionModel') if model is None: raise Exception( 'PMML model does not contain GeneralRegressionModel.') self.coef_ = np.array([_get_coefficients(self, model)]) self.intercept_ = _get_intercept(model)
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) OneHotEncodingMixin.__init__(self) model = self.root.find('NaiveBayesModel') if model is None: raise Exception('PMML model does not contain NaiveBayesModel.') inputs = model.find('BayesInputs') target_values = { target: self._get_target_values(inputs, target) for target in self.classes_ } try: outputs = model.find('BayesOutput').find( 'TargetValueCounts').findall('TargetValueCount') counts = [int(x.get('count')) for x in outputs] self.class_prior_ = np.array([x / np.sum(counts) for x in counts]) except AttributeError: self.class_prior_ = np.array( [1 / len(self.classes_) for _ in self.classes_]) self.theta_ = np.array( [[float(value.get('mean', 0)) for value in target_values[target]] for target in self.classes_]) try: self.sigma_ = np.array([[ float(value.get('variance', 0)) for value in target_values[target] ] for target in self.classes_]) except AttributeError: self.var_ = np.array([[ float(value.get('variance', 0)) for value in target_values[target] ] for target in self.classes_])
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) model = self.root.find('NaiveBayesModel') if model is None: raise Exception('PMML model does not contain NaiveBayesModel.') inputs = model.find('BayesInputs') target_values = { target: self._get_target_values(inputs, target) for target in self.classes_ } self.class_prior_ = np.array( [1 / len(self.classes_) for _ in self.classes_]) self.theta_ = np.array( [[float(value.get('mean', 0)) for value in target_values[target]] for target in self.classes_]) self.sigma_ = np.array([[ float(value.get('variance', 0)) for value in target_values[target] ] for target in self.classes_])
def fit(self, x, y): return PMMLBaseClassifier.fit(self, x, y)
def __init__(self, pmml, n_jobs=None): PMMLBaseClassifier.__init__(self, pmml) KNeighborsClassifier.__init__(self, n_jobs=n_jobs) PMMLBaseKNN.__init__(self) KNeighborsClassifier.fit(self, self._X, self._y)
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) OneHotEncodingMixin.__init__(self) SVC.__init__(self) PMMLBaseSVM.__init__(self)
def __init__(self, pmml): PMMLBaseClassifier.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in ['modelChain']: raise Exception('PMML model ensemble should use modelChain.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [None] * self.n_classes_ indices = range(self.n_classes_) # For binary classification, only the predictions of the first class need to be described, the other can be inferred # Not all PMML models do this, but we assume the following conditions imply this approach. if self.n_classes_ == 2 and len( segments) == 2 and segments[-1].find('TreeModel') is None: indices = [0] for i in indices: valid_segments[i] = [ segment for segment in segments[i].find('MiningModel').find( 'Segmentation').findall('Segment') if segment.find('True') is not None and segment.find('TreeModel') is not None ] n_estimators = len(valid_segments[0]) GradientBoostingClassifier.__init__(self, n_estimators=n_estimators) clf = DecisionTreeRegressor(random_state=123) try: clf.n_features_in_ = self.n_features_in_ except AttributeError: clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ self.template_estimator = clf self._check_params() if self.n_classes_ == 2 and len( segments) == 3 and segments[-1].find('TreeModel') is None: # For binary classification where both sides are specified, we need to force multinomial deviance self.loss_ = _gb_losses.MultinomialDeviance(self.n_classes_ + 1) self.loss_.K = 2 try: self.init = None self._init_state() self.init_.class_prior_ = [ expit(-float(segments[i].find('MiningModel').find( 'Targets').find('Target').get('rescaleConstant'))) for i in indices ] if self.n_classes_ == 2: self.init_.class_prior_ = [ self.init_.class_prior_[0], 1 - self.init_.class_prior_[0] ] self.init_.classes_ = [i for i, _ in enumerate(self.classes_)] self.init_.n_classes_ = self.n_classes_ self.init_.n_outputs_ = 1 self.init_._strategy = self.init_.strategy except AttributeError: self.init = 'zero' self._init_state() for x, y in np.ndindex(self.estimators_.shape): try: factor = float(segments[y].find('MiningModel').find( 'Targets').find('Target').get('rescaleFactor', 1)) self.estimators_[x, y] = get_tree(self, valid_segments[y][x], rescale_factor=factor) except AttributeError: self.estimators_[x, y] = get_tree(self, valid_segments[y][x]) # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for x, y in np.ndindex(self.estimators_.shape): clf = self.estimators_[x, y] n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories) self.categorical = [ x != -1 for x in self.estimators_[0, 0].n_categories ]