def replace_value(field, original, replacement): if original is not None: transformation = pmml.Apply(function='if') cond = pmml.Apply(function='equals') cond.append(pmml.FieldRef(field=field)) cond.append(pmml.Constant(original)) transformation.append(pmml.Constant(replacement)) transformation.append(pmml.FieldRef(field=field)) return { DerivedFeatureTransformations.TRANSFORMATION: transformation, DerivedFeatureTransformations.FUNCTION: lambda df: np.where(df[field] == original, replacement, df[ field]) } else: transformation = pmml.Apply(function='+', mapMissingTo=replacement) transformation.append(pmml.Constant(0)) transformation.append(pmml.FieldRef(field=field)) return { DerivedFeatureTransformations.TRANSFORMATION: transformation, DerivedFeatureTransformations.FUNCTION: lambda df: np.where(df[field].isnull(), replacement, df[field]) }
def output_transformation(self): """ Build sigmoid output transformation: proba = 1 / (1 + exp(-(initial_estimate + weighted_sum(estimates)))) :return: Output element """ output = pmml.Output() output.append( pmml.OutputField(feature='predictedValue', name='predictedValue')) output_feature = self.context.schemas[self.SCHEMA_OUTPUT][0] output_field = pmml.OutputField(dataType='double', feature='transformedValue', name=output_feature.full_name, optype=output_feature.optype) neg = pmml.Apply(function='*') neg.append(pmml.FieldRef(field='predictedValue')) neg.append( pmml.Constant( # there is no notion of weighted sum in segment aggregation, so we used weighted average, # and now the result should be multiplied by total weight -(1 + self.estimator.n_estimators * self.estimator.learning_rate), dataType='double')) exp = pmml.Apply(function='exp') exp.append(neg) plus = pmml.Apply(function='+') plus.append(pmml.Constant(1.0, dataType='double')) plus.append(exp) div = pmml.Apply(function='/') div.append(pmml.Constant(1.0, dataType='double')) div.append(plus) output_field.append(div) output.append(output_field) return output
def greedy_evaluation(node): if isinstance(node, str): # field reference return (lambda df: df[node]), pmml.FieldRef(field=node) elif isinstance(node, (tuple, list)): # eval arguments args = map(greedy_evaluation, node[1:]) functions = { '*': lambda df: np.multiply(*[_[0](df) for _ in args]), '-': lambda df: np.subtract(*[_[0](df) for _ in args]), '+': lambda df: np.add(*[_[0](df) for _ in args]), '/': lambda df: np.divide(*[_[0](df) for _ in args]), '%': lambda df: np.mod(*[_[0](df) for _ in args]), } assert isinstance( node[0], str), 'First element should be a code of operation' assert node[ 0] in functions, 'Unknown function code {}. Supported codes: {}'.format( node[0], functions.keys()) expr = { '*': partial(basic_function, '*'), '-': partial(basic_function, '-'), '+': partial(basic_function, '+'), '/': partial(basic_function, '/'), '%': mod_function }.get(node[0])([a[1] for a in args]) func = functions[node[0]] return func, expr else: # numeric terminal return lambda df: node, pmml.Constant(node, dataType='double')
def output_transformation(self): """ Build sigmoid output transformation: proba = 1 / (1 + exp(-(initial_estimate + weighted_sum(estimates)))) :return: Output element """ output = pmml.Output() # storing the raw prediction into internal::varname variable for f in self.context.schemas[Schema.INTERNAL]: output.append( pmml.OutputField(feature='predictedValue', name=Schema.INTERNAL.extract_feature_name(f))) # setting up a logistic transformation for the positive label positive_category = self.context.schemas[Schema.CATEGORIES][1] output_field = pmml.OutputField( dataType=positive_category.data_type.value, feature='transformedValue', name=Schema.CATEGORIES.extract_feature_name(positive_category), optype=positive_category.optype.value) neg = pmml.Apply(function='*') neg.append( pmml.FieldRef(field=Schema.INTERNAL.extract_feature_name( positive_category.namespace))) neg.append( pmml.Constant( # there is no notion of weighted sum in segment aggregation, so we used weighted average, # and now the result should be multiplied by total weight -(1 + self.estimator.n_estimators * self.estimator.learning_rate), dataType=FeatureType.DOUBLE.value)) exp = pmml.Apply(function='exp') exp.append(neg) plus = pmml.Apply(function='+') plus.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value)) plus.append(exp) div = pmml.Apply(function='/') div.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value)) div.append(plus) output_field.append(div) output.append(output_field) # probability of negative label is 1 - positive_proba negative_category = self.context.schemas[Schema.CATEGORIES][0] output_field = pmml.OutputField( dataType=negative_category.data_type.value, feature='transformedValue', name=Schema.CATEGORIES.extract_feature_name(negative_category), optype=negative_category.optype.value) subtract = pmml.Apply(function='-') subtract.append(pmml.Constant(1, dataType=FeatureType.DOUBLE.value)) subtract.append( pmml.FieldRef(field=Schema.CATEGORIES.extract_feature_name( positive_category))) output_field.append(subtract) output.append(output_field) # now we should define a label; we can look at the raw predicted output and compare it with 0 label = self.context.schemas[Schema.OUTPUT][0] output_field = pmml.OutputField( feature='transformedValue', name=Schema.OUTPUT.extract_feature_name(label), optype=label.optype.value, dataType=label.data_type.value) discretize = pmml.Discretize( field=Schema.INTERNAL.extract_feature_name(label)) discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[0]) discretize_bin.append(pmml.Interval(closure="openOpen", rightMargin=0)) discretize.append(discretize_bin) discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[1]) discretize_bin.append(pmml.Interval(closure="closedOpen", leftMargin=0)) discretize.append(discretize_bin) output_field.append(discretize) output.append(output_field) return output