Example #1
0
    def replace_value(field, original, replacement):
        if original is not None:
            transformation = pmml.Apply(function='if')
            cond = pmml.Apply(function='equals')
            cond.append(pmml.FieldRef(field=field))
            cond.append(pmml.Constant(original))
            transformation.append(pmml.Constant(replacement))
            transformation.append(pmml.FieldRef(field=field))

            return {
                DerivedFeatureTransformations.TRANSFORMATION:
                transformation,
                DerivedFeatureTransformations.FUNCTION:
                lambda df: np.where(df[field] == original, replacement, df[
                    field])
            }
        else:
            transformation = pmml.Apply(function='+', mapMissingTo=replacement)
            transformation.append(pmml.Constant(0))
            transformation.append(pmml.FieldRef(field=field))
            return {
                DerivedFeatureTransformations.TRANSFORMATION:
                transformation,
                DerivedFeatureTransformations.FUNCTION:
                lambda df: np.where(df[field].isnull(), replacement, df[field])
            }
Example #2
0
 def greedy_evaluation(node):
     if isinstance(node, str):
         # field reference
         return (lambda df: df[node]), pmml.FieldRef(field=node)
     elif isinstance(node, (tuple, list)):
         # eval arguments
         args = map(greedy_evaluation, node[1:])
         functions = {
             '*': lambda df: np.multiply(*[_[0](df) for _ in args]),
             '-': lambda df: np.subtract(*[_[0](df) for _ in args]),
             '+': lambda df: np.add(*[_[0](df) for _ in args]),
             '/': lambda df: np.divide(*[_[0](df) for _ in args]),
             '%': lambda df: np.mod(*[_[0](df) for _ in args]),
         }
         assert isinstance(
             node[0],
             str), 'First element should be a code of operation'
         assert node[
             0] in functions, 'Unknown function code {}. Supported codes: {}'.format(
                 node[0], functions.keys())
         expr = {
             '*': partial(basic_function, '*'),
             '-': partial(basic_function, '-'),
             '+': partial(basic_function, '+'),
             '/': partial(basic_function, '/'),
             '%': mod_function
         }.get(node[0])([a[1] for a in args])
         func = functions[node[0]]
         return func, expr
     else:
         # numeric terminal
         return lambda df: node, pmml.Constant(node, dataType='double')
Example #3
0
    def output_transformation(self):
        """
        Build sigmoid output transformation:
        proba = 1 / (1 + exp(-(initial_estimate + weighted_sum(estimates))))
        :return: Output element
        """
        output = pmml.Output()
        output.append(
            pmml.OutputField(feature='predictedValue', name='predictedValue'))
        output_feature = self.context.schemas[self.SCHEMA_OUTPUT][0]
        output_field = pmml.OutputField(dataType='double',
                                        feature='transformedValue',
                                        name=output_feature.full_name,
                                        optype=output_feature.optype)
        neg = pmml.Apply(function='*')
        neg.append(pmml.FieldRef(field='predictedValue'))
        neg.append(
            pmml.Constant(
                # there is no notion of weighted sum in segment aggregation, so we used weighted average,
                # and now the result should be multiplied by total weight
                -(1 +
                  self.estimator.n_estimators * self.estimator.learning_rate),
                dataType='double'))
        exp = pmml.Apply(function='exp')
        exp.append(neg)
        plus = pmml.Apply(function='+')
        plus.append(pmml.Constant(1.0, dataType='double'))
        plus.append(exp)
        div = pmml.Apply(function='/')
        div.append(pmml.Constant(1.0, dataType='double'))
        div.append(plus)

        output_field.append(div)
        output.append(output_field)
        return output
Example #4
0
    def output_transformation(self):
        """
        Build sigmoid output transformation:
        proba = 1 / (1 + exp(-(initial_estimate + weighted_sum(estimates))))
        :return: Output element
        """
        output = pmml.Output()

        # storing the raw prediction into internal::varname variable
        for f in self.context.schemas[Schema.INTERNAL]:
            output.append(
                pmml.OutputField(feature='predictedValue',
                                 name=Schema.INTERNAL.extract_feature_name(f)))

        # setting up a logistic transformation for the positive label
        positive_category = self.context.schemas[Schema.CATEGORIES][1]
        output_field = pmml.OutputField(
            dataType=positive_category.data_type.value,
            feature='transformedValue',
            name=Schema.CATEGORIES.extract_feature_name(positive_category),
            optype=positive_category.optype.value)
        neg = pmml.Apply(function='*')
        neg.append(
            pmml.FieldRef(field=Schema.INTERNAL.extract_feature_name(
                positive_category.namespace)))
        neg.append(
            pmml.Constant(
                # there is no notion of weighted sum in segment aggregation, so we used weighted average,
                # and now the result should be multiplied by total weight
                -(1 +
                  self.estimator.n_estimators * self.estimator.learning_rate),
                dataType=FeatureType.DOUBLE.value))
        exp = pmml.Apply(function='exp')
        exp.append(neg)
        plus = pmml.Apply(function='+')
        plus.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value))
        plus.append(exp)
        div = pmml.Apply(function='/')
        div.append(pmml.Constant(1.0, dataType=FeatureType.DOUBLE.value))
        div.append(plus)
        output_field.append(div)
        output.append(output_field)

        # probability of negative label is 1 - positive_proba
        negative_category = self.context.schemas[Schema.CATEGORIES][0]
        output_field = pmml.OutputField(
            dataType=negative_category.data_type.value,
            feature='transformedValue',
            name=Schema.CATEGORIES.extract_feature_name(negative_category),
            optype=negative_category.optype.value)
        subtract = pmml.Apply(function='-')
        subtract.append(pmml.Constant(1, dataType=FeatureType.DOUBLE.value))
        subtract.append(
            pmml.FieldRef(field=Schema.CATEGORIES.extract_feature_name(
                positive_category)))
        output_field.append(subtract)
        output.append(output_field)

        # now we should define a label; we can look at the raw predicted output and compare it with 0
        label = self.context.schemas[Schema.OUTPUT][0]
        output_field = pmml.OutputField(
            feature='transformedValue',
            name=Schema.OUTPUT.extract_feature_name(label),
            optype=label.optype.value,
            dataType=label.data_type.value)
        discretize = pmml.Discretize(
            field=Schema.INTERNAL.extract_feature_name(label))
        discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[0])
        discretize_bin.append(pmml.Interval(closure="openOpen", rightMargin=0))
        discretize.append(discretize_bin)
        discretize_bin = pmml.DiscretizeBin(binValue=label.value_list[1])
        discretize_bin.append(pmml.Interval(closure="closedOpen",
                                            leftMargin=0))
        discretize.append(discretize_bin)
        output_field.append(discretize)
        output.append(output_field)

        return output