Example #1
0
 def field_not_in_list(field, values):
     mv = pmml.MapValues(outputColumn='output', defaultValue=1)
     mv.append(pmml.FieldColumnPair(field=field, column='input'))
     it = pmml.InlineTable()
     for v in values:
         it.append(pmml_row(input=v, output=0))
     mv.append(it)
     return {
         DerivedFeatureTransformations.TRANSFORMATION:
         mv,
         DerivedFeatureTransformations.FUNCTION:
         lambda df: reduce(np.logical_and, [df[field] != _ for _ in values])
     }
Example #2
0
    def transformation_dictionary(self):
        """
        Build a transformation dictionary and return a TransformationDictionary element
        """
        td = pmml.TransformationDictionary()
        # define a schema with all variables available for a model
        encoded_schema = []
        self.context.schemas[Schema.NUMERIC] = encoded_schema
        idx = {}

        # First, populate transformation dictionary for _all_ derived fields, because they can be requested later
        for f in self.context.schemas[Schema.DERIVED]:
            ef = RealNumericFeature(name=f.name)
            df = pmml.DerivedField(name=ef.full_name,
                                   optype=ef.optype.value,
                                   dataType=ef.data_type.value)
            df.append(f.transformation)
            td.append(df)
            assert f.name not in idx, 'Duplicate field definition: {}'.format(
                f.name)
            idx[f.name] = ef

        # second, define the numeric transformations for the categorical variables
        for f in self.context.schemas[Schema.INPUT]:
            assert f.name not in idx, 'Duplicate field definition: {}'.format(
                f.name)
            if isinstance(f, CategoricalFeature):
                ef = RealNumericFeature(name=f.name,
                                        namespace=Schema.NUMERIC.namespace)
                # create a record in transformation dictionary with mapping from raw values into numbers
                df = pmml.DerivedField(name=ef.full_name,
                                       optype=ef.optype.value,
                                       dataType=ef.data_type.value)
                mv = pmml.MapValues(outputColumn='output',
                                    dataType=ef.data_type.value)
                mv.append(
                    pmml.FieldColumnPair(field=f.full_name, column='input'))
                it = pmml.InlineTable()
                for i, v in enumerate(f.value_list):
                    it.append(pmml_row(input=v, output=i))
                td.append(df.append(mv.append(it)))
                idx[f.name] = ef
            else:
                idx[f.name] = f

        # now we can build a mirror of model schema into the numeric schema
        self.context.schemas[Schema.NUMERIC] = [
            idx[f.name] for f in self.context.schemas[Schema.MODEL]
        ]

        return td
Example #3
0
 def map_values(field, value_map, default_value):
     mv = pmml.MapValues(outputColumn='output', default_value=default_value)
     mv.append(pmml.FieldColumnPair(field=field, column='input'))
     it = pmml.InlineTable()
     for k, v in value_map.items():
         it.append(pmml_row(input=k, output=v))
     mv.append(it)
     return {
         DerivedFeatureTransformations.TRANSFORMATION:
         mv,
         DerivedFeatureTransformations.FUNCTION:
         lambda df: np.vectorize(partial(value_map.get, default_value))
         (df[field])
     }
 def test_transform_with_derived_field(self):
     self.est = DecisionTreeClassifier(max_depth=2)
     self.est.fit([
         [0, 0, 0],
         [0, 1, 0],
         [1, 0, 0],
         [1, 1, 1],
     ], [0, 1, 1, 1])
     mapping = pmml.MapValues(dataType="double", outputColumn="output")
     mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
     mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
     it = pmml.InlineTable()
     it.append(pmml_row(x1=0, x2='zero', output=0))
     it.append(pmml_row(x1=0, x2='one', output=0))
     it.append(pmml_row(x1=1, x2='zero', output=0))
     it.append(pmml_row(x1=1, x2='one', output=1))
     mapping.append(it)
     self.ctx = TransformationContext({
         Schema.INPUT: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.DERIVED: [
             DerivedFeature(feature=RealNumericFeature(name='x3'),
                            transformation=mapping)
         ],
         Schema.MODEL: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one']),
             RealNumericFeature(name='x3')
         ],
         Schema.OUTPUT:
         [IntegerCategoricalFeature('output', ['neg', 'pos'])]
     })
     self.converter = DecisionTreeConverter(estimator=self.est,
                                            context=self.ctx,
                                            mode=ModelMode.CLASSIFICATION)
     self.converter.pmml().toxml()