Ejemplo n.º 1
0
    def testSetRequestOnly(self):
        input_record = schema.Scalar(np.int64)
        schema.attach_metadata_to_scalars(
            input_record,
            schema.Metadata(
                categorical_limit=100000000,
                expected_value=99,
                feature_specs=schema.FeatureSpec(feature_ids=[1, 100, 1001])))

        set_request_only(input_record)
        self.assertEqual(input_record.metadata.categorical_limit, 100000000)
        self.assertEqual(input_record.metadata.expected_value, 99)
        self.assertEqual(input_record.metadata.feature_specs.feature_ids,
                         [1, 100, 1001])
Ejemplo n.º 2
0
    def testSetRequestOnly(self):
        input_record = schema.Scalar(np.int64)
        schema.attach_metadata_to_scalars(
            input_record,
            schema.Metadata(
                categorical_limit=100000000,
                expected_value=99,
                feature_specs=schema.FeatureSpec(
                    feature_ids=[1, 100, 1001]
                )
            )
        )

        set_request_only(input_record)
        self.assertEqual(input_record.metadata.categorical_limit, 100000000)
        self.assertEqual(input_record.metadata.expected_value, 99)
        self.assertEqual(
            input_record.metadata.feature_specs.feature_ids,
            [1, 100, 1001]
        )
Ejemplo n.º 3
0
    def __init__(self,
                 model,
                 input_record,
                 input_specs,
                 name='sparse_to_dense',
                 **kwargs):
        """
        `input_specs` follows the format of FeatureSpec from schema. To be more
        precise it's a namedtuple that should have:
            'feature_type', 'feature_names', 'feature_ids'
        """
        super(SparseToDense, self).__init__(model, name, input_record,
                                            **kwargs)

        self.input_specs = input_specs

        outputs = []
        for field, feature_specs in self.input_specs:
            assert len(feature_specs.feature_names) ==\
                len(feature_specs.feature_ids)
            if feature_specs.feature_type == 'FLOAT':
                outputs.append(
                    (field,
                     schema.Scalar(
                         (np.float32, (len(feature_specs.feature_ids), )),
                         model.net.NextScopedBlob(name + '_' + field +
                                                  '_output'))))
            elif feature_specs.feature_type == 'ID_LIST':
                outputs.append(
                    (field,
                     schema.Struct(
                         (
                             'ranges',
                             schema.Scalar(
                                 (np.int32,
                                  (len(feature_specs.feature_ids), 2)),
                                 model.net.NextScopedBlob(name + '_' + field +
                                                          '_ranges')),
                         ),
                         ('values', input_record[field].values.items),
                     )))
            elif feature_specs.feature_type == 'ID_SCORE_LIST':
                outputs.append(
                    (field,
                     schema.Struct(
                         (
                             'ranges',
                             schema.Scalar(
                                 (np.int32,
                                  (len(feature_specs.feature_ids), 2)),
                                 model.net.NextScopedBlob(name + '_' + field +
                                                          '_ranges')),
                         ),
                         ('ids', input_record[field].values.keys),
                         ('scores', input_record[field].values.values),
                     )))
            else:
                raise TypeError("Unsupported input type: {0}".format(
                    feature_specs.feature_type))

        # TODO(amalevich): This schema is producing ranges. And thus if there is
        # something using it it should support ranges as well. It might be
        # confusing, if we don't add better support for ranges/have it as a
        # first layer
        self.output_schema = schema.Struct(*outputs)

        # TODO(amalevich): Consider moving this data to schema, instead
        # Structs doens't support attaching metadata to them and clonning
        # will break things badly, but this is the most elegant way to pass
        # this info around. Should we change it or it'll be too much work and
        # not worse it?
        for field, feature_specs in input_specs:
            schema.attach_metadata_to_scalars(
                self.output_schema[field],
                schema.Metadata(feature_specs=feature_specs))
        self.zero = model.global_constants['ZERO']
        self.zero_range = model.global_constants['ZERO_RANGE']
Ejemplo n.º 4
0
    def __init__(self, model, input_record, input_specs,
                 name='feature_sparse_to_dense', **kwargs):
        """
        `input_specs` follows the format of FeatureSpec from schema. To be more
        precise it's a namedtuple that should have:
            'feature_type', 'feature_names', 'feature_ids'
        """
        super(FeatureSparseToDense, self).__init__(model, name,
                                            input_record, **kwargs)

        self.input_specs = input_specs

        outputs = []
        for field, feature_specs in self.input_specs:
            assert len(feature_specs.feature_names) ==\
                len(feature_specs.feature_ids)
            if feature_specs.feature_type == 'FLOAT':
                outputs.append((
                    field,
                    schema.Scalar(
                        (np.float32, (len(feature_specs.feature_ids), )),
                        self.get_next_blob_reference(field + '_output')
                    )
                ))
            elif feature_specs.feature_type == 'ID_LIST':
                outputs.append((
                    field,
                    schema.Struct(
                        ('ranges',
                            schema.Scalar(
                                (
                                    np.int32,
                                    (len(feature_specs.feature_ids), 2)
                                ),
                                self.get_next_blob_reference(
                                    field + '_ranges')
                            ),
                         ),
                        ('values',
                         schema.Scalar(np.int64,
                                       self.get_next_blob_reference(
                                           field + '_values')
                                       ),
                         )
                    )
                ))
            elif feature_specs.feature_type == 'ID_SCORE_LIST':
                outputs.append((
                    field,
                    schema.Struct(
                        ('ranges',
                            schema.Scalar(
                                (
                                    np.int32,
                                    (len(feature_specs.feature_ids), 2)
                                ),
                                self.get_next_blob_reference(
                                    field + '_ranges')
                            ),
                         ),
                        ('ids',
                         schema.Scalar(np.int64,
                                       self.get_next_blob_reference(
                                           field + '_ids')
                                       ),
                         ),
                        ('scores',
                         schema.Scalar(np.float32,
                                       self.get_next_blob_reference(
                                           field + '_scores')
                                       ),
                         )
                    )
                ))
            else:
                raise TypeError(
                    "Unsupported input type: {0}".
                    format(feature_specs.feature_type))

        # TODO(amalevich): This schema is producing ranges. And thus if there is
        # something using it it should support ranges as well. It might be
        # confusing, if we don't add better support for ranges/have it as a
        # first layer
        self.output_schema = schema.Struct(
            *outputs
        )

        # TODO(amalevich): Consider moving this data to schema, instead
        # Structs doens't support attaching metadata to them and clonning
        # will break things badly, but this is the most elegant way to pass
        # this info around. Should we change it or it'll be too much work and
        # not worse it?
        for field, feature_specs in input_specs:
            schema.attach_metadata_to_scalars(
                self.output_schema[field],
                schema.Metadata(
                    feature_specs=feature_specs)
            )
        self.zero = model.global_constants['ZERO']
        self.zero_range = model.global_constants['ZERO_RANGE']
Ejemplo n.º 5
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 **kwargs):
        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        # TODO Add some asserts about input type
        assert reducer in self._supported_reducers, "Unsupported reducer: {}".\
            format(reducer)
        self.reducer = reducer

        assert input_record.items.metadata is not None,\
            "Features without metadata are not supported"
        input_dim = input_record.items.metadata.categorical_limit
        assert input_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            model.net.NextScopedBlob(name + '_output'),
        )

        if self.request_only:
            schema.attach_metadata_to_scalars(
                self.output_schema,
                schema.Metadata(categorical_limit=None,
                                expected_value=None,
                                feature_specs=schema.FeatureSpec(
                                    feature_is_request_only=True)))
        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else ('UniformFill', {
            'min': -scale,
            'max': scale
        })

        self.w = model.net.NextScopedBlob(name + "_w")
        self.params.append(
            LayerParameter(parameter=self.w,
                           initializer=core.CreateOperator(
                               self.weight_init[0], [],
                               self.w,
                               shape=self.shape,
                               **self.weight_init[1]),
                           optimizer=weight_optim))

        if reducer == 'PositionWeighted':
            self.pos_w = model.net.NextScopedBlob(name + "_pos_w")
            self.params.append(
                LayerParameter(parameter=self.pos_w,
                               initializer=core.CreateOperator('ConstantFill',
                                                               [],
                                                               self.pos_w,
                                                               shape=[
                                                                   input_dim,
                                                               ],
                                                               value=1.0),
                               optimizer=weight_optim))
Ejemplo n.º 6
0
    def __init__(self,
                 model,
                 input_record,
                 input_specs,
                 name="feature_sparse_to_dense",
                 default_dense_value=None,
                 **kwargs):
        """
        `input_specs` follows the format of FeatureSpec from schema. To be more
        precise it's a namedtuple that should have:
            'feature_type', 'feature_names', 'feature_ids'
        Default_dense_value can only be 0.0 or float("NaN"). Any input that isn't
        None will be NaN.
        """
        super(FeatureSparseToDense, self).__init__(model, name, input_record,
                                                   **kwargs)
        if default_dense_value is None:
            default_dense_value = 0.0
        default_dense_value = float(default_dense_value)
        assert (np.isnan(default_dense_value) or default_dense_value
                == 0.0), "default_dense_value can only be 0.0 or NaN"

        self.input_specs = input_specs
        self.default_float_value = (model.global_constants["NAN"]
                                    if np.isnan(default_dense_value) else
                                    model.global_constants["ZERO"])
        self.zero_range = model.global_constants["ZERO_RANGE"]

        outputs = []
        for field, feature_specs in self.input_specs:
            assert len(feature_specs.feature_names) == len(
                feature_specs.feature_ids)
            if feature_specs.feature_type == "FLOAT":
                outputs.append((
                    field,
                    schema.Scalar(
                        (np.float32, (len(feature_specs.feature_ids), )),
                        self.get_next_blob_reference(field + "_output"),
                    ),
                ))
            elif feature_specs.feature_type == "ID_LIST":
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "values",
                            schema.Scalar(
                                np.int64,
                                self.get_next_blob_reference(field +
                                                             "_values"),
                            ),
                        ),
                    ),
                ))
            elif feature_specs.feature_type == "ID_SCORE_LIST":
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "ids",
                            schema.Scalar(
                                np.int64,
                                self.get_next_blob_reference(field + "_ids"),
                            ),
                        ),
                        (
                            "scores",
                            schema.Scalar(
                                np.float32,
                                self.get_next_blob_reference(field +
                                                             "_scores"),
                            ),
                        ),
                    ),
                ))
            elif feature_specs.feature_type == "EMBEDDING":
                # We don't know dimensions of embeddings in input data.
                # Even though they should match dimensions from feature config,
                # we keep ranges blob to check input data later.
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "values",
                            schema.Scalar(
                                np.float32,
                                self.get_next_blob_reference(field +
                                                             "_values"),
                            ),
                        ),
                    ),
                ))
            elif feature_specs.feature_type == "GENERIC_FEATURE":
                # We don't know dimensions of embeddings in input data.
                # Even though they should match dimensions from feature config,
                # we keep ranges blob to check input data later.
                # Currently this schema with ranges and values is only for
                # generic type enum 1. If new types are implemented, we need to
                # modify the ParseGeneric operator, and this part accordingly
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "values",
                            schema.Scalar(
                                np.float32,
                                self.get_next_blob_reference(field +
                                                             "_values"),
                            ),
                        ),
                    ),
                ))
            else:
                raise TypeError("Unsupported input type: {0}".format(
                    feature_specs.feature_type))

        # TODO(amalevich): This schema is producing ranges. And thus if there is
        # something using it it should support ranges as well. It might be
        # confusing, if we don't add better support for ranges/have it as a
        # first layer
        self.output_schema = schema.Struct(*outputs)

        # TODO(amalevich): Consider moving this data to schema, instead
        # Structs doesn't support attaching metadata to them and clonning
        # will break things badly, but this is the most elegant way to pass
        # this info around. Should we change it or it'll be too much work and
        # not worse it?
        for field, feature_specs in input_specs:
            schema.attach_metadata_to_scalars(
                self.output_schema[field],
                schema.Metadata(feature_specs=feature_specs))
Ejemplo n.º 7
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):
        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        # TODO Add some asserts about input type
        assert reducer in self._supported_reducers, "Unsupported reducer: {}".\
            format(reducer)
        self.reducer = reducer

        assert input_record.items.metadata is not None,\
            "Features without metadata are not supported"
        input_dim = input_record.items.metadata.categorical_limit
        assert input_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            core.ScopedBlobReference(model.net.NextName(self.name + '_output')))

        if self.request_only:
            schema.attach_metadata_to_scalars(
                self.output_schema,
                schema.Metadata(
                    categorical_limit=None,
                    expected_value=None,
                    feature_specs=schema.FeatureSpec(
                        feature_is_request_only=True
                    )
                )
            )
        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        self.w = core.ScopedBlobReference(model.net.NextName(self.name + "_w"))
        self.params.append(
            LayerParameter(
                parameter=self.w,
                initializer=core.CreateOperator(self.weight_init[0],
                                                [],
                                                self.w,
                                                shape=self.shape,
                                                **self.weight_init[1]
                                                ),
                optimizer=weight_optim
            ))

        if reducer == 'PositionWeighted':
            self.pos_w = core.ScopedBlobReference(
                model.net.NextName(self.name + "_pos_w"))
            self.params.append(
                LayerParameter(
                    parameter=self.pos_w,
                    initializer=core.CreateOperator('ConstantFill',
                                                    [],
                                                    self.pos_w,
                                                    shape=[input_dim, ],
                                                    value=1.0
                                                    ),
                    optimizer=weight_optim
                ))