Ejemplo n.º 1
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim is not None, "Unbounded features are not supported"

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(
                self.input_record,
                IdScoreList,
                check_field_types=False):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length
                                   ))

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 2
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} " +
                "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record))
            self.external_weights = input_record.values()

        elif reducer == "RecencyWeighted":
            assert _is_id_score_list(self.input_record), (
                "RecencyWeighted only supports IdScoreList.")
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim > 0, (
            "{} should have categorical limit > 0, but got {}".format(
                get_key(input_record)(), input_dim))

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        self.trainer_version = get_trainer_version_based_on_optim(weight_optim)

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        # If fp16 is used, make sure fp16 init op is used
        if self.trainer_version == "fp16":
            assert self.reducer in self._fp16_compatible_reducers, (
                "Fp16 training is enabled. The reducer specified is not supported. "
                "Got {}. Supported reducers: {}. Right now, in general, sum, mean, "
                "positional pooling are supported. Attention is not. Please check "
                "if there is fp16 trained sparse features using advanced pooling."
                .format(self.reducer, self._fp16_compatible_reducers))

            # if init op is UniformFill, we replace it directly
            if self.weight_init[0] == "UniformFill":
                self.weight_init = ("Float16UniformFill", self.weight_init[1])
            assert self.weight_init[0] in self._fp16_compatible_init_op_types, (
                "Fp16 training is enabled. Init op for weight parameter must be fp16 "
                "compatibale. Got {}. Supported ops: {}".format(
                    self.weight_init[0], self._fp16_compatible_init_op_types))

            assert regularizer is None, "Regularizer is not compatible with fp16"

        if _is_id_list(self.input_record):
            sparse_key = self.input_record.items()
        elif _is_id_score_list(self.input_record):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 3
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} " +
                "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record))
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim > 0, (
            "{} should have categorical limit > 0, but got {}".format(
                get_key(input_record)(), input_dim))

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        if _is_id_list(self.input_record):
            sparse_key = self.input_record.items()
        elif _is_id_score_list(self.input_record):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 4
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 use_external_weights=False,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        self.sparse_key = get_key(self.input_record)()
        logger.info("Setup the sparse lookup layer for " + self.sparse_key)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0} for {1}".\
            format(type(inner_shape), self.sparse_key)

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} for {}"
                + "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record),
                                         self.sparse_key)
            self.external_weights = self.input_record.values()

        elif reducer == "RecencyWeighted":
            assert _is_id_score_list(self.input_record), (
                "RecencyWeighted only supports IdScoreList, "
                "while the sparse feature {} is not.".format(self.sparse_key))
            self.external_weights = self.input_record.values()
        # TODO: create a new type of reducer with external weights to wrap
        # this and the above two cases since essentially their input formats
        # are the same.
        elif use_external_weights:
            assert _is_id_score_list(self.input_record), (
                "Use_external_weights only supports IdScoreList, "
                "while the sparse feature {} is not.".format(self.sparse_key))
            assert reducer in [
                "Sum", "WeightedSum"
            ], ("Use_external_weights only supports Sum reducer, "
                "while the reducer is {}.".format(reducer))
            self.external_weights = self.input_record.values()
        self.reducer = reducer
        self.use_external_weights = use_external_weights

        input_dim = get_categorical_limit(self.input_record)
        assert input_dim > 0, "{} should have categorical limit > 0, but got {}".format(
            self.sparse_key, input_dim)

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        self.trainer_version = get_trainer_version_based_on_optim(weight_optim)

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        self.evicted_values = None
        if schema.equal_schemas(self.input_record,
                                IdListWithEvicted) or schema.equal_schemas(
                                    self.input_record,
                                    IdScoreListWithEvicted,
                                    check_field_types=False):
            self.evicted_values = self.input_record._evicted_values

        # If fp16 is used, make sure fp16 init op is used
        if self.trainer_version == "fp16":
            assert self.reducer in self._fp16_compatible_reducers or use_external_weights, (
                "Fp16 training is enabled. The reducer specified is not supported. "
                "Got {}. Supported reducers: {}. Right now, in general, sum, mean, "
                "positional pooling are supported. Attention is not. Please check "
                "if there is fp16 trained sparse features using advanced pooling."
                .format(self.reducer, self._fp16_compatible_reducers))

            # if init op is UniformFill, we replace it directly
            if self.weight_init[0] == "UniformFill":
                self.weight_init = ("Float16UniformFill", self.weight_init[1])
            assert self.weight_init[0] in self._fp16_compatible_init_op_types, (
                "Fp16 training is enabled. Init op for weight parameter must be fp16 "
                "compatibale. Got {}. Supported ops: {}".format(
                    self.weight_init[0], self._fp16_compatible_init_op_types))

            assert regularizer is None, "Regularizer is not compatible with fp16"

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=self.sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)
        if self.evicted_values:
            self.reinit_vec = self.create_param(
                param_name="reinit_vec",
                shape=inner_shape,
                initializer=self.weight_init,
                optimizer=model.NoOptim,
                regularizer=None,
            )

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 5
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):
        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        # TODO Add some asserts about input type
        assert reducer in self._supported_reducers, "Unsupported reducer: {}".\
            format(reducer)
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)

        assert input_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            model.net.NextScopedBlob(name + '_output'),
        )

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        self.w = model.net.NextScopedBlob(name + "_w")
        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(self.input_record, IdScoreList):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None
        self.params.append(
            LayerParameter(
                parameter=self.w,
                initializer=core.CreateOperator(self.weight_init[0],
                                                [],
                                                self.w,
                                                shape=self.shape,
                                                **self.weight_init[1]
                                                ),
                optimizer=weight_optim,
                ps_param=LayerPsParam(
                    sparse_key=sparse_key,
                    average_length=avg_length
                )
            ))

        if reducer == 'PositionWeighted':
            self.pos_w = model.net.NextScopedBlob(name + "_pos_w")
            self.params.append(
                LayerParameter(
                    parameter=self.pos_w,
                    initializer=core.CreateOperator('ConstantFill',
                                                    [],
                                                    self.pos_w,
                                                    shape=[input_dim, ],
                                                    value=1.0
                                                    ),
                    optimizer=weight_optim
                ))