Beispiel #1
0
    def __init__(self, model, input_record, weight_optim=None,
                 name="position_weights"):
        super(PositionWeighted, self).__init__(model, name, input_record)

        assert isinstance(input_record, schema.List), "Incorrect input type"
        length_metadata = input_record.lengths.metadata
        max_length = (length_metadata.categorical_limit if length_metadata is
                      not None else None)
        if max_length is not None:
            self.shape = max_length
        else:
            self.shape = get_categorical_limit(input_record)
            logger.warning(
                '{}: categorical_limit of lengths is not available, using '
                'categorical_limit of the keys: {}'.format(
                    str(input_record.lengths()), self.shape))

        self.pos_w = self.create_param(param_name='pos_w',
                                       shape=[self.shape, ],
                                       initializer=('ConstantFill', {'value': 1.0}),
                                       optimizer=weight_optim)

        self.output_schema = schema.Struct(
            ('position_weights',
                schema.Scalar((np.float32, self.shape),
                              self.get_next_blob_reference("pos_w_gather")))
        )

        self.tags.update({Tags.HANDLE_AS_SPARSE_LAYER})
        self.tags.update({Tags.GRADIENT_FROM_PS})
    def __init__(self,
                 model,
                 input_record,
                 max_score=0,
                 bucket_boundaries=None,
                 hash_buckets=True,
                 weight_optim=None,
                 name="bucket_weighted"):
        super(BucketWeighted, self).__init__(model, name, input_record)

        assert isinstance(input_record, schema.List), "Incorrect input type"
        self.bucket_boundaries = bucket_boundaries
        self.hash_buckets = hash_buckets
        if bucket_boundaries is not None:
            self.shape = len(bucket_boundaries) + 1
        elif max_score > 0:
            self.shape = max_score
        else:
            self.shape = get_categorical_limit(input_record)

        self.bucket_w = self.create_param(param_name='bucket_w',
                                          shape=[
                                              self.shape,
                                          ],
                                          initializer=('ConstantFill', {
                                              'value': 1.0
                                          }),
                                          optimizer=weight_optim)

        self.output_schema = schema.Struct(
            ('bucket_weights',
             schema.Scalar((np.float32, self.shape),
                           self.get_next_blob_reference("bucket_w_gather"))))

        self.tags.update({Tags.HANDLE_AS_SPARSE_LAYER})
Beispiel #3
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim is not None, "Unbounded features are not supported"

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(
                self.input_record,
                IdScoreList,
                check_field_types=False):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length
                                   ))

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Beispiel #4
0
    def __init__(self, model, input_record, name='merged'):
        super(MergeIdLists, self).__init__(model, name, input_record)
        assert all(schema.equal_schemas(x, IdList) for x in input_record), \
            "Inputs to MergeIdLists should all be IdLists."

        assert all(record.items.metadata is not None
                   for record in self.input_record), \
            "Features without metadata are not supported"

        merge_dim = max(
            get_categorical_limit(record) for record in self.input_record)
        assert merge_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.NewRecord(
            model.net,
            schema.List(
                schema.Scalar(
                    np.int64,
                    blob=model.net.NextBlob(name),
                    metadata=schema.Metadata(categorical_limit=merge_dim))))
Beispiel #5
0
    def __init__(self, model, input_record, name='merged'):
        super(MergeIdLists, self).__init__(model, name, input_record)
        assert all(schema.equal_schemas(x, IdList) for x in input_record), \
            "Inputs to MergeIdLists should all be IdLists."

        assert all(record.items.metadata is not None
                   for record in self.input_record), \
            "Features without metadata are not supported"

        merge_dim = max(get_categorical_limit(record)
                        for record in self.input_record)
        assert merge_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.NewRecord(
            model.net, schema.List(
                schema.Scalar(
                    np.int64,
                    blob=model.net.NextBlob(name),
                    metadata=schema.Metadata(categorical_limit=merge_dim)
                )))
Beispiel #6
0
    def __init__(self,
                 model,
                 input_record,
                 weight_optim=None,
                 name="position_weights"):
        super(PositionWeighted, self).__init__(model, name, input_record)

        assert isinstance(input_record, schema.List), "Incorrect input type"
        length_metadata = input_record.lengths.metadata
        max_length = (length_metadata.categorical_limit
                      if length_metadata is not None else None)
        if max_length is not None:
            self.shape = max_length
        else:
            self.shape = get_categorical_limit(input_record)
            logger.warning(
                '{}: categorical_limit of lengths is not available, using '
                'categorical_limit of the keys: {}'.format(
                    str(input_record.lengths()), self.shape))

        self.pos_w = model.net.NextScopedBlob(name + "_pos_w")
        self.params.append(
            LayerParameter(parameter=self.pos_w,
                           initializer=core.CreateOperator('ConstantFill', [],
                                                           self.pos_w,
                                                           shape=[
                                                               self.shape,
                                                           ],
                                                           value=1.0),
                           optimizer=weight_optim))

        self.output_schema = schema.Struct(
            ('position_weights',
             schema.Scalar((np.float32, self.shape),
                           model.net.NextScopedBlob(name + "_pos_w_gather"))))

        self.tags.update({Tags.HANDLE_AS_SPARSE_LAYER})
        self.tags.update({Tags.GRADIENT_FROM_PS})
Beispiel #7
0
    def __init__(self,
                 model,
                 input_record,
                 weight_optim=None,
                 name="position_weights"):
        super(PositionWeighted, self).__init__(model, name, input_record)

        assert isinstance(input_record, schema.List), "Incorrect input type"
        length_metadata = input_record.lengths.metadata
        max_length = (length_metadata.categorical_limit
                      if length_metadata is not None else None)
        if max_length is not None:
            self.shape = max_length
        else:
            self.shape = get_categorical_limit(input_record)
            logger.warning(
                '{}: categorical_limit of lengths is not available, using '
                'categorical_limit of the keys: {}'.format(
                    str(input_record.lengths()), self.shape))

        self.pos_w = self.create_param(param_name='pos_w',
                                       shape=[
                                           self.shape,
                                       ],
                                       initializer=('ConstantFill', {
                                           'value': 1.0
                                       }),
                                       optimizer=weight_optim)

        self.output_schema = schema.Struct(
            ('position_weights',
             schema.Scalar((np.float32, self.shape),
                           self.get_next_blob_reference("pos_w_gather"))))

        self.tags.update({Tags.HANDLE_AS_SPARSE_LAYER})
        self.tags.update({Tags.GRADIENT_FROM_PS})
Beispiel #8
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} " +
                "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record))
            self.external_weights = input_record.values()

        elif reducer == "RecencyWeighted":
            assert _is_id_score_list(self.input_record), (
                "RecencyWeighted only supports IdScoreList.")
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim > 0, (
            "{} should have categorical limit > 0, but got {}".format(
                get_key(input_record)(), input_dim))

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        self.trainer_version = get_trainer_version_based_on_optim(weight_optim)

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        # If fp16 is used, make sure fp16 init op is used
        if self.trainer_version == "fp16":
            assert self.reducer in self._fp16_compatible_reducers, (
                "Fp16 training is enabled. The reducer specified is not supported. "
                "Got {}. Supported reducers: {}. Right now, in general, sum, mean, "
                "positional pooling are supported. Attention is not. Please check "
                "if there is fp16 trained sparse features using advanced pooling."
                .format(self.reducer, self._fp16_compatible_reducers))

            # if init op is UniformFill, we replace it directly
            if self.weight_init[0] == "UniformFill":
                self.weight_init = ("Float16UniformFill", self.weight_init[1])
            assert self.weight_init[0] in self._fp16_compatible_init_op_types, (
                "Fp16 training is enabled. Init op for weight parameter must be fp16 "
                "compatibale. Got {}. Supported ops: {}".format(
                    self.weight_init[0], self._fp16_compatible_init_op_types))

            assert regularizer is None, "Regularizer is not compatible with fp16"

        if _is_id_list(self.input_record):
            sparse_key = self.input_record.items()
        elif _is_id_score_list(self.input_record):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Beispiel #9
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} " +
                "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record))
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim > 0, (
            "{} should have categorical limit > 0, but got {}".format(
                get_key(input_record)(), input_dim))

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        if _is_id_list(self.input_record):
            sparse_key = self.input_record.items()
        elif _is_id_score_list(self.input_record):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Beispiel #10
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} " +
                "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record))
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim > 0, (
            "{} should have categorical limit > 0, but got {}".format(
                get_key(input_record)(), input_dim))

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        if _is_id_list(self.input_record):
            sparse_key = self.input_record.items()
        elif _is_id_score_list(self.input_record):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(
            param_name='w',
            shape=self.shape,
            initializer=self.weight_init,
            optimizer=weight_optim,
            ps_param=LayerPsParam(
                sparse_key=sparse_key,
                average_length=avg_length))

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim)

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Beispiel #11
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 use_external_weights=False,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        self.sparse_key = get_key(self.input_record)()
        logger.info("Setup the sparse lookup layer for " + self.sparse_key)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0} for {1}".\
            format(type(inner_shape), self.sparse_key)

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} for {}"
                + "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record),
                                         self.sparse_key)
            self.external_weights = self.input_record.values()

        elif reducer == "RecencyWeighted":
            assert _is_id_score_list(self.input_record), (
                "RecencyWeighted only supports IdScoreList, "
                "while the sparse feature {} is not.".format(self.sparse_key))
            self.external_weights = self.input_record.values()
        # TODO: create a new type of reducer with external weights to wrap
        # this and the above two cases since essentially their input formats
        # are the same.
        elif use_external_weights:
            assert _is_id_score_list(self.input_record), (
                "Use_external_weights only supports IdScoreList, "
                "while the sparse feature {} is not.".format(self.sparse_key))
            assert reducer in [
                "Sum", "WeightedSum"
            ], ("Use_external_weights only supports Sum reducer, "
                "while the reducer is {}.".format(reducer))
            self.external_weights = self.input_record.values()
        self.reducer = reducer
        self.use_external_weights = use_external_weights

        input_dim = get_categorical_limit(self.input_record)
        assert input_dim > 0, "{} should have categorical limit > 0, but got {}".format(
            self.sparse_key, input_dim)

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        self.trainer_version = get_trainer_version_based_on_optim(weight_optim)

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        self.evicted_values = None
        if schema.equal_schemas(self.input_record,
                                IdListWithEvicted) or schema.equal_schemas(
                                    self.input_record,
                                    IdScoreListWithEvicted,
                                    check_field_types=False):
            self.evicted_values = self.input_record._evicted_values

        # If fp16 is used, make sure fp16 init op is used
        if self.trainer_version == "fp16":
            assert self.reducer in self._fp16_compatible_reducers or use_external_weights, (
                "Fp16 training is enabled. The reducer specified is not supported. "
                "Got {}. Supported reducers: {}. Right now, in general, sum, mean, "
                "positional pooling are supported. Attention is not. Please check "
                "if there is fp16 trained sparse features using advanced pooling."
                .format(self.reducer, self._fp16_compatible_reducers))

            # if init op is UniformFill, we replace it directly
            if self.weight_init[0] == "UniformFill":
                self.weight_init = ("Float16UniformFill", self.weight_init[1])
            assert self.weight_init[0] in self._fp16_compatible_init_op_types, (
                "Fp16 training is enabled. Init op for weight parameter must be fp16 "
                "compatibale. Got {}. Supported ops: {}".format(
                    self.weight_init[0], self._fp16_compatible_init_op_types))

            assert regularizer is None, "Regularizer is not compatible with fp16"

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=self.sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)
        if self.evicted_values:
            self.reinit_vec = self.create_param(
                param_name="reinit_vec",
                shape=inner_shape,
                initializer=self.weight_init,
                optimizer=model.NoOptim,
                regularizer=None,
            )

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Beispiel #12
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):
        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        # TODO Add some asserts about input type
        assert reducer in self._supported_reducers, "Unsupported reducer: {}".\
            format(reducer)
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)

        assert input_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            model.net.NextScopedBlob(name + '_output'),
        )

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        self.w = model.net.NextScopedBlob(name + "_w")
        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(self.input_record, IdScoreList):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None
        self.params.append(
            LayerParameter(
                parameter=self.w,
                initializer=core.CreateOperator(self.weight_init[0],
                                                [],
                                                self.w,
                                                shape=self.shape,
                                                **self.weight_init[1]
                                                ),
                optimizer=weight_optim,
                ps_param=LayerPsParam(
                    sparse_key=sparse_key,
                    average_length=avg_length
                )
            ))

        if reducer == 'PositionWeighted':
            self.pos_w = model.net.NextScopedBlob(name + "_pos_w")
            self.params.append(
                LayerParameter(
                    parameter=self.pos_w,
                    initializer=core.CreateOperator('ConstantFill',
                                                    [],
                                                    self.pos_w,
                                                    shape=[input_dim, ],
                                                    value=1.0
                                                    ),
                    optimizer=weight_optim
                ))