Ejemplo n.º 1
0
    def add_ops(self, net):
        net.Copy(
            self.input_record.lengths(),
            self.output_schema.lengths()
        )
        if schema.equal_schemas(self.output_schema, IdList):
            input_blob = self.input_record.items()
            output_blob = self.output_schema.items()
        elif schema.equal_schemas(self.output_schema, IdScoreList):
            input_blob = self.input_record.keys()
            output_blob = self.output_schema.keys()
            net.Copy(
                self.input_record.values(),
                self.output_schema.values()
            )
        else:
            raise NotImplementedError()

        if self.use_hashing:
            net.IndexHash(
                input_blob, output_blob, seed=self.seed, modulo=self.modulo
            )
        else:
            net.Mod(
                input_blob, output_blob, divisor=self.modulo, sign_follow_divisor=True
            )
Ejemplo n.º 2
0
    def add_ops(self, net):
        net.Copy(self.input_record.lengths(), self.output_schema.lengths())
        if schema.equal_schemas(self.output_schema, IdList):
            input_blob = self.input_record.items()
            output_blob = self.output_schema.items()
        elif schema.equal_schemas(self.output_schema, IdScoreList):
            input_blob = self.input_record.keys()
            output_blob = self.output_schema.keys()
            net.Copy(self.input_record.values(), self.output_schema.values())
        else:
            raise NotImplementedError()

        if self.use_hashing:
            net.IndexHash(input_blob,
                          output_blob,
                          seed=self.seed,
                          modulo=self.modulo)
        else:
            if self.use_divide_mod:
                quotient = net.Div([input_blob, self.divisor],
                                   [net.NextScopedBlob('quotient')])
                net.Mod(quotient,
                        output_blob,
                        divisor=self.modulo,
                        sign_follow_divisor=True)
            else:
                net.Mod(input_blob,
                        output_blob,
                        divisor=self.modulo,
                        sign_follow_divisor=True)
Ejemplo n.º 3
0
    def add_ops(self, net):
        if schema.equal_schemas(self.output_schema, IdList):
            input_blobs = self.input_record.items.field_blobs()
            output_blobs = self.output_schema.items.field_blobs()

            net.Alias(
                self.input_record.lengths.field_blobs(),
                self.lengths_blob.field_blobs()
            )
        elif schema.equal_schemas(self.output_schema, IdScoreList):
            input_blobs = self.input_record.keys.field_blobs()
            output_blobs = self.output_schema.keys.field_blobs()

            net.Alias(
                self.input_record.values.field_blobs(),
                self.values_blob.field_blobs()
            )
            net.Alias(
                self.input_record.lengths.field_blobs(),
                self.lengths_blob.field_blobs()
            )
        else:
            raise NotImplementedError()
        net.IndexHash(input_blobs,
                      output_blobs,
                      seed=self.seed,
                      modulo=self.modulo)
Ejemplo n.º 4
0
 def add_ops(self, net):
     if schema.equal_schemas(self.input_record, IdList):
         if self.reducer in ['Sum', 'Mean']:
             net.__getattr__('SparseLengths' +
                             self.reducer)([
                                 self.w,
                                 self.input_record.items(),
                                 self.input_record.lengths()
                             ],
                                           self.output_schema.field_blobs(),
                                           engine='fp16')
         elif self.reducer == 'Sqrt':
             sqrt_weight = net.LengthsToWeights(
                 [self.input_record.lengths()],
                 [self.input_record.lengths() + '_sqrt'],
                 power=0.5)
             net.SparseLengthsWeightedSum([
                 self.w, sqrt_weight,
                 self.input_record.items(),
                 self.input_record.lengths()
             ],
                                          self.output_schema.field_blobs(),
                                          engine='fp16')
         else:
             table_rows = net.Gather([self.w, self.input_record.items()])
             segment_ids = net.LengthsToSegmentIds(
                 self.input_record.lengths(),
                 self.input_record.lengths() + '_sid')
             net.__getattr__('SortedSegmentRange' +
                             self.reducer)([table_rows, segment_ids],
                                           self.output_schema.field_blobs(),
                                           engine='fp16')
     elif schema.equal_schemas(self.input_record,
                               IdScoreList,
                               check_field_types=False):
         if self.reducer in ['Sum', 'Mean']:
             net.__getattr__('SparseLengthsWeighted' +
                             self.reducer)([
                                 self.w,
                                 self.input_record.values(),
                                 self.input_record.keys(),
                                 self.input_record.lengths()
                             ],
                                           self.output_schema.field_blobs(),
                                           engine='fp16')
         elif self.reducer == 'PositionWeighted':
             net.SparseLengthsWeightedSum([
                 self.w, self.external_weights,
                 self.input_record.keys(),
                 self.input_record.lengths()
             ],
                                          self.output_schema.field_blobs(),
                                          grad_on_weights=1,
                                          engine='fp16')
         else:
             raise "Only Sum, Mean is supported for IdScoreList input." +\
                 "Trying to create with {}".format(self.reducer)
     else:
         raise "Unsupported input type {0}".format(self.input_record)
Ejemplo n.º 5
0
    def add_ops(self, net):
        if schema.equal_schemas(self.input_record, IdList):
            if self.reducer == 'Sum':
                net.SparseLengthsSum(
                    [
                        self.w,
                        self.input_record.items(),
                        self.input_record.lengths()
                    ],
                    self.output_schema.field_blobs(),
                    engine='fp16'
                )
            elif self.reducer == 'PositionWeighted':
                inc_seq = net.LengthsRangeFill(
                    [self.input_record.lengths()],
                    self.input_record.lengths() + '_seq'
                )
                gather_pos_w = net.Gather(
                    [self.pos_w, inc_seq], self.pos_w + '_gather')

                net.SparseLengthsWeightedSum(
                    [
                        self.w,
                        gather_pos_w,
                        self.input_record.items(),
                        self.input_record.lengths()
                    ],
                    self.output_schema.field_blobs(),
                    grad_on_weights=1,
                    engine='fp16'
                )
            else:
                table_rows = net.Gather([self.w, self.input_record.keys()])
                segment_ids = net.LengthsToSegmentIds(
                    self.input_record.lengths())
                net.__getattr__('SortedSegmentRange' + self.reducer)(
                    [table_rows, segment_ids],
                    self.output_schema.field_blobs(),
                    engine='fp16'
                )
        elif schema.equal_schemas(self.input_record, IdScoreList):
            if self.reducer == 'Sum':
                net.SparseLengthsWeightedSum(
                    [
                        self.w,
                        self.input_record.values(),
                        self.input_record.keys(),
                        self.input_record.lengths()
                    ],
                    self.output_schema.field_blobs(),
                    engine='fp16'
                )
            else:
                raise "Only Sum is supported for IdScoreList input." +\
                    "Trying to create with {}".format(self.reducer)
        else:
            raise "Unsupported input type {0}".format(self.input_record)
Ejemplo n.º 6
0
 def add_ops(self, net):
     if schema.equal_schemas(self.input_record, IdList):
         self._add_ops_id_list(net)
     elif schema.equal_schemas(self.input_record,
                               IdScoreList,
                               check_field_types=False):
         self._add_ops_id_score_list(net)
     else:
         raise "Unsupported input type {0}".format(self.input_record)
Ejemplo n.º 7
0
def get_key(record):
    if schema.equal_schemas(record, IdList):
        key = 'values'
    elif schema.equal_schemas(record, IdScoreList, check_field_types=False):
        key = 'values:keys'
    else:
        raise NotImplementedError('Not implemented for {}'.format(record))
    assert record[key].metadata is not None, (
        "Blob {} doesn't have metadata".format(str(record[key]())))
    return record[key]
Ejemplo n.º 8
0
def get_categorical_limit(record):
    if schema.equal_schemas(record, IdList):
        key = 'items'
    elif schema.equal_schemas(record, IdScoreList, check_field_types=False):
        key = 'keys'
    else:
        raise NotImplementedError()
    assert record[key].metadata is not None, (
        "Blob {} doesn't have metadata".format(str(record[key]())))
    return record[key].metadata.categorical_limit
Ejemplo n.º 9
0
def get_key(record):
    if schema.equal_schemas(record, IdList):
        key = 'values'
    elif schema.equal_schemas(record, IdScoreList, check_field_types=False):
        key = 'values:keys'
    else:
        raise NotImplementedError('Not implemented for {}'.format(record))
    assert record[key].metadata is not None, (
        "Blob {} doesn't have metadata".format(str(record[key]())))
    return record[key]
Ejemplo n.º 10
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim is not None, "Unbounded features are not supported"

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(
                self.input_record,
                IdScoreList,
                check_field_types=False):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=sparse_key,
                                       average_length=avg_length
                                   ))

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 11
0
    def add_ops(self, net):
        if schema.equal_schemas(self.input_record, IdList):
            if self.reducer == 'Sum':
                net.SparseLengthsSum(
                    [
                        self.w,
                        self.input_record.items(),
                        self.input_record.lengths()
                    ],
                    self.output_schema.field_blobs()
                )
            elif self.reducer == 'PositionWeighted':
                inc_seq = net.LengthsRangeFill(
                    [self.input_record.lengths()],
                    self.input_record.lengths() + '_seq'
                )
                gather_pos_w = net.Gather(
                    [self.pos_w, inc_seq], self.pos_w + '_gather')

                net.SparseLengthsWeightedSum(
                    [
                        self.w,
                        gather_pos_w,
                        self.input_record.items(),
                        self.input_record.lengths()
                    ],
                    self.output_schema.field_blobs(),
                    grad_on_weights=1
                )
            else:
                table_rows = net.Gather([self.w, self.input_record.keys()])
                segment_ids = net.LengthsToSegmentIds(
                    self.input_record.lengths())
                net.__getattr__('SortedSegmentRange' + self.reducer)(
                    [table_rows, segment_ids],
                    self.output_schema.field_blobs()
                )
        elif schema.equal_schemas(self.input_record, IdScoreList):
            if self.reducer == 'Sum':
                net.SparseLengthsWeightedSum(
                    [
                        self.w,
                        self.input_record.values(),
                        self.input_record.keys(),
                        self.input_record.lengths()
                    ],
                    self.output_schema.field_blobs()
                )
            else:
                raise "Only Sum is supported for IdScoreList input." +\
                    "Trying to create with {}".format(self.reducer)
        else:
            raise "Unsupported input type {0}".format(self.input_record)
Ejemplo n.º 12
0
 def add_ops(self, net):
     if schema.equal_schemas(self.output_schema, IdList):
         input_blobs = self.input_record.items.field_blobs()
         output_blobs = self.output_schema.items.field_blobs()
     elif schema.equal_schemas(self.output_schema, IdScoreList):
         input_blobs = self.input_record.keys.field_blobs()
         output_blobs = self.output_schema.keys.field_blobs()
     else:
         raise NotImplementedError()
     net.IndexHash(input_blobs,
                   output_blobs,
                   seed=self.seed,
                   modulo=self.modulo)
Ejemplo n.º 13
0
    def add_ops(self, net):
        cur_scope = get_current_scope()
        version = get_sparse_lookup_predictor_version(**cur_scope.get(
            get_sparse_lookup_predictor_version.__name__, {'version': 'fp32'}))

        if schema.equal_schemas(self.input_record, IdList):
            self._add_ops_id_list(net, version=version)
        elif schema.equal_schemas(self.input_record,
                                  IdScoreList,
                                  check_field_types=False):
            self._add_ops_id_score_list(net, version=version)
        else:
            raise "Unsupported input type {0}".format(self.input_record)
Ejemplo n.º 14
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Ejemplo n.º 15
0
    def add_ops(self, net):
        cur_scope = get_current_scope()
        version = get_sparse_lookup_predictor_version(
            **cur_scope.get(get_sparse_lookup_predictor_version.__name__,
                            {'version': 'fp32'}))

        if schema.equal_schemas(self.input_record, IdList):
            self._add_ops_id_list(net, version=version)
        elif schema.equal_schemas(self.input_record,
                                  IdScoreList,
                                  check_field_types=False):
            self._add_ops_id_score_list(net, version=version)
        else:
            raise "Unsupported input type {0}".format(self.input_record)
Ejemplo n.º 16
0
    def __init__(self,
                 model,
                 input_record,
                 seed,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.lengths_blob = schema.Scalar(
            np.int32,
            model.net.NextScopedBlob(name + "_lengths"),
        )

        if schema.equal_schemas(input_record, IdList):
            self.modulo = self.extract_hash_size(input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, model.net.NextScopedBlob(name + "_hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=self.lengths_blob,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.values_blob = schema.Scalar(
                np.float32,
                model.net.NextScopedBlob(name + "_values"),
            )
            self.modulo = self.extract_hash_size(input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, model.net.NextScopedBlob(name + "_hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=self.values_blob,
                lengths_blob=self.lengths_blob,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"
Ejemplo n.º 17
0
    def __init__(self,
                 model,
                 input_record,
                 name='select_record_by_context',
                 check_field_metas=True,
                 use_copy=False,
                 default_output_record_field=None,
                 **kwargs):
        super(SelectRecordByContext, self).__init__(model, name, input_record,
                                                    **kwargs)

        assert isinstance(input_record, schema.Struct)
        assert len(input_record) > 1

        self.use_copy = use_copy
        self.default_output_record = (
            input_record[default_output_record_field] if
            (default_output_record_field is not None) else None)
        ref_record = input_record[0]
        for record in input_record:
            assert schema.equal_schemas(record,
                                        ref_record,
                                        check_field_metas=check_field_metas)

        self.output_schema = schema.NewRecord(model.net, ref_record)
Ejemplo n.º 18
0
    def __init__(
        self,
        model,
        input_record,
        name='select_record_by_context',
        check_field_metas=True,
        use_copy=False,
        default_output_record_field=None,
        **kwargs
    ):
        super(SelectRecordByContext, self).__init__(model, name, input_record,
                                                    **kwargs)

        assert isinstance(input_record, schema.Struct)
        assert len(input_record) > 1

        self.use_copy = use_copy
        self.default_output_record = (
            input_record[default_output_record_field]
            if (default_output_record_field is not None) else None
        )
        ref_record = input_record[0]
        for record in input_record:
            assert schema.equal_schemas(record, ref_record,
                                        check_field_metas=check_field_metas)

        self.output_schema = schema.NewRecord(model.net, ref_record)
Ejemplo n.º 19
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
                expected_value=input_record.items.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdList)
            self.output_schema.items.set_metadata(metadata)

        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
                expected_value=input_record.keys.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdScoreList)
            self.output_schema.keys.set_metadata(metadata)

        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
        if input_record.lengths.metadata:
            self.output_schema.lengths.set_metadata(
                input_record.lengths.metadata)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Ejemplo n.º 20
0
    def __init__(self, model, input_record, seed,
                 name='sparse_feature_hash', **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs)

        self.seed = seed
        self.lengths_blob = schema.Scalar(
            np.int32,
            self.get_next_blob_reference("lengths"),
        )

        if schema.equal_schemas(input_record, IdList):
            self.modulo = self.extract_hash_size(input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64,
                self.get_next_blob_reference("hashed_idx")
            )
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=self.lengths_blob,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.values_blob = schema.Scalar(
                np.float32,
                self.get_next_blob_reference("values"),
            )
            self.modulo = self.extract_hash_size(input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64,
                self.get_next_blob_reference("hashed_idx")
            )
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=self.values_blob,
                lengths_blob=self.lengths_blob,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"
Ejemplo n.º 21
0
    def add_ops(self, net):
        if schema.equal_schemas(self.output_schema, IdList):
            input_blob = self.input_record.items()
            output_blob = self.output_schema.items()
        elif schema.equal_schemas(self.output_schema, IdScoreList):
            input_blob = self.input_record.keys()
            output_blob = self.output_schema.keys()
        else:
            raise NotImplementedError()

        if self.use_hashing:
            net.IndexHash(input_blob,
                          output_blob,
                          seed=self.seed,
                          modulo=self.modulo)
        else:
            net.Mod(input_blob, output_blob, divisor=self.modulo)
Ejemplo n.º 22
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
Ejemplo n.º 23
0
    def __init__(self, model, input_record, seed=0, modulo=None,
                 use_hashing=True, name='sparse_feature_hash', **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64,
                self.get_next_blob_reference("hashed_idx")
            )
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64,
                self.get_next_blob_reference("hashed_idx")
            )
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
Ejemplo n.º 24
0
    def __init__(self, model, input_record, seed=0, modulo=None,
                 use_hashing=True, name='sparse_feature_hash', **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
                expected_value=input_record.items.metadata.expected_value
            )
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdList)
            self.output_schema.items.set_metadata(metadata)

        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
                expected_value=input_record.keys.metadata.expected_value
            )
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdScoreList)
            self.output_schema.keys.set_metadata(metadata)

        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
        if input_record.lengths.metadata:
            self.output_schema.lengths.set_metadata(input_record.lengths.metadata)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Ejemplo n.º 25
0
    def __init__(self, model, input_record, name='select_record_by_context',
                 check_field_metas=True, **kwargs):
        super(SelectRecordByContext, self).__init__(model, name, input_record,
                                                    **kwargs)

        assert isinstance(input_record, schema.Struct)
        assert len(input_record) > 1

        ref_record = input_record[0]
        for record in input_record:
            assert schema.equal_schemas(record, ref_record,
                                        check_field_metas=check_field_metas)

        self.output_schema = schema.NewRecord(model.net, ref_record)
    def __init__(self, model, input_record, name='select_record_by_context',
                 check_field_metas=True, **kwargs):
        super(SelectRecordByContext, self).__init__(model, name, input_record,
                                                    **kwargs)

        assert isinstance(input_record, schema.Struct)
        assert len(input_record) > 1

        ref_record = input_record[0]
        for record in input_record:
            assert schema.equal_schemas(record, ref_record,
                                        check_field_metas=check_field_metas)

        self.output_schema = schema.NewRecord(model.net, ref_record)
Ejemplo n.º 27
0
def almost_equal_schemas(
    record,
    original_schema,
    check_field_names=True,
    check_field_types=True,
    check_field_metas=False,
):
    if original_schema == IdList:
        return schema.equal_schemas(
            record,
            IdList,
            check_field_names=check_field_names,
            check_field_types=check_field_types,
            check_field_metas=check_field_metas,
        ) or schema.equal_schemas(
            record,
            IdListWithEvicted,
            check_field_names=check_field_names,
            check_field_types=check_field_types,
            check_field_metas=check_field_metas,
        )
    elif original_schema == IdScoreList:
        return schema.equal_schemas(
            record,
            IdScoreList,
            check_field_names=check_field_names,
            check_field_types=check_field_types,
            check_field_metas=check_field_metas,
        ) or schema.equal_schemas(
            record,
            IdScoreListWithEvicted,
            check_field_names=check_field_names,
            check_field_types=check_field_types,
            check_field_metas=check_field_metas,
        )
    else:
        return schema.equal_schemas(record, original_schema)
Ejemplo n.º 28
0
    def __init__(self,
                 model,
                 input_record,
                 dropout_prob_train,
                 dropout_prob_eval,
                 dropout_prob_predict,
                 replacement_value,
                 name='sparse_dropout',
                 **kwargs):

        super(SparseDropoutWithReplacement,
              self).__init__(model, name, input_record, **kwargs)
        assert schema.equal_schemas(input_record,
                                    IdList), "Incorrect input type"

        self.dropout_prob_train = float(dropout_prob_train)
        self.dropout_prob_eval = float(dropout_prob_eval)
        self.dropout_prob_predict = float(dropout_prob_predict)
        self.replacement_value = int(replacement_value)
        assert (self.dropout_prob_train >= 0 and
                self.dropout_prob_train <= 1.0), \
            "Expected 0 <= dropout_prob_train <= 1, but got %s" \
            % self.dropout_prob_train
        assert (self.dropout_prob_eval >= 0 and
                self.dropout_prob_eval <= 1.0), \
            "Expected 0 <= dropout_prob_eval <= 1, but got %s" \
            % dropout_prob_eval
        assert (self.dropout_prob_predict >= 0 and
                self.dropout_prob_predict <= 1.0), \
            "Expected 0 <= dropout_prob_predict <= 1, but got %s" \
            % dropout_prob_predict
        assert(self.dropout_prob_train > 0 or
               self.dropout_prob_eval > 0 or
               self.dropout_prob_predict > 0), \
            "Ratios all set to 0.0 for train, eval and predict"

        self.output_schema = schema.NewRecord(model.net, IdList)
        if input_record.lengths.metadata:
            self.output_schema.lengths.set_metadata(
                input_record.lengths.metadata)
        if input_record.items.metadata:
            self.output_schema.items.set_metadata(input_record.items.metadata)
Ejemplo n.º 29
0
    def __init__(self, model, input_record, name='merged'):
        super(MergeIdLists, self).__init__(model, name, input_record)
        assert all(schema.equal_schemas(x, IdList) for x in input_record), \
            "Inputs to MergeIdLists should all be IdLists."

        assert all(record.items.metadata is not None
                   for record in self.input_record), \
            "Features without metadata are not supported"

        merge_dim = max(get_categorical_limit(record)
                        for record in self.input_record)
        assert merge_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.NewRecord(
            model.net, schema.List(
                schema.Scalar(
                    np.int64,
                    blob=model.net.NextBlob(name),
                    metadata=schema.Metadata(categorical_limit=merge_dim)
                )))
Ejemplo n.º 30
0
    def __init__(self, model, input_record, name='merged'):
        super(MergeIdLists, self).__init__(model, name, input_record)
        assert all(schema.equal_schemas(x, IdList) for x in input_record), \
            "Inputs to MergeIdLists should all be IdLists."

        assert all(record.items.metadata is not None
                   for record in self.input_record), \
            "Features without metadata are not supported"

        merge_dim = max(
            get_categorical_limit(record) for record in self.input_record)
        assert merge_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.NewRecord(
            model.net,
            schema.List(
                schema.Scalar(
                    np.int64,
                    blob=model.net.NextBlob(name),
                    metadata=schema.Metadata(categorical_limit=merge_dim))))
Ejemplo n.º 31
0
    def testMergeIdListsLayer(self, num_inputs, batch_size):
        inputs = []
        for _ in range(num_inputs):
            lengths = np.random.randint(5, size=batch_size).astype(np.int32)
            size = lengths.sum()
            values = np.random.randint(1, 10, size=size).astype(np.int64)
            inputs.append(lengths)
            inputs.append(values)
        input_schema = schema.Tuple(*[
            schema.List(
                schema.Scalar(dtype=np.int64,
                              metadata=schema.Metadata(categorical_limit=20)))
            for _ in range(num_inputs)
        ])

        input_record = schema.NewRecord(self.model.net, input_schema)
        schema.FeedRecord(input_record, inputs)
        output_schema = self.model.MergeIdLists(input_record)
        assert schema.equal_schemas(output_schema,
                                    IdList,
                                    check_field_names=False)
Ejemplo n.º 32
0
    def testMergeIdListsLayer(self, num_inputs, batch_size):
        inputs = []
        for _ in range(num_inputs):
            lengths = np.random.randint(5, size=batch_size).astype(np.int32)
            size = lengths.sum()
            values = np.random.randint(1, 10, size=size).astype(np.int64)
            inputs.append(lengths)
            inputs.append(values)
        input_schema = schema.Tuple(
            *[schema.List(
                schema.Scalar(dtype=np.int64, metadata=schema.Metadata(
                    categorical_limit=20
                ))) for _ in range(num_inputs)]
        )

        input_record = schema.NewRecord(self.model.net, input_schema)
        schema.FeedRecord(input_record, inputs)
        output_schema = self.model.MergeIdLists(input_record)
        assert schema.equal_schemas(
            output_schema, IdList,
            check_field_names=False)
Ejemplo n.º 33
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        if reducer == "PositionWeighted":
            self.external_weights = input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)
        assert input_dim is not None, "Unbounded features are not supported"

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(
                self.input_record,
                IdScoreList,
                check_field_types=False):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(
            param_name='w',
            shape=self.shape,
            initializer=self.weight_init,
            optimizer=weight_optim,
            ps_param=LayerPsParam(
                sparse_key=sparse_key,
                average_length=avg_length))

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim)

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 34
0
def _is_id_score_list(input_record):
    return schema.equal_schemas(input_record,
                                IdScoreList,
                                check_field_types=False)
Ejemplo n.º 35
0
def _is_id_list(input_record):
    return schema.equal_schemas(input_record, IdList)
Ejemplo n.º 36
0
    def __init__(self, model, input_record, inner_shape, reducer,
                 weight_init=None, weight_optim=None,
                 name='sparse_lookup', **kwargs):
        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        # TODO Add some asserts about input type
        assert reducer in self._supported_reducers, "Unsupported reducer: {}".\
            format(reducer)
        self.reducer = reducer

        input_dim = get_categorical_limit(input_record)

        assert input_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            model.net.NextScopedBlob(name + '_output'),
        )

        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else (
            'UniformFill', {'min': -scale, 'max': scale})

        self.w = model.net.NextScopedBlob(name + "_w")
        if schema.equal_schemas(self.input_record, IdList):
            sparse_key = self.input_record.items()
        elif schema.equal_schemas(self.input_record, IdScoreList):
            sparse_key = self.input_record.keys()
        else:
            raise NotImplementedError()

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None
        self.params.append(
            LayerParameter(
                parameter=self.w,
                initializer=core.CreateOperator(self.weight_init[0],
                                                [],
                                                self.w,
                                                shape=self.shape,
                                                **self.weight_init[1]
                                                ),
                optimizer=weight_optim,
                ps_param=LayerPsParam(
                    sparse_key=sparse_key,
                    average_length=avg_length
                )
            ))

        if reducer == 'PositionWeighted':
            self.pos_w = model.net.NextScopedBlob(name + "_pos_w")
            self.params.append(
                LayerParameter(
                    parameter=self.pos_w,
                    initializer=core.CreateOperator('ConstantFill',
                                                    [],
                                                    self.pos_w,
                                                    shape=[input_dim, ],
                                                    value=1.0
                                                    ),
                    optimizer=weight_optim
                ))
Ejemplo n.º 37
0
    def testGatherRecord(self):
        indices = np.array([1, 3, 4], dtype=np.int32)
        dense = np.array(list(range(20)), dtype=np.float32).reshape(10, 2)
        lengths = np.array(list(range(10)), dtype=np.int32)
        items = np.array(list(range(lengths.sum())), dtype=np.int64)
        items_lengths = np.array(list(range(lengths.sum())), dtype=np.int32)
        items_items = np.array(list(range(items_lengths.sum())), dtype=np.int64)
        record = self.new_record(schema.Struct(
            ('dense', schema.Scalar(np.float32)),
            ('sparse', schema.Struct(
                ('list', schema.List(np.int64)),
                ('list_of_list', schema.List(schema.List(np.int64))),
            )),
            ('empty_struct', schema.Struct())
        ))
        indices_record = self.new_record(schema.Scalar(np.int32))
        input_record = schema.Struct(
            ('indices', indices_record),
            ('record', record),
        )
        schema.FeedRecord(
            input_record,
            [indices, dense, lengths, items, lengths, items_lengths,
             items_items])
        gathered_record = self.model.GatherRecord(input_record)
        self.assertTrue(schema.equal_schemas(gathered_record, record))

        self.run_train_net_forward_only()
        gathered_dense = workspace.FetchBlob(gathered_record.dense())
        np.testing.assert_array_equal(
            np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense)
        gathered_lengths = workspace.FetchBlob(
            gathered_record.sparse.list.lengths())
        np.testing.assert_array_equal(
            np.concatenate([lengths[i:i + 1] for i in indices]),
            gathered_lengths)
        gathered_items = workspace.FetchBlob(
            gathered_record.sparse.list.items())
        offsets = lengths.cumsum() - lengths
        np.testing.assert_array_equal(
            np.concatenate([
                items[offsets[i]: offsets[i] + lengths[i]]
                for i in indices
            ]), gathered_items)

        gathered_items_lengths = workspace.FetchBlob(
            gathered_record.sparse.list_of_list.items.lengths())
        np.testing.assert_array_equal(
            np.concatenate([
                items_lengths[offsets[i]: offsets[i] + lengths[i]]
                for i in indices
            ]),
            gathered_items_lengths
        )

        nested_offsets = []
        nested_lengths = []
        nested_offset = 0
        j = 0
        for l in lengths:
            nested_offsets.append(nested_offset)
            nested_length = 0
            for _i in range(l):
                nested_offset += items_lengths[j]
                nested_length += items_lengths[j]
                j += 1
            nested_lengths.append(nested_length)

        gathered_items_items = workspace.FetchBlob(
            gathered_record.sparse.list_of_list.items.items())
        np.testing.assert_array_equal(
            np.concatenate([
                items_items[nested_offsets[i]:
                            nested_offsets[i] + nested_lengths[i]]
                for i in indices
            ]),
            gathered_items_items
        )
Ejemplo n.º 38
0
 def get_fp16_compatible_parameters(self):
     if (self.reducer == 'Sum'
             and schema.equal_schemas(self.input_record, IdList)):
         return [self.w]
     return []
Ejemplo n.º 39
0
def _is_id_score_list(input_record):
    return schema.equal_schemas(input_record,
                                IdScoreList,
                                check_field_types=False)
Ejemplo n.º 40
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 use_divide_mod=False,
                 divisor=None,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        assert use_hashing + use_divide_mod < 2, "use_hashing and use_divide_mod cannot be set true at the same time."

        if use_divide_mod:
            assert divisor >= 1, 'Unexpected divisor: {}'.format(divisor)

            self.divisor = self.create_param(
                param_name='divisor',
                shape=[1],
                initializer=('GivenTensorInt64Fill', {
                    'values': np.array([divisor])
                }),
                optimizer=model.NoOptim)

        self.seed = seed
        self.use_hashing = use_hashing
        self.use_divide_mod = use_divide_mod

        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
                expected_value=input_record.items.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdList)
            self.output_schema.items.set_metadata(metadata)

        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
                expected_value=input_record.keys.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdScoreList)
            self.output_schema.keys.set_metadata(metadata)

        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
        if input_record.lengths.metadata:
            self.output_schema.lengths.set_metadata(
                input_record.lengths.metadata)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Ejemplo n.º 41
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 regularizer=None,
                 **kwargs):

        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        self.sparse_key = get_key(self.input_record)()
        logger.info("Setup the sparse lookup layer for " + self.sparse_key)

        # TODO Add some asserts about input type
        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0} for {1}".\
            format(type(inner_shape), self.sparse_key)

        if reducer == "PositionWeighted":
            assert _is_id_score_list(self.input_record), (
                "PositionWeighted only support IdScoreList, but got {} for {}"
                + "please use PositionWeighted layer to convert IdList " +
                "to IdScoreList").format(repr(self.input_record),
                                         self.sparse_key)
            self.external_weights = self.input_record.values()

        elif reducer == "RecencyWeighted":
            assert _is_id_score_list(self.input_record), (
                "RecencyWeighted only supports IdScoreList, "
                "while the sparse feature {} is not.".format(self.sparse_key))
            self.external_weights = self.input_record.values()
        self.reducer = reducer

        input_dim = get_categorical_limit(self.input_record)
        assert input_dim > 0, (
            "{} should have categorical limit > 0, but got {}".format(
                self.sparse_key, input_dim))

        self.input_dim = input_dim
        self.shape = [input_dim] + inner_shape

        self.trainer_version = get_trainer_version_based_on_optim(weight_optim)

        default_init_op = self._get_default_init_op()

        self.weight_init = weight_init or default_init_op

        self.evicted_values = None
        if schema.equal_schemas(self.input_record, IdListWithEvicted) or \
            schema.equal_schemas(self.input_record, IdScoreListWithEvicted,
                                 check_field_types=False):
            self.evicted_values = self.input_record._evicted_values

        # If fp16 is used, make sure fp16 init op is used
        if self.trainer_version == "fp16":
            assert self.reducer in self._fp16_compatible_reducers, (
                "Fp16 training is enabled. The reducer specified is not supported. "
                "Got {}. Supported reducers: {}. Right now, in general, sum, mean, "
                "positional pooling are supported. Attention is not. Please check "
                "if there is fp16 trained sparse features using advanced pooling."
                .format(self.reducer, self._fp16_compatible_reducers))

            # if init op is UniformFill, we replace it directly
            if self.weight_init[0] == "UniformFill":
                self.weight_init = ("Float16UniformFill", self.weight_init[1])
            assert self.weight_init[0] in self._fp16_compatible_init_op_types, (
                "Fp16 training is enabled. Init op for weight parameter must be fp16 "
                "compatibale. Got {}. Supported ops: {}".format(
                    self.weight_init[0], self._fp16_compatible_init_op_types))

            assert regularizer is None, "Regularizer is not compatible with fp16"

        if self.input_record.lengths.metadata:
            avg_length = self.input_record.lengths.metadata.expected_value
        else:
            avg_length = None

        self.w = self.create_param(param_name='w',
                                   shape=self.shape,
                                   initializer=self.weight_init,
                                   optimizer=weight_optim,
                                   ps_param=LayerPsParam(
                                       sparse_key=self.sparse_key,
                                       average_length=avg_length),
                                   regularizer=regularizer)
        if self.evicted_values:
            self.reinit_vec = self.create_param(
                param_name="reinit_vec",
                shape=inner_shape,
                initializer=self.weight_init,
                optimizer=model.NoOptim,
                regularizer=None,
            )

        self.scale_bias_init = ('ConstantFill', {'value': 0.0})

        self.scale_bias = self.create_param(
            param_name='scale_bias',
            shape=[],
            initializer=self.scale_bias_init,
            optimizer=model.NoOptim,
        )

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            self.get_next_blob_reference('output'),
        )
Ejemplo n.º 42
0
    def testGatherRecord(self):
        indices = np.array([1, 3, 4], dtype=np.int32)
        dense = np.array(range(20), dtype=np.float32).reshape(10, 2)
        lengths = np.array(range(10), dtype=np.int32)
        items = np.array(range(lengths.sum()), dtype=np.int64)
        items_lengths = np.array(range(lengths.sum()), dtype=np.int32)
        items_items = np.array(range(items_lengths.sum()), dtype=np.int64)
        record = self.new_record(
            schema.Struct(
                ('dense', schema.Scalar(np.float32)),
                ('sparse',
                 schema.Struct(
                     ('list', schema.List(np.int64)),
                     ('list_of_list', schema.List(schema.List(np.int64))),
                 )), ('empty_struct', schema.Struct())))
        indices_record = self.new_record(schema.Scalar(np.int32))
        input_record = schema.Struct(
            ('indices', indices_record),
            ('record', record),
        )
        schema.FeedRecord(input_record, [
            indices, dense, lengths, items, lengths, items_lengths, items_items
        ])
        gathered_record = self.model.GatherRecord(input_record)
        self.assertTrue(schema.equal_schemas(gathered_record, record))

        self.run_train_net_forward_only()
        gathered_dense = workspace.FetchBlob(gathered_record.dense())
        np.testing.assert_array_equal(
            np.concatenate([dense[i:i + 1] for i in indices]), gathered_dense)
        gathered_lengths = workspace.FetchBlob(
            gathered_record.sparse.list.lengths())
        np.testing.assert_array_equal(
            np.concatenate([lengths[i:i + 1] for i in indices]),
            gathered_lengths)
        gathered_items = workspace.FetchBlob(
            gathered_record.sparse.list.items())
        offsets = lengths.cumsum() - lengths
        np.testing.assert_array_equal(
            np.concatenate(
                [items[offsets[i]:offsets[i] + lengths[i]] for i in indices]),
            gathered_items)

        gathered_items_lengths = workspace.FetchBlob(
            gathered_record.sparse.list_of_list.items.lengths())
        np.testing.assert_array_equal(
            np.concatenate([
                items_lengths[offsets[i]:offsets[i] + lengths[i]]
                for i in indices
            ]), gathered_items_lengths)

        nested_offsets = []
        nested_lengths = []
        nested_offset = 0
        j = 0
        for l in lengths:
            nested_offsets.append(nested_offset)
            nested_length = 0
            for _i in range(l):
                nested_offset += items_lengths[j]
                nested_length += items_lengths[j]
                j += 1
            nested_lengths.append(nested_length)

        gathered_items_items = workspace.FetchBlob(
            gathered_record.sparse.list_of_list.items.items())
        np.testing.assert_array_equal(
            np.concatenate([
                items_items[nested_offsets[i]:nested_offsets[i] +
                            nested_lengths[i]] for i in indices
            ]), gathered_items_items)
Ejemplo n.º 43
0
def _is_id_list(input_record):
    return schema.equal_schemas(input_record, IdList)