def testMapInStructIndexing(self): a = schema.Map( schema.Scalar(dtype=np.int32), schema.Scalar(dtype=np.float32), ) s = schema.Struct(('field1', schema.Scalar(dtype=np.int32)), ('field2', a)) self.assertEquals(s['field2:values:keys'], a.keys) self.assertEquals(s['field2:values:values'], a.values) with self.assertRaises(KeyError): s['fields2:keys:non_existent']
def __init__(self, model, input_record, seed=0, modulo=None, use_hashing=True, name='sparse_feature_hash', **kwargs): super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs) self.seed = seed self.use_hashing = use_hashing if schema.equal_schemas(input_record, IdList): self.modulo = modulo or self.extract_hash_size( input_record.items.metadata) metadata = schema.Metadata( categorical_limit=self.modulo, feature_specs=input_record.items.metadata.feature_specs, ) hashed_indices = schema.Scalar( np.int64, self.get_next_blob_reference("hashed_idx")) hashed_indices.set_metadata(metadata) self.output_schema = schema.List( values=hashed_indices, lengths_blob=input_record.lengths, ) elif schema.equal_schemas(input_record, IdScoreList): self.modulo = modulo or self.extract_hash_size( input_record.keys.metadata) metadata = schema.Metadata( categorical_limit=self.modulo, feature_specs=input_record.keys.metadata.feature_specs, ) hashed_indices = schema.Scalar( np.int64, self.get_next_blob_reference("hashed_idx")) hashed_indices.set_metadata(metadata) self.output_schema = schema.Map( keys=hashed_indices, values=input_record.values, lengths_blob=input_record.lengths, ) else: assert False, "Input type must be one of (IdList, IdScoreList)" assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo) # operators in this layer do not have CUDA implementation yet. # In addition, since the sparse feature keys that we are hashing are # typically on CPU originally, it makes sense to have this layer on CPU. self.tags.update([Tags.CPU_ONLY])
def __init__(self, model, input_record, seed, name='sparse_feature_hash', **kwargs): super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs) self.seed = seed self.lengths_blob = schema.Scalar( np.int32, model.net.NextScopedBlob(name + "_lengths"), ) if schema.equal_schemas(input_record, IdList): self.modulo = self.extract_hash_size(input_record.items.metadata) metadata = schema.Metadata( categorical_limit=self.modulo, feature_specs=input_record.items.metadata.feature_specs, ) hashed_indices = schema.Scalar( np.int64, model.net.NextScopedBlob(name + "_hashed_idx")) hashed_indices.set_metadata(metadata) self.output_schema = schema.List( values=hashed_indices, lengths_blob=self.lengths_blob, ) elif schema.equal_schemas(input_record, IdScoreList): self.values_blob = schema.Scalar( np.float32, model.net.NextScopedBlob(name + "_values"), ) self.modulo = self.extract_hash_size(input_record.keys.metadata) metadata = schema.Metadata( categorical_limit=self.modulo, feature_specs=input_record.keys.metadata.feature_specs, ) hashed_indices = schema.Scalar( np.int64, model.net.NextScopedBlob(name + "_hashed_idx")) hashed_indices.set_metadata(metadata) self.output_schema = schema.Map( keys=hashed_indices, values=self.values_blob, lengths_blob=self.lengths_blob, ) else: assert False, "Input type must be one of (IdList, IdScoreList)"
def testFromColumnList(self): st = schema.Struct(('a', schema.Scalar()), ('b', schema.List(schema.Scalar())), ('c', schema.Map(schema.Scalar(), schema.Scalar()))) columns = st.field_names() # test that recovery works for arbitrary order for _ in range(10): some_blobs = [core.BlobReference('blob:' + x) for x in columns] rec = schema.from_column_list(columns, col_blobs=some_blobs) self.assertTrue(rec.has_blobs()) self.assertEqual(sorted(st.field_names()), sorted(rec.field_names())) self.assertEqual( [str(blob) for blob in rec.field_blobs()], [str('blob:' + name) for name in rec.field_names()]) random.shuffle(columns)
def __init__(self, model, input_record, seed=0, modulo=None, use_hashing=True, name='sparse_feature_hash', **kwargs): super(SparseFeatureHash, self).__init__(model, name, input_record, **kwargs) self.seed = seed self.use_hashing = use_hashing if schema.equal_schemas(input_record, IdList): self.modulo = modulo or self.extract_hash_size( input_record.items.metadata) metadata = schema.Metadata( categorical_limit=self.modulo, feature_specs=input_record.items.metadata.feature_specs, ) hashed_indices = schema.Scalar( np.int64, self.get_next_blob_reference("hashed_idx")) hashed_indices.set_metadata(metadata) self.output_schema = schema.List( values=hashed_indices, lengths_blob=input_record.lengths, ) elif schema.equal_schemas(input_record, IdScoreList): self.modulo = modulo or self.extract_hash_size( input_record.keys.metadata) metadata = schema.Metadata( categorical_limit=self.modulo, feature_specs=input_record.keys.metadata.feature_specs, ) hashed_indices = schema.Scalar( np.int64, self.get_next_blob_reference("hashed_idx")) hashed_indices.set_metadata(metadata) self.output_schema = schema.Map( keys=hashed_indices, values=input_record.values, lengths_blob=input_record.lengths, ) else: assert False, "Input type must be one of (IdList, IdScoreList)" assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
def create_net(self): net = core.Net("feature_extractor") init_net = core.Net("feature_extractor_init") missing_scalar = self.create_const(init_net, "MISSING_SCALAR", MISSING_VALUE) input_schema = schema.Struct(( "float_features", schema.Map( keys=core.BlobReference("input/float_features.keys"), values=core.BlobReference("input/float_features.values"), lengths_blob=core.BlobReference( "input/float_features.lengths"), ), )) input_record = net.set_input_record(input_schema) state = self.extract_float_features( net, "state", input_record.float_features, self.sorted_state_features, missing_scalar, ) output_record = schema.Struct(("state", state)) if self.sorted_action_features: action = self.extract_float_features( net, "action", input_record.float_features, self.sorted_action_features, missing_scalar, ) output_record += schema.Struct(("action", action)) net.set_output_record(output_record) return FeatureExtractorNet(net, init_net)
## @package layers # Module caffe2.python.layers.layers from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals from caffe2.python import core, schema, scope from caffe2.python.layers.tags import TagContext from collections import namedtuple import numpy as np # Some types to simplify descriptions of things traveling between ops IdList = schema.List(np.int64) IdScoreList = schema.Map(np.int64, np.float32) def get_categorical_limit(record): if schema.equal_schemas(record, IdList): key = 'items' elif schema.equal_schemas(record, IdScoreList, check_field_types=False): key = 'keys' else: raise NotImplementedError() assert record[key].metadata is not None, ( "Blob {} doesn't have metadata".format(str(record[key]()))) return record[key].metadata.categorical_limit def set_request_only(field):
def map_schema(): return schema.Map(schema.Scalar(), schema.Scalar())
def create_net(self): net = core.Net("feature_extractor") init_net = core.Net("feature_extractor_init") missing_scalar = self.create_const(init_net, "MISSING_SCALAR", MISSING_VALUE) input_schema = schema.Struct(( "float_features", schema.Map( keys=core.BlobReference("input/float_features.keys"), values=core.BlobReference("input/float_features.values"), lengths_blob=core.BlobReference( "input/float_features.lengths"), ), )) input_record = net.set_input_record(input_schema) state = self.extract_float_features( net, "state", input_record.float_features, self.sorted_state_features, missing_scalar, ) if self.sorted_action_features: action = self.extract_float_features( net, "action", input_record.float_features, self.sorted_action_features, missing_scalar, ) if self.normalize: C2.set_net_and_init_net(net, init_net) state, _ = PreprocessorNet().normalize_dense_matrix( state, self.sorted_state_features, self.state_normalization_parameters, blobname_prefix="state", split_expensive_feature_groups=True, ) if self.sorted_action_features: action, _ = PreprocessorNet().normalize_dense_matrix( action, self.sorted_action_features, self.action_normalization_parameters, blobname_prefix="action", split_expensive_feature_groups=True, ) C2.set_net_and_init_net(None, None) output_record = schema.Struct(("state", state)) if self.sorted_action_features: output_record += schema.Struct(("action", action)) net.set_output_record(output_record) return FeatureExtractorNet(net, init_net)