def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [2] # example 2, ids [0, 1] # example 3, ids [] # example 4, ids [1] # example 5, ids [] indices=((1, 0), (2, 0), (2, 1), (4, 0)), values=(2, 0, 1, 1), dense_shape=(6, 2)) expected_sequence_length_a = [0, 1, 2, 0, 1, 0] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [] # example 2, ids [] # example 3, ids [] # example 4, ids [1] # example 5, ids [0, 1] indices=((0, 0), (4, 0), (5, 0), (5, 1)), values=(2, 1, 0, 1), dense_shape=(6, 2)) expected_sequence_length_b = [1, 0, 0, 0, 1, 2] categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) shared_embedding_columns = fc.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=2) sequence_length_a = shared_embedding_columns[0]._get_sequence_dense_tensor( _LazyBuilder({ 'aaa': sparse_input_a }))[1] sequence_length_b = shared_embedding_columns[1]._get_sequence_dense_tensor( _LazyBuilder({ 'bbb': sparse_input_b }))[1] with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length_a, sequence_length_a.eval(session=sess)) self.assertAllEqual( expected_sequence_length_b, sequence_length_b.eval(session=sess))
def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) expected_lookups = [ # example 0, ids [2] [[0., 0., 1.], [0., 0., 0.]], # example 1, ids [0, 1] [[1., 0., 0.], [0., 1., 0.]], # example 2, ids [] [[0., 0., 0.], [0., 0., 0.]], # example 3, ids [1] [[0., 1., 0.], [0., 0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [2] # example 2, ids [0, 1] # example 3, ids [] # example 4, ids [1] # example 5, ids [] indices=((1, 0), (2, 0), (2, 1), (4, 0)), values=(2, 0, 1, 1), dense_shape=(6, 2)) expected_sequence_length = [0, 1, 2, 0, 1, 0] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = sfc._sequence_indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [2] # example 2, ids [0, 1] # example 3, ids [] # example 4, ids [1] # example 5, ids [] indices=((1, 0), (2, 0), (2, 1), (4, 0)), values=(2, 0, 1, 1), dense_shape=(6, 2)) expected_sequence_length = [0, 1, 2, 0, 1, 0] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) expected_lookups = [ # example 0, ids [2] [[0., 0., 1.], [0., 0., 0.]], # example 1, ids [0, 1] [[1., 0., 0.], [0., 1., 0.]], # example 2, ids [] [[0., 0., 0.], [0., 0., 0.]], # example 3, ids [1] [[0., 1., 0.], [0., 0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = sfc._sequence_indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess))
def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) vocabulary_size_b = 2 sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [1, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 1, 0), dense_shape=(2, 2)) expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [1, 0] [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = sfc._sequence_indicator_column( categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = sfc._sequence_indicator_column( categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[indicator_column_b, indicator_column_a]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_indicator_column(self): vocabulary_size_a = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) vocabulary_size_b = 2 sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [1, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 1, 0), dense_shape=(2, 2)) expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [1, 0] [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) indicator_column_a = fc.indicator_column(categorical_column_a) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size_b) indicator_column_b = fc.indicator_column(categorical_column_b) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[indicator_column_b, indicator_column_a]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_sequence_length(self): vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) expected_sequence_length_a = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [0, 2] # example 1, ids [1] indices=((0, 0), (0, 1), (1, 0)), values=(0, 2, 1), dense_shape=(2, 2)) expected_sequence_length_b = [2, 1] categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) shared_embedding_columns = fc.shared_embedding_columns( [categorical_column_a, categorical_column_b], dimension=2) sequence_length_a = shared_embedding_columns[0]._get_sequence_dense_tensor( _LazyBuilder({ 'aaa': sparse_input_a }))[1] sequence_length_b = shared_embedding_columns[1]._get_sequence_dense_tensor( _LazyBuilder({ 'bbb': sparse_input_b }))[1] with monitored_session.MonitoredSession() as sess: sequence_length_a = sess.run(sequence_length_a) self.assertAllEqual(expected_sequence_length_a, sequence_length_a) self.assertEqual(np.int64, sequence_length_a.dtype) sequence_length_b = sess.run(sequence_length_b) self.assertAllEqual(expected_sequence_length_b, sequence_length_b) self.assertEqual(np.int64, sequence_length_b.dtype)
def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values expected_lookups = [ # example 0, ids [2] [[7., 11.], [0., 0.]], # example 1, ids [0, 1] [[1., 2.], [3., 5.]], # example 2, ids [] [[0., 0.], [0., 0.]], # example 3, ids [1] [[3., 5.], [0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = sfc._sequence_embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual(('embedding_weights:0', ), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
def test_sequence_length_with_zeros(self): column = sfc.sequence_categorical_column_with_identity( 'aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue( indices=((1, 0), (3, 0), (3, 1)), values=(1, 2, 0), dense_shape=(5, 2)) expected_sequence_length = [0, 1, 0, 2, 0] sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values expected_lookups = [ # example 0, ids [2] [[7., 11.], [0., 0.]], # example 1, ids [0, 1] [[1., 2.], [3., 5.]], # example 2, ids [] [[0., 0.], [0., 0.]], # example 3, ids [1] [[3., 5.], [0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('embedding_weights:0',), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
def test_sequence_length(self): column = sfc.sequence_categorical_column_with_identity( 'aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) expected_sequence_length = [1, 2] sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_sequence_length(self): column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) expected_sequence_length = [1, 2] sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def _build_feature_columns(self): col = fc.categorical_column_with_identity( 'int_ctx', num_buckets=100) ctx_cols = [ fc.embedding_column(col, dimension=10), fc.numeric_column('float_ctx')] identity_col = sfc.sequence_categorical_column_with_identity( 'int_list', num_buckets=10) bucket_col = sfc.sequence_categorical_column_with_hash_bucket( 'bytes_list', hash_bucket_size=100) seq_cols = [ fc.embedding_column(identity_col, dimension=10), fc.embedding_column(bucket_col, dimension=20)] return ctx_cols, seq_cols
def test_get_sparse_tensors_inputs3d(self): """Tests _get_sparse_tensors when the input is already 3D Tensor.""" column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue(indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), values=(1, 2, 0), dense_shape=(2, 2, 1)) with self.assertRaisesRegexp( errors.InvalidArgumentError, r'Column aaa expected ID tensor of rank 2\.\s*' r'id_tensor shape:\s*\[2 2 1\]'): id_weight_pair = column._get_sparse_tensors( _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: id_weight_pair.id_tensor.eval(session=sess)
def test_get_sparse_tensors_inputs3d(self): """Tests _get_sparse_tensors when the input is already 3D Tensor.""" column = sfc.sequence_categorical_column_with_identity( 'aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue( indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), values=(1, 2, 0), dense_shape=(2, 2, 1)) with self.assertRaisesRegexp( errors.InvalidArgumentError, r'Column aaa expected ID tensor of rank 2\.\s*' r'id_tensor shape:\s*\[2 2 1\]'): id_weight_pair = column._get_sparse_tensors( _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: id_weight_pair.id_tensor.eval(session=sess)
def test_get_sparse_tensors(self): column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) expected_sparse_ids = sparse_tensor.SparseTensorValue( indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), values=np.array((1, 2, 0), dtype=np.int64), dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors( _LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( self, expected_sparse_ids, id_weight_pair.id_tensor.eval(session=sess))
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer(features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def test_get_sparse_tensors(self): column = sfc.sequence_categorical_column_with_identity( 'aaa', num_buckets=3) inputs = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) expected_sparse_ids = sparse_tensor.SparseTensorValue( indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), values=np.array((1, 2, 0), dtype=np.int64), dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( self, expected_sparse_ids, id_weight_pair.id_tensor.eval(session=sess))
def test_indicator_column(self): """Tests that error is raised for sequence indicator column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column_a = fc.indicator_column(categorical_column_a) with self.assertRaisesRegexp( ValueError, r'In indicator_column: aaa_indicator\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer( features={'aaa': sparse_input}, feature_columns=[indicator_column_a])
def test_sequence_length(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_sequence_length(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = sfc._sequence_indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], [<ignored-padding>]] = [[0.92, -0.88], [<ignored-padding>]] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ base_global_step = 100 create_checkpoint( # FeatureColumns are sorted alphabetically, so on_sale weights are # inserted before price. rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': sparse_tensor.SparseTensor(values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = seq_fc.sequence_numeric_column('price', shape=(1, )) on_sale_column = fc.indicator_column( seq_fc.sequence_categorical_column_with_identity('on_sale', num_buckets=2)) sequence_feature_columns = [price_column, on_sale_column] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-1.5056], [-0.7962]])
def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], [<ignored-padding>]] = [[0.92, -0.88], [<ignored-padding>]] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ base_global_step = 100 create_checkpoint( # FeatureColumns are sorted alphabetically, so on_sale weights are # inserted before price. rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': sparse_tensor.SparseTensor( values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = seq_fc.sequence_numeric_column('price', shape=(1,)) on_sale_column = fc.indicator_column( seq_fc.sequence_categorical_column_with_identity( 'on_sale', num_buckets=2)) sequence_feature_columns = [price_column, on_sale_column] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-1.5056], [-0.7962]])
def test_embedding_column(self): vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [2, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 (3., 4.), # id 1 (5., 6.) # id 2 ) embedding_dimension_b = 3 embedding_values_b = ( (11., 12., 13.), # id 0 (14., 15., 16.), # id 1 (17., 18., 19.) # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [2, 0] [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_b = fc.embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[embedding_column_b, embedding_column_a]) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('sequence_input_layer/aaa_embedding/embedding_weights:0', 'sequence_input_layer/bbb_embedding/embedding_weights:0'), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_embedding_column(self): vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [2, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 (3., 4.), # id 1 (5., 6.) # id 2 ) embedding_dimension_b = 3 embedding_values_b = ( (11., 12., 13.), # id 0 (14., 15., 16.), # id 1 (17., 18., 19.) # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [2, 0] [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = sfc._sequence_embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_b = sfc._sequence_embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[embedding_column_b, embedding_column_a]) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('sequence_input_layer/aaa_embedding/embedding_weights:0', 'sequence_input_layer/bbb_embedding/embedding_weights:0'), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))