def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] sparse_input_a = sparse_tensor.SparseTensorValue(indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Input b with sequence_length = [1, 1] sparse_input_b = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0)), values=(1., 10.), dense_shape=(2, 2)) numeric_column_a = sfc.sequence_numeric_column('aaa') numeric_column_b = sfc.sequence_numeric_column('bbb') sequence_input_layer = ksfc.SequenceFeatures( [numeric_column_a, numeric_column_b]) with self.assertRaisesRegex(errors.InvalidArgumentError, r'Condition x == y did not hold.*'): _, sequence_length = sequence_input_layer({ 'aaa': sparse_input_a, 'bbb': sparse_input_b }) self.evaluate(sequence_length)
def test_shape_must_be_positive_integer(self): with self.assertRaisesRegex(TypeError, 'shape dimensions must be integer'): sfc.sequence_numeric_column('aaa', shape=[1.0]) with self.assertRaisesRegex(ValueError, 'shape dimensions must be greater than 0'): sfc.sequence_numeric_column('aaa', shape=[0])
def test_compute_output_shape(self): price1 = sfc.sequence_numeric_column('price1', shape=2) price2 = sfc.sequence_numeric_column('price2') features = { 'price1': sparse_tensor.SparseTensor( indices=[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0], [1, 0, 1], [2, 0, 0], [2, 0, 1], [3, 0, 0], [3, 0, 1]], values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.], dense_shape=(4, 3, 2)), 'price2': sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [2, 0], [3, 0]], values=[10., 11., 20., 30., 40.], dense_shape=(4, 3)) } sequence_features = ksfc.SequenceFeatures([price1, price2]) seq_input, seq_len = sequence_features(features) self.assertEqual(sequence_features.compute_output_shape((None, None)), (None, None, 3)) self.evaluate(variables_lib.global_variables_initializer()) self.evaluate(lookup_ops.tables_initializer()) self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]], [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]], [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]], [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]], self.evaluate(seq_input)) self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
def test_serialization_sequence_features(self): rating = sfc.sequence_numeric_column('rating') sequence_feature = ksfc.SequenceFeatures([rating]) config = keras.layers.serialize(sequence_feature) revived = keras.layers.deserialize(config) self.assertIsInstance(revived, ksfc.SequenceFeatures)
def test_get_sequence_dense_tensor(self, inputs_args, expected): inputs = sparse_tensor.SparseTensorValue(**inputs_args) numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = _get_sequence_dense_tensor(numeric_column, {'aaa': inputs}) self.assertAllEqual(expected, self.evaluate(dense_tensor))
def test_get_sequence_dense_tensor_with_normalizer_fn(self): def _increment_two(input_sparse_tensor): return sparse_ops.sparse_add( input_sparse_tensor, sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2))) sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Before _increment_two: # [[0.], [1.]], # [[10.], [0.]], # After _increment_two: # [[2.], [1.]], # [[10.], [2.]], expected_dense_tensor = [ [[2.], [1.]], [[10.], [2.]], ] numeric_column = sfc.sequence_numeric_column( 'aaa', normalizer_fn=_increment_two) dense_tensor, _ = _get_sequence_dense_tensor(numeric_column, {'aaa': sparse_input}) self.assertAllEqual(expected_dense_tensor, self.evaluate(dense_tensor))
def test_defaults(self): a = sfc.sequence_numeric_column('aaa') self.assertEqual('aaa', a.key) self.assertEqual('aaa', a.name) self.assertEqual((1, ), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype) self.assertIsNone(a.normalizer_fn)
def test_saving_with_sequence_features(self): cols = [ sfc.sequence_numeric_column('a'), fc.indicator_column( sfc.sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input(shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = sparse_tensor.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = sparse_tensor.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size)
def test_static_shape_from_tensors_numeric(self, sparse_input_args, expected_shape): """Tests that we return a known static shape when we have one.""" sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) input_layer, _ = sequence_input_layer({'aaa': sparse_input}) shape = input_layer.get_shape() self.assertEqual(shape, expected_shape)
def test_get_dense_tensor_multi_dim(self, sparse_input_args, expected_dense_tensor): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = _get_sequence_dense_tensor(numeric_column, {'aaa': sparse_input}) self.assertAllEqual(expected_dense_tensor, self.evaluate(dense_tensor))
def test_sequence_length(self, inputs_args, expected_sequence_length, shape): inputs = sparse_tensor.SparseTensorValue(**inputs_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=shape) _, sequence_length = _get_sequence_dense_tensor( numeric_column, {'aaa': inputs}) sequence_length = self.evaluate(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_get_config(self, trainable, name): cols = [sfc.sequence_numeric_column('a')] orig_layer = sfc.SequenceFeatures(cols, trainable=trainable, name=name) config = orig_layer.get_config() self.assertEqual(config['name'], orig_layer.name) self.assertEqual(config['trainable'], trainable) self.assertLen(config['feature_columns'], 1) self.assertEqual(config['feature_columns'][0]['class_name'], 'SequenceNumericColumn') self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
def test_from_config(self, trainable, name): cols = [sfc.sequence_numeric_column('a')] orig_layer = sfc.SequenceFeatures(cols, trainable=trainable, name=name) config = orig_layer.get_config() new_layer = sfc.SequenceFeatures.from_config(config) self.assertEqual(new_layer.name, orig_layer.name) self.assertEqual(new_layer.trainable, trainable) self.assertLen(new_layer._feature_columns, 1) self.assertEqual(new_layer._feature_columns[0].name, 'a')
def test_numeric_column( self, sparse_input_args, expected_input_layer, expected_sequence_length): sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa') sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input}) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length))
def test_numeric_column_multi_dim( self, sparse_input_args, expected_input_layer, expected_sequence_length): """Tests SequenceFeatures for multi-dimensional numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) sequence_input_layer = ksfc.SequenceFeatures([numeric_column]) input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input}) self.assertAllEqual(expected_input_layer, self.evaluate(input_layer)) self.assertAllEqual( expected_sequence_length, self.evaluate(sequence_length))
def test_serialization(self): """Tests that column can be serialized.""" def _custom_fn(input_tensor): return input_tensor + 42 column = sfc.sequence_numeric_column( key='my-key', shape=(2,), default_value=3, dtype=dtypes.int32, normalizer_fn=_custom_fn) configs = serialization.serialize_feature_column(column) column = serialization.deserialize_feature_column( configs, custom_objects={_custom_fn.__name__: _custom_fn}) self.assertEqual(column.key, 'my-key') self.assertEqual(column.shape, (2,)) self.assertEqual(column.default_value, 3) self.assertEqual(column.normalizer_fn(3), 45) with self.assertRaisesRegex(ValueError, 'Instance: 0 is not a FeatureColumn'): serialization.serialize_feature_column(int())
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [] # example 1, values [[0.], [1.]] # example 2, [[2.]] # example 3, values [] # example 4, [[3.]] # example 5, values [] indices=((1, 0), (1, 1), (2, 0), (4, 0)), values=(0., 1., 2., 3.), dense_shape=(6, 2)) expected_sequence_length = [0, 2, 1, 0, 1, 0] numeric_column = sfc.sequence_numeric_column('aaa') _, sequence_length = _get_sequence_dense_tensor( numeric_column, {'aaa': sparse_input}) self.assertAllEqual(expected_sequence_length, self.evaluate(sequence_length))
def test_normalizer_fn_must_be_callable(self): with self.assertRaisesRegexp(TypeError, 'must be a callable'): sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')
def test_dtype_is_convertible_to_float(self): with self.assertRaisesRegexp(ValueError, 'dtype must be convertible to float'): sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
def test_parents(self): """Tests parents attribute of column.""" column = sfc.sequence_numeric_column(key='my-key') self.assertEqual(column.parents, ['my-key'])
def test_shape_saved_as_tuple(self): a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) self.assertEqual((1, 2), a.shape)
def make_feature_config(num_players): return FeatureConfig( context_features=[ fc.numeric_column( "public_context__starting_stack_sizes", shape=num_players, dtype=tf.int64, ), fc.embedding_column( tf.feature_column.categorical_column_with_vocabulary_list( "private_context__hand_encoded", range(1326)), dimension=4, ), ], sequence_features=[ fc.indicator_column( sfc.sequence_categorical_column_with_identity( "last_action__action_encoded", 22)), fc.indicator_column( sfc.sequence_categorical_column_with_identity( "last_action__move", 5)), sfc.sequence_numeric_column( "last_action__amount_added", dtype=tf.int64, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "last_action__amount_added_percent_of_remaining", dtype=tf.float32, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "last_action__amount_raised", dtype=tf.int64, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "last_action__amount_raised_percent_of_pot", dtype=tf.float32, default_value=-1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__all_in_player_mask", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__stack_sizes", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__amount_to_call", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__current_player_mask", dtype=tf.int64, default_value=-1, shape=num_players, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__min_raise_amount", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__pot_size", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "public_state__street", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__is_current_player", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__current_player_offset", dtype=tf.int64, default_value=-1, shape=1, normalizer_fn=make_float, ), fc.indicator_column( sfc.sequence_categorical_column_with_identity( "player_state__current_hand_type", 9)), sfc.sequence_numeric_column( "player_state__win_odds", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__win_odds_vs_better", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__win_odds_vs_tied", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__win_odds_vs_worse", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__frac_better_hands", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__frac_tied_hands", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), sfc.sequence_numeric_column( "player_state__frac_worse_hands", dtype=tf.float32, default_value=-1, shape=1, normalizer_fn=make_float, ), ], context_targets=[ fc.numeric_column("public_context__num_players", shape=1, dtype=tf.int64), ], sequence_targets=[ sfc.sequence_numeric_column("next_action__action_encoded", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("reward__cumulative_reward", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("public_state__pot_size", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("player_state__is_current_player", dtype=tf.int64, default_value=-1), sfc.sequence_numeric_column("public_state__num_players_remaining", dtype=tf.int64, default_value=-1), ], )