def test_serialization_sequence_features(self): rating = fc.sequence_numeric_column('rating') sequence_feature = fc.SequenceFeatures([rating]) config = keras.layers.serialize(sequence_feature) revived = keras.layers.deserialize(config) self.assertIsInstance(revived, fc.SequenceFeatures)
def test_saving_with_sequence_features(self): cols = [ feature_column_lib.sequence_numeric_column('a'), feature_column_lib.indicator_column( feature_column_lib. sequence_categorical_column_with_vocabulary_list( 'b', ['one', 'two'])) ] input_layers = { 'a': keras.layers.Input(shape=(None, 1), sparse=True, name='a'), 'b': keras.layers.Input(shape=(None, 1), sparse=True, name='b', dtype='string') } fc_layer, _ = feature_column_lib.SequenceFeatures(cols)(input_layers) # TODO(tibell): Figure out the right dtype and apply masking. # sequence_length_mask = array_ops.sequence_mask(sequence_length) # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile(loss=keras.losses.MSE, optimizer='rmsprop', metrics=[keras.metrics.categorical_accuracy]) config = model.to_json() loaded_model = model_config.model_from_json(config) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = sparse_tensor.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = sparse_tensor.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) with self.cached_session(): # Initialize tables for V1 lookup. if not context.executing_eagerly(): self.evaluate(lookup_ops.tables_initializer()) self.assertLen( loaded_model.predict({ 'a': inputs_a, 'b': inputs_b }, steps=1), batch_size)
def test_non_v1_feature_column(self): parsing_spec = self._parse_example_fn( feature_columns=[fc.sequence_numeric_column('a')], label_key='b') expected_spec = { 'a': parsing_ops.VarLenFeature(dtype=dtypes.float32), 'b': parsing_ops.FixedLenFeature((1, ), dtype=dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec)
def test_encode_features_sequence_column(self): with tf.Graph().as_default(): # Inputs. vocabulary_size = 4 # Sequence of ids. -1 values are ignored. input_seq_ids = np.array([ [3, -1, -1], # example 0 [0, 1, -1], # example 1 ]) # Sequence of numeric values. # input_seq_nums = [ # [1.], # example 0. # [2., 3.], # example 1 # ] input_seq_nums = tf.sparse.SparseTensor(indices=[[0, 0], [1, 0], [1, 1]], values=[1., 2., 3.], dense_shape=(2, 3)) input_features = { "seq_ids": input_seq_ids, "seq_nums": input_seq_nums } # Embedding variable. embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.), # id 2 (9., 13.) # id 3 ) # Expected sequence embeddings for input_seq_ids. expected_seq_embed = [ # example 0: [[9., 13.], [0., 0.], [0., 0.]], # example 1: [[1., 2.], [3., 5.], [0., 0.]], ] expected_seq_nums = [ # example 0: [[1.], [0.], [0.]], # example 1: [[2.], [3.], [0.]], ] # Build columns. seq_categorical_column = ( feature_column.sequence_categorical_column_with_identity( key="seq_ids", num_buckets=vocabulary_size)) seq_embed_column = feature_column.embedding_column( seq_categorical_column, dimension=embedding_dimension, initializer=lambda shape, dtype, partition_info: embedding_values) seq_numeric_column = feature_column.sequence_numeric_column( "seq_nums") cols_to_tensors = feature_lib.encode_features( input_features, [seq_embed_column, seq_numeric_column], mode=tf.estimator.ModeKeys.EVAL) actual_seq_embed = cols_to_tensors[seq_embed_column] actual_seq_nums = cols_to_tensors[seq_numeric_column] # Assert embedding variable and encoded sequence features. global_vars = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.GLOBAL_VARIABLES) embedding_var = global_vars[0] with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) self.assertAllEqual(embedding_values, embedding_var.eval()) self.assertAllEqual(expected_seq_embed, actual_seq_embed) self.assertAllEqual(expected_seq_nums, actual_seq_nums)
def test_save_load_with_sequence_features(self): cols = [ feature_column_lib.sequence_numeric_column("a"), feature_column_lib.indicator_column( feature_column_lib. sequence_categorical_column_with_vocabulary_list( "b", ["one", "two"])), ] input_layers = { "a": keras.layers.Input(shape=(None, 1), sparse=True, name="a"), "b": keras.layers.Input(shape=(None, 1), sparse=True, name="b", dtype="string"), } fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers) x = keras.layers.GRU(32)(fc_layer) output = keras.layers.Dense(10)(x) model = keras.models.Model(input_layers, output) model.compile( loss=keras.losses.MSE, optimizer="rmsprop", metrics=[keras.metrics.categorical_accuracy], ) tiledb_uri = os.path.join(self.get_temp_dir(), "model_array") tiledb_model_obj = TensorflowTileDB(uri=tiledb_uri) tiledb_model_obj.save(model=model, include_optimizer=True) loaded_model = tiledb_model_obj.load(compile_model=True) model_opt_weights = batch_get_value(getattr(model.optimizer, "weights")) loaded_opt_weights = batch_get_value( getattr(loaded_model.optimizer, "weights")) # Assert optimizer weights are equal for weight_model, weight_loaded_model in zip(model_opt_weights, loaded_opt_weights): np.testing.assert_array_equal(weight_model, weight_loaded_model) batch_size = 10 timesteps = 1 values_a = np.arange(10, dtype=np.float32) indices_a = np.zeros((10, 3), dtype=np.int64) indices_a[:, 0] = np.arange(10) inputs_a = sparse_tensor.SparseTensor(indices_a, values_a, (batch_size, timesteps, 1)) values_b = np.zeros(10, dtype=np.str) indices_b = np.zeros((10, 3), dtype=np.int64) indices_b[:, 0] = np.arange(10) inputs_b = sparse_tensor.SparseTensor(indices_b, values_b, (batch_size, timesteps, 1)) # Assert model predictions are equal np.testing.assert_array_equal( loaded_model.predict({ "a": inputs_a, "b": inputs_b }, steps=1), model.predict({ "a": inputs_a, "b": inputs_b }, steps=1), )