def _testExampleWeight(self, n_classes): def train_input_fn(): return { 'tokens': sparse_tensor.SparseTensor( values=['the', 'cat', 'sat', 'dog', 'barked'], indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], dense_shape=[2, 3]), 'w': [[1], [2]], }, [[1], [0]] col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] est = rnn.RNNClassifier( num_units=cell_units, sequence_feature_columns=[embed], n_classes=n_classes, weight_column='w', model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
def testFromScratchWithCustomRNNCellFn(self): def train_input_fn(): return { 'tokens': sparse_tensor.SparseTensor( values=['the', 'cat', 'sat'], indices=[[0, 0], [0, 1], [0, 2]], dense_shape=[1, 3]), }, [[1]] col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] n_classes = 2 def rnn_cell_fn(mode): del mode # unused cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units] return rnn_cell.MultiRNNCell(cells) est = rnn.RNNClassifier( sequence_feature_columns=[embed], rnn_cell_fn=rnn_cell_fn, n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
def test_dnn_classifier(self): embedding = feature_column_lib.embedding_column( feature_column_lib.categorical_column_with_vocabulary_list( 'wire_cast', ['kima', 'omar', 'stringer']), 8) dnn = estimator_lib.DNNClassifier(feature_columns=[embedding], hidden_units=[3, 1]) def train_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['omar'], ['kima']] }, [[0], [1]])).repeat(3) def eval_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['stringer'], ['kima']] }, [[0], [1]])).repeat(2) evaluator = hooks_lib.InMemoryEvaluatorHook(dnn, eval_input_fn, name='in-memory') dnn.train(train_input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory'))) step_keyword_to_value = summary_step_keyword_to_value_mapping( dnn.eval_dir('in-memory')) final_metrics = dnn.evaluate(eval_input_fn) step = final_metrics[ops.GraphKeys.GLOBAL_STEP] for summary_tag in final_metrics: if summary_tag == ops.GraphKeys.GLOBAL_STEP: continue self.assertEqual(final_metrics[summary_tag], step_keyword_to_value[step][summary_tag])
def testFromScratchWithCustomRNNCellFn(self): def train_input_fn(): return { 'tokens': sparse_tensor.SparseTensor(values=['the', 'cat', 'sat'], indices=[[0, 0], [0, 1], [0, 2]], dense_shape=[1, 3]), }, [[1]] col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] n_classes = 2 def rnn_cell_fn(mode): del mode # unused cells = [rnn_cell.BasicRNNCell(num_units=n) for n in cell_units] return rnn_cell.MultiRNNCell(cells) est = rnn.RNNClassifier(sequence_feature_columns=[embed], rnn_cell_fn=rnn_cell_fn, n_classes=n_classes, model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
def _sequence_embedding_column(categorical_column, dimension, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): """Returns a feature column that represents sequences of embeddings. Use this to convert sequence categorical data into dense representation for input to sequence NN, such as RNN. Example: ```python watches = sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = embedding_column(watches, dimension=10) columns = [watches] features = tf.parse_example(..., features=make_parse_example_spec(columns)) input_layer, sequence_length = sequence_input_layer(features, columns) rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) outputs, state = tf.nn.dynamic_rnn( rnn_cell, inputs=input_layer, sequence_length=sequence_length) ``` Args: categorical_column: A `_SequenceCategoricalColumn` created with a `sequence_cateogrical_column_with_*` function. dimension: Integer dimension of the embedding. initializer: Initializer function used to initialize the embeddings. ckpt_to_load_from: String representing checkpoint name/pattern from which to restore column weights. Required if `tensor_name_in_ckpt` is not `None`. tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from which to restore the column weights. Required if `ckpt_to_load_from` is not `None`. max_norm: If not `None`, embedding values are l2-normalized to this value. trainable: Whether or not the embedding is trainable. Default is True. Returns: A `_SequenceEmbeddingColumn`. Raises: ValueError: If `categorical_column` is not the right type. """ if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' 'Given (type {}): {}'.format(type(categorical_column), categorical_column)) return _SequenceEmbeddingColumn( fc.embedding_column(categorical_column, dimension=dimension, initializer=initializer, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt, max_norm=max_norm, trainable=trainable))
def _testExampleWeight(self, n_classes): def train_input_fn(): return { 'tokens': sparse_tensor.SparseTensor( values=['the', 'cat', 'sat', 'dog', 'barked'], indices=[[0, 0], [0, 1], [0, 2], [1, 0], [1, 1]], dense_shape=[2, 3]), 'w': [[1], [2]], }, [[1], [0]] col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) input_units = 2 cell_units = [4, 2] est = rnn.RNNClassifier(num_units=cell_units, sequence_feature_columns=[embed], n_classes=n_classes, weight_column='w', model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 est.train(input_fn=train_input_fn, steps=num_steps) self._assert_checkpoint(n_classes, input_units, cell_units, num_steps)
def _build_feature_columns(self): col = fc.categorical_column_with_identity( 'int_ctx', num_buckets=100) ctx_cols = [ fc.embedding_column(col, dimension=10), fc.numeric_column('float_ctx')] identity_col = sfc.sequence_categorical_column_with_identity( 'int_list', num_buckets=10) bucket_col = sfc.sequence_categorical_column_with_hash_bucket( 'bytes_list', hash_bucket_size=100) seq_cols = [ fc.embedding_column(identity_col, dimension=10), fc.embedding_column(bucket_col, dimension=20)] return ctx_cols, seq_cols
def test_dnn_classifier(self): embedding = feature_column_lib.embedding_column( feature_column_lib.categorical_column_with_vocabulary_list( 'wire_cast', ['kima', 'omar', 'stringer']), 8) dnn = estimator_lib.DNNClassifier( feature_columns=[embedding], hidden_units=[3, 1]) def train_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['omar'], ['kima']] }, [[0], [1]])).repeat(3) def eval_input_fn(): return dataset_ops.Dataset.from_tensors(({ 'wire_cast': [['stringer'], ['kima']] }, [[0], [1]])).repeat(2) evaluator = hooks_lib.InMemoryEvaluatorHook( dnn, eval_input_fn, name='in-memory') dnn.train(train_input_fn, hooks=[evaluator]) self.assertTrue(os.path.isdir(dnn.eval_dir('in-memory'))) step_keyword_to_value = summary_step_keyword_to_value_mapping( dnn.eval_dir('in-memory')) final_metrics = dnn.evaluate(eval_input_fn) step = final_metrics[ops.GraphKeys.GLOBAL_STEP] for summary_tag in final_metrics: if summary_tag == ops.GraphKeys.GLOBAL_STEP: continue self.assertEqual(final_metrics[summary_tag], step_keyword_to_value[step][summary_tag])
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [] # example 1, ids [2] # example 2, ids [0, 1] # example 3, ids [] # example 4, ids [1] # example 5, ids [] indices=((1, 0), (2, 0), (2, 1), (4, 0)), values=(2, 0, 1, 1), dense_shape=(6, 2)) expected_sequence_length = [0, 1, 2, 0, 1, 0] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def testWarmStartInputLayerEmbeddingColumn(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "input_layer/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.input_layer( features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={ emb_vocab: prev_vocab_path }) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def testWarmStartInputLayerEmbeddingColumn(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab( ["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: _ = variable_scope.get_variable( "input_layer/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. initializer=init_ops.random_uniform_initializer(minval=0.42, maxval=0.42)) all_deep_cols = [emb_vocab] # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.input_layer(features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) ws_settings = ws_util._WarmStartSettings( self.get_temp_dir(), col_to_prev_vocab={emb_vocab: prev_vocab_path}) ws_util._warmstart_input_layer(cols_to_vars, ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. Var corresponding to # emb_vocab should be correctly warmstarted after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def _sequence_embedding_column( categorical_column, dimension, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): """Returns a feature column that represents sequences of embeddings. Use this to convert sequence categorical data into dense representation for input to sequence NN, such as RNN. Example: ```python watches = sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = _sequence_embedding_column(watches, dimension=10) columns = [watches] features = tf.parse_example(..., features=make_parse_example_spec(columns)) input_layer, sequence_length = sequence_input_layer(features, columns) rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) outputs, state = tf.nn.dynamic_rnn( rnn_cell, inputs=input_layer, sequence_length=sequence_length) ``` Args: categorical_column: A `_SequenceCategoricalColumn` created with a `sequence_cateogrical_column_with_*` function. dimension: Integer dimension of the embedding. initializer: Initializer function used to initialize the embeddings. ckpt_to_load_from: String representing checkpoint name/pattern from which to restore column weights. Required if `tensor_name_in_ckpt` is not `None`. tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from which to restore the column weights. Required if `ckpt_to_load_from` is not `None`. max_norm: If not `None`, embedding values are l2-normalized to this value. trainable: Whether or not the embedding is trainable. Default is True. Returns: A `_SequenceCategoricalToDenseColumn`. Raises: ValueError: If `categorical_column` is not the right type. """ if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' 'Given (type {}): {}'.format( type(categorical_column), categorical_column)) return _SequenceCategoricalToDenseColumn( fc.embedding_column( categorical_column, dimension=dimension, initializer=initializer, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt, max_norm=max_norm, trainable=trainable))
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) writer.write(example.SerializeToString()) writer.close() col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) def _train_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) writer.write(example.SerializeToString()) writer.close() col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) def _train_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def test_warm_starting_selective_variables(self): """Tests selecting variables to warm-start.""" age = feature_column.numeric_column('age') city = feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), # The provided regular expression will only warm-start the deep # portion of the model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_lc_classifier.model_dir, vars_to_warm_start='.*(dnn).*'))) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names( ): if 'dnn' in variable_name: self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value( variable_name)) elif 'linear' in variable_name: linear_values = warm_started_dnn_lc_classifier.get_variable_value( variable_name) # Since they're not warm-started, the linear weights will be # zero-initialized. self.assertAllClose(np.zeros_like(linear_values), linear_values)
def build_model_columns(): week_list = fc.categorical_column_with_vocabulary_list("week_list", vocabulary_list=['mon', 'tue', 'wed', 'thur', 'fri', 'sat', 'sun']) week = fc.weighted_categorical_column(week_list, 'week_weight') week = fc.embedding_column(week, 3) wide = [] deep = [week] return wide, deep
def test_reuse(): data = { 'gender': [['M'], ['G'], ['M'], ['M']], 'user': [['A'], ['B'], ['C'], ['C']], 'pos': [['a'], ['d'], ['f'], ['c']], 'neg': [['c'], ['e'], ['d'], ['a']] } user_v_list = ['A', 'B', 'C', 'D'] item_v_list = ['a', 'b', 'c', 'd', 'e', 'f'] gender_col = feature_column.categorical_column_with_vocabulary_list( 'gender', ['M', "G"], dtype=tf.string) user_col = feature_column.categorical_column_with_vocabulary_list( 'user', user_v_list, dtype=tf.string) pos_item_col = feature_column.categorical_column_with_vocabulary_list( 'pos', item_v_list, dtype=tf.string) neg_item_col = feature_column.categorical_column_with_vocabulary_list( 'neg', item_v_list, dtype=tf.string) gender_embedding = feature_column.embedding_column(gender_col, 2) user_embedding = feature_column.embedding_column(user_col, 2) pos_embedding, neg_embedding = feature_column.shared_embedding_columns( [pos_item_col, neg_item_col], 3) columns = [gender_embedding, user_embedding, pos_embedding, neg_embedding] with tf.variable_scope("a") as scope: aa = scope.name ret = tf.feature_column.input_layer(data, columns) print(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=aa)) with tf.variable_scope("b") as scope: bb = scope.name ret1 = tf.feature_column.input_layer(data, columns) print(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=bb)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) print(sess.run(ret)) print('------------------') print(sess.run(ret1))
def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values expected_lookups = [ # example 0, ids [2] [[7., 11.], [0., 0.]], # example 1, ids [0, 1] [[1., 2.], [3., 5.]], # example 2, ids [] [[0., 0.], [0., 0.]], # example 3, ids [1] [[3., 5.], [0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column(categorical_column, dimension=embedding_dimension, initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual(('embedding_weights:0', ), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
def test_warm_starting_selective_variables(self): """Tests selecting variables to warm-start.""" age = feature_column.numeric_column('age') city = feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), # The provided regular expression will only warm-start the deep # portion of the model. warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_lc_classifier.model_dir, vars_to_warm_start='.*(dnn).*'))) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names(): if 'dnn' in variable_name: self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value(variable_name)) elif 'linear' in variable_name: linear_values = warm_started_dnn_lc_classifier.get_variable_value( variable_name) # Since they're not warm-started, the linear weights will be # zero-initialized. self.assertAllClose(np.zeros_like(linear_values), linear_values)
def test_get_sequence_dense_tensor(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] # example 2, ids [] # example 3, ids [1] indices=((0, 0), (1, 0), (1, 1), (3, 0)), values=(2, 0, 1, 1), dense_shape=(4, 2)) embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 (3., 5.), # id 1 (7., 11.) # id 2 ) def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values expected_lookups = [ # example 0, ids [2] [[7., 11.], [0., 0.]], # example 1, ids [0, 1] [[1., 2.], [3., 5.]], # example 2, ids [] [[0., 0.], [0., 0.]], # example 3, ids [1] [[3., 5.], [0., 0.]], ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('embedding_weights:0',), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess))
def _sequence_embedding_column( categorical_column, dimension, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' 'Given (type {}): {}'.format( type(categorical_column), categorical_column)) return _SequenceEmbeddingColumn( fc.embedding_column( categorical_column, dimension=dimension, initializer=initializer, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt, max_norm=max_norm, trainable=trainable))
def testNumpyInputFn(self): """Tests complete flow with numpy_input_fn.""" n_classes = 3 batch_size = 10 words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept'] # Numpy only supports dense input, so all examples will have same length. # TODO(b/73160931): Update test when support for prepadded data exists. sequence_length = 3 features = [] for _ in range(batch_size): sentence = random.sample(words, sequence_length) features.append(sentence) x_data = np.array(features) y_data = np.random.randint(n_classes, size=batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, batch_size=batch_size, shuffle=False) col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] self._test_complete_flow( feature_columns=feature_columns, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def testNumpyInputFn(self): """Tests complete flow with numpy_input_fn.""" n_classes = 3 batch_size = 10 words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept'] # Numpy only supports dense input, so all examples will have same length. # TODO(b/73160931): Update test when support for prepadded data exists. sequence_length = 3 features = [] for _ in range(batch_size): sentence = random.sample(words, sequence_length) features.append(sentence) x_data = np.array(features) y_data = np.random.randint(n_classes, size=batch_size) train_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'tokens': x_data}, batch_size=batch_size, shuffle=False) col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] self._test_complete_flow( feature_columns=feature_columns, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, predict_input_fn=predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def test_classifier_basic_warm_starting(self): """Tests correctness of DNNLinearCombinedClassifier default warm-start.""" age = feature_column.numeric_column('age') city = feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_lc_classifier.model_dir)) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names( ): self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value( variable_name))
def _sequence_embedding_column(categorical_column, dimension, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' 'Given (type {}): {}'.format(type(categorical_column), categorical_column)) return _SequenceEmbeddingColumn( fc.embedding_column(categorical_column, dimension=dimension, initializer=initializer, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt, max_norm=max_norm, trainable=trainable))
def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, n_classes, batch_size): col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] cell_units = [4, 2] est = rnn.RNNClassifier( num_units=cell_units, sequence_feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = { 'tokens': parsing_ops.VarLenFeature(dtypes.string), 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_sequence_length(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column(categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_embedding_column(self): """Tests that error is raised for sequence embedding column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column(categorical_column_a, dimension=2) with self.assertRaisesRegexp( ValueError, r'In embedding_column: aaa_embedding\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer(features={'aaa': sparse_input}, feature_columns=[embedding_column_a])
def testConflictingRNNCellFn(self): col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) cell_units = [4, 2] with self.assertRaisesRegexp( ValueError, 'num_units and cell_type must not be specified when using rnn_cell_fn' ): rnn.RNNClassifier(sequence_feature_columns=[embed], rnn_cell_fn=lambda x: x, num_units=cell_units) with self.assertRaisesRegexp( ValueError, 'num_units and cell_type must not be specified when using rnn_cell_fn' ): rnn.RNNClassifier(sequence_feature_columns=[embed], rnn_cell_fn=lambda x: x, cell_type='lstm')
def testConflictingRNNCellFn(self): col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) cell_units = [4, 2] with self.assertRaisesRegexp( ValueError, 'num_units and cell_type must not be specified when using rnn_cell_fn'): rnn.RNNClassifier( sequence_feature_columns=[embed], rnn_cell_fn=lambda x: x, num_units=cell_units) with self.assertRaisesRegexp( ValueError, 'num_units and cell_type must not be specified when using rnn_cell_fn'): rnn.RNNClassifier( sequence_feature_columns=[embed], rnn_cell_fn=lambda x: x, cell_type='lstm')
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, n_classes, batch_size): col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] cell_units = [4, 2] est = rnn.RNNClassifier(num_units=cell_units, sequence_feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUATE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = { 'tokens': parsing_ops.VarLenFeature(dtypes.string), 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), } serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_classifier_basic_warm_starting(self): """Tests correctness of DNNLinearCombinedClassifier default warm-start.""" age = feature_column.numeric_column('age') city = feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'city', vocabulary_list=['Mountain View', 'Palo Alto']), dimension=5) # Create a DNNLinearCombinedClassifier and train to save a checkpoint. dnn_lc_classifier = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], model_dir=self._ckpt_and_vocab_dir, n_classes=4, linear_optimizer='SGD', dnn_optimizer='SGD') dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) # Create a second DNNLinearCombinedClassifier, warm-started from the first. # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't # have accumulator values that change). warm_started_dnn_lc_classifier = ( dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[age], dnn_feature_columns=[city], dnn_hidden_units=[256, 128], n_classes=4, linear_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), dnn_optimizer=gradient_descent.GradientDescentOptimizer( learning_rate=0.0), warm_start_from=dnn_lc_classifier.model_dir)) warm_started_dnn_lc_classifier.train(input_fn=self._input_fn, max_steps=1) for variable_name in warm_started_dnn_lc_classifier.get_variable_names(): self.assertAllClose( dnn_lc_classifier.get_variable_value(variable_name), warm_started_dnn_lc_classifier.get_variable_value(variable_name))
def test_embedding_column(self): """Tests that error is raised for sequence embedding column.""" vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( categorical_column_a, dimension=2) with self.assertRaisesRegexp( ValueError, r'In embedding_column: aaa_embedding\. categorical_column must not be ' r'of type _SequenceCategoricalColumn\.'): _ = fc.input_layer( features={'aaa': sparse_input}, feature_columns=[embedding_column_a])
def test_sequence_length(self): vocabulary_size = 3 sparse_input = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def _testAnnotationsPresentForEstimator(self, estimator_class): feature_columns = [ feature_column.numeric_column('x', shape=(1,)), feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'y', vocabulary_list=['a', 'b', 'c']), dimension=3) ] estimator = estimator_class( hidden_units=(2, 2), feature_columns=feature_columns, model_dir=self._model_dir) model_fn = estimator.model_fn graph = ops.Graph() with graph.as_default(): model_fn({ 'x': array_ops.constant([1.0]), 'y': array_ops.constant(['a']) }, {}, model_fn_lib.ModeKeys.PREDICT, config=None) unprocessed_features = self._getLayerAnnotationCollection( graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames .UNPROCESSED_FEATURES) processed_features = self._getLayerAnnotationCollection( graph, dnn_with_layer_annotations.LayerAnnotationsCollectionNames .PROCESSED_FEATURES) feature_columns = graph.get_collection( dnn_with_layer_annotations.LayerAnnotationsCollectionNames .FEATURE_COLUMNS) self.assertItemsEqual(unprocessed_features.keys(), ['x', 'y']) self.assertEqual(2, len(processed_features.keys())) self.assertEqual(2, len(feature_columns))
def _testAnnotationsPresentForEstimator(self, estimator_class): feature_columns = [ feature_column.numeric_column('x', shape=(1, )), feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'y', vocabulary_list=['a', 'b', 'c']), dimension=3) ] estimator = estimator_class(hidden_units=(2, 2), feature_columns=feature_columns, model_dir=self._model_dir) model_fn = estimator.model_fn graph = ops.Graph() with graph.as_default(): model_fn( { 'x': array_ops.constant([1.0]), 'y': array_ops.constant(['a']) }, {}, model_fn_lib.ModeKeys.PREDICT, config=None) unprocessed_features = self._getLayerAnnotationCollection( graph, dnn_with_layer_annotations. LayerAnnotationsCollectionNames.UNPROCESSED_FEATURES) processed_features = self._getLayerAnnotationCollection( graph, dnn_with_layer_annotations. LayerAnnotationsCollectionNames.PROCESSED_FEATURES) feature_columns = graph.get_collection( dnn_with_layer_annotations.LayerAnnotationsCollectionNames. FEATURE_COLUMNS) self.assertItemsEqual(unprocessed_features.keys(), ['x', 'y']) self.assertEqual(2, len(processed_features.keys())) self.assertEqual(2, len(feature_columns))
from tensorflow.contrib.learn import LinearRegressor, pandas_input_fn, DNNRegressor, Experiment from tensorflow.python.feature_column.feature_column import categorical_column_with_hash_bucket, numeric_column, \ categorical_column_with_vocabulary_list, embedding_column, indicator_column make = categorical_column_with_hash_bucket('make', 100) horsepower = numeric_column('horsepower', shape=[]) cylinders = categorical_column_with_vocabulary_list( 'num-of-cylinders', ['two', 'three', 'four', 'six', 'eight']) ############### regressor = DNNRegressor(feature_columns=[ embedding_column(make, 10), horsepower, indicator_column(cylinders, 3) ], hidden_units=[50, 30, 10]) ################ regressor = LinearRegressor(feature_columns=[make, horsepower, cylinders]) # any python generator train_input_fn = pandas_input_fn(x=input_data, y=input_label, batch_size=64, shuffle=True, num_epochs=None) regressor.train(train_input_fn, steps=10000) def expirement_fn(run_config, hparams): regressor = DNNRegressor(..., config=run_config,
def _complete_flow_with_mode(self, mode): n_classes = 3 input_dimension = 2 batch_size = 12 data = np.linspace( 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) categorical_data = np.random.random_integers( 0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': x_data, 'categories': categorical_data}, batch_size=batch_size, shuffle=False) feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)), feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'categories', vocabulary_list=np.linspace( 0., len(x_data), len(x_data), dtype=np.int64)), 1) ] estimator = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) if not mode: # Use the public `replicate_model_fn`. model_fn = replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']) else: model_fn = replicate_model_fn._replicate_model_fn_with_mode( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2'], mode=mode) estimator = estimator_lib.Estimator( model_fn=model_fn, model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_one_shot_prediction_head_export(self, estimator_factory): def _new_temp_dir(): return os.path.join(test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column( "2d_exogenous_feature", shape=(2,)), feature_column.embedding_column( categorical_column=categorical_column, dimension=10)] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange( 20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array( ["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_saved_model(_new_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load( session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature"], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile(numpy.array( ["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items()} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_saved_model( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: example = example_pb2.Example() times = example.features.feature[feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend( [float(i) * 2. + feature_number for feature_number in range(5)]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = loader.load( session, [tag_constants.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value),) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)
def _build_feature_columns(self, ): multi_hot_feature_columns = {} multi_hot_feature_columns_deep = {} multi_category_feature_columns = {} continuous_feature_columns = {} crossed_feature_columns = [] bucketized_feature_columns = [] embedding_feature_columns = [] if self._data_conf.multi_hot_columns is not None: for column in self._data_conf.multi_hot_columns: multi_hot_feature_columns[ column] = categorical_column_with_vocabulary_list( column, self._data_conf.multi_hot_columns[column], dtype=tf.string) multi_hot_feature_columns_deep[column] = indicator_column( multi_hot_feature_columns[column]) if self._data_conf.multi_category_columns is not None: multi_category_feature_columns = { column: categorical_column_with_hash_bucket(column, hash_bucket_size=1000) for column in self._data_conf.multi_category_columns } if self._data_conf.continuous_columns is not None: continuous_feature_columns = { column: numeric_column(column) for column in self._data_conf.continuous_columns } if self._data_conf.crossed_columns is not None: crossed_feature_columns = [ crossed_column(_, hash_bucket_size=100000) for _ in self._data_conf.crossed_columns ] if self._data_conf.bucketized_columns is not None: [ bucketized_feature_columns.append( bucketized_column(continuous_feature_columns[column], boundaries=boundary)) for column, boundary in self._data_conf.bucketized_columns.items ] if len(multi_category_feature_columns) > 0: embedding_feature_columns = [ embedding_column( _, dimension=self._model_conf.embedding_dimension) for _ in multi_category_feature_columns.values() ] self._feature_mapping = { 0: list(multi_hot_feature_columns.values()), 1: list(multi_category_feature_columns.values()), 2: list(continuous_feature_columns.values()), 3: crossed_feature_columns, 4: bucketized_feature_columns, 5: embedding_feature_columns, 6: list(multi_hot_feature_columns_deep.values()) } self._build_feature_columns_for_model()
def test_embedding_column(self): vocabulary_size = 3 sparse_input_a = sparse_tensor.SparseTensorValue( # example 0, ids [2] # example 1, ids [0, 1] indices=((0, 0), (1, 0), (1, 1)), values=(2, 0, 1), dense_shape=(2, 2)) sparse_input_b = sparse_tensor.SparseTensorValue( # example 0, ids [1] # example 1, ids [2, 0] indices=((0, 0), (1, 0), (1, 1)), values=(1, 2, 0), dense_shape=(2, 2)) embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 (3., 4.), # id 1 (5., 6.) # id 2 ) embedding_dimension_b = 3 embedding_values_b = ( (11., 12., 13.), # id 0 (14., 15., 16.), # id 1 (17., 18., 19.) # id 2 ) def _get_initializer(embedding_dimension, embedding_values): def _initializer(shape, dtype, partition_info): self.assertAllEqual((vocabulary_size, embedding_dimension), shape) self.assertEqual(dtypes.float32, dtype) self.assertIsNone(partition_info) return embedding_values return _initializer expected_input_layer = [ # example 0, ids_a [2], ids_b [1] [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], # example 1, ids_a [0, 1], ids_b [2, 0] [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], ] expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( categorical_column_a, dimension=embedding_dimension_a, initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) categorical_column_b = sfc.sequence_categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) embedding_column_b = fc.embedding_column( categorical_column_b, dimension=embedding_dimension_b, initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) input_layer, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, # Test that columns are reordered alphabetically. feature_columns=[embedding_column_b, embedding_column_a]) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('sequence_input_layer/aaa_embedding/embedding_weights:0', 'sequence_input_layer/bbb_embedding/embedding_weights:0'), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_one_shot_prediction_head_export(self): model_dir = self.get_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column("2d_exogenous_feature", shape=(2, )), feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = ts_estimators.TimeSeriesRegressor( model=lstm_example._LSTMModel( num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), optimizer=adam.AdamOptimizer(0.001), config=estimator_lib.RunConfig(tf_random_seed=4), state_manager=state_management.ChainingStateManager(), head_type=ts_head_lib.OneShotPredictionHead, model_dir=model_dir) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange(20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array(["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(self.get_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual(self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature" ], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile( numpy.arange(20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile( numpy.array(["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items() } fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertAllEqual((2, 15, 5), output["mean"].shape)
def test_one_shot_prediction_head_export(self, estimator_factory): def _new_temp_dir(): return os.path.join(test.get_temp_dir(), str(ops.uid())) model_dir = _new_temp_dir() categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column("2d_exogenous_feature", shape=(2, )), feature_column.embedding_column( categorical_column=categorical_column, dimension=10) ] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange(20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange(20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array(["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) result = estimator.evaluate(input_fn=train_input_fn, steps=1) self.assertIn("average_loss", result) self.assertNotIn(feature_keys.State.STATE_TUPLE, result) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(_new_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load(session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual(self, [ feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature" ], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile( numpy.arange(20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile( numpy.array(["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items() } fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape) # Build a parsing input function, then make a tf.Example for it to parse. export_location = estimator.export_savedmodel( _new_temp_dir(), estimator.build_one_shot_parsing_serving_input_receiver_fn( filtering_length=20, prediction_length=15)) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: example = example_pb2.Example() times = example.features.feature[ feature_keys.TrainEvalFeatures.TIMES] values = example.features.feature[ feature_keys.TrainEvalFeatures.VALUES] times.int64_list.value.extend(range(35)) for i in range(20): values.float_list.value.extend([ float(i) * 2. + feature_number for feature_number in range(5) ]) real_feature = example.features.feature["2d_exogenous_feature"] categortical_feature = example.features.feature[ "categorical_exogenous_feature"] for i in range(35): real_feature.float_list.value.extend([1, 1]) categortical_feature.bytes_list.value.append(b"strkey") # Serialize the tf.Example for feeding to the Session examples = [example.SerializeToString()] * 2 signatures = loader.load(session, [tag_constants.SERVING], export_location) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] ((_, input_value), ) = predict_signature.inputs.items() feeds = {graph.as_graph_element(input_value.name): examples} fetches = { output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items() } output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)
def testWarmStartEmbeddingColumnLinearModel(self): # Create old and new vocabs for embedding column "sc_vocab". prev_vocab_path = self._write_vocab(["apple", "banana", "guava", "orange"], "old_vocab") new_vocab_path = self._write_vocab( ["orange", "guava", "banana", "apple", "raspberry", "blueberry"], "new_vocab") # Save checkpoint from which to warm-start. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: variable_scope.get_variable( "linear_model/sc_vocab_embedding/embedding_weights", initializer=[[0.5, 0.4], [1., 1.1], [2., 2.2], [3., 3.3]]) variable_scope.get_variable( "linear_model/sc_vocab_embedding/weights", initializer=[[0.69], [0.71]]) self._write_checkpoint(sess) def _partitioner(shape, dtype): # pylint:disable=unused-argument # Partition each var into 2 equal slices. partitions = [1] * len(shape) partitions[0] = min(2, shape[0].value) return partitions # Create feature columns. sc_vocab = fc.categorical_column_with_vocabulary_file( "sc_vocab", vocabulary_file=new_vocab_path, vocabulary_size=6) emb_vocab = fc.embedding_column( categorical_column=sc_vocab, dimension=2) all_deep_cols = [emb_vocab] # New graph, new session with warm-starting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = {} with variable_scope.variable_scope("", partitioner=_partitioner): # Create the variables. fc.linear_model( features=self._create_dummy_inputs(), feature_columns=all_deep_cols, cols_to_vars=cols_to_vars) # Construct the vocab_info for the embedding weight. vocab_info = ws_util.VocabInfo( new_vocab=sc_vocab.vocabulary_file, new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path, # Can't use constant_initializer with load_and_remap. In practice, # use a truncated normal initializer. backup_initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) ws_util.warm_start( self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab_embedding/embedding_weights": vocab_info }) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # emb_vocab should be correctly warm-started after vocab remapping. # Missing values are filled in with the EmbeddingColumn's initializer. self._assert_cols_to_vars( cols_to_vars, { emb_vocab: [ # linear weights part 0. np.array([[0.69]]), # linear weights part 1. np.array([[0.71]]), # embedding_weights part 0. np.array([[3., 3.3], [2., 2.2], [1., 1.1]]), # embedding_weights part 1. np.array([[0.5, 0.4], [0.42, 0.42], [0.42, 0.42]]) ] }, sess)
def test_complete_flow(self): n_classes = 3 input_dimension = 2 batch_size = 12 data = np.linspace(0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) x_data = data.reshape(batch_size, input_dimension) categorical_data = np.random.random_integers(0, len(x_data), size=len(x_data)) y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) train_input_fn = numpy_io.numpy_input_fn(x={ 'x': x_data, 'categories': categorical_data }, y=y_data, batch_size=batch_size, num_epochs=None, shuffle=True) eval_input_fn = numpy_io.numpy_input_fn(x={ 'x': x_data, 'categories': categorical_data }, y=y_data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn(x={ 'x': x_data, 'categories': categorical_data }, batch_size=batch_size, shuffle=False) feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )), feature_column.embedding_column( feature_column.categorical_column_with_vocabulary_list( 'categories', vocabulary_list=np.linspace(0., len(x_data), len(x_data), dtype=np.int64)), 1) ] estimator = dnn.DNNClassifier(hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) def optimizer_fn(): return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) estimator = estimator_lib.Estimator( model_fn=replicate_model_fn.replicate_model_fn( estimator.model_fn, optimizer_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']), model_dir=estimator.model_dir, config=estimator.config, params=estimator.params) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_one_shot_prediction_head_export(self, estimator_factory): model_dir = os.path.join(test.get_temp_dir(), str(ops.uid())) categorical_column = feature_column.categorical_column_with_hash_bucket( key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ feature_column.numeric_column( "2d_exogenous_feature", shape=(2,)), feature_column.embedding_column( categorical_column=categorical_column, dimension=10)] estimator = estimator_factory( model_dir=model_dir, exogenous_feature_columns=exogenous_feature_columns, head_type=ts_head_lib.OneShotPredictionHead) train_features = { feature_keys.TrainEvalFeatures.TIMES: numpy.arange( 20, dtype=numpy.int64), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[:, None], [1, 5]), "2d_exogenous_feature": numpy.ones([20, 2]), "categorical_exogenous_feature": numpy.array( ["strkey"] * 20)[:, None] } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(train_features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=5) input_receiver_fn = estimator.build_raw_serving_input_receiver_fn() export_location = estimator.export_savedmodel(test.get_temp_dir(), input_receiver_fn) graph = ops.Graph() with graph.as_default(): with session_lib.Session() as session: signatures = loader.load( session, [tag_constants.SERVING], export_location) self.assertEqual([feature_keys.SavedModelLabels.PREDICT], list(signatures.signature_def.keys())) predict_signature = signatures.signature_def[ feature_keys.SavedModelLabels.PREDICT] six.assertCountEqual( self, [feature_keys.FilteringFeatures.TIMES, feature_keys.FilteringFeatures.VALUES, "2d_exogenous_feature", "categorical_exogenous_feature"], predict_signature.inputs.keys()) features = { feature_keys.TrainEvalFeatures.TIMES: numpy.tile( numpy.arange(35, dtype=numpy.int64)[None, :], [2, 1]), feature_keys.TrainEvalFeatures.VALUES: numpy.tile(numpy.arange( 20, dtype=numpy.float32)[None, :, None], [2, 1, 5]), "2d_exogenous_feature": numpy.ones([2, 35, 2]), "categorical_exogenous_feature": numpy.tile(numpy.array( ["strkey"] * 35)[None, :, None], [2, 1, 1]) } feeds = { graph.as_graph_element(input_value.name): features[input_key] for input_key, input_value in predict_signature.inputs.items()} fetches = {output_key: graph.as_graph_element(output_value.name) for output_key, output_value in predict_signature.outputs.items()} output = session.run(fetches, feed_dict=feeds) self.assertEqual((2, 15, 5), output["mean"].shape)