def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] sparse_input_a = sparse_tensor.SparseTensorValue(indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Input b with sequence_length = [1, 1] sparse_input_b = sparse_tensor.SparseTensorValue(indices=((0, 0), (1, 0)), values=(1., 10.), dense_shape=(2, 2)) numeric_column_a = sfc.sequence_numeric_column('aaa') numeric_column_b = sfc.sequence_numeric_column('bbb') _, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, feature_columns=[numeric_column_a, numeric_column_b]) with monitored_session.MonitoredSession() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, r'\[Condition x == y did not hold element-wise:\] ' r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] ' r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]' ): sess.run(sequence_length)
def test_sequence_length_not_equal(self): """Tests that an error is raised when sequence lengths are not equal.""" # Input a with sequence_length = [2, 1] sparse_input_a = sparse_tensor.SparseTensorValue( indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Input b with sequence_length = [1, 1] sparse_input_b = sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0)), values=(1., 10.), dense_shape=(2, 2)) numeric_column_a = sfc.sequence_numeric_column('aaa') numeric_column_b = sfc.sequence_numeric_column('bbb') _, sequence_length = sfc.sequence_input_layer( features={ 'aaa': sparse_input_a, 'bbb': sparse_input_b, }, feature_columns=[numeric_column_a, numeric_column_b]) with monitored_session.MonitoredSession() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, r'\[Condition x == y did not hold element-wise:\] ' r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] ' r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'): sess.run(sequence_length)
def test_shape_must_be_positive_integer(self): with self.assertRaisesRegexp(TypeError, 'shape dimensions must be integer'): sfc.sequence_numeric_column('aaa', shape=[1.0]) with self.assertRaisesRegexp( ValueError, 'shape dimensions must be greater than 0'): sfc.sequence_numeric_column('aaa', shape=[0])
def testMultiClassFromCheckpoint(self): initial_global_step = 100 create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=initial_global_step, model_dir=self._model_dir) def train_input_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] # Uses same checkpoint and examples as testMultiClassEvaluationMetrics. # See that test for loss calculation. mock_optimizer = self._mock_optimizer(expected_loss=2.662932) sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1, )) ] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) est.train(input_fn=train_input_fn, steps=10) self.assertEqual(1, mock_optimizer.minimize.call_count)
def testMultiExampleMultiDim(self): """Tests multiple examples and multi-dimensional logits. Intermediate values are rounded for ease in reading. input_layer = [[[10], [5]], [[2], [7]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)], [tanh(.1*2 + .2*0 + .3*0 +.2), tanh(-.2*2 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91], [0.38, 0.10]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)], [tanh(.1*7 + .2*.38 + .3*.10 +.2), tanh(-.2*7 - .3*.38 - .4*.10 +.5)]] = [[0.53, -0.37], [0.76, -0.78] logits = [[-1*0.53 - 1*0.37 + 0.3, 0.5*0.53 + 0.3*0.37 + 0.4, 0.2*0.53 - 0.1*0.37 + 0.5], [-1*0.76 - 1*0.78 + 0.3, 0.5*0.76 +0.3*0.78 + 0.4, 0.2*0.76 -0.1*0.78 + 0.5]] = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]] """ base_global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,)) ] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=3, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]])
def test_get_sequence_dense_tensor_with_normalizer_fn(self): def _increment_two(input_sparse_tensor): return sparse_ops.sparse_add( input_sparse_tensor, sparse_tensor.SparseTensor(((0, 0), (1, 1)), (2.0, 2.0), (2, 2)) ) sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) # Before _increment_two: # [[0.], [1.]], # [[10.], [0.]], # After _increment_two: # [[2.], [1.]], # [[10.], [2.]], expected_dense_tensor = [ [[2.], [1.]], [[10.], [2.]], ] numeric_column = sfc.sequence_numeric_column( 'aaa', normalizer_fn=_increment_two) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess))
def test_numeric_column_multi_dim(self): """Tests sequence_input_layer for multi-dimensional numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), dense_shape=(2, 8)) # The output of numeric_column._get_dense_tensor should be flattened. expected_input_layer = [ [[0., 1., 2., 3.], [4., 5., 6., 7.]], [[10., 11., 12., 13.], [0., 0., 0., 0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_numeric_column_multi_dim(self): """Tests sequence_input_layer for multi-dimensional numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), dense_shape=(2, 8)) # The output of numeric_column._get_dense_tensor should be flattened. expected_input_layer = [ [[0., 1., 2., 3.], [4., 5., 6., 7.]], [[10., 11., 12., 13.], [0., 0., 0., 0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def testMultiExampleMultiDim(self): """Tests multiple examples and multi-dimensional logits. Intermediate values are rounded for ease in reading. input_layer = [[[10], [5]], [[2], [7]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)], [tanh(.1*2 + .2*0 + .3*0 +.2), tanh(-.2*2 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91], [0.38, 0.10]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)], [tanh(.1*7 + .2*.38 + .3*.10 +.2), tanh(-.2*7 - .3*.38 - .4*.10 +.5)]] = [[0.53, -0.37], [0.76, -0.78] logits = [[-1*0.53 - 1*0.37 + 0.3, 0.5*0.53 + 0.3*0.37 + 0.4, 0.2*0.53 - 0.1*0.37 + 0.5], [-1*0.76 - 1*0.78 + 0.3, 0.5*0.76 +0.3*0.78 + 0.4, 0.2*0.76 -0.1*0.78 + 0.5]] = [[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]] """ base_global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,)) ] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=3, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-0.6033, 0.7777, 0.5698], [-1.2473, 1.0170, 0.5745]])
def testMultiClassFromCheckpoint(self): initial_global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=initial_global_step, model_dir=self._model_dir) def train_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] # Uses same checkpoint and examples as testMultiClassEvaluationMetrics. # See that test for loss calculation. mock_optimizer = self._mock_optimizer(expected_loss=1.331465) sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) est.train(input_fn=train_input_fn, steps=10) self.assertEqual(1, mock_optimizer.minimize.call_count)
def test_defaults(self): a = sfc.sequence_numeric_column('aaa') self.assertEqual('aaa', a.key) self.assertEqual('aaa', a.name) self.assertEqual('aaa', a._var_scope_name) self.assertEqual((1,), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype)
def test_defaults(self): a = sfc.sequence_numeric_column('aaa') self.assertEqual('aaa', a.key) self.assertEqual('aaa', a.name) self.assertEqual('aaa', a._var_scope_name) self.assertEqual((1, ), a.shape) self.assertEqual(0., a.default_value) self.assertEqual(dtypes.float32, a.dtype)
def testMultiExamplesWithContext(self): """Tests multiple examples with context features. Intermediate values are rounded for ease in reading. input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2), tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)], [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2), tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]] = [[0.60, -0.96], [0.83, 0.68]] rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2), tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)], [<ignored-padding>]] = [[0.03, -0.63], [<ignored-padding>]] logits = [[-1*0.03 - 1*0.63 + 0.3], [-1*0.83 + 1*0.68 + 0.3]] = [[-0.3662], [0.1414]] """ base_global_step = 100 create_checkpoint( # Context features weights are inserted between input and state weights. rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'context': [[-0.5], [0.8]], } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] context_feature_columns = [fc.numeric_column('context', shape=(1,))] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-0.3662], [0.1414]])
def testMultiExamplesWithContext(self): """Tests multiple examples with context features. Intermediate values are rounded for ease in reading. input_layer = [[[10, -0.5], [5, -0.5]], [[2, 0.8], [0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 - 1*.5 + .2*0 + .3*0 +.2), tanh(-.2*10 - 0.9*.5 - .3*0 - .4*0 +.5)], [tanh(.1*2 + 1*.8 + .2*0 + .3*0 +.2), tanh(-.2*2 + .9*.8 - .3*0 - .4*0 +.5)]] = [[0.60, -0.96], [0.83, 0.68]] rnn_output_timestep_2 = [[tanh(.1*5 - 1*.5 + .2*.60 - .3*.96 +.2), tanh(-.2*5 - .9*.5 - .3*.60 + .4*.96 +.5)], [<ignored-padding>]] = [[0.03, -0.63], [<ignored-padding>]] logits = [[-1*0.03 - 1*0.63 + 0.3], [-1*0.83 + 1*0.68 + 0.3]] = [[-0.3662], [0.1414]] """ base_global_step = 100 create_checkpoint( # Context features weights are inserted between input and state weights. rnn_weights=[[.1, -.2], [1., 0.9], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'context': [[-0.5], [0.8]], } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1, )) ] context_feature_columns = [fc.numeric_column('context', shape=(1, ))] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-0.3662], [0.1414]])
def testOneDimLogits(self, return_sequences, expected_logits): """Tests one-dimensional logits. Intermediate values are rounded for ease in reading. input_layer = [[[10]], [[5]]] initial_state = [0, 0] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)]] = [[0.53, -0.37]] logits_timestep_1 = [[-1*0.83 - 1*0.91 + 0.3]] = [[-1.4388]] logits_timestep_2 = [[-1*0.53 - 1*0.37 + 0.3]] = [[-0.6033]] Args: return_sequences: A boolean indicating whether to return the last output in the output sequence, or the full sequence. expected_logits: An array with expected logits result. """ base_global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=expected_logits, return_sequences=return_sequences)
def testMultiExamplesDifferentLength(self): """Tests multiple examples with different lengths. Intermediate values are rounded for ease in reading. input_layer = [[[10], [5]], [[2], [0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)], [tanh(.1*2 + .2*0 + .3*0 +.2), tanh(-.2*2 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91], [0.38, 0.10]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)], [<ignored-padding>]] = [[0.53, -0.37], [<ignored-padding>]] logits = [[-1*0.53 - 1*0.37 + 0.3], [-1*0.38 + 1*0.10 + 0.3]] = [[-0.6033], [0.0197]] """ base_global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-0.6033], [0.0197]])
def testMultiExamplesDifferentLength(self): """Tests multiple examples with different lengths. Intermediate values are rounded for ease in reading. input_layer = [[[10], [5]], [[2], [0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.1*10 + .2*0 + .3*0 +.2), tanh(-.2*10 - .3*0 - .4*0 +.5)], [tanh(.1*2 + .2*0 + .3*0 +.2), tanh(-.2*2 - .3*0 - .4*0 +.5)]] = [[0.83, -0.91], [0.38, 0.10]] rnn_output_timestep_2 = [[tanh(.1*5 + .2*.83 - .3*.91 +.2), tanh(-.2*5 - .3*.83 + .4*.91 +.5)], [<ignored-padding>]] = [[0.53, -0.37], [<ignored-padding>]] logits = [[-1*0.53 - 1*0.37 + 0.3], [-1*0.38 + 1*0.10 + 0.3]] = [[-0.6033], [0.0197]] """ base_global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-0.6033], [0.0197]])
def testBinaryClassEvaluationMetrics(self): global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=global_step, model_dir=self._model_dir) def eval_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, [[0], [1]] sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=2, model_dir=self._model_dir) eval_metrics = est.evaluate(eval_input_fn, steps=1) # Uses identical numbers to testMultiExamplesWithDifferentLength. # See that test for logits calculation. # logits = [[-0.603282], [0.019719]] # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]] # loss = -label * ln(p) - (1 - label) * ln(1 - p) # = [[0.436326], [0.683335]] expected_metrics = { ops.GraphKeys.GLOBAL_STEP: global_step, metric_keys.MetricKeys.LOSS: 1.119661, metric_keys.MetricKeys.LOSS_MEAN: 0.559831, metric_keys.MetricKeys.ACCURACY: 1.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.429262, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # With default threshold of 0.5, the model is a perfect classifier. metric_keys.MetricKeys.RECALL: 1.0, metric_keys.MetricKeys.PRECISION: 1.0, # Positive example is scored above negative, so AUC = 1.0. metric_keys.MetricKeys.AUC: 1.0, metric_keys.MetricKeys.AUC_PR: 1.0, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
def testMultiClassEvaluationMetrics(self): global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=global_step, model_dir=self._model_dir) def eval_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, model_dir=self._model_dir) eval_metrics = est.evaluate(eval_input_fn, steps=1) # Uses identical numbers to testMultiExampleMultiDim. # See that test for logits calculation. # logits = [[-0.603282, 0.777708, 0.569756], # [-1.247356, 1.017018, 0.574481]] # logits_exp = exp(logits) / (1 + exp(logits)) # = [[0.547013, 2.176468, 1.767836], # [0.287263, 2.764937, 1.776208]] # softmax_probabilities = logits_exp / logits_exp.sum() # = [[0.121793, 0.484596, 0.393611], # [0.059494, 0.572639, 0.367866]] # loss = -1. * log(softmax[label]) # = [[2.105432], [0.557500]] # sum_over_batch_size = (2.105432 + 0.557500)/2 expected_metrics = { ops.GraphKeys.GLOBAL_STEP: global_step, metric_keys.MetricKeys.LOSS: 1.331465, metric_keys.MetricKeys.LOSS_MEAN: 1.331466, metric_keys.MetricKeys.ACCURACY: 0.5, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
def testMultiClassEvaluationMetrics(self): global_step = 100 create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=global_step, model_dir=self._model_dir) def eval_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2., 7.], indices=[[0, 0], [0, 1], [1, 0], [1, 1]], dense_shape=[2, 2]), }, [[0], [1]] sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, model_dir=self._model_dir) eval_metrics = est.evaluate(eval_input_fn, steps=1) # Uses identical numbers to testMultiExampleMultiDim. # See that test for logits calculation. # logits = [[-0.603282, 0.777708, 0.569756], # [-1.247356, 1.017018, 0.574481]] # logits_exp = exp(logits) / (1 + exp(logits)) # = [[0.547013, 2.176468, 1.767836], # [0.287263, 2.764937, 1.776208]] # softmax_probabilities = logits_exp / logits_exp.sum() # = [[0.121793, 0.484596, 0.393611], # [0.059494, 0.572639, 0.367866]] # loss = -1. * log(softmax[label]) # = [[2.105432], [0.557500]] # sum_over_batch_size = (2.105432 + 0.557500)/2 expected_metrics = { ops.GraphKeys.GLOBAL_STEP: global_step, metric_keys.MetricKeys.LOSS: 1.331465, metric_keys.MetricKeys.LOSS_MEAN: 1.331466, metric_keys.MetricKeys.ACCURACY: 0.5, } self.assertAllClose( sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
def testBinaryClassPredictions(self): create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=0, model_dir=self._model_dir) def predict_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] label_vocabulary = ['class_0', 'class_1'] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=2, label_vocabulary=label_vocabulary, model_dir=self._model_dir) # Uses identical numbers to testOneDimLogits. # See that test for logits calculation. # logits = [-0.603282] # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593] # probabilities = [0.646407, 0.353593] # class_ids = argmax(probabilities) = [0] predictions = next(est.predict(predict_input_fn)) self.assertAllClose([-0.603282], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.353593], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.646407, 0.353593], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_0'], predictions[prediction_keys.PredictionKeys.CLASSES])
def test_sequence_length_with_shape(self): """Tests _sequence_length with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa') _, sequence_length = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_sequence_length(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0., 1., 2.], [3., 4., 5.]] # example 1, [[10., 11., 12.]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 0), (1, 1), (1, 2)), values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), dense_shape=(2, 6)) expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) _, sequence_length = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def testBinaryClassPredictions(self): create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=0, model_dir=self._model_dir) def predict_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] label_vocabulary = ['class_0', 'class_1'] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=2, label_vocabulary=label_vocabulary, model_dir=self._model_dir) # Uses identical numbers to testOneDimLogits. # See that test for logits calculation. # logits = [-0.603282] # logistic = exp(-0.6033) / (1 + exp(-0.6033)) = [0.353593] # probabilities = [0.646407, 0.353593] # class_ids = argmax(probabilities) = [0] predictions = next(est.predict(predict_input_fn)) self.assertAllClose([-0.603282], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.353593], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.646407, 0.353593], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_0'], predictions[prediction_keys.PredictionKeys.CLASSES])
def test_sequence_length_with_shape(self): """Tests _sequence_length with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa') _, sequence_length = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def testMultiClassPredictions(self): create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=0, model_dir=self._model_dir) def predict_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] label_vocabulary = ['class_0', 'class_1', 'class_2'] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, label_vocabulary=label_vocabulary, model_dir=self._model_dir) # Uses identical numbers to testMultiDimLogits. # See that test for logits calculation. # logits = [-0.603282, 0.777708, 0.569756] # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836] # softmax_probabilities = logits_exp / logits_exp.sum() # = [0.121793, 0.484596, 0.393611] # class_ids = argmax(probabilities) = [1] predictions = next(est.predict(predict_input_fn)) self.assertAllClose([-0.603282, 0.777708, 0.569756], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( [0.121793, 0.484596, 0.393611], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([1], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_1'], predictions[prediction_keys.PredictionKeys.CLASSES])
def testMultiClassPredictions(self): create_checkpoint( rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1., 0.5, 0.2], [1., -0.3, 0.1]], logits_biases=[0.3, 0.4, 0.5], global_step=0, model_dir=self._model_dir) def predict_input_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5.], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]), } sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1,))] label_vocabulary = ['class_0', 'class_1', 'class_2'] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=3, label_vocabulary=label_vocabulary, model_dir=self._model_dir) # Uses identical numbers to testMultiDimLogits. # See that test for logits calculation. # logits = [-0.603282, 0.777708, 0.569756] # logits_exp = exp(logits) = [0.547013, 2.176468, 1.767836] # softmax_probabilities = logits_exp / logits_exp.sum() # = [0.121793, 0.484596, 0.393611] # class_ids = argmax(probabilities) = [1] predictions = next(est.predict(predict_input_fn)) self.assertAllClose([-0.603282, 0.777708, 0.569756], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose( [0.121793, 0.484596, 0.393611], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([1], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertEqual([b'class_1'], predictions[prediction_keys.PredictionKeys.CLASSES])
def test_sequence_length(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0., 1., 2.], [3., 4., 5.]] # example 1, [[10., 11., 12.]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 0), (1, 1), (1, 2)), values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), dense_shape=(2, 6)) expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) _, sequence_length = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype)
def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_dense_tensor = [ [[0.], [1.]], [[10.], [0.]], ] numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_dense_tensor, dense_tensor.eval(session=sess))
def test_get_sequence_dense_tensor(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_dense_tensor = [ [[0.], [1.]], [[10.], [0.]], ] numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess))
def test_get_sequence_dense_tensor_with_shape(self): """Tests get_sequence_dense_tensor with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0., 1., 2.], [3., 4., 5.]] # example 1, [[10., 11., 12.]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 0), (1, 1), (1, 2)), values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), dense_shape=(2, 6)) expected_dense_tensor = [ [[0., 1., 2.], [3., 4., 5.]], [[10., 11., 12.], [0., 0., 0.]], ] numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess))
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [] # example 1, values [[0.], [1.]] # example 2, [[2.]] # example 3, values [] # example 4, [[3.]] # example 5, values [] indices=((1, 0), (1, 1), (2, 0), (4, 0)), values=(0., 1., 2., 3.), dense_shape=(6, 2)) expected_sequence_length = [0, 2, 1, 0, 1, 0] numeric_column = sfc.sequence_numeric_column('aaa') _, sequence_length = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_get_dense_tensor_multi_dim(self): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), dense_shape=(2, 8)) expected_dense_tensor = [ [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], ] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess))
def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [] # example 1, values [[0.], [1.]] # example 2, [[2.]] # example 3, values [] # example 4, [[3.]] # example 5, values [] indices=((1, 0), (1, 1), (2, 0), (4, 0)), values=(0., 1., 2., 3.), dense_shape=(6, 2)) expected_sequence_length = [0, 2, 1, 0, 1, 0] numeric_column = sfc.sequence_numeric_column('aaa') _, sequence_length = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_get_sequence_dense_tensor_with_shape(self): """Tests get_sequence_dense_tensor with shape !=(1,).""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0., 1., 2.], [3., 4., 5.]] # example 1, [[10., 11., 12.]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (1, 0), (1, 1), (1, 2)), values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), dense_shape=(2, 6)) expected_dense_tensor = [ [[0., 1., 2.], [3., 4., 5.]], [[10., 11., 12.], [0., 0., 0.]], ] numeric_column = sfc.sequence_numeric_column('aaa', shape=(3, )) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_dense_tensor, dense_tensor.eval(session=sess))
def test_get_dense_tensor_multi_dim(self): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] # example 1, [[[10., 11.], [12., 13.]]] indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), dense_shape=(2, 8)) expected_dense_tensor = [ [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], ] numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_dense_tensor, dense_tensor.eval(session=sess))
def test_numeric_column(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_input_layer = [ [[0.], [1.]], [[10.], [0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual(expected_sequence_length, sequence_length.eval(session=sess))
def test_numeric_column(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] # example 1, [[10.]] indices=((0, 0), (0, 1), (1, 0)), values=(0., 1., 10.), dense_shape=(2, 2)) expected_input_layer = [ [[0.], [1.]], [[10.], [0.]], ] expected_sequence_length = [2, 1] numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( features={'aaa': sparse_input}, feature_columns=[numeric_column]) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess))
def test_shape_saved_as_tuple(self): a = sfc.sequence_numeric_column('aaa', shape=[1, 2]) self.assertEqual((1, 2), a.shape)
def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], [<ignored-padding>]] = [[0.92, -0.88], [<ignored-padding>]] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ base_global_step = 100 create_checkpoint( # FeatureColumns are sorted alphabetically, so on_sale weights are # inserted before price. rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor( values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': sparse_tensor.SparseTensor( values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = seq_fc.sequence_numeric_column('price', shape=(1,)) on_sale_column = fc.indicator_column( seq_fc.sequence_categorical_column_with_identity( 'on_sale', num_buckets=2)) sequence_feature_columns = [price_column, on_sale_column] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-1.5056], [-0.7962]])
def make_columns(): """ Builds the feature_columns required by the estimator to link the Dataset and the model_fn :return: """ columns_dict = {} columns_dict['gci'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci', vocab_file, default_value="0" ) ) columns_dict['ta'] = ( seq_fc.sequence_numeric_column( 'ta', normalizer_fn=lambda x: normalize(x, 'ta', stats_dict) ) ) columns_dict['rsrp'] = ( seq_fc.sequence_numeric_column( 'rsrp', normalizer_fn=lambda x: normalize( x, 'rsrp', stats_dict))) columns_dict['gci0'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci0', vocab_file, default_value="0" ) ) columns_dict['rsrp0'] = ( seq_fc.sequence_numeric_column( 'rsrp0', normalizer_fn=lambda x: normalize( x, 'rsrp0', stats_dict))) columns_dict['gci1'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci1', vocab_file, default_value="0" ) ) columns_dict['rsrp1'] = ( seq_fc.sequence_numeric_column( 'rsrp1', normalizer_fn=lambda x: normalize( x, 'rsrp1', stats_dict))) columns_dict['gci2'] = fc.indicator_column( fc.sequence_categorical_column_with_vocabulary_file( 'gci2', vocab_file, default_value="0" ) ) columns_dict['rsrp2'] = ( seq_fc.sequence_numeric_column( 'rsrp2', normalizer_fn=lambda x: normalize( x, 'rsrp2', stats_dict))) columns_dict['dt'] = ( seq_fc.sequence_numeric_column( 'dt', normalizer_fn=lambda x: normalize(x, 'dt', stats_dict) ) ) return columns_dict
def testMultiExamplesMultiFeatures(self): """Tests examples with multiple sequential feature columns. Intermediate values are rounded for ease in reading. input_layer = [[[1, 0, 10], [0, 1, 5]], [[1, 0, 2], [0, 0, 0]]] initial_state = [[0, 0], [0, 0]] rnn_output_timestep_1 = [[tanh(.5*1 + 1*0 + .1*10 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*10 - .3*0 - .4*0 +.5)], [tanh(.5*1 + 1*0 + .1*2 + .2*0 + .3*0 +.2), tanh(-.5*1 - 1*0 - .2*2 - .3*0 - .4*0 +.5)]] = [[0.94, -0.96], [0.72, -0.38]] rnn_output_timestep_2 = [[tanh(.5*0 + 1*1 + .1*5 + .2*.94 - .3*.96 +.2), tanh(-.5*0 - 1*1 - .2*5 - .3*.94 + .4*.96 +.5)], [<ignored-padding>]] = [[0.92, -0.88], [<ignored-padding>]] logits = [[-1*0.92 - 1*0.88 + 0.3], [-1*0.72 - 1*0.38 + 0.3]] = [[-1.5056], [-0.7962]] """ base_global_step = 100 create_checkpoint( # FeatureColumns are sorted alphabetically, so on_sale weights are # inserted before price. rnn_weights=[[.5, -.5], [1., -1.], [.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=base_global_step, model_dir=self._model_dir) def features_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), 'on_sale': sparse_tensor.SparseTensor(values=[0, 1, 0], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), } price_column = seq_fc.sequence_numeric_column('price', shape=(1, )) on_sale_column = fc.indicator_column( seq_fc.sequence_categorical_column_with_identity('on_sale', num_buckets=2)) sequence_feature_columns = [price_column, on_sale_column] context_feature_columns = [] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: self._test_logits( mode, rnn_units=[2], logits_dimension=1, features_fn=features_fn, sequence_feature_columns=sequence_feature_columns, context_feature_columns=context_feature_columns, expected_logits=[[-1.5056], [-0.7962]])
def test_dtype_is_convertible_to_float(self): with self.assertRaisesRegexp( ValueError, 'dtype must be convertible to float'): sfc.sequence_numeric_column('aaa', dtype=dtypes.string)
def testBinaryClassEvaluationMetrics(self): global_step = 100 create_checkpoint(rnn_weights=[[.1, -.2], [.2, -.3], [.3, -.4]], rnn_biases=[.2, .5], logits_weights=[[-1.], [1.]], logits_biases=[0.3], global_step=global_step, model_dir=self._model_dir) def eval_input_fn(): return { 'price': sparse_tensor.SparseTensor(values=[10., 5., 2.], indices=[[0, 0], [0, 1], [1, 0]], dense_shape=[2, 2]), }, [[0], [1]] sequence_feature_columns = [ seq_fc.sequence_numeric_column('price', shape=(1, )) ] est = rnn.RNNClassifier( num_units=[2], sequence_feature_columns=sequence_feature_columns, n_classes=2, model_dir=self._model_dir) eval_metrics = est.evaluate(eval_input_fn, steps=1) # Uses identical numbers to testMultiExamplesWithDifferentLength. # See that test for logits calculation. # logits = [[-0.603282], [0.019719]] # probability = exp(logits) / (1 + exp(logits)) = [[0.353593], [0.504930]] # loss = -label * ln(p) - (1 - label) * ln(1 - p) # = [[0.436326], [0.683335]] expected_metrics = { ops.GraphKeys.GLOBAL_STEP: global_step, metric_keys.MetricKeys.LOSS: 1.119661, metric_keys.MetricKeys.LOSS_MEAN: 0.559831, metric_keys.MetricKeys.ACCURACY: 1.0, metric_keys.MetricKeys.PREDICTION_MEAN: 0.429262, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # With default threshold of 0.5, the model is a perfect classifier. metric_keys.MetricKeys.RECALL: 1.0, metric_keys.MetricKeys.PRECISION: 1.0, # Positive example is scored above negative, so AUC = 1.0. metric_keys.MetricKeys.AUC: 1.0, metric_keys.MetricKeys.AUC_PR: 1.0, } self.assertAllClose(sorted_key_dict(expected_metrics), sorted_key_dict(eval_metrics))
def test_normalizer_fn_must_be_callable(self): with self.assertRaisesRegexp(TypeError, 'must be a callable'): sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable')