def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length, batch_size): feature_columns = [ feature_column_lib.numeric_column('x', shape=(input_dimension,)) ] est = linear.LinearRegressor( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array([ x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testFromCheckpointMultiBatch(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with ops.Graph().as_default(): variables.Variable([[age_weight]], name=_AGE_WEIGHT_NAME) variables.Variable([bias], name=_BIAS_NAME) variables.Variable( initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias # logits[0] = 17 * 10. + 5. = 175 # logits[1] = 15 * 10. + 5. = 155 # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004 mock_optimizer = self._mock_optimizer(expected_loss=52004.) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias)
def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 x_dim = 4 feature_columns = ( feature_column_lib.numeric_column('x', shape=(x_dim,)),) with ops.Graph().as_default(): variables.Variable( # shape=[x_dim, label_dimension] [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]], name='linear/linear_model/x/weights') variables.Variable( # shape=[label_dimension] [.2, .4, .6], name=_BIAS_NAME) variables.Variable(100, name='global_step', dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = x * weight + bias, shape=[batch_size, label_dimension] self.assertAllClose( [[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]], predicted_scores)
def testPartitioner(self): x_dim = 64 partitions = 4 def _partitioner(shape, dtype): del dtype # unused; required by Fn signature. # Only partition the embedding tensor. return [partitions, 1] if shape[0] == x_dim else [1] regressor = linear.LinearRegressor( feature_columns=( feature_column_lib.categorical_column_with_hash_bucket( 'language', hash_bucket_size=x_dim),), partitioner=_partitioner, model_dir=self._model_dir) def _input_fn(): return { 'language': sparse_tensor.SparseTensor( values=['english', 'spanish'], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]) }, [[10.]] hook = _CheckPartitionerVarHook( self, _LANGUAGE_WEIGHT_NAME, x_dim, partitions) regressor.train( input_fn=_input_fn, steps=1, hooks=[hook])
def test_evaluation_weights(self): """Tests evaluation with weights.""" with ops.Graph().as_default(): variables.Variable([[11.0]], name=_AGE_WEIGHT_NAME) variables.Variable([2.0], name=_BIAS_NAME) variables.Variable( 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) def _input_fn(): features = { 'age': ((1,), (1,)), 'weights': ((1.,), (2.,)) } labels = ((10.,), (10.,)) return features, labels linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), weight_feature_key='weights', model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch = 9 + 2*9 = 27 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual({ metric_keys.MetricKeys.LOSS: 27., metric_keys.MetricKeys.LOSS_MEAN: 9., ops.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics)
def testDefaultPartitionerWithMultiplePsReplicas(self): partitions = 2 # This results in weights larger than the default partition size of 64M, # so partitioned weights are created (each weight uses 4 bytes). x_dim = 32 << 20 class FakeRunConfig(run_config.RunConfig): @property def num_ps_replicas(self): return partitions # Mock the device setter as ps is not available on test machines. with test.mock.patch.object(estimator, '_get_replica_device_setter', return_value=lambda _: '/cpu:0'): linear_regressor = linear.LinearRegressor( feature_columns=( feature_column_lib.categorical_column_with_hash_bucket( 'language', hash_bucket_size=x_dim),), config=FakeRunConfig(), model_dir=self._model_dir) def _input_fn(): return { 'language': sparse_tensor.SparseTensor( values=['english', 'spanish'], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]) }, [[10.]] hook = _CheckPartitionerVarHook( self, _LANGUAGE_WEIGHT_NAME, x_dim, partitions) linear_regressor.train( input_fn=_input_fn, steps=1, hooks=[hook])
def testTwoFeatureColumns(self): """Tests predict with two feature columns.""" with ops.Graph().as_default(): variables.Variable([[10.]], name='linear/linear_model/x0/weights') variables.Variable([[20.]], name='linear/linear_model/x1/weights') variables.Variable([.2], name=_BIAS_NAME) variables.Variable(100, name='global_step', dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1')), model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn(x={ 'x0': np.array([[2.]]), 'x1': np.array([[3.]]) }, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x0 * weight0 + x1 * weight1 + bias = 2. * 10. + 3. * 20 + .2 = 80.2 self.assertAllClose([[80.2]], predicted_scores)
def test_evaluation_for_multiple_feature_columns(self): with ops.Graph().as_default(): variables.Variable([[10.0]], name=_AGE_WEIGHT_NAME) variables.Variable([[2.0]], name=_HEIGHT_WEIGHT_NAME) variables.Variable([5.0], name=_BIAS_NAME) variables.Variable(100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) batch_size = 2 feature_columns = [ feature_column_lib.numeric_column('age'), feature_column_lib.numeric_column('height') ] input_fn = numpy_io.numpy_input_fn(x={ 'age': np.array([20, 40]), 'height': np.array([4, 8]) }, y=np.array([[213.], [421.]]), batch_size=batch_size, num_epochs=None, shuffle=False) est = linear.LinearRegressor(feature_columns=feature_columns, model_dir=self._model_dir) eval_metrics = est.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, ops.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is [(20. * 10.0 + 4 * 2.0 + 5.0), (40. * 10.0 + 8 * 2.0 + 5.0)] = # [213.0, 421.0], while label is [213., 421.]. Loss = 0. self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
def test_evaluation_for_multi_dimensions(self): x_dim = 3 label_dim = 2 with ops.Graph().as_default(): variables.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name=_AGE_WEIGHT_NAME) variables.Variable([7.0, 8.0], name=_BIAS_NAME) variables.Variable(100, name='global_step', dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column( 'age', shape=(x_dim, )), ), label_dimension=label_dim, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn(x={ 'age': np.array([[2., 4., 5.]]), }, y=np.array([[46., 58.]]), batch_size=1, num_epochs=None, shuffle=False) eval_metrics = linear_regressor.evaluate(input_fn=input_fn, steps=1) self.assertItemsEqual( (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN, ops.GraphKeys.GLOBAL_STEP), eval_metrics.keys()) # Logit is # [2., 4., 5.] * [1.0, 2.0] + [7.0, 8.0] = [39, 50] + [7.0, 8.0] # [3.0, 4.0] # [5.0, 6.0] # which is [46, 58] self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
def test_evaluation_batch(self): """Tests evaluation for batch_size==2.""" with ops.Graph().as_default(): variables.Variable([[11.0]], name=_AGE_WEIGHT_NAME) variables.Variable([2.0], name=_BIAS_NAME) variables.Variable(100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'), ), model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate(input_fn=lambda: ({ 'age': ((1, ), (1, )) }, ((10., ), (10., ))), steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the sum over batch = 9 + 9 = 18 # Average loss is the average over batch = 9 self.assertDictEqual( { metric_keys.MetricKeys.LOSS: 18., metric_keys.MetricKeys.LOSS_MEAN: 9., ops.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics)
def test_key_should_be_in_features(self): def input_fn(): return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]] estimator = linear.LinearRegressor([fc.numeric_column('x')]) estimator.train(input_fn=input_fn, steps=1) estimator = extenders.forward_features(estimator, 'y') with self.assertRaisesRegexp(ValueError, 'keys should be exist in features'): next(estimator.predict(input_fn=input_fn))
def test_forward_single_key(self): def input_fn(): return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]] estimator = linear.LinearRegressor([fc.numeric_column('x')]) estimator.train(input_fn=input_fn, steps=1) self.assertNotIn('id', next(estimator.predict(input_fn=input_fn))) estimator = extenders.forward_features(estimator, 'id') predictions = next(estimator.predict(input_fn=input_fn)) self.assertIn('id', predictions) self.assertEqual(101, predictions['id'])
def testFromScratchWithDefaultOptimizer(self): # Create LinearRegressor. label = 5. age = 17 linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), model_dir=self._model_dir) # Train for a few steps, and validate final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self._assert_checkpoint(num_steps)
def test_forwarded_feature_should_be_a_sparse_tensor(self): input_fn = self.make_dummy_input_fn() estimator = linear.LinearRegressor([fc.numeric_column('x')]) estimator.train(input_fn=input_fn, steps=1) estimator = extenders.forward_features(estimator, sparse_default_values={ 'id': 0, 'sparse_id': 0 }) with self.assertRaisesRegexp( ValueError, 'Feature .* is expected to be a `SparseTensor`.'): next(estimator.predict(input_fn=input_fn))
def test_complete_flow(self): label_dimension = 2 batch_size = 10 feature_columns = [feature_column_lib.numeric_column('x', shape=(2, ))] est = linear.LinearRegressor(feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) # TRAIN # learn y = x train_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) # EVALUTE eval_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, shuffle=False) scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predict_input_fn = numpy_io.numpy_input_fn(x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = list( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllClose(data, predictions, atol=0.01) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_forwarded_feature_should_not_be_a_sparse_tensor(self): def input_fn(): return { 'x': [[3.], [5.]], 'id': sparse_tensor.SparseTensor(values=['1', '2'], indices=[[0, 0], [1, 0]], dense_shape=[2, 1]) }, [[1.], [2.]] estimator = linear.LinearRegressor([fc.numeric_column('x')]) estimator.train(input_fn=input_fn, steps=1) estimator = extenders.forward_features(estimator) with self.assertRaisesRegexp( ValueError, 'Forwarded feature.* should be a Tensor.'): next(estimator.predict(input_fn=input_fn))
def testTrainWithOneDimLabel(self): label_dimension = 1 batch_size = 20 feature_columns = [ feature_column_lib.numeric_column('age', shape=(1,)) ] est = linear.LinearRegressor( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) data_rank_1 = np.linspace(0., 2., batch_size, dtype=np.float32) self.assertEqual((batch_size,), data_rank_1.shape) train_input_fn = numpy_io.numpy_input_fn( x={'age': data_rank_1}, y=data_rank_1, batch_size=batch_size, num_epochs=None, shuffle=True) est.train(train_input_fn, steps=200) self._assert_checkpoint(200)
def test_1d(self): """Tests predict when all variables are one-dimensional.""" with ops.Graph().as_default(): variables.Variable([[10.]], name='linear/linear_model/x/weights') variables.Variable([.2], name=_BIAS_NAME) variables.Variable(100, name='global_step', dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('x'),), model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[2.]])}, y=None, batch_size=1, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # x * weight + bias = 2. * 10. + .2 = 20.2 self.assertAllClose([[20.2]], predicted_scores)
def test_evaluation_for_simple_data(self): with ops.Graph().as_default(): variables.Variable([[11.0]], name=_AGE_WEIGHT_NAME) variables.Variable([2.0], name=_BIAS_NAME) variables.Variable( 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate( input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. Loss is 3**2 = 9. self.assertDictEqual({ metric_keys.MetricKeys.LOSS: 9., metric_keys.MetricKeys.LOSS_MEAN: 9., ops.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics)
def testFromScratch(self): # Create LinearRegressor. label = 5. age = 17 # loss = (logits - label)^2 = (0 - 5.)^2 = 25. mock_optimizer = self._mock_optimizer(expected_loss=25.) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({'age': ((age,),)}, ((label,),)), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint( expected_global_step=num_steps, expected_age_weight=0., expected_bias=0.)
def test_forward_keys(self): input_fn = self.make_dummy_input_fn() estimator = linear.LinearRegressor([fc.numeric_column('x')]) estimator.train(input_fn=input_fn, steps=1) forwarded_keys = ['id', 'sparse_id'] for key in forwarded_keys: self.assertNotIn(key, next(estimator.predict(input_fn=input_fn))) estimator = extenders.forward_features( estimator, forwarded_keys, sparse_default_values={'sparse_id': 1}) expected_results = [101, 2, 102, 5] predictions = estimator.predict(input_fn=input_fn) for _ in range(2): prediction = next(predictions) for key in forwarded_keys: self.assertIn(key, prediction) self.assertEqual(expected_results.pop(0), sum(prediction[key]))
def test_forward_in_exported(self): def serving_input_fn(): features_ph = { 'x': array_ops.placeholder(dtypes.float32, [None]), 'id': array_ops.placeholder(dtypes.int32, [None]) } features = { key: array_ops.expand_dims(tensor, -1) for key, tensor in features_ph.items() } return estimator_lib.export.ServingInputReceiver(features, features_ph) def input_fn(): return {'x': [[3.], [5.]], 'id': [[101], [102]]}, [[1.], [2.]] # create estimator feature_columns = [fc.numeric_column('x')] estimator = linear.LinearRegressor(feature_columns) estimator.train(input_fn=input_fn, steps=1) estimator = extenders.forward_features(estimator, 'id') # export saved model tmpdir = tempfile.mkdtemp() export_dir_base = os.path.join( compat.as_bytes(tmpdir), compat.as_bytes('export')) export_dir = estimator.export_savedmodel(export_dir_base, serving_input_fn) self.assertTrue(gfile.Exists(export_dir)) # restore model predict_fn = from_saved_model(export_dir, signature_def_key='predict') predictions = predict_fn({'x': [3], 'id': [101]}) # verify that 'id' exists in predictions self.assertIn('id', predictions) self.assertEqual(101, predictions['id']) # Clean up. gfile.DeleteRecursively(tmpdir)
def test_key_should_be_list_of_string(self): estimator = linear.LinearRegressor([fc.numeric_column('x')]) with self.assertRaisesRegexp(TypeError, 'should be a string'): extenders.forward_features(estimator, ['x', estimator])
def _LinearRegressor(self, feature_columns): # Can be used for baseline. return linear_estimator.LinearRegressor(feature_columns=feature_columns)
def _linear_regressor_fn(*args, **kwargs): return linear.LinearRegressor(*args, **kwargs)