def test_parse_features(self): """Tests the various behaviours of kmeans._parse_features_if_necessary.""" # No-op if a tensor is passed in. features = constant_op.constant(self.points) parsed_features = kmeans_lib._parse_features_if_necessary(features, None) self.assertAllEqual(features, parsed_features) # All values from a feature dict are transformed into a tensor. feature_dict = { 'x': [[point[0]] for point in self.points], 'y': [[point[1]] for point in self.points] } parsed_feature_dict = kmeans_lib._parse_features_if_necessary( feature_dict, None) self._parse_feature_dict_helper(features, parsed_feature_dict) # Only the feature_columns of a feature dict are transformed into a tensor. feature_dict_with_extras = { 'foo': 'bar', 'x': [[point[0]] for point in self.points], 'baz': {'fizz': 'buzz'}, 'y': [[point[1]] for point in self.points] } feature_columns = [fc.numeric_column(key='x'), fc.numeric_column(key='y')] parsed_feature_dict = kmeans_lib._parse_features_if_necessary( feature_dict_with_extras, feature_columns) self._parse_feature_dict_helper(features, parsed_feature_dict)
def test_multi_feature_column(self): # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=1. global_step = 100 _create_checkpoint(( (((1., 2.), (3., 4.),), (5., 6.)), (((7., 8.), (9., 8.),), (7., 6.)), (((5.,), (4.,),), (3.,)) ), global_step, self._model_dir) # Create DNNRegressor and evaluate. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=(feature_column.numeric_column('age'), feature_column.numeric_column('height')), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'age': np.array(((20,), (40,))), 'height': np.array(((4,), (8,)))}, y=np.array(((213.,), (421.,))), batch_size=2, shuffle=False) self.assertAllClose({ # TODO(ptucker): Point to tool for calculating a neural net output? # predictions = 7315, 13771 # loss = ((213-7315)^2 + (421-13771)^2) / 2 = 228660896 metric_keys.MetricKeys.LOSS: 228660896., # average_loss = loss / 2 = 114330452 metric_keys.MetricKeys.LOSS_MEAN: 114330452., ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=input_fn, steps=1))
def test_weight_column_should_not_be_used_as_feature(self): with self.assertRaisesRegexp(ValueError, 'weight_column should not be used as feature'): parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column=fc.numeric_column('a'))
def _get_estimator(self, train_distribute, eval_distribute, remote_cluster=None): input_dimension = LABEL_DIMENSION linear_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column("x", shape=(input_dimension,)) ] return dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=LABEL_DIMENSION, model_dir=self._model_dir, dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config_lib.RunConfig( experimental_distribute=DistributeConfig( train_distribute=train_distribute, eval_distribute=eval_distribute, remote_cluster=remote_cluster)))
def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testCalibratedLatticeClassifierTraining(self): feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), ] estimator = self._CalibratedLatticeClassifier(feature_columns) estimator.train(input_fn=self._test_data.twod_classificer_input_fn()) results = estimator.evaluate( input_fn=self._test_data.twod_classificer_input_fn()) self.assertGreater(results['auc'], 0.990)
def testCalibratedLinearRegressorWeightedTraining1D(self): feature_columns = [feature_column_lib.numeric_column('x')] weight_column = feature_column_lib.numeric_column('zero') estimator = self._CalibratedLinearRegressor( ['x'], feature_columns, weight_column=weight_column) estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn()) results = estimator.evaluate( input_fn=self._test_data.oned_zero_weight_input_fn()) # Expects almost zero since the weight values are exactly zero. self.assertLess(results['average_loss'], 1e-7)
def testBaseLinearRegressorTraining3D(self): # Tests also a categorical feature with vocabulary list. feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), feature_column_lib.categorical_column_with_vocabulary_list( 'x2', ['Y', 'N']) ] self._TestRegressor(feature_columns, self._test_data.threed_input_fn(False, 1))
def testCalibratedLatticeRegressorTraining2D(self): feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), ] estimator = self._CalibratedLatticeRegressor(['x0', 'x1'], feature_columns) estimator.train(input_fn=self._test_data.twod_input_fn()) results = estimator.evaluate(input_fn=self._test_data.twod_input_fn()) self.assertLess(results['average_loss'], 5e-3)
def test_weight_column_as_numeric_column(self): parsing_spec = parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column=fc.numeric_column('c')) expected_spec = { 'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32), 'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64), 'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec)
def test_complete_flow_with_mode(self, distribution): label_dimension = 2 input_dimension = label_dimension batch_size = 10 data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) data = data.reshape(batch_size, label_dimension) train_input_fn = self.dataset_input_fn( x={'x': data}, y=data, batch_size=batch_size // len(distribution.worker_devices), shuffle=True) eval_input_fn = numpy_io.numpy_input_fn( x={'x': data}, y=data, batch_size=batch_size, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( x={'x': data}, batch_size=batch_size, shuffle=False) linear_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] dnn_feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] feature_columns = linear_feature_columns + dnn_feature_columns estimator = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=linear_feature_columns, dnn_hidden_units=(2, 2), dnn_feature_columns=dnn_feature_columns, label_dimension=label_dimension, model_dir=self._model_dir, # TODO(isaprykin): Work around the colocate_with error. dnn_optimizer=adagrad.AdagradOptimizer(0.001), linear_optimizer=adagrad.AdagradOptimizer(0.001), config=run_config.RunConfig( train_distribute=distribution, eval_distribute=distribution)) num_steps = 10 estimator.train(train_input_fn, steps=num_steps) scores = estimator.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in estimator.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testCalibratedLatticeRegressorWeightedTraining1D(self): feature_columns = [ feature_column_lib.numeric_column('x'), ] weight_column = feature_column_lib.numeric_column('zero') estimator = self._CalibratedLatticeRegressor( ['x'], feature_columns, weight_column=weight_column) estimator.train(input_fn=self._test_data.oned_zero_weight_input_fn()) results = estimator.evaluate( input_fn=self._test_data.oned_zero_weight_input_fn()) self.assertLess(results['average_loss'], 1e-7)
def test_weight_column_as_numeric_column(self): parsing_spec = parsing_utils.classifier_parse_example_spec( feature_columns=[fc.numeric_column('a')], label_key='b', weight_column=fc.numeric_column('c')) expected_spec = { 'a': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32), 'b': parsing_ops.FixedLenFeature((1,), dtype=dtypes.int64), 'c': parsing_ops.FixedLenFeature((1,), dtype=dtypes.float32), } self.assertDictEqual(expected_spec, parsing_spec)
def test_shape_must_be_positive_integer(self): with self.assertRaisesRegexp(TypeError, 'shape dimensions must be integer'): fc.numeric_column('aaa', shape=[ 1.0, ]) with self.assertRaisesRegexp( ValueError, 'shape dimensions must be greater than 0'): fc.numeric_column('aaa', shape=[ 0, ])
def testCalibratedLinearRegressorTraining2D(self): feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), ] estimator = self._CalibratedLinearRegressor(['x0', 'x1'], feature_columns) estimator.train(input_fn=self._test_data.twod_input_fn()) results = estimator.evaluate(input_fn=self._test_data.twod_input_fn()) # For the record: # Loss(CalibratedLinear)=~6.9e-5 # Loss(LinearRegressor)=~3.3e-2 self.assertLess(results['average_loss'], 1e-4)
def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 create_checkpoint((([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: with ops.Graph().as_default(): training_util.create_global_step() head = mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = self._dnn_model_fn( features={ 'age': constant_op.constant(inputs[0]), 'height': constant_op.constant(inputs[1]) }, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age'), feature_column.numeric_column('height') ], optimizer=mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def test_multi_feature_column_multi_dim_logits(self): """Tests multiple feature columns and multi-dimensional logits. All numbers are the same as test_multi_dim_input_multi_dim_logits. The only difference is that the input consists of two 1D feature columns, instead of one 2D feature column. """ base_global_step = 100 _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) hidden_units = (2, 2) logits_dimension = 3 inputs = ([[10.]], [[8.]]) expected_logits = [[-0.48, 0.48, 0.39]] for mode in [ model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL, model_fn.ModeKeys.PREDICT ]: with ops.Graph().as_default(): training_util.create_global_step() head = _mock_head(self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = dnn._dnn_model_fn( features={ 'age': constant_op.constant(inputs[0]), 'height': constant_op.constant(inputs[1]) }, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age'), feature_column.numeric_column('height') ], optimizer=_mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def testCalibratedLinearClassifierTraining(self): feature_columns = [ feature_column_lib.numeric_column('x0'), feature_column_lib.numeric_column('x1'), ] estimator = self._CalibratedLinearClassifier(['x0', 'x1'], feature_columns) estimator.train(input_fn=self._test_data.twod_classificer_input_fn()) results = estimator.evaluate( input_fn=self._test_data.twod_classificer_input_fn()) # For the record: # auc(CalibratedLinear)=~0.999 # auc(LinearClassifier)=~0.481 self.assertGreater(results['auc'], 0.990)
def test_shape_and_default_value_compatibility(self): fc.numeric_column('aaa', shape=[2], default_value=[1, 2.]) with self.assertRaisesRegexp(ValueError, 'The shape of default_value'): fc.numeric_column('aaa', shape=[2], default_value=[1, 2, 3.]) fc.numeric_column('aaa', shape=[3, 2], default_value=[[2, 3], [1, 2], [2, 3.]]) with self.assertRaisesRegexp(ValueError, 'The shape of default_value'): fc.numeric_column('aaa', shape=[3, 1], default_value=[[2, 3], [1, 2], [2, 3.]]) with self.assertRaisesRegexp(ValueError, 'The shape of default_value'): fc.numeric_column('aaa', shape=[3, 3], default_value=[[2, 3], [1, 2], [2, 3.]])
def _serving_input_receiver_fn(): """A receiver function to be passed to export_savedmodel.""" times_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) values_column = feature_column.numeric_column( key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, shape=(self._model.num_features, )) parsed_features_no_sequence = ( feature_column.make_parse_example_spec( list(self._model.exogenous_feature_columns) + [times_column, values_column])) parsed_features = {} for key, feature_spec in parsed_features_no_sequence.items(): if isinstance(feature_spec, parsing_ops.FixedLenFeature): if key == feature_keys.TrainEvalFeatures.VALUES: parsed_features[key] = feature_spec._replace( shape=((values_proto_length, ) + feature_spec.shape)) else: parsed_features[key] = feature_spec._replace( shape=((filtering_length + prediction_length, ) + feature_spec.shape)) elif feature_spec.dtype == dtypes.string: parsed_features[key] = parsing_ops.FixedLenFeature( shape=(filtering_length + prediction_length, ), dtype=dtypes.string) else: # VarLenFeature raise ValueError( "VarLenFeatures not supported, got %s for key %s" % (feature_spec, key)) tfexamples = array_ops.placeholder(shape=[default_batch_size], dtype=dtypes.string, name="input") features = parsing_ops.parse_example(serialized=tfexamples, features=parsed_features) features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( features[feature_keys.TrainEvalFeatures.VALUES], dtype=self._model.dtype)[:, :filtering_length] features.update( self._model_start_state_placeholders( batch_size_tensor=array_ops.shape( features[feature_keys.TrainEvalFeatures.TIMES])[0], static_batch_size=default_batch_size)) return export_lib.ServingInputReceiver(features, {"examples": tfexamples})
def test_one_dim(self): """Asserts predictions for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. _create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step=0, model_dir=self._model_dir) # Create DNNRegressor and predict. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=(feature_column.numeric_column('x'), ), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn(x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. self.assertAllClose( { prediction_keys.PredictionKeys.PREDICTIONS: [-2.08], }, next(dnn_regressor.predict(input_fn=input_fn)))
def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. global_step = 100 _create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) # Create DNNRegressor and evaluate. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): return {'age': [[10.]]}, [[1.]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. # loss = (1+2.08)^2 = 9.4864 expected_loss = 9.4864 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
def _test_logits(self, mode, hidden_units, logits_dimension, inputs, expected_logits): """Tests that the expected logits are passed to mock head.""" with ops.Graph().as_default(): training_util.create_global_step() head = _mock_head(self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = dnn._dnn_model_fn( features={'age': constant_op.constant(inputs)}, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column( 'age', shape=np.array(inputs).shape[1:]) ], optimizer=_mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def test_ar_lstm_regressor(self): dtype = dtypes.float32 model_dir = tempfile.mkdtemp(dir=self.get_temp_dir()) exogenous_feature_columns = ( feature_column.numeric_column("exogenous"), ) estimator = estimators.LSTMAutoRegressor( periodicities=10, input_window_size=10, output_window_size=6, model_dir=model_dir, num_features=1, extra_feature_columns=exogenous_feature_columns, num_units=10, config=_SeedRunConfig()) times = numpy.arange(20, dtype=numpy.int64) values = numpy.arange(20, dtype=dtype.as_numpy_dtype) exogenous = numpy.arange(20, dtype=dtype.as_numpy_dtype) features = { feature_keys.TrainEvalFeatures.TIMES: times, feature_keys.TrainEvalFeatures.VALUES: values, "exogenous": exogenous } train_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=2, num_threads=1, batch_size=16, window_size=16) eval_input_fn = input_pipeline.RandomWindowInputFn( input_pipeline.NumpyReader(features), shuffle_seed=3, num_threads=1, batch_size=16, window_size=16) estimator.train(input_fn=train_input_fn, steps=1) evaluation = estimator.evaluate( input_fn=eval_input_fn, steps=1) self.assertAllEqual(evaluation["loss"], evaluation["average_loss"]) self.assertAllEqual([], evaluation["loss"].shape)
def test_dnn_and_linear_logits_are_added(self): with ops.Graph().as_default(): variables_lib.Variable([[1.0]], name='linear/linear_model/x/weights') variables_lib.Variable([2.0], name='linear/linear_model/bias_weights') variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel') variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias') variables_lib.Variable([[5.0]], name='dnn/logits/kernel') variables_lib.Variable([6.0], name='dnn/logits/bias') variables_lib.Variable(1, name='global_step', dtype=dtypes.int64) linear_testing_utils.save_variables_to_ckpt(self._model_dir) x_column = feature_column.numeric_column('x') est = dnn_linear_combined.DNNLinearCombinedRegressor( linear_feature_columns=[x_column], dnn_hidden_units=[1], dnn_feature_columns=[x_column], model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # linear logits = 10*1 + 2 = 12 # dnn logits = (10*3 + 4)*5 + 6 = 176 # logits = dnn + linear = 176 + 12 = 188 self.assertAllClose( { prediction_keys.PredictionKeys.PREDICTIONS: [188.], }, next(est.predict(input_fn=input_fn)))
def test_one_dim(self): """Asserts predictions for one-dimensional input and logits.""" dnn_testing_utils.create_checkpoint( (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]),), global_step=0, model_dir=self._model_dir) dnn_classifier = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=(feature_column.numeric_column('x'),), model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[10.]])}, batch_size=1, shuffle=False) # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [-2.08] => # logistic = exp(-2.08)/(1 + exp(-2.08)) = 0.11105597 # probabilities = [1-logistic, logistic] = [0.88894403, 0.11105597] # class_ids = argmax(probabilities) = [0] predictions = next(dnn_classifier.predict(input_fn=input_fn)) self.assertAllClose([-2.08], predictions[prediction_keys.PredictionKeys.LOGITS]) self.assertAllClose([0.11105597], predictions[prediction_keys.PredictionKeys.LOGISTIC]) self.assertAllClose( [0.88894403, 0.11105597], predictions[prediction_keys.PredictionKeys.PROBABILITIES]) self.assertAllClose([0], predictions[prediction_keys.PredictionKeys.CLASS_IDS]) self.assertAllEqual([b'0'], predictions[prediction_keys.PredictionKeys.CLASSES])
def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" global_step = 100 dnn_testing_utils.create_checkpoint( (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]),), global_step, self._model_dir) dnn_classifier = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10.], [10.]]}, [[1], [0]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08], [-2.08]] => # logistic = 1/(1 + exp(-logits)) = [[0.11105597], [0.11105597]] # loss = -1. * log(0.111) -1. * log(0.889) = 2.31544200 expected_loss = 2.31544200 self.assertAllClose({ metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2., metric_keys.MetricKeys.ACCURACY: 0.5, metric_keys.MetricKeys.PREDICTION_MEAN: 0.11105597, metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, # There is no good way to calculate AUC for only two data points. But # that is what the algorithm returns. metric_keys.MetricKeys.AUC: 0.5, metric_keys.MetricKeys.AUC_PR: 0.75, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1))
def _test_logits( self, mode, hidden_units, logits_dimension, inputs, expected_logits): """Tests that the expected logits are passed to mock head.""" with ops.Graph().as_default(): training_util.create_global_step() head = _mock_head( self, hidden_units=hidden_units, logits_dimension=logits_dimension, expected_logits=expected_logits) estimator_spec = dnn._dnn_model_fn( features={'age': constant_op.constant(inputs)}, labels=constant_op.constant([[1]]), mode=mode, head=head, hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column('age', shape=np.array(inputs).shape[1:])], optimizer=_mock_optimizer(self, hidden_units)) with monitored_session.MonitoredTrainingSession( checkpoint_dir=self._model_dir) as sess: if mode == model_fn.ModeKeys.TRAIN: sess.run(estimator_spec.train_op) elif mode == model_fn.ModeKeys.EVAL: sess.run(estimator_spec.loss) elif mode == model_fn.ModeKeys.PREDICT: sess.run(estimator_spec.predictions) else: self.fail('Invalid mode: {}'.format(mode))
def test_multi_dim(self): """Asserts train loss for multi-dimensional input and logits.""" base_global_step = 100 hidden_units = (2, 2) _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), base_global_step, self._model_dir) input_dimension = 2 label_dimension = 3 # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 expected_loss = 4.3929 mock_optimizer = _mock_optimizer(self, hidden_units=hidden_units, expected_loss=expected_loss) dnn_regressor = dnn.DNNRegressor(hidden_units=hidden_units, feature_columns=[ feature_column.numeric_column( 'age', shape=[input_dimension]) ], label_dimension=label_dimension, optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_regressor.train(input_fn=lambda: ({ 'age': [[10., 8.]] }, [[1., -1., 0.5]]), steps=num_steps, hooks=(summary_hook, )) self.assertEqual(1, mock_optimizer.minimize.call_count) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: _assert_simple_summary( self, { metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension, 'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0., 'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.5, 'dnn/dnn/logits_fraction_of_zero_values': 0., metric_keys.MetricKeys.LOSS: expected_loss, }, summary) _assert_checkpoint(self, base_global_step + num_steps, input_units=input_dimension, hidden_units=hidden_units, output_units=label_dimension, model_dir=self._model_dir)
def _BuildInputs(self, x0, x1): """Returns input_fn, feature_names and feature_columns.""" def _input_fn(): return { 'x0': array_ops.constant(x0, dtype=dtypes.float32), 'x1': array_ops.constant(x1, dtype=dtypes.float32), } feature_names = ['x0', 'x1'] x0_dim = 1 if not isinstance(x0[0], list) else len(x0[0]) x1_dim = 1 if not isinstance(x1[0], list) else len(x1[0]) feature_columns = { feature_column_lib.numeric_column(key='x0', shape=(x0_dim, )), feature_column_lib.numeric_column(key='x1', shape=(x1_dim, )), } return _input_fn, feature_names, feature_columns
def _test_parsed_sequence_example( self, col_name, col_fn, col_arg, shape, values): """Helper function to check that each FeatureColumn parses correctly. Args: col_name: string, name to give to the feature column. Should match the name that the column will parse out of the features dict. col_fn: function used to create the feature column. For example, sequence_numeric_column. col_arg: second arg that the target feature column is expecting. shape: the expected dense_shape of the feature after parsing into a SparseTensor. values: the expected values at index [0, 2, 6] of the feature after parsing into a SparseTensor. """ example = _make_sequence_example() columns = [ fc.categorical_column_with_identity('int_ctx', num_buckets=100), fc.numeric_column('float_ctx'), col_fn(col_name, col_arg) ] context, seq_features = parsing_ops.parse_single_sequence_example( example.SerializeToString(), context_features=fc.make_parse_example_spec(columns[:2]), sequence_features=fc.make_parse_example_spec(columns[2:])) with self.cached_session() as sess: ctx_result, seq_result = sess.run([context, seq_features]) self.assertEqual(list(seq_result[col_name].dense_shape), shape) self.assertEqual( list(seq_result[col_name].values[[0, 2, 6]]), values) self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) self.assertEqual(ctx_result['int_ctx'].values[0], 5) self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1)
def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 x_dim = 4 feature_columns = (feature_column_lib.numeric_column( 'x', shape=(x_dim, )), ) with ops.Graph().as_default(): variables.Variable( # shape=[x_dim, label_dimension] [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]], name='linear/linear_model/x/weights') variables.Variable( # shape=[label_dimension] [.2, .4, .6], name=BIAS_NAME) variables.Variable(100, name='global_step', dtype=dtypes.int64) save_variables_to_ckpt(self._model_dir) linear_regressor = self._linear_regressor_fn( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = x * weight + bias, shape=[batch_size, label_dimension] self.assertAllClose([[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]], predicted_scores)
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ feature_column_lib.numeric_column('x', shape=(input_dimension,)) ] est = _baseline_estimator_fn( label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def testFromCheckpointMultiBatch(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with ops.Graph().as_default(): variables.Variable([[age_weight]], name=AGE_WEIGHT_NAME) variables.Variable([bias], name=BIAS_NAME) variables.Variable(initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias # logits[0] = 17 * 10. + 5. = 175 # logits[1] = 15 * 10. + 5. = 155 # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004 mock_optimizer = self._mock_optimizer(expected_loss=52004.) linear_regressor = self._linear_regressor_fn( feature_columns=(feature_column_lib.numeric_column('age'), ), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train(input_fn=lambda: ({ 'age': ((17, ), (15, )) }, ((5., ), (3., ))), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assert_checkpoint(expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias)
def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,))] est = linear.LinearEstimator( head=head_lib.regression_head(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. global_step = 100 _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) label_dimension = 3 # Create DNNRegressor and evaluate. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age', shape=[2])], label_dimension=label_dimension, model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]]}, [[1., -1., 0.5]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 expected_loss = 4.3929 self.assertAllClose({ metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" global_step = 100 dnn_testing_utils.create_checkpoint( (([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),), global_step, self._model_dir) n_classes = 3 dnn_classifier = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age', shape=[2])], n_classes=n_classes, model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10., 8.], [10., 8.]]}, [[1], [0]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39], [-0.48, 0.48, 0.39]] # probabilities = exp(logits)/sum(exp(logits)) # = [[0.16670536, 0.43538380, 0.39791084], # [0.16670536, 0.43538380, 0.39791084]] # loss = -log(0.43538380) - log(0.16670536) expected_loss = 2.62305466 self.assertAllClose({ metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / 2, metric_keys.MetricKeys.ACCURACY: 0.5, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1))
def test_from_scratch_validate_summary(self): hidden_units = (2, 2) mock_optimizer = _mock_optimizer(self, hidden_units=hidden_units) dnn_classifier = dnn.DNNClassifier( hidden_units=hidden_units, feature_columns=(feature_column.numeric_column('age'),), optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train( input_fn=lambda: ({'age': [[10.]]}, [[1]]), steps=num_steps, hooks=(summary_hook,)) self.assertEqual(1, mock_optimizer.minimize.call_count) _assert_checkpoint( self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: summary_keys = [v.tag for v in summary.value] self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys) self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
def test_train_op_calls_both_dnn_and_linear(self): opt = gradient_descent.GradientDescentOptimizer(1.) x_column = feature_column.numeric_column('x') input_fn = numpy_io.numpy_input_fn( x={'x': np.array([[0.], [1.]])}, y=np.array([[0.], [1.]]), batch_size=1, shuffle=False) est = dnn_linear_combined.DNNLinearCombinedClassifier( linear_feature_columns=[x_column], # verifies linear_optimizer is used only for linear part. linear_optimizer=self._mock_optimizer(opt, 'linear'), dnn_hidden_units=(2, 2), dnn_feature_columns=[x_column], # verifies dnn_optimizer is used only for linear part. dnn_optimizer=self._mock_optimizer(opt, 'dnn'), model_dir=self._model_dir) est.train(input_fn, steps=1) # verifies train_op fires linear minimize op self.assertEqual(100., checkpoint_utils.load_variable( self._model_dir, 'linear_called')) # verifies train_op fires dnn minimize op self.assertEqual(100., checkpoint_utils.load_variable( self._model_dir, 'dnn_called'))
def _testCheckpointCompatibleWithNonAnnotatedEstimator( self, train_input_fn, predict_input_fn, non_annotated_class, annotated_class, prediction_key, estimator_args): input_dimension = 2 feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,)) ] estimator = non_annotated_class( model_dir=self._model_dir, hidden_units=(2, 2), feature_columns=feature_columns, **estimator_args) estimator.train(train_input_fn, steps=10) predictions = np.array( [x[prediction_key] for x in estimator.predict(predict_input_fn)]) annotated_estimator = annotated_class( model_dir=self._model_dir, hidden_units=(2, 2), feature_columns=feature_columns, warm_start_from=self._model_dir, **estimator_args) annotated_predictions = np.array([ x[prediction_key] for x in annotated_estimator.predict(predict_input_fn) ]) self.assertAllEqual(predictions.shape, annotated_predictions.shape) for i, (a, b) in enumerate( zip(predictions.flatten(), annotated_predictions.flatten())): self.assertAlmostEqual(a, b, msg='index=%d' % i)
def _test_complete_flow( self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, n_classes, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension,))] est = dnn.DNNClassifier( hidden_units=(2, 2), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predicted_proba = np.array([ x[prediction_keys.PredictionKeys.PROBABILITIES] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def _testCheckpointCompatibleWithNonAnnotatedEstimator( self, train_input_fn, predict_input_fn, non_annotated_class, annotated_class, prediction_key, estimator_args): input_dimension = 2 feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] estimator = non_annotated_class(model_dir=self._model_dir, hidden_units=(2, 2), feature_columns=feature_columns, **estimator_args) estimator.train(train_input_fn, steps=10) predictions = np.array( [x[prediction_key] for x in estimator.predict(predict_input_fn)]) annotated_estimator = annotated_class(model_dir=self._model_dir, hidden_units=(2, 2), feature_columns=feature_columns, warm_start_from=self._model_dir, **estimator_args) annotated_predictions = np.array([ x[prediction_key] for x in annotated_estimator.predict(predict_input_fn) ]) self.assertAllEqual(predictions.shape, annotated_predictions.shape) for i, (a, b) in enumerate( zip(predictions.flatten(), annotated_predictions.flatten())): self.assertAlmostEqual(a, b, msg='index=%d' % i)
def test_multi_dim_weights(self): """Tests evaluation with weights.""" # Uses same checkpoint with test_multi_dims global_step = 100 create_checkpoint((([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),), global_step, self._model_dir) n_classes = 3 dnn_classifier = self._dnn_classifier_fn( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age', shape=[2])], n_classes=n_classes, weight_column='w', model_dir=self._model_dir) def _input_fn(): # batch_size = 2, one false label, and one true. return {'age': [[10., 8.], [10., 8.]], 'w': [[10.], [100.]]}, [[1], [0]] # Uses identical numbers as test_multi_dims # See that test for calculation of logits. # loss = -log(0.43538380)*10 - log(0.16670536)*100 expected_loss = 187.468007 metrics = dnn_classifier.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual( expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3)
def test_multi_dim_weights(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # same checkpoint with test_multi_dim. global_step = 100 create_checkpoint((([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),), global_step, self._model_dir) label_dimension = 3 dnn_regressor = self._dnn_regressor_fn( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age', shape=[2])], label_dimension=label_dimension, weight_column='w', model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]], 'w': [10.]}, [[1., -1., 0.5]] # Uses identical numbers as test_multi_dim. # See that test for calculation of logits. # loss = 4.3929*10 expected_loss = 43.929 metrics = dnn_regressor.evaluate(input_fn=_input_fn, steps=1) self.assertAlmostEqual( expected_loss, metrics[metric_keys.MetricKeys.LOSS], places=3)
def test_evaluation_weights(self): """Tests evaluation with weights.""" with ops.Graph().as_default(): variables.Variable([[11.0]], name=_AGE_WEIGHT_NAME) variables.Variable([2.0], name=_BIAS_NAME) variables.Variable( 100, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) def _input_fn(): features = { 'age': ((1,), (1,)), 'weights': ((1.,), (2.,)) } labels = ((10.,), (10.,)) return features, labels linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), weight_feature_key='weights', model_dir=self._model_dir) eval_metrics = linear_regressor.evaluate(input_fn=_input_fn, steps=1) # Logit is (1. * 11.0 + 2.0) = 13, while label is 10. # Loss per example is 3**2 = 9. # Training loss is the weighted sum over batch = 9 + 2*9 = 27 # average loss is the weighted average = 9 + 2*9 / (1 + 2) = 9 self.assertDictEqual({ metric_keys.MetricKeys.LOSS: 27., metric_keys.MetricKeys.LOSS_MEAN: 9., ops.GraphKeys.GLOBAL_STEP: 100 }, eval_metrics)
def testMultiDim(self): """Tests predict when all variables are multi-dimenstional.""" batch_size = 2 label_dimension = 3 x_dim = 4 feature_columns = ( feature_column_lib.numeric_column('x', shape=(x_dim,)),) with ops.Graph().as_default(): variables.Variable( # shape=[x_dim, label_dimension] [[1., 2., 3.], [2., 3., 4.], [3., 4., 5.], [4., 5., 6.]], name='linear/linear_model/x/weights') variables.Variable( # shape=[label_dimension] [.2, .4, .6], name=_BIAS_NAME) variables.Variable(100, name='global_step', dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) linear_regressor = linear.LinearRegressor( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) predict_input_fn = numpy_io.numpy_input_fn( # x shape=[batch_size, x_dim] x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])}, y=None, batch_size=batch_size, num_epochs=1, shuffle=False) predictions = linear_regressor.predict(input_fn=predict_input_fn) predicted_scores = list([x['predictions'] for x in predictions]) # score = x * weight + bias, shape=[batch_size, label_dimension] self.assertAllClose( [[30.2, 40.4, 50.6], [70.2, 96.4, 122.6]], predicted_scores)
def test_multi_dim(self): """Asserts evaluation metrics for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. global_step = 100 _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), global_step, self._model_dir) label_dimension = 3 # Create DNNRegressor and evaluate. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age', shape=[2])], label_dimension=label_dimension, model_dir=self._model_dir) def _input_fn(): return {'age': [[10., 8.]]}, [[1., -1., 0.5]] # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929 expected_loss = 4.3929 self.assertAllClose( { metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss / label_dimension, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
def testFromCheckpointMultiBatch(self): # Create initial checkpoint. age_weight = 10.0 bias = 5.0 initial_global_step = 100 with ops.Graph().as_default(): variables.Variable([[age_weight]], name=_AGE_WEIGHT_NAME) variables.Variable([bias], name=_BIAS_NAME) variables.Variable( initial_global_step, name=ops.GraphKeys.GLOBAL_STEP, dtype=dtypes.int64) _save_variables_to_ckpt(self._model_dir) # logits = age * age_weight + bias # logits[0] = 17 * 10. + 5. = 175 # logits[1] = 15 * 10. + 5. = 155 # loss = sum(logits - label)^2 = (175 - 5)^2 + (155 - 3)^2 = 52004 mock_optimizer = self._mockOptimizer(expected_loss=52004.) linear_regressor = linear.LinearRegressor( feature_columns=(feature_column_lib.numeric_column('age'),), model_dir=self._model_dir, optimizer=mock_optimizer) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, and validate optimizer and final checkpoint. num_steps = 10 linear_regressor.train( input_fn=lambda: ({'age': ((17,), (15,))}, ((5.,), (3.,))), steps=num_steps) self.assertEqual(1, mock_optimizer.minimize.call_count) self._assertCheckpoint( expected_global_step=initial_global_step + num_steps, expected_age_weight=age_weight, expected_bias=bias)
def test_multi_dim(self): """Asserts predictions for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), 100, self._model_dir) # Create DNNRegressor and predict. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=(feature_column.numeric_column('x', shape=(2, )), ), label_dimension=3, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( # Inputs shape is (batch_size, num_inputs). x={'x': np.array([[10., 8.]])}, batch_size=1, shuffle=False) # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39] self.assertAllClose( { prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39], }, next(dnn_regressor.predict(input_fn=input_fn)))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, prediction_length): feature_columns = [ feature_column_lib.numeric_column('x', shape=(input_dimension, )) ] est = self._linear_regressor_fn(feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) # TRAIN # learn y = x est.train(train_input_fn, steps=200) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT predictions = np.array( [x['predictions'] for x in est.predict(predict_input_fn)]) self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def test_from_scratch_validate_summary(self): hidden_units = (2, 2) mock_optimizer = _mock_optimizer(self, hidden_units=hidden_units) dnn_classifier = dnn.DNNClassifier( hidden_units=hidden_units, feature_columns=(feature_column.numeric_column('age'), ), optimizer=mock_optimizer, model_dir=self._model_dir) self.assertEqual(0, mock_optimizer.minimize.call_count) # Train for a few steps, then validate optimizer, summaries, and # checkpoint. num_steps = 5 summary_hook = _SummaryHook() dnn_classifier.train(input_fn=lambda: ({ 'age': [[10.]] }, [[1]]), steps=num_steps, hooks=(summary_hook, )) self.assertEqual(1, mock_optimizer.minimize.call_count) _assert_checkpoint(self, num_steps, input_units=1, hidden_units=hidden_units, output_units=1, model_dir=self._model_dir) summaries = summary_hook.summaries() self.assertEqual(num_steps, len(summaries)) for summary in summaries: summary_keys = [v.tag for v in summary.value] self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys) self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
def get_default_feature_columns(): features = { 'bband_up': [], 'bband_mid': [], 'bband_low': [], 'ema_short': [], 'ema_mid': [], 'ema_long': [], 'sma_short': [], 'sma_mid': [], 'sma_long': [], 'adx': [], 'cci': [], 'macd': [], 'macd_signal': [], 'macd_hist': [], 'rsi': [], 'roc': [], 'stoch_rsi_k': [], 'stoch_rsi_d': [], 'will_r': [], 'obv': [], 'symbol': [] } feature_columns = [] for key in features.keys(): feature_columns.append(feature_column.numeric_column(key=key)) return feature_columns
def test_one_dim(self): """Asserts evaluation metrics for one-dimensional input and logits.""" # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1. global_step = 100 _create_checkpoint(( ([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1.], [1.]], [.3]), ), global_step, self._model_dir) # Create DNNRegressor and evaluate. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=[feature_column.numeric_column('age')], model_dir=self._model_dir) def _input_fn(): return {'age': [[10.]]}, [[1.]] # Uses identical numbers as DNNModelTest.test_one_dim_logits. # See that test for calculation of logits. # logits = [[-2.08]] => predictions = [-2.08]. # loss = (1+2.08)^2 = 9.4864 expected_loss = 9.4864 self.assertAllClose({ metric_keys.MetricKeys.LOSS: expected_loss, metric_keys.MetricKeys.LOSS_MEAN: expected_loss, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
def testWarmStart_BucketizedColumn(self): # Create feature column. real = fc.numeric_column("real") real_bucket = fc.bucketized_column(real, boundaries=[0., 1., 2., 3.]) # Save checkpoint from which to warm-start. _, prev_bucket_val = self._create_prev_run_var( "linear_model/real_bucketized/weights", shape=[5, 1], initializer=norms()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) sess.run(variables.global_variables_initializer()) # Without warmstarting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {real_bucket: [np.zeros([5, 1])]}, sess) # New graph, new session with warmstarting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) ws_util._warmstart(ws_util._WarmStartSettings( self.get_temp_dir(), vars_to_warmstart=".*real_bucketized.*")) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warmstarted. self._assert_cols_to_vars(cols_to_vars, {real_bucket: [prev_bucket_val]}, sess)
def test_multi_dim(self): """Asserts predictions for multi-dimensional input and logits.""" # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3. _create_checkpoint(( ([[.6, .5], [-.6, -.5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]), ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]), ), 100, self._model_dir) # Create DNNRegressor and predict. dnn_regressor = dnn.DNNRegressor( hidden_units=(2, 2), feature_columns=(feature_column.numeric_column('x', shape=(2,)),), label_dimension=3, model_dir=self._model_dir) input_fn = numpy_io.numpy_input_fn( # Inputs shape is (batch_size, num_inputs). x={'x': np.array([[10., 8.]])}, batch_size=1, shuffle=False) # Uses identical numbers as # DNNModelFnTest.test_multi_dim_input_multi_dim_logits. # See that test for calculation of logits. # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39] self.assertAllClose({ prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39], }, next(dnn_regressor.predict(input_fn=input_fn)))
def _test_complete_flow(self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, label_dimension, batch_size): feature_columns = [ feature_column.numeric_column('x', shape=(input_dimension, )) ] est = linear.LinearEstimator( head=head_lib.regression_head(label_dimension=label_dimension), feature_columns=feature_columns, model_dir=self._model_dir) # TRAIN num_steps = 10 est.train(train_input_fn, steps=num_steps) # EVALUTE scores = est.evaluate(eval_input_fn) self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn('loss', six.iterkeys(scores)) # PREDICT predictions = np.array([ x[prediction_keys.PredictionKeys.PREDICTIONS] for x in est.predict(predict_input_fn) ]) self.assertAllEqual((batch_size, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column.make_parse_example_spec(feature_columns) serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( feature_spec) export_dir = est.export_savedmodel(tempfile.mkdtemp(), serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir))
def setUp(self): self._feature_columns = { feature_column.bucketized_column( feature_column.numeric_column('f_%d' % i, dtype=dtypes.float32), BUCKET_BOUNDARIES) for i in range(NUM_FEATURES) }
def testWarmStart_BucketizedColumn(self): # Create feature column. real = fc.numeric_column("real") real_bucket = fc.bucketized_column(real, boundaries=[0., 1., 2., 3.]) # Save checkpoint from which to warm-start. _, prev_bucket_val = self._create_prev_run_var( "linear_model/real_bucketized/weights", shape=[5, 1], initializer=norms()) partitioner = lambda shape, dtype: [1] * len(shape) # New graph, new session WITHOUT warm-starting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) sess.run(variables.global_variables_initializer()) # Without warm-starting, the weights should be initialized using default # initializer (which is init_ops.zeros_initializer). self._assert_cols_to_vars(cols_to_vars, {real_bucket: [np.zeros([5, 1])]}, sess) # New graph, new session with warm-starting. with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) ws_util._warm_start( ws_util.WarmStartSettings( self.get_temp_dir(), vars_to_warm_start=".*real_bucketized.*")) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, {real_bucket: [prev_bucket_val]}, sess)