Beispiel #1
0
    def test_multi_dim(self):
        """Asserts evaluation metrics for multi-dimensional input and logits."""
        # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
        global_step = 100
        _create_checkpoint((
            ([[.6, .5], [-.6, -.5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
        ), global_step, self._model_dir)
        label_dimension = 3

        # Create DNNRegressor and evaluate.
        dnn_regressor = dnn.DNNRegressor(
            hidden_units=(2, 2),
            feature_columns=[feature_column.numeric_column('age', shape=[2])],
            label_dimension=label_dimension,
            model_dir=self._model_dir)

        def _input_fn():
            return {'age': [[10., 8.]]}, [[1., -1., 0.5]]

        # Uses identical numbers as
        # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
        # See that test for calculation of logits.
        # logits = [[-0.48, 0.48, 0.39]]
        # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
        expected_loss = 4.3929
        self.assertAllClose(
            {
                metric_keys.MetricKeys.LOSS: expected_loss,
                metric_keys.MetricKeys.LOSS_MEAN:
                expected_loss / label_dimension,
                ops.GraphKeys.GLOBAL_STEP: global_step
            }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
Beispiel #2
0
    def test_one_dim(self):
        """Asserts evaluation metrics for one-dimensional input and logits."""
        # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
        global_step = 100
        _create_checkpoint((
            ([[.6, .5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1.], [1.]], [.3]),
        ), global_step, self._model_dir)

        # Create DNNRegressor and evaluate.
        dnn_regressor = dnn.DNNRegressor(
            hidden_units=(2, 2),
            feature_columns=[feature_column.numeric_column('age')],
            model_dir=self._model_dir)

        def _input_fn():
            return {'age': [[10.]]}, [[1.]]

        # Uses identical numbers as DNNModelTest.test_one_dim_logits.
        # See that test for calculation of logits.
        # logits = [[-2.08]] => predictions = [-2.08].
        # loss = (1+2.08)^2 = 9.4864
        expected_loss = 9.4864
        self.assertAllClose(
            {
                metric_keys.MetricKeys.LOSS: expected_loss,
                metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
                ops.GraphKeys.GLOBAL_STEP: global_step
            }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
Beispiel #3
0
    def test_multi_dim(self):
        """Asserts train loss for multi-dimensional input and logits."""
        base_global_step = 100
        hidden_units = (2, 2)
        _create_checkpoint((
            ([[.6, .5], [-.6, -.5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
        ), base_global_step, self._model_dir)
        input_dimension = 2
        label_dimension = 3

        # Uses identical numbers as
        # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
        # See that test for calculation of logits.
        # logits = [[-0.48, 0.48, 0.39]]
        # loss = (1+0.48)^2 + (-1-0.48)^2 + (0.5-0.39)^2 = 4.3929
        expected_loss = 4.3929
        mock_optimizer = _mock_optimizer(self,
                                         hidden_units=hidden_units,
                                         expected_loss=expected_loss)
        dnn_regressor = dnn.DNNRegressor(hidden_units=hidden_units,
                                         feature_columns=[
                                             feature_column.numeric_column(
                                                 'age',
                                                 shape=[input_dimension])
                                         ],
                                         label_dimension=label_dimension,
                                         optimizer=mock_optimizer,
                                         model_dir=self._model_dir)
        self.assertEqual(0, mock_optimizer.minimize.call_count)

        # Train for a few steps, then validate optimizer, summaries, and
        # checkpoint.
        num_steps = 5
        summary_hook = _SummaryHook()
        dnn_regressor.train(input_fn=lambda: ({
            'age': [[10., 8.]]
        }, [[1., -1., 0.5]]),
                            steps=num_steps,
                            hooks=(summary_hook, ))
        self.assertEqual(1, mock_optimizer.minimize.call_count)
        summaries = summary_hook.summaries()
        self.assertEqual(num_steps, len(summaries))
        for summary in summaries:
            _assert_simple_summary(
                self, {
                    metric_keys.MetricKeys.LOSS_MEAN:
                    expected_loss / label_dimension,
                    'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
                    'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.5,
                    'dnn/dnn/logits_fraction_of_zero_values': 0.,
                    metric_keys.MetricKeys.LOSS: expected_loss,
                }, summary)
        _assert_checkpoint(self,
                           base_global_step + num_steps,
                           input_units=input_dimension,
                           hidden_units=hidden_units,
                           output_units=label_dimension,
                           model_dir=self._model_dir)
Beispiel #4
0
  def test_simple(self):
    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
    global_step = 100
    _create_checkpoint((
        (((1., 2.),), (3., 4.)),
        (((5., 6.), (7., 8.),), (9., 10.)),
        (((11.,), (12.,),), (13.,))
    ), global_step, self._model_dir)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('age'),),
        model_dir=self._model_dir)
    def _input_fn():
      return {'age': ((1,),)}, ((10.,),)
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # prediction = 1778
    # loss = (10-1778)^2 = 3125824
    expected_loss = 3125824
    self.assertAllClose({
        metric_keys.MetricKeys.LOSS: expected_loss,
        metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
Beispiel #5
0
  def test_multi_dim(self):
    """Tests predict when all variables are multi-dimenstional."""
    # Create checkpoint: num_inputs=4, hidden_units=(2, 2), num_outputs=3.
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), 100, self._model_dir)

    # Create DNNRegressor and predict.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('x', shape=(4,)),),
        label_dimension=3,
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        # Inputs shape is (batch_size, num_inputs).
        x={'x': np.array(((1., 2., 3., 4.), (5., 6., 7., 8.)))},
        batch_size=2,
        shuffle=False)
    # Output shape=(batch_size, num_outputs).
    self.assertAllClose((
        # TODO(ptucker): Point to tool for calculating a neural net output?
        (3275., 4660., 6045.),
        (6939., 9876., 12813.)
    ), tuple([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in dnn_regressor.predict(input_fn=input_fn)
    ]), rtol=1e-04)
Beispiel #6
0
    def test_one_dim(self):
        """Asserts predictions for one-dimensional input and logits."""
        # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
        _create_checkpoint((
            ([[.6, .5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1.], [1.]], [.3]),
        ),
                           global_step=0,
                           model_dir=self._model_dir)

        # Create DNNRegressor and predict.
        dnn_regressor = dnn.DNNRegressor(
            hidden_units=(2, 2),
            feature_columns=(feature_column.numeric_column('x'), ),
            model_dir=self._model_dir)
        input_fn = numpy_io.numpy_input_fn(x={'x': np.array([[10.]])},
                                           batch_size=1,
                                           shuffle=False)
        # Uses identical numbers as DNNModelTest.test_one_dim_logits.
        # See that test for calculation of logits.
        # logits = [[-2.08]] => predictions = [-2.08].
        self.assertAllClose(
            {
                prediction_keys.PredictionKeys.PREDICTIONS: [-2.08],
            }, next(dnn_regressor.predict(input_fn=input_fn)))
Beispiel #7
0
  def test_multi_feature_column(self):
    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=1.
    global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.),), (5., 6.)),
        (((7., 8.), (9., 8.),), (7., 6.)),
        (((5.,), (4.,),), (3.,))
    ), global_step, self._model_dir)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('age'),
                         feature_column.numeric_column('height')),
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'age': np.array(((20,), (40,))), 'height': np.array(((4,), (8,)))},
        y=np.array(((213.,), (421.,))),
        batch_size=2,
        shuffle=False)
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # predictions = 7315, 13771
        # loss = ((213-7315)^2 + (421-13771)^2) / 2 = 228660896
        metric_keys.MetricKeys.LOSS: 228660896.,
        # average_loss = loss / 2 = 114330452
        metric_keys.MetricKeys.LOSS_MEAN: 114330452.,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=input_fn, steps=1))
Beispiel #8
0
    def test_multi_dim(self):
        """Asserts predictions for multi-dimensional input and logits."""
        # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
        _create_checkpoint((
            ([[.6, .5], [-.6, -.5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
        ), 100, self._model_dir)

        # Create DNNRegressor and predict.
        dnn_regressor = dnn.DNNRegressor(
            hidden_units=(2, 2),
            feature_columns=(feature_column.numeric_column('x',
                                                           shape=(2, )), ),
            label_dimension=3,
            model_dir=self._model_dir)
        input_fn = numpy_io.numpy_input_fn(
            # Inputs shape is (batch_size, num_inputs).
            x={'x': np.array([[10., 8.]])},
            batch_size=1,
            shuffle=False)
        # Uses identical numbers as
        # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
        # See that test for calculation of logits.
        # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39]
        self.assertAllClose(
            {
                prediction_keys.PredictionKeys.PREDICTIONS:
                [-0.48, 0.48, 0.39],
            }, next(dnn_regressor.predict(input_fn=input_fn)))
Beispiel #9
0
    def test_from_scratch(self):
        hidden_units = (2, 2)
        mock_optimizer = _mock_optimizer(self, hidden_units=hidden_units)
        dnn_regressor = dnn.DNNRegressor(
            hidden_units=hidden_units,
            feature_columns=(feature_column.numeric_column('age'), ),
            optimizer=mock_optimizer,
            model_dir=self._model_dir)
        self.assertEqual(0, mock_optimizer.minimize.call_count)

        # Train for a few steps, then validate optimizer, summaries, and
        # checkpoint.
        num_steps = 5
        summary_hook = _SummaryHook()
        dnn_regressor.train(input_fn=lambda: ({
            'age': ((1, ), )
        }, ((5., ), )),
                            steps=num_steps,
                            hooks=(summary_hook, ))
        self.assertEqual(1, mock_optimizer.minimize.call_count)
        _assert_checkpoint(self,
                           num_steps,
                           input_units=1,
                           hidden_units=hidden_units,
                           output_units=1,
                           model_dir=self._model_dir)
        summaries = summary_hook.summaries()
        self.assertEqual(num_steps, len(summaries))
        for summary in summaries:
            summary_keys = [v.tag for v in summary.value]
            self.assertIn(metric_keys.MetricKeys.LOSS, summary_keys)
            self.assertIn(metric_keys.MetricKeys.LOSS_MEAN, summary_keys)
Beispiel #10
0
  def test_multi_dim(self):
    # Create checkpoint: num_inputs=3, hidden_units=(2, 2), num_outputs=2.
    global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.),), (7., 8.)),
        (((9., 8.), (7., 6.),), (5., 4.)),
        (((3., 2.), (1., 2.),), (3., 4.)),
    ), global_step, self._model_dir)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('x', shape=(3,)),),
        label_dimension=2,
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'x': np.array(((2., 4., 5.),))},
        y=np.array(((46., 58.),)),
        batch_size=1,
        shuffle=False)
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # predictions = 3198, 3094
        # loss = ((46-3198)^2 + (58-3094)^2) = 19152400
        metric_keys.MetricKeys.LOSS: 19152400,
        # average_loss = loss / 2 = 9576200
        metric_keys.MetricKeys.LOSS_MEAN: 9576200,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=input_fn, steps=1))
Beispiel #11
0
  def test_multi_batch(self):
    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
    global_step = 100
    _create_checkpoint((
        (((1., 2.),), (3., 4.)),
        (((5., 6.), (7., 8.),), (9., 10.)),
        (((11.,), (12.,),), (13.,))
    ), global_step, self._model_dir)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('age'),),
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'age': np.array(((1,), (2,), (3,)))},
        y=np.array(((10,), (9,), (8,))),
        batch_size=1,
        shuffle=False)
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # predictions = 1778, 2251, 2724
    # loss = ((10-1778)^2 + (9-2251)^2 + (8-2724)^2) / 3 = 5176348
    expected_loss = 5176348.
    self.assertAllClose({
        metric_keys.MetricKeys.LOSS: expected_loss,
        metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=input_fn, steps=3))
Beispiel #12
0
  def test_multi_example(self):
    # Create initial checkpoint, 1 input, 2x2 hidden dims, 1 outputs.
    global_step = 100
    _create_checkpoint((
        (((1., 2.),), (3., 4.)),
        (((5., 6.), (7., 8.),), (9., 10.)),
        (((11.,), (12.,),), (13.,))
    ), global_step, self._model_dir)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('age'),),
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'age': np.array(((1,), (2,), (3,)))},
        y=np.array(((10,), (9,), (8,))),
        batch_size=3,
        shuffle=False)
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # predictions = 1778, 2251, 2724
        # loss = ((10-1778)^2 + (9-2251)^2 + (8-2724)^2) = 15529044
        metric_keys.MetricKeys.LOSS: 15529044.,
        # average_loss = loss / 3 = 5176348
        metric_keys.MetricKeys.LOSS_MEAN: 5176348.,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=input_fn, steps=1))
Beispiel #13
0
  def test_weighted(self):
    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
    global_step = 100
    _create_checkpoint((
        (((1., 2.),), (3., 4.)),
        (((5., 6.), (7., 8.),), (9., 10.)),
        (((11.,), (12.,),), (13.,))
    ), global_step, self._model_dir)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('age'),),
        model_dir=self._model_dir,
        weight_feature_key='label_weight')
    def _input_fn():
      return {'age': ((1,),), 'label_weight': ((1.5,),)}, ((10.,),)
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # prediction = 1778
        # loss = 1.5*((10-1778)^2) = 4688736
        metric_keys.MetricKeys.LOSS: 4688736,
        # average_loss = loss / 1.5 = 3125824
        metric_keys.MetricKeys.LOSS_MEAN: 3125824,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=_input_fn, steps=1))
  def _test_complete_flow(
      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
      label_dimension, batch_size):
    feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))]
    est = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir)

    # TRAIN
    num_steps = 10
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Beispiel #15
0
  def test_complete_flow(self):
    label_dimension = 2
    batch_size = 10
    feature_columns = [feature_column.numeric_column('x', shape=(2,))]
    est = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir)
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)

    # TRAIN
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    num_steps = 200
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        shuffle=False)
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        batch_size=batch_size,
        shuffle=False)
    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
    # TODO(ptucker): Deterministic test for predicted values?

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Beispiel #16
0
    def test_one_dim(self):
        """Asserts train loss for one-dimensional input and logits."""
        base_global_step = 100
        hidden_units = (2, 2)
        _create_checkpoint((
            ([[.6, .5]], [.1, -.1]),
            ([[1., .8], [-.8, -1.]], [.2, -.2]),
            ([[-1.], [1.]], [.3]),
        ), base_global_step, self._model_dir)

        # Uses identical numbers as DNNModelFnTest.test_one_dim_logits.
        # See that test for calculation of logits.
        # logits = [-2.08] => predictions = [-2.08]
        # loss = (1 + 2.08)^2 = 9.4864
        expected_loss = 9.4864
        mock_optimizer = _mock_optimizer(self,
                                         hidden_units=hidden_units,
                                         expected_loss=expected_loss)
        dnn_regressor = dnn.DNNRegressor(
            hidden_units=hidden_units,
            feature_columns=(feature_column.numeric_column('age'), ),
            optimizer=mock_optimizer,
            model_dir=self._model_dir)
        self.assertEqual(0, mock_optimizer.minimize.call_count)

        # Train for a few steps, then validate optimizer, summaries, and
        # checkpoint.
        num_steps = 5
        summary_hook = _SummaryHook()
        dnn_regressor.train(input_fn=lambda: ({
            'age': [[10.]]
        }, [[1.]]),
                            steps=num_steps,
                            hooks=(summary_hook, ))
        self.assertEqual(1, mock_optimizer.minimize.call_count)
        summaries = summary_hook.summaries()
        self.assertEqual(num_steps, len(summaries))
        for summary in summaries:
            _assert_simple_summary(
                self, {
                    metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
                    'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
                    'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.5,
                    'dnn/dnn/logits_fraction_of_zero_values': 0.,
                    metric_keys.MetricKeys.LOSS: expected_loss,
                }, summary)
        _assert_checkpoint(self,
                           base_global_step + num_steps,
                           input_units=1,
                           hidden_units=hidden_units,
                           output_units=1,
                           model_dir=self._model_dir)
Beispiel #17
0
  def test_from_scratch_with_default_optimizer(self):
    hidden_units = (2, 2)
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=(feature_column.numeric_column('age'),),
        model_dir=self._model_dir)

    # Train for a few steps, then validate final checkpoint.
    num_steps = 5
    dnn_regressor.train(
        input_fn=lambda: ({'age': ((1,),)}, ((10,),)), steps=num_steps)
    self._assert_checkpoint(
        num_steps, input_units=1, hidden_units=hidden_units, output_units=1)
Beispiel #18
0
  def test_activation_fn(self):
    base_global_step = 100
    hidden_units = (2, 2)
    _create_checkpoint((
        (((1., 2.),), (3., 4.)),
        (((5., 6.), (7., 8.),), (9., 10.)),
        (((11.,), (12.,),), (13.,))
    ), base_global_step, self._model_dir)

    # Create DNNRegressor with mock optimizer.
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # prediction = 36
    # loss = (10-36)^2 = 676
    expected_loss = 676.
    mock_optimizer = self._mockOptimizer(
        hidden_units=hidden_units, expected_loss=expected_loss)
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=(feature_column.numeric_column('age'),),
        optimizer=mock_optimizer,
        model_dir=self._model_dir,
        activation_fn=nn.tanh)
    self.assertEqual(0, mock_optimizer.minimize.call_count)

    # Train for a few steps, then validate optimizer, summaries, and
    # checkpoint.
    num_steps = 5
    summary_hook = _SummaryHook()
    dnn_regressor.train(
        input_fn=lambda: ({'age': ((1,),)}, ((10.,),)), steps=num_steps,
        hooks=(summary_hook,))
    self.assertEqual(1, mock_optimizer.minimize.call_count)
    summaries = summary_hook.summaries()
    self.assertEqual(num_steps, len(summaries))
    for summary in summaries:
      self._assert_simple_summary({
          metric_keys.MetricKeys.LOSS: expected_loss,
          metric_keys.MetricKeys.LOSS_MEAN: expected_loss,
          'dnn/dnn/hiddenlayer_0_activation': 0.,
          'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
          'dnn/dnn/hiddenlayer_1_activation': 0.,
          'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.,
          'dnn/dnn/logits_activation': 0.,
          'dnn/dnn/logits_fraction_of_zero_values': 0.,
      }, summary)
    self._assert_checkpoint(
        base_global_step + num_steps, input_units=1, hidden_units=hidden_units,
        output_units=1)
Beispiel #19
0
  def test_1d(self):
    """Tests predict when all variables are one-dimensional."""
    # Create checkpoint: num_inputs=1, hidden_units=(2, 2), num_outputs=1.
    _create_checkpoint((
        (((1., 2.),), (3., 4.)),
        (((5., 6.), (7., 8.),), (9., 10.)),
        (((11.,), (12.,),), (13.,))
    ), global_step=0, model_dir=self._model_dir)

    # Create DNNRegressor and predict.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('x'),),
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'x': np.array(((1.,),))}, batch_size=1, shuffle=False)
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # prediction = 1778
    self.assertAllClose({
        prediction_keys.PredictionKeys.PREDICTIONS: (1778.,)
    }, next(dnn_regressor.predict(input_fn=input_fn)))
Beispiel #20
0
  def test_two_feature_columns(self):
    """Tests predict with two feature columns."""
    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=1.
    _create_checkpoint((
        (((1., 2.), (3., 4.),), (5., 6.)),
        (((7., 8.), (9., 8.),), (7., 6.)),
        (((5.,), (4.,),), (3.,))
    ), 100, self._model_dir)

    # Create DNNRegressor and predict.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('x'),
                         feature_column.numeric_column('y')),
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'x': np.array((20.,)), 'y': np.array((4.,))},
        batch_size=1,
        shuffle=False)
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # predictions = 7315
        prediction_keys.PredictionKeys.PREDICTIONS: (7315,)
    }, next(dnn_regressor.predict(input_fn=input_fn)))
Beispiel #21
0
  def test_weighted_multi_batch(self):
    # Create checkpoint: num_inputs=4, hidden_units=(2, 2), num_outputs=3.
    global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), global_step, self._model_dir)

    # Create batched input.
    input_fn = numpy_io.numpy_input_fn(
        x={
            # Dimensions are (batch_size, feature_column.dimension).
            'x': np.array((
                (15., 0., 1.5, 135.2),
                (45., 45000., 1.8, 158.8),
                (21., 33000., 1.7, 207.1),
                (60., 10000., 1.6, 90.2)
            )),
            # TODO(ptucker): Add test for different weight shapes when we fix
            # head._compute_weighted_loss (currently it requires weights to be
            # same shape as labels & logits).
            'label_weights': np.array((
                (1., 1., 0.),
                (.5, 1., .1),
                (.5, 0., .9),
                (0., 0., 0.),
            ))
        },
        # Label shapes is (batch_size, num_outputs).
        y=np.array((
            (5., 2., 2.),
            (-2., 1., -4.),
            (-1., -1., -1.),
            (-4., 3., 9.),
        )),
        batch_size=1,
        shuffle=False)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(
            # Dimension is number of inputs.
            feature_column.numeric_column(
                'x', dtype=dtypes.int32, shape=(4,)),
        ),
        model_dir=self._model_dir,
        label_dimension=3,
        weight_feature_key='label_weights')
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # predictions = [
        #   [  54033.5    76909.6    99785.7]
        #   [8030393.8 11433082.4 14835771.0]
        #   [5923209.2  8433014.8 10942820.4]
        #   [1810021.6  2576969.6  3343917.6]
        # ]
        # losses = label_weights*(labels-predictions)^2 = [
        #  [  2.91907881e+09   5.91477894e+09                0]
        #  [  3.22436284e+13   1.30715350e+14   2.20100220e+13]
        #  [  1.75422095e+13                0   1.07770806e+14]
        #  [               0                0                0]
        # ]
        # total_loss = sum(losses) = 3.10290850204e+14
        # loss = total_loss / 4 = 7.7572712551e+13
        metric_keys.MetricKeys.LOSS: 7.7572712551e+13,
        # average_loss = total_loss / sum(label_weights) = 6.20581700408e+13
        metric_keys.MetricKeys.LOSS_MEAN: 6.20581700408e+13,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=input_fn, steps=4))
Beispiel #22
0
  def test_weighted_multi_example_multi_column(self):
    hidden_units = (2, 2)
    base_global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), base_global_step, self._model_dir)

    # Create DNNRegressor with mock optimizer.
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # predictions = [
    #   [  54033.5    76909.6    99785.7]
    #   [8030393.8 11433082.4 14835771.0]
    #   [5923209.2  8433014.8 10942820.4]
    #   [1810021.6  2576969.6  3343917.6]
    # ]
    # loss = sum(label_weights*(labels-predictions)^2) = 3.10290850204e+14
    expected_loss = 3.10290850204e+14
    mock_optimizer = self._mockOptimizer(
        hidden_units=hidden_units, expected_loss=expected_loss)
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=(
            # Dimensions add up to 4 (number of inputs).
            feature_column.numeric_column(
                'x', dtype=dtypes.int32, shape=(2,)),
            feature_column.numeric_column(
                'y', dtype=dtypes.float32, shape=(2,)),
        ),
        optimizer=mock_optimizer,
        model_dir=self._model_dir,
        label_dimension=3,
        weight_feature_key='label_weights')
    self.assertEqual(0, mock_optimizer.minimize.call_count)

    # Create batched inputs.
    input_fn = numpy_io.numpy_input_fn(
        # NOTE: feature columns are concatenated in alphabetic order of keys.
        x={
            # Inputs shapes are (batch_size, feature_column.dimension).
            'x': np.array((
                (15., 0.),
                (45., 45000.),
                (21., 33000.),
                (60., 10000.)
            )),
            'y': np.array((
                (1.5, 135.2),
                (1.8, 158.8),
                (1.7, 207.1),
                (1.6, 90.2)
            )),
            # TODO(ptucker): Add test for different weight shapes when we fix
            # head._compute_weighted_loss (currently it requires weights to be
            # same shape as labels & logits).
            'label_weights': np.array((
                (1., 1., 0.),
                (.5, 1., .1),
                (.5, 0., .9),
                (0., 0., 0.),
            ))
        },
        # Labels shapes is (batch_size, num_outputs).
        y=np.array((
            (5., 2., 2.),
            (-2., 1., -4.),
            (-1., -1., -1.),
            (-4., 3., 9.),
        )),
        batch_size=4,
        num_epochs=None,
        shuffle=False)

    # Train for 1 step, then validate optimizer, summaries, and checkpoint.
    summary_hook = _SummaryHook()
    dnn_regressor.train(input_fn=input_fn, steps=1, hooks=(summary_hook,))
    self.assertEqual(1, mock_optimizer.minimize.call_count)
    summaries = summary_hook.summaries()
    self.assertEqual(1, len(summaries))
    self._assert_simple_summary({
        metric_keys.MetricKeys.LOSS: expected_loss,
        # average_loss = loss / sum(label_weights) = 3.10290850204e+14 / 5.
        #              = 6.205817e+13
        metric_keys.MetricKeys.LOSS_MEAN: 6.205817e+13,
        'dnn/dnn/hiddenlayer_0_activation': 0.,
        'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
        'dnn/dnn/hiddenlayer_1_activation': 0.,
        'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.,
        'dnn/dnn/logits_activation': 0.,
        'dnn/dnn/logits_fraction_of_zero_values': 0.,
    }, summaries[0])
    self._assert_checkpoint(
        base_global_step + 1,
        input_units=4,  # Sum of feature column dimensions.
        hidden_units=hidden_units,
        output_units=3)  # = label_dimension

    # Train for 3 steps - we should still get the same loss since we're not
    # updating weights.
    dnn_regressor.train(input_fn=input_fn, steps=3)
    self.assertEqual(2, mock_optimizer.minimize.call_count)
    self._assert_checkpoint(
        base_global_step + 4,
        input_units=4,  # Sum of feature column dimensions.
        hidden_units=hidden_units,
        output_units=3)  # = label_dimension
def _dnn_regressor_fn(*args, **kwargs):
  return dnn.DNNRegressor(*args, **kwargs)
Beispiel #24
0
def DNNRegressorWithLayerAnnotations(  # pylint: disable=invalid-name
    hidden_units,
    feature_columns,
    model_dir=None,
    label_dimension=1,
    weight_column=None,
    optimizer='Adagrad',
    activation_fn=nn.relu,
    dropout=None,
    input_layer_partitioner=None,
    config=None,
    warm_start_from=None,
    loss_reduction=losses.Reduction.SUM,
):
  """A regressor for TensorFlow DNN models with layer annotations.

  This regressor is fuctionally identical to estimator.DNNRegressor as far as
  training and evaluating models is concerned. The key difference is that this
  classifier adds additional layer annotations, which can be used for computing
  Integrated Gradients.

  Integrated Gradients is a method for attributing a classifier's predictions
  to its input features (https://arxiv.org/pdf/1703.01365.pdf). Given an input
  instance, the method assigns attribution scores to individual features in
  proportion to the feature's importance to the classifier's prediction.

  See estimator.DNNRegressor for example code for training and evaluating models
  using this regressor.

  This regressor is checkpoint-compatible with estimator.DNNRegressor and
  therefore the following should work seamlessly:

  # Instantiate ordinary estimator as usual.
  estimator = tf.estimator.DNNRegressor(
    config, feature_columns, hidden_units, ...)

  # Train estimator, export checkpoint.
  tf.estimator.train_and_evaluate(estimator, ...)

  # Instantiate estimator with annotations with the same configuration as the
  # ordinary estimator.
  estimator_with_annotations = (
    tf.contrib.estimator.DNNRegressorWithLayerAnnotations(
      config, feature_columns, hidden_units, ...))

  # Call export_savedmodel with the same arguments as the ordinary estimator,
  # using the checkpoint produced for the ordinary estimator.
  estimator_with_annotations.export_saved_model(
    export_dir_base, serving_input_receiver, ...
    checkpoint_path='/path/to/ordinary/estimator/checkpoint/model.ckpt-1234')

  Args:
    hidden_units: Iterable of number hidden units per layer. All layers are
      fully connected. Ex. `[64, 32]` means first layer has 64 nodes and second
      one has 32.
    feature_columns: An iterable containing all the feature columns used by the
      model. All items in the set should be instances of classes derived from
      `_FeatureColumn`.
    model_dir: Directory to save model parameters, graph and etc. This can also
      be used to load checkpoints from the directory into a estimator to
      continue training a previously saved model.
    label_dimension: Number of regression targets per example. This is the size
      of the last dimension of the labels and logits `Tensor` objects
      (typically, these have shape `[batch_size, label_dimension]`).
    weight_column: A string or a `_NumericColumn` created by
      `tf.feature_column.numeric_column` defining feature column representing
      weights. It is used to down weight or boost examples during training. It
      will be multiplied by the loss of the example. If it is a string, it is
      used as a key to fetch weight tensor from the `features`. If it is a
      `_NumericColumn`, raw tensor is fetched by key `weight_column.key`, then
      weight_column.normalizer_fn is applied on it to get weight tensor.
    optimizer: An instance of `tf.Optimizer` used to train the model. Defaults
      to Adagrad optimizer.
    activation_fn: Activation function applied to each layer. If `None`, will
      use `tf.nn.relu`.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Optional. Partitioner for input layer. Defaults to
      `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.
    warm_start_from: A string filepath to a checkpoint to warm-start from, or a
      `WarmStartSettings` object to fully configure warm-starting.  If the
      string filepath is provided instead of a `WarmStartSettings`, then all
      weights are warm-started, and it is assumed that vocabularies and Tensor
      names are unchanged.
    loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to
      reduce training loss over batch. Defaults to `SUM`.

  Returns:
    DNNRegressor with layer annotations.
  """

  original = dnn.DNNRegressor(
      hidden_units=hidden_units,
      feature_columns=feature_columns,
      model_dir=model_dir,
      label_dimension=label_dimension,
      weight_column=weight_column,
      optimizer=optimizer,
      activation_fn=activation_fn,
      dropout=dropout,
      input_layer_partitioner=input_layer_partitioner,
      config=config,
      warm_start_from=warm_start_from,
      loss_reduction=loss_reduction,
  )

  def _model_fn(features, labels, mode, config):
    with _monkey_patch(
        feature_column_lib, '_internal_input_layer',
        make_input_layer_with_layer_annotations(
            feature_column_lib._internal_input_layer)):  # pylint: disable=protected-access
      return original.model_fn(features, labels, mode, config)

  return estimator.Estimator(
      model_fn=_model_fn,
      model_dir=model_dir,
      config=config,
      warm_start_from=warm_start_from)
Beispiel #25
0
  def test_weighted_multi_batch(self):
    hidden_units = (2, 2)
    base_global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), base_global_step, self._model_dir)

    mock_optimizer = self._mockOptimizer(hidden_units=hidden_units)
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=(
            # Dimension is number of inputs.
            feature_column.numeric_column(
                'x', dtype=dtypes.int32, shape=(4,)),
        ),
        optimizer=mock_optimizer,
        model_dir=self._model_dir,
        label_dimension=3,
        weight_feature_key='label_weights')
    self.assertEqual(0, mock_optimizer.minimize.call_count)

    # Create batched input.
    input_fn = numpy_io.numpy_input_fn(
        x={
            # Inputs shape is (batch_size, feature_column.dimension).
            'x': np.array((
                (15., 0., 1.5, 135.2),
                (45., 45000., 1.8, 158.8),
                (21., 33000., 1.7, 207.1),
                (60., 10000., 1.6, 90.2)
            )),
            # TODO(ptucker): Add test for different weight shapes when we fix
            # head._compute_weighted_loss (currently it requires weights to be
            # same shape as labels & logits).
            'label_weights': np.array((
                (1., 1., 0.),
                (.5, 1., .1),
                (.5, 0., .9),
                (0., 0., 0.),
            ))
        },
        # Labels shapes is (batch_size, num_outputs).
        y=np.array((
            (5., 2., 2.),
            (-2., 1., -4.),
            (-1., -1., -1.),
            (-4., 3., 9.),
        )),
        batch_size=1,
        shuffle=False)

    # Train for 1 step, then validate optimizer, summaries, and checkpoint.
    num_steps = 4
    summary_hook = _SummaryHook()
    dnn_regressor.train(
        input_fn=input_fn, steps=num_steps, hooks=(summary_hook,))
    self.assertEqual(1, mock_optimizer.minimize.call_count)
    summaries = summary_hook.summaries()
    self.assertEqual(num_steps, len(summaries))
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # predictions = [
    #   [  54033.5    76909.6    99785.7]
    #   [8030393.8 11433082.4 14835771.0]
    #   [5923209.2  8433014.8 10942820.4]
    #   [1810021.6  2576969.6  3343917.6]
    # ]
    # losses = label_weights*(labels-predictions)^2 = [
    #   [2.91907881e+09 5.91477894e+09              0]
    #   [3.22436284e+13 1.30715350e+14 2.20100220e+13]
    #   [1.75422095e+13              0 1.07770806e+14]
    #   [             0              0              0]
    # ]
    # step_losses = [sum(losses[i]) for i in 0...3]
    #             = [8833857750, 1.84969e+14, 1.2531302e+14, 0]
    expected_step_losses = (8833857750, 1.84969e+14, 1.2531302e+14, 0)
    # step_average_losses = [
    #     step_losses[i] / sum(label_weights[i]) for i in 0...3
    # ] = [4416928875, 1.1560563e+14, 8.95093e+13, 0]
    expected_step_average_losses = (4416928875, 1.1560563e+14, 8.95093e+13, 0)
    for i in range(len(summaries)):
      self._assert_simple_summary({
          metric_keys.MetricKeys.LOSS: expected_step_losses[i],
          metric_keys.MetricKeys.LOSS_MEAN: expected_step_average_losses[i],
          'dnn/dnn/hiddenlayer_0_activation': 0.,
          'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
          'dnn/dnn/hiddenlayer_1_activation': 0.,
          'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.,
          'dnn/dnn/logits_activation': 0.,
          'dnn/dnn/logits_fraction_of_zero_values': 0.,
      }, summaries[i])
    self._assert_checkpoint(
        base_global_step + num_steps,
        input_units=4,  # Sum of feature column dimensions.
        hidden_units=hidden_units,
        output_units=3)  # = label_dimension