Esempio n. 1
0
    def test_dnn_and_linear_logits_are_added(self):
        with ops.Graph().as_default():
            variables_lib.Variable([[1.0]],
                                   name='linear/linear_model/x/weights')
            variables_lib.Variable([2.0],
                                   name='linear/linear_model/bias_weights')
            variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel')
            variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias')
            variables_lib.Variable([[5.0]], name='dnn/logits/kernel')
            variables_lib.Variable([6.0], name='dnn/logits/bias')
            variables_lib.Variable(1, name='global_step', dtype=dtypes.int64)
            linear_testing_utils.save_variables_to_ckpt(self._model_dir)

        x_column = feature_column.numeric_column('x')
        est = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=[x_column],
            dnn_hidden_units=[1],
            dnn_feature_columns=[x_column],
            model_dir=self._model_dir)
        input_fn = numpy_io.numpy_input_fn(x={'x': np.array([[10.]])},
                                           batch_size=1,
                                           shuffle=False)
        # linear logits = 10*1 + 2 = 12
        # dnn logits = (10*3 + 4)*5 + 6 = 176
        # logits = dnn + linear = 176 + 12 = 188
        self.assertAllClose(
            {
                prediction_keys.PredictionKeys.PREDICTIONS: [188.],
            }, next(est.predict(input_fn=input_fn)))
Esempio n. 2
0
  def _get_estimator(self,
                     train_distribute,
                     eval_distribute,
                     remote_cluster=None):
    input_dimension = LABEL_DIMENSION
    linear_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column("x", shape=(input_dimension,))
    ]

    return dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=LABEL_DIMENSION,
        model_dir=self._model_dir,
        dnn_optimizer=adagrad.AdagradOptimizer(0.001),
        linear_optimizer=adagrad.AdagradOptimizer(0.001),
        config=run_config_lib.RunConfig(
            experimental_distribute=DistributeConfig(
                train_distribute=train_distribute,
                eval_distribute=eval_distribute,
                remote_cluster=remote_cluster)))
    def _test_complete_flow_helper(self, linear_feature_columns,
                                   dnn_feature_columns, feature_spec,
                                   train_input_fn, eval_input_fn,
                                   predict_input_fn, input_dimension,
                                   label_dimension, batch_size):
        est = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir)

        # TRAIN
        num_steps = 10
        est.train(train_input_fn, steps=num_steps)

        # EVALUTE
        scores = est.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        # PREDICT
        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in est.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        # EXPORT
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                           serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Esempio n. 4
0
    def test_complete_flow_with_mode(self, distribution):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices),
            shuffle=True)
        eval_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                y=data,
                                                batch_size=batch_size,
                                                shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            # TODO(isaprykin): Work around the colocate_with error.
            dnn_optimizer=adagrad.AdagradOptimizer(0.001),
            linear_optimizer=adagrad.AdagradOptimizer(0.001),
            config=run_config.RunConfig(train_distribute=distribution))

        num_steps = 10
        estimator.train(train_input_fn, steps=num_steps)

        scores = estimator.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
    def test_regressor_basic_warm_starting(self, fc_impl):
        """Tests correctness of DNNLinearCombinedRegressor default warm-start."""
        age = fc_impl.numeric_column('age')
        city = fc_impl.embedding_column(
            fc_impl.categorical_column_with_vocabulary_list(
                'city', vocabulary_list=['Mountain View', 'Palo Alto']),
            dimension=5)

        # Create a DNNLinearCombinedRegressor and train to save a checkpoint.
        dnn_lc_regressor = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=[age],
            dnn_feature_columns=[city],
            dnn_hidden_units=[256, 128],
            model_dir=self._ckpt_and_vocab_dir,
            linear_optimizer='SGD',
            dnn_optimizer='SGD')
        dnn_lc_regressor.train(input_fn=self._input_fn, max_steps=1)

        # Create a second DNNLinearCombinedRegressor, warm-started from the first.
        # Use a learning_rate = 0.0 optimizer to check values (use SGD so we don't
        # have accumulator values that change).
        warm_started_dnn_lc_regressor = (
            dnn_linear_combined.DNNLinearCombinedRegressor(
                linear_feature_columns=[age],
                dnn_feature_columns=[city],
                dnn_hidden_units=[256, 128],
                linear_optimizer=gradient_descent.GradientDescentOptimizer(
                    learning_rate=0.0),
                dnn_optimizer=gradient_descent.GradientDescentOptimizer(
                    learning_rate=0.0),
                warm_start_from=dnn_lc_regressor.model_dir))

        warm_started_dnn_lc_regressor.train(input_fn=self._input_fn,
                                            max_steps=1)
        for variable_name in warm_started_dnn_lc_regressor.get_variable_names(
        ):
            self.assertAllClose(
                dnn_lc_regressor.get_variable_value(variable_name),
                warm_started_dnn_lc_regressor.get_variable_value(
                    variable_name))
Esempio n. 6
0
def _linear_regressor_fn(feature_columns,
                         model_dir=None,
                         label_dimension=1,
                         weight_feature_key=None,
                         optimizer='Ftrl',
                         config=None,
                         partitioner=None):
    return dnn_linear_combined.DNNLinearCombinedRegressor(
        model_dir=model_dir,
        linear_feature_columns=feature_columns,
        linear_optimizer=optimizer,
        label_dimension=label_dimension,
        weight_feature_key=weight_feature_key,
        input_layer_partitioner=partitioner,
        config=config)
Esempio n. 7
0
def _dnn_regressor_fn(hidden_units,
                      feature_columns,
                      model_dir=None,
                      label_dimension=1,
                      weight_feature_key=None,
                      optimizer='Adagrad',
                      config=None,
                      input_layer_partitioner=None):
    return dnn_linear_combined.DNNLinearCombinedRegressor(
        model_dir=model_dir,
        dnn_hidden_units=hidden_units,
        dnn_feature_columns=feature_columns,
        dnn_optimizer=optimizer,
        label_dimension=label_dimension,
        weight_feature_key=weight_feature_key,
        input_layer_partitioner=input_layer_partitioner,
        config=config)
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices))
        eval_input_fn = self.dataset_input_fn(x={'x': data},
                                              y=data,
                                              batch_size=batch_size //
                                              len(distribution.worker_devices))
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        session_config = config_pb2.ConfigProto(log_device_placement=True,
                                                allow_soft_placement=True)
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            dnn_optimizer=adam.Adam(0.001),
            linear_optimizer=adam.Adam(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution,
                                        session_config=session_config))

        num_steps = 2
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))