Example #1
0
    def testNumpyInputFn(self):
        a = np.arange(4) * 1.0
        b = np.arange(32, 36)
        x = {'a': a, 'b': b}
        y = np.arange(-32, -28)

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=2,
                                               shuffle=False,
                                               num_epochs=1)
            features, target = input_fn()

            coord = tf.train.Coordinator()
            threads = tf.compat.v1.train.queue_runner.start_queue_runners(
                session, coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [0, 1])
            self.assertAllEqual(res[0]['b'], [32, 33])
            self.assertAllEqual(res[1], [-32, -31])

            session.run([features, target])
            with self.assertRaises(tf.errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
Example #2
0
    def testNumpyInputFnWithYAsDict(self):
        a = np.arange(4) * 1.0
        b = np.arange(32, 36)
        x = {'a': a, 'b': b}
        y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=2,
                                               shuffle=False,
                                               num_epochs=1)
            features_tensor, targets_tensor = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            features, targets = session.run([features_tensor, targets_tensor])
            self.assertEqual(len(features), 2)
            self.assertAllEqual(features['a'], [0, 1])
            self.assertAllEqual(features['b'], [32, 33])
            self.assertEqual(len(targets), 2)
            self.assertAllEqual(targets['y1'], [-32, -31])
            self.assertAllEqual(targets['y2'], [32, 31])

            session.run([features_tensor, targets_tensor])
            with self.assertRaises(errors.OutOfRangeError):
                session.run([features_tensor, targets_tensor])

            coord.request_stop()
            coord.join(threads)
Example #3
0
  def testMultiDim(self):
    """Tests predict when all variables are multi-dimenstional."""
    batch_size = 2
    label_dimension = 3
    with ops.Graph().as_default():
      variables.Variable(  # shape=[label_dimension]
          [.2, .4, .6], name=BIAS_NAME)
      variables.Variable(100, name='global_step', dtype=dtypes.int64)
      save_variables_to_ckpt(self._model_dir)

    baseline_estimator = _baseline_estimator_fn(
        label_dimension=label_dimension,
        model_dir=self._model_dir)

    predict_input_fn = numpy_io.numpy_input_fn(
        # x shape=[batch_size, x_dim]
        x={'x': np.array([[1., 2., 3., 4.], [5., 6., 7., 8.]])},
        y=None,
        batch_size=batch_size,
        num_epochs=1,
        shuffle=False)
    predictions = baseline_estimator.predict(input_fn=predict_input_fn)
    predicted_scores = list([x['predictions'] for x in predictions])
    # score = bias, shape=[batch_size, label_dimension]
    self.assertAllClose([[0.2, 0.4, 0.6], [0.2, 0.4, 0.6]],
                        predicted_scores)
    def testBinaryClassifierTrainInMemoryWithDataset(self):
        train_input_fn = _make_train_input_fn_dataset(is_classification=True)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.boosted_trees_classifier_train_in_memory(
            train_input_fn=train_input_fn,
            feature_columns=self._feature_columns,
            n_trees=1,
            max_depth=5)
        # It will stop after 5 steps because of the max depth and num trees.
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)

        # Check evaluate and predict.
        eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
        self.assertAllClose(eval_res['accuracy'], 1.0)
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose([[0], [1], [1], [0], [0]],
                            [pred['class_ids'] for pred in predictions])
    def testRegressorTrainInMemoryWithDataset(self):
        train_input_fn = _make_train_input_fn_dataset(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.boosted_trees_regressor_train_in_memory(
            train_input_fn=train_input_fn,
            feature_columns=self._feature_columns,
            n_trees=1,
            max_depth=5)
        # It will stop after 5 steps because of the max depth and num trees.
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)
        # Check evaluate and predict.
        eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
        self.assertAllClose(eval_res['average_loss'], 2.478283)
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
            [pred['predictions'] for pred in predictions])
    def testBinaryClassifierTrainInMemoryAndEvalAndInferWithPrePruning(self):
        train_input_fn = _make_train_input_fn(is_classification=True)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.boosted_trees_classifier_train_in_memory(
            train_input_fn=train_input_fn,
            feature_columns=self._feature_columns,
            n_trees=1,
            max_depth=5,
            pruning_mode='pre',
            tree_complexity=0.01)
        # We stop actually after 2*depth*n_trees steps (via a hook) because we still
        # could not grow 1 trees of depth 5 (due to pre-pruning).
        self._assert_checkpoint(est.model_dir,
                                global_step=11,
                                finalized_trees=0,
                                attempted_layers=11)

        # Check evaluate and predict.
        eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
        self.assertAllClose(eval_res['accuracy'], 1.0)
        # Validate predictions.
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose([[0], [1], [1], [0], [0]],
                            [pred['class_ids'] for pred in predictions])
Example #7
0
 def testNumpyInputFnWithXIsEmptyArray(self):
     x = np.array([[], []])
     y = np.arange(4)
     with self.cached_session():
         with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
             failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
             failing_input_fn()
 def test_train_op_calls_both_dnn_and_linear(self, fc_impl):
   dnn_opt = gradient_descent_v2.SGD(1.)
   linear_opt = gradient_descent_v2.SGD(1.)
   x_column = fc_impl.numeric_column('x')
   input_fn = numpy_io.numpy_input_fn(
       x={'x': np.array([[0.], [1.]])},
       y=np.array([[0.], [1.]]),
       batch_size=1,
       shuffle=False)
   est = dnn_linear_combined.DNNLinearCombinedClassifierV2(
       linear_feature_columns=[x_column],
       # verifies linear_optimizer is used only for linear part.
       linear_optimizer=linear_opt,
       dnn_hidden_units=(2, 2),
       dnn_feature_columns=[x_column],
       # verifies dnn_optimizer is used only for dnn part.
       dnn_optimizer=dnn_opt,
       model_dir=self._model_dir)
   num_steps = 1
   est.train(input_fn, steps=num_steps)
   # verifies train_op fires linear minimize op
   self.assertEqual(num_steps,
                    est.get_variable_value(linear_opt.iterations.name))
   # verifies train_op fires dnn optmizer
   self.assertEqual(num_steps, est.get_variable_value(dnn_opt.iterations.name))
Example #9
0
  def testBinaryClassifierTrainInMemoryWithMixedColumns(self):
    categorical = feature_column.categorical_column_with_vocabulary_list(
        key='f_0', vocabulary_list=('bad', 'good', 'ok'))
    indicator_col = feature_column.indicator_column(categorical)
    bucketized_col = feature_column.bucketized_column(
        feature_column.numeric_column('f_1', dtype=dtypes.float32),
        BUCKET_BOUNDARIES)
    numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32)

    labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32)
    input_fn = numpy_io.numpy_input_fn(
        x={
            'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']),
            'f_1': np.array([1, 1, 1, 1, 1]),
            'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]),
        },
        y=labels,
        num_epochs=None,
        batch_size=5,
        shuffle=False)
    feature_columns = [numeric_col, bucketized_col, indicator_col]

    est = boosted_trees.boosted_trees_classifier_train_in_memory(
        train_input_fn=input_fn,
        feature_columns=feature_columns,
        n_trees=1,
        max_depth=5,
        quantile_sketch_epsilon=0.33)

    self._assert_checkpoint(
        est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5)

    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 1.0)
Example #10
0
  def testRegressorTrainInMemoryWithFloatColumns(self):
    train_input_fn = _make_train_input_fn(is_classification=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees.boosted_trees_regressor_train_in_memory(
        train_input_fn=train_input_fn,
        feature_columns=self._numeric_feature_columns,
        n_trees=1,
        max_depth=5,
        quantile_sketch_epsilon=0.33)

    # It will stop after 5 steps because of the max depth and num trees.
    self._assert_checkpoint(
        est.model_dir,
        global_step=5,
        finalized_trees=1,
        attempted_layers=5,
        bucket_boundaries=[[-2.001, -1.999, 12.5], [-3., 0.4995, 2.],
                           [-100., 20., 102.75]])

    # Check evaluate and predict.
    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
    self.assertAllClose(eval_res['average_loss'], 2.4182191)
    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertAllClose(
        [[0.663432], [0.31798199], [0.081902], [0.75843203], [1.86384201]],
        [pred['predictions'] for pred in predictions])
Example #11
0
    def test_dnn_and_linear_logits_are_added(self, fc_impl):
        with ops.Graph().as_default():
            variables_lib.Variable([[1.0]],
                                   name='linear/linear_model/x/weights')
            variables_lib.Variable([2.0],
                                   name='linear/linear_model/bias_weights')
            variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel')
            variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias')
            variables_lib.Variable([[5.0]], name='dnn/logits/kernel')
            variables_lib.Variable([6.0], name='dnn/logits/bias')
            variables_lib.Variable(1, name='global_step', dtype=dtypes.int64)
            linear_testing_utils.save_variables_to_ckpt(self._model_dir)

        x_column = fc_impl.numeric_column('x')
        est = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=[x_column],
            dnn_hidden_units=[1],
            dnn_feature_columns=[x_column],
            model_dir=self._model_dir)
        input_fn = numpy_io.numpy_input_fn(x={'x': np.array([[10.]])},
                                           batch_size=1,
                                           shuffle=False)
        # linear logits = 10*1 + 2 = 12
        # dnn logits = (10*3 + 4)*5 + 6 = 176
        # logits = dnn + linear = 176 + 12 = 188
        self.assertAllClose(
            {
                prediction_keys.PredictionKeys.PREDICTIONS: [188.],
            }, next(est.predict(input_fn=input_fn)))
Example #12
0
  def testInferEstimatorWithCenterBias(self):
    train_input_fn = _make_train_input_fn(is_classification=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees._BoostedTreesEstimator(
        feature_columns=self._feature_columns,
        n_batches_per_layer=1,
        n_trees=1,
        max_depth=5,
        center_bias=True,
        head=self._head)

    # It will stop after 6 steps because of the max depth and num trees (5 for
    # training and 2 for bias centering).
    num_steps = 100
    # Train for a few steps, and validate final checkpoint.
    est.train(train_input_fn, steps=num_steps)
    self._assert_checkpoint(
        est.model_dir, global_step=7, finalized_trees=1, attempted_layers=5)
    # Validate predictions.
    predictions = list(est.predict(input_fn=predict_input_fn))

    self.assertAllClose(
        [[1.634501], [1.325703], [1.187431], [2.019683], [2.832683]],
        [pred['predictions'] for pred in predictions])
Example #13
0
  def testBinaryClassifierTrainInMemoryFloatColumns(self):
    train_input_fn = _make_train_input_fn_dataset(is_classification=True)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees.boosted_trees_classifier_train_in_memory(
        train_input_fn=train_input_fn,
        feature_columns=self._numeric_feature_columns,
        n_trees=1,
        max_depth=5,
        quantile_sketch_epsilon=0.33)

    # It will stop after 5 steps because of the max depth and num trees.
    self._assert_checkpoint(
        est.model_dir,
        global_step=5,
        finalized_trees=1,
        attempted_layers=5,
        bucket_boundaries=[[-2.001, -1.999, 12.5], [-3., 0.4995, 2.],
                           [-100., 20., 102.75]])
    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 1.0)
    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertAllClose([[0], [1], [1], [0], [0]],
                        [pred['class_ids'] for pred in predictions])
Example #14
0
    def testNumpyInputFnWithYIsNone(self):
        a = np.arange(4) * 1.0
        b = np.arange(32, 36)
        x = {'a': a, 'b': b}
        y = None

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=2,
                                               shuffle=False,
                                               num_epochs=1)
            features_tensor = input_fn()

            coord = tf.train.Coordinator()
            threads = tf.compat.v1.train.queue_runner.start_queue_runners(
                session, coord=coord)

            feature = session.run(features_tensor)
            self.assertEqual(len(feature), 2)
            self.assertAllEqual(feature['a'], [0, 1])
            self.assertAllEqual(feature['b'], [32, 33])

            session.run([features_tensor])
            with self.assertRaises(tf.errors.OutOfRangeError):
                session.run([features_tensor])

            coord.request_stop()
            coord.join(threads)
Example #15
0
    def testNumpyInputFnWithVeryLargeBatchSizeAndMultipleEpochs(self):
        a = np.arange(2) * 1.0
        b = np.arange(32, 34)
        x = {'a': a, 'b': b}
        y = np.arange(-32, -30)

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=128,
                                               shuffle=False,
                                               num_epochs=2)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [0, 1, 0, 1])
            self.assertAllEqual(res[0]['b'], [32, 33, 32, 33])
            self.assertAllEqual(res[1], [-32, -31, -32, -31])

            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
Example #16
0
  def test_train_with_dense_features(self):
    feature_dict = {
        'sex': np.int64([1, 1, 1, 1, 0]),
        'cp': np.int64([0, 3, 3, 2, 1]),
        'slope': np.int64([3, 2, 0, 3, 1]),
    }
    label = np.int64([0, 1, 0, 0, 0])
    train_input_fn = numpy_io.numpy_input_fn(
        x=feature_dict, y=label, num_epochs=1, shuffle=False)
    feature_columns = list()
    input_features = dict()
    for feature_name, data_array in feature_dict.items():
      feature_columns.append(
          feature_column.indicator_column(
              feature_column.categorical_column_with_identity(
                  key=feature_name,
                  num_buckets=np.size(np.unique(data_array)))))
      input_features[feature_name] = keras.layers.Input(
          name=feature_name,
          shape=(np.size(np.unique(data_array)),),
          dtype=dtypes.int64)

    x = dense_features.DenseFeatures(feature_columns)(input_features)
    x = keras.layers.Dense(16, activation='relu')(x)
    logits = keras.layers.Dense(1, activation='linear')(x)
    model = keras.Model(inputs=input_features, outputs=logits)

    model.compile(
        optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    estimator_model = keras_lib.model_to_estimator(keras_model=model)
    estimator_model.train(input_fn=train_input_fn, steps=5)
    def testInferEstimator(self):
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees._BoostedTreesEstimator(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5,
            head=self._head)

        # It will stop after 5 steps because of the max depth and num trees.
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(train_input_fn, steps=num_steps)
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)
        # Validate predictions.
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
            [pred['predictions'] for pred in predictions])
Example #18
0
 def test_train_op_calls_both_dnn_and_linear(self, fc_impl):
     opt = gradient_descent.GradientDescentOptimizer(1.)
     x_column = fc_impl.numeric_column('x')
     input_fn = numpy_io.numpy_input_fn(x={'x': np.array([[0.], [1.]])},
                                        y=np.array([[0.], [1.]]),
                                        batch_size=1,
                                        shuffle=False)
     est = dnn_linear_combined.DNNLinearCombinedClassifier(
         linear_feature_columns=[x_column],
         # verifies linear_optimizer is used only for linear part.
         linear_optimizer=self._mock_optimizer(opt, 'linear'),
         dnn_hidden_units=(2, 2),
         dnn_feature_columns=[x_column],
         # verifies dnn_optimizer is used only for linear part.
         dnn_optimizer=self._mock_optimizer(opt, 'dnn'),
         model_dir=self._model_dir)
     est.train(input_fn, steps=1)
     # verifies train_op fires linear minimize op
     self.assertEqual(
         100.,
         checkpoint_utils.load_variable(self._model_dir, 'linear_called'))
     # verifies train_op fires dnn minimize op
     self.assertEqual(
         100., checkpoint_utils.load_variable(self._model_dir,
                                              'dnn_called'))
Example #19
0
  def test_evaluation_for_multi_dimensions(self):
    label_dim = 2
    with ops.Graph().as_default():
      variables.Variable([46.0, 58.0], name=BIAS_NAME)
      variables.Variable(100, name='global_step', dtype=dtypes.int64)
      save_variables_to_ckpt(self._model_dir)

    baseline_estimator = _baseline_estimator_fn(
        label_dimension=label_dim,
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={
            'age': np.array([[2., 4., 5.]]),
        },
        y=np.array([[46., 58.]]),
        batch_size=1,
        num_epochs=None,
        shuffle=False)
    eval_metrics = baseline_estimator.evaluate(input_fn=input_fn, steps=1)

    self.assertItemsEqual(
        (metric_keys.MetricKeys.LOSS, metric_keys.MetricKeys.LOSS_MEAN,
         metric_keys.MetricKeys.PREDICTION_MEAN,
         metric_keys.MetricKeys.LABEL_MEAN, ops.GraphKeys.GLOBAL_STEP),
        eval_metrics.keys())

    # Logit is bias which is [46, 58]
    self.assertAlmostEqual(0, eval_metrics[metric_keys.MetricKeys.LOSS])
Example #20
0
    def testNumpyInputFnWithDifferentDimensionsOfFeatures(self):
        a = np.array([[1, 2], [3, 4]])
        b = np.array([5, 6])
        x = {'a': a, 'b': b}
        y = np.arange(-32, -30)

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=2,
                                               shuffle=False,
                                               num_epochs=1)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [[1, 2], [3, 4]])
            self.assertAllEqual(res[0]['b'], [5, 6])
            self.assertAllEqual(res[1], [-32, -31])

            coord.request_stop()
            coord.join(threads)
Example #21
0
    def testNumpyInputFnWithBatchSizeLargerThanDataSize(self):
        batch_size = 10
        a = np.arange(4) * 1.0
        b = np.arange(32, 36)
        x = {'a': a, 'b': b}
        y = np.arange(-32, -28)

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_epochs=1)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [0, 1, 2, 3])
            self.assertAllEqual(res[0]['b'], [32, 33, 34, 35])
            self.assertAllEqual(res[1], [-32, -31, -30, -29])

            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
Example #22
0
  def _create_input_fn(self, label_dimension, batch_size):
    """Creates input_fn for integration test."""
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=batch_size, shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    return train_input_fn, eval_input_fn, predict_input_fn
Example #23
0
  def testNumpyInputFn(self):
    """Tests complete flow with numpy_input_fn."""
    n_classes = 3
    batch_size = 10
    words = ['dog', 'cat', 'bird', 'the', 'a', 'sat', 'flew', 'slept']
    # Numpy only supports dense input, so all examples will have same length.
    # TODO(b/73160931): Update test when support for prepadded data exists.
    sequence_length = 3

    features = []
    for _ in range(batch_size):
      sentence = random.sample(words, sequence_length)
      features.append(sentence)

    x_data = np.array(features)
    y_data = np.random.randint(n_classes, size=batch_size)

    train_input_fn = numpy_io.numpy_input_fn(
        x={'tokens': x_data},
        y=y_data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'tokens': x_data},
        y=y_data,
        batch_size=batch_size,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'tokens': x_data},
        batch_size=batch_size,
        shuffle=False)

    col = seq_fc.sequence_categorical_column_with_hash_bucket(
        'tokens', hash_bucket_size=10)
    embed = fc.embedding_column(col, dimension=2)
    feature_columns = [embed]

    self._test_complete_flow(
        feature_columns=feature_columns,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        predict_input_fn=predict_input_fn,
        n_classes=n_classes,
        batch_size=batch_size)
Example #24
0
 def testNumpyInputFnWithYIsEmptyDict(self):
     a = np.arange(4) * 1.0
     b = np.arange(32, 36)
     x = {'a': a, 'b': b}
     y = {}
     with self.cached_session():
         with self.assertRaisesRegexp(ValueError, 'y cannot be empty'):
             failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
             failing_input_fn()
Example #25
0
 def testNumpyInputFnWithXAsNonDict(self):
     x = list(range(32, 36))
     y = np.arange(4)
     with self.cached_session():
         with self.assertRaisesRegexp(TypeError,
                                      'x must be a dict or array'):
             failing_input_fn = numpy_io.numpy_input_fn(x,
                                                        y,
                                                        batch_size=2,
                                                        shuffle=False,
                                                        num_epochs=1)
             failing_input_fn()
    def test_train_premade_linear_model_with_dense_features(self):
        vocab_list = ['alpha', 'beta', 'gamma']
        vocab_val = [0.4, 0.6, 0.9]
        data = np.random.choice(vocab_list, size=256)
        y = np.zeros_like(data, dtype=np.float32)
        for vocab, val in zip(vocab_list, vocab_val):
            indices = np.where(data == vocab)
            y[indices] = val + np.random.uniform(
                low=-0.01, high=0.01, size=indices[0].shape)
        cat_column = tf.feature_column.categorical_column_with_vocabulary_list(
            key='symbol', vocabulary_list=vocab_list)
        ind_column = tf.feature_column.indicator_column(cat_column)
        keras_input = keras.layers.Input(name='symbol',
                                         shape=3,
                                         dtype=tf.dtypes.string)
        feature_layer = dense_features.DenseFeatures([ind_column])
        h = feature_layer({'symbol': keras_input})
        linear_model = linear.LinearModel(units=1)
        h = linear_model(h)

        model = keras.Model(inputs=keras_input, outputs=h)
        opt = gradient_descent.SGD(0.1)
        model.compile(opt, 'mse', ['mse'])
        train_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                 y=y,
                                                 num_epochs=20,
                                                 shuffle=False)
        eval_input_fn = numpy_io.numpy_input_fn(x={'symbol': data},
                                                y=y,
                                                num_epochs=20,
                                                shuffle=False)
        est = keras_lib.model_to_estimator(keras_model=model,
                                           config=self._config,
                                           checkpoint_format='saver')
        before_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        est.train(input_fn=train_input_fn, steps=30)
        after_eval_results = est.evaluate(input_fn=eval_input_fn, steps=1)
        self.assertLess(after_eval_results['loss'],
                        before_eval_results['loss'])
        self.assertLess(after_eval_results['loss'], 0.05)
Example #27
0
  def testCheckpointCompatibleForRegressor(self):
    label_dimension = 2
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    self._testCheckpointCompatibleWithNonAnnotatedEstimator(
        train_input_fn,
        predict_input_fn,
        dnn.DNNRegressor,
        dnn_with_layer_annotations.DNNRegressorWithLayerAnnotations,
        prediction_key=prediction_keys.PredictionKeys.PREDICTIONS,
        estimator_args={'label_dimension': label_dimension})
Example #28
0
    def testNumpyInputFnWithBatchSizeNotDividedByDataSizeAndMultipleEpochs(
            self):
        batch_size = 2
        a = np.arange(3) * 1.0
        b = np.arange(32, 35)
        x = {'a': a, 'b': b}
        y = np.arange(-32, -29)

        with self.cached_session() as session:
            input_fn = numpy_io.numpy_input_fn(x,
                                               y,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               num_epochs=3)
            features, target = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [0, 1])
            self.assertAllEqual(res[0]['b'], [32, 33])
            self.assertAllEqual(res[1], [-32, -31])

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [2, 0])
            self.assertAllEqual(res[0]['b'], [34, 32])
            self.assertAllEqual(res[1], [-30, -32])

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [1, 2])
            self.assertAllEqual(res[0]['b'], [33, 34])
            self.assertAllEqual(res[1], [-31, -30])

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [0, 1])
            self.assertAllEqual(res[0]['b'], [32, 33])
            self.assertAllEqual(res[1], [-32, -31])

            res = session.run([features, target])
            self.assertAllEqual(res[0]['a'], [2])
            self.assertAllEqual(res[0]['b'], [34])
            self.assertAllEqual(res[1], [-30])

            with self.assertRaises(errors.OutOfRangeError):
                session.run([features, target])

            coord.request_stop()
            coord.join(threads)
 def test_train_vs_eval_mode(self):
   data = self._create_data()
   train_input_fn = numpy_io.numpy_input_fn(
       x={'x': data},
       y=data,
       batch_size=self._batch_size,
       num_epochs=None,
       shuffle=True)
   eval_input_fn = numpy_io.numpy_input_fn(
       x={'x': data}, y=data, batch_size=self._batch_size, shuffle=False)
   est = self._get_estimator()
   with patch.object(
       tf.compat.v2.keras.layers.DenseFeatures, 'call',
       return_value=data) as mock_dense_features_call:
     est.train(train_input_fn, steps=10)
     est.evaluate(eval_input_fn)
   train_args, eval_args = mock_dense_features_call.call_args_list
   # DenseFeature should have been called with training = True in train.
   _, train_training_kwarg = train_args
   self.assertTrue(train_training_kwarg['training'])
   # DenseFeature should have been called with training = False in eval.
   _, eval_training_kwarg = eval_args
   self.assertFalse(eval_training_kwarg['training'])
Example #30
0
 def testNumpyInputFnWithDuplicateKeysInXAndY(self):
     a = np.arange(4) * 1.0
     b = np.arange(32, 36)
     x = {'a': a, 'b': b}
     y = {
         'y1': np.arange(-32, -28),
         'a': a,
         'y2': np.arange(32, 28, -1),
         'b': b
     }
     with self.cached_session():
         with self.assertRaisesRegexp(
                 ValueError, '2 duplicate keys are found in both x and y'):
             failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
             failing_input_fn()