def train_and_eval():
  """Train and evaluate the model."""
  model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
  print('model directory = %s' % model_dir)

  est = build_estimator(model_dir)

  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

  train_input_fn = numpy_io.numpy_input_fn(
      x={'images': mnist.train.images},
      y=mnist.train.labels.astype(numpy.int32),
      batch_size=FLAGS.batch_size,
      num_epochs=None,
      shuffle=True)
  est.fit(input_fn=train_input_fn, steps=None)

  metric_name = 'accuracy'
  metric = {
      metric_name:
          metric_spec.MetricSpec(
              eval_metrics.get_metric(metric_name),
              prediction_key=eval_metrics.get_prediction_key(metric_name))
  }

  test_input_fn = numpy_io.numpy_input_fn(
      x={'images': mnist.test.images},
      y=mnist.test.labels.astype(numpy.int32),
      num_epochs=1,
      batch_size=FLAGS.batch_size,
      shuffle=False)

  results = est.evaluate(input_fn=test_input_fn, metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
  def test_numpy_input_fn(self):
    """Tests complete flow with numpy_input_fn."""
    batch_size = 5
    img_size = 8
    channel_size = 3
    label_size = 3
    image_data = np.zeros(
        [batch_size, img_size, img_size, channel_size], dtype=np.float32)
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': image_data},
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': image_data}, batch_size=batch_size, shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': image_data}, shuffle=False)

    train_input_fn = self._numpy_input_fn_wrapper(train_input_fn, batch_size,
                                                  label_size)
    eval_input_fn = self._numpy_input_fn_wrapper(eval_input_fn, batch_size,
                                                 label_size)
    predict_input_fn = self._numpy_input_fn_wrapper(predict_input_fn,
                                                    batch_size, label_size)

    predict_input_fn = estimator.stargan_prediction_input_fn_wrapper(
        predict_input_fn)

    self._test_complete_flow(
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        predict_input_fn=predict_input_fn,
        prediction_size=[batch_size, img_size, img_size, channel_size])
Esempio n. 3
0
def get_resource_for_simple_model(is_sequential, is_evaluate):
  model = simple_sequential_model(
  ) if is_sequential else simple_functional_model()
  if is_sequential:
    model.build()
  input_name = model.input_names[0]
  np.random.seed(_RANDOM_SEED)
  (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data(
      train_samples=_TRAIN_SIZE,
      test_samples=50,
      input_shape=_INPUT_SIZE,
      num_classes=_NUM_CLASS)
  y_train = keras.utils.to_categorical(y_train)
  y_test = keras.utils.to_categorical(y_test)

  train_input_fn = numpy_io.numpy_input_fn(
      x={input_name: x_train},
      y=y_train,
      shuffle=False,
      num_epochs=None,
      batch_size=16)

  evaluate_input_fn = numpy_io.numpy_input_fn(
      x={input_name: x_test}, y=y_test, num_epochs=1, shuffle=False)

  predict_input_fn = numpy_io.numpy_input_fn(
      x={input_name: x_test}, num_epochs=1, shuffle=False)

  inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn

  return model, (x_train, y_train), (x_test,
                                     y_test), train_input_fn, inference_input_fn
  def test_numpy_input_fn_lrdecay(self):
    """Tests complete flow with numpy_input_fn."""
    input_dim = 4
    batch_size = 5
    data = np.zeros([batch_size, input_dim])
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        batch_size=batch_size,
        shuffle=False)

    self._test_complete_flow(
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        predict_input_fn=predict_input_fn,
        prediction_size=[batch_size, input_dim],
        lr_decay=True)
Esempio n. 5
0
  def test_numpy_input_fn(self):
    """Tests complete flow with numpy_input_fn."""
    n_classes = 3
    input_dimension = 2
    batch_size = 10
    data = np.linspace(
        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
    x_data = data.reshape(batch_size, input_dimension)
    y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data},
        y=y_data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data},
        y=y_data,
        batch_size=batch_size,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data},
        batch_size=batch_size,
        shuffle=False)

    self._test_complete_flow(
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        predict_input_fn=predict_input_fn,
        input_dimension=input_dimension,
        n_classes=n_classes,
        batch_size=batch_size)
  def test_numpy_input_fn(self):
    """Tests complete flow with numpy_input_fn."""
    label_dimension = 2
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        batch_size=batch_size,
        shuffle=False)

    self._test_complete_flow(
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        predict_input_fn=predict_input_fn,
        input_dimension=label_dimension,
        label_dimension=label_dimension,
        batch_size=batch_size)
  def testCheckpointCompatibleForClassifier(self):
    n_classes = 2
    input_dimension = 2
    batch_size = 10
    data = np.linspace(
        0., n_classes - 1., batch_size * input_dimension, dtype=np.float32)
    x_data = data.reshape(batch_size, input_dimension)
    y_data = np.reshape(
        np.rint(data[:batch_size]).astype(np.int64), (batch_size, 1))
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data},
        y=y_data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': x_data}, batch_size=batch_size, shuffle=False)

    self._testCheckpointCompatibleWithNonAnnotatedEstimator(
        train_input_fn,
        predict_input_fn,
        dnn.DNNClassifier,
        dnn_with_layer_annotations.DNNClassifierWithLayerAnnotations,
        prediction_key=prediction_keys.PredictionKeys.PROBABILITIES,
        estimator_args={'n_classes': n_classes})
Esempio n. 8
0
 def testNumpyInputFnWithNonBoolShuffle(self):
   x = np.arange(32, 36)
   y = np.arange(4)
   with self.test_session():
     with self.assertRaisesRegexp(TypeError,
                                  'shuffle must be explicitly set as boolean'):
       # Default shuffle is None.
       numpy_io.numpy_input_fn(x, y)
Esempio n. 9
0
  def test_complete_flow_with_a_simple_linear_model(self):

    def _model_fn(features, labels, mode):
      predictions = layers.dense(
          features['x'], 1, kernel_initializer=init_ops.zeros_initializer())
      export_outputs = {
          'predictions': export_output.RegressionOutput(predictions)
      }

      if mode == model_fn_lib.ModeKeys.PREDICT:
        return model_fn_lib.EstimatorSpec(
            mode, predictions=predictions, export_outputs=export_outputs)

      loss = losses.mean_squared_error(labels, predictions)
      train_op = training.GradientDescentOptimizer(learning_rate=0.5).minimize(
          loss, training.get_global_step())
      eval_metric_ops = {
          'absolute_error': metrics_lib.mean_absolute_error(
              labels, predictions)
      }

      return model_fn_lib.EstimatorSpec(
          mode,
          predictions=predictions,
          loss=loss,
          train_op=train_op,
          eval_metric_ops=eval_metric_ops,
          export_outputs=export_outputs)

    est = estimator.Estimator(model_fn=_model_fn)
    data = np.linspace(0., 1., 100, dtype=np.float32).reshape(-1, 1)

    # TRAIN
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=50, num_epochs=None, shuffle=True)
    est.train(train_input_fn, steps=200)

    # EVALUTE
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=50, num_epochs=1, shuffle=True)
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(200, scores['global_step'])
    self.assertGreater(0.1, scores['absolute_error'])

    # PREDICT
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=None, batch_size=10, num_epochs=1, shuffle=False)
    predictions = list(est.predict(predict_input_fn))
    self.assertAllClose(data, predictions, atol=0.01)

    # EXPORT
    feature_spec = {'x': parsing_ops.FixedLenFeature([1], dtypes.float32)}
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Esempio n. 10
0
 def testNumpyInputFnWithNonBoolShuffle(self):
   x = np.arange(32, 36)
   y = np.arange(4)
   with self.cached_session():
     with self.assertRaisesRegexp(ValueError,
                                  'shuffle must be provided and explicitly '
                                  'set as boolean'):
       # Default shuffle is None.
       numpy_io.numpy_input_fn(x, y)
Esempio n. 11
0
def _get_regression_input_fns():
  boston = base.load_boston()
  data = boston.data.astype(np.float32)
  labels = boston.target.astype(np.int32)

  train_input_fn = numpy_io.numpy_input_fn(
      x=data, y=labels, batch_size=506, num_epochs=None, shuffle=False)

  predict_input_fn = numpy_io.numpy_input_fn(
      x=data[:1,], y=None, batch_size=1, num_epochs=1, shuffle=False)
  return train_input_fn, predict_input_fn
Esempio n. 12
0
def _get_classification_input_fns():
  iris = base.load_iris()
  data = iris.data.astype(np.float32)
  labels = iris.target.astype(np.int32)

  train_input_fn = numpy_io.numpy_input_fn(
      x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False)

  predict_input_fn = numpy_io.numpy_input_fn(
      x=data[:1,], y=None, batch_size=1, num_epochs=1, shuffle=False)
  return train_input_fn, predict_input_fn
  def test_complete_flow_with_mode(self, distribution):
    label_dimension = 2
    input_dimension = label_dimension
    batch_size = 10
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)
    train_input_fn = self.dataset_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size // len(distribution.worker_devices),
        shuffle=True)
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, y=data, batch_size=batch_size, shuffle=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data}, batch_size=batch_size, shuffle=False)

    linear_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    dnn_feature_columns = [
        feature_column.numeric_column('x', shape=(input_dimension,))
    ]
    feature_columns = linear_feature_columns + dnn_feature_columns
    estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=linear_feature_columns,
        dnn_hidden_units=(2, 2),
        dnn_feature_columns=dnn_feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir,
        # TODO(isaprykin): Work around the colocate_with error.
        dnn_optimizer=adagrad.AdagradOptimizer(0.001),
        linear_optimizer=adagrad.AdagradOptimizer(0.001),
        config=run_config.RunConfig(train_distribute=distribution))

    num_steps = 10
    estimator.train(train_input_fn, steps=num_steps)

    scores = estimator.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in estimator.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)

    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                             serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Esempio n. 14
0
  def test_complete_flow(self):
    label_dimension = 2
    batch_size = 10
    feature_columns = [feature_column.numeric_column('x', shape=(2,))]
    est = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=feature_columns,
        label_dimension=label_dimension,
        model_dir=self._model_dir)
    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
    data = data.reshape(batch_size, label_dimension)

    # TRAIN
    # learn y = x
    train_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True)
    num_steps = 200
    est.train(train_input_fn, steps=num_steps)

    # EVALUTE
    eval_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        y=data,
        batch_size=batch_size,
        shuffle=False)
    scores = est.evaluate(eval_input_fn)
    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
    self.assertIn('loss', six.iterkeys(scores))

    # PREDICT
    predict_input_fn = numpy_io.numpy_input_fn(
        x={'x': data},
        batch_size=batch_size,
        shuffle=False)
    predictions = np.array([
        x[prediction_keys.PredictionKeys.PREDICTIONS]
        for x in est.predict(predict_input_fn)
    ])
    self.assertAllEqual((batch_size, label_dimension), predictions.shape)
    # TODO(ptucker): Deterministic test for predicted values?

    # EXPORT
    feature_spec = feature_column.make_parse_example_spec(feature_columns)
    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
        feature_spec)
    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
                                       serving_input_receiver_fn)
    self.assertTrue(gfile.Exists(export_dir))
Esempio n. 15
0
def main(*args):
    """Creates an estimator for the boston house-prices datase.

    References:
        * This dataset concerns housing values in Boston suburbs.
        It's based on the "Boston Housing Dataset" from University of California, Irvine,
        which in turn was taken from the StatLib library maintained at Carnegie Mellon University.

    Returns:
        * https://archive.ics.uci.edu/ml/datasets/Housing
    """
    # Load dataset
    boston = datasets.load_boston()
    x, y = boston.data, boston.target

    # Split dataset into train / test
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        x, y, test_size=0.2, random_state=42)

    # Scale data (training set) to 0 mean and unit standard deviation.
    scaler = preprocessing.StandardScaler()
    x_train = scaler.fit_transform(x_train)

    def graph_fn(mode, features):
        x = plx.layers.FullyConnected(
            mode, num_units=32, activation='relu', dropout=0.3)(features['x'])
        x = plx.layers.FullyConnected(mode, num_units=32, activation='relu', dropout=0.3)(x)
        return plx.layers.FullyConnected(mode, num_units=1, dropout=0.3)(x)

    def model_fn(features, labels, mode):
        model = plx.models.Regressor(
            mode, graph_fn=graph_fn,
            loss_config=plx.configs.LossConfig(module='mean_squared_error'),
            optimizer_config=plx.configs.OptimizerConfig(module='sgd', learning_rate=0.01),
            summaries='all')
        return model(features, labels)

    estimator = plx.estimators.Estimator(model_fn=model_fn,
                                         model_dir="/tmp/polyaxon_logs/boston")

    estimator.train(input_fn=numpy_input_fn(
        {'x': np.asarray(x_train, dtype=np.float32)}, np.expand_dims(y_train, axis=1),
        shuffle=False, num_epochs=5000, batch_size=64))

    x_test = scaler.transform(x_test)

    estimator.evaluate(input_fn=numpy_input_fn(
        {'x': np.asarray(x_test, dtype=np.float32)}, np.expand_dims(y_test, axis=1),
        shuffle=False, num_epochs=1, batch_size=32))
Esempio n. 16
0
def _quantile_regression_input_fns(two_dimension=False):
  # The data generation is taken from
  # http://scikit-learn.org/stable/auto_examples/ensemble/plot_gradient_boosting_quantile.html
  np.random.seed(1)

  def f(x):
    """The function to predict."""
    return x * np.sin(x)

  def g(x):
    """The function to predict."""
    return x * np.cos(x)

  #  Training data.
  x = np.atleast_2d(np.random.uniform(0, 10.0,
                                      size=_QUANTILE_REGRESSION_SIZE)).T
  x = x.astype(np.float32)

  # Labels.
  if not two_dimension:
    y = f(x).ravel()
  else:
    y = np.column_stack((f(x).ravel(), g(x).ravel()))

  # Add random noise.
  dy = 1.5 + 1.0 * np.random.random(y.shape)
  noise = np.random.normal(0, dy)
  y += noise
  y_original = y.astype(np.float32)
  if not two_dimension:
    y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1)

  train_input_fn = numpy_io.numpy_input_fn(
      x=x,
      y=y,
      batch_size=_QUANTILE_REGRESSION_SIZE,
      num_epochs=None,
      shuffle=True)

  # Test on the training data to make sure the predictions are calibrated.
  test_input_fn = numpy_io.numpy_input_fn(
      x=x,
      y=y,
      batch_size=_QUANTILE_REGRESSION_SIZE,
      num_epochs=1,
      shuffle=False)

  return train_input_fn, test_input_fn, y_original
Esempio n. 17
0
  def testNumpyInputFnWithYIsNone(self):
    a = np.arange(4) * 1.0
    b = np.arange(32, 36)
    x = {'a': a, 'b': b}
    y = None

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features_tensor = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      feature = session.run(features_tensor)
      self.assertEqual(len(feature), 2)
      self.assertAllEqual(feature['a'], [0, 1])
      self.assertAllEqual(feature['b'], [32, 33])

      session.run([features_tensor])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features_tensor])

      coord.request_stop()
      coord.join(threads)
Esempio n. 18
0
  def testInferEstimator(self):
    train_input_fn = _make_train_input_fn(is_classification=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees._BoostedTreesEstimator(
        feature_columns=self._feature_columns,
        n_batches_per_layer=1,
        n_trees=1,
        max_depth=5,
        head=self._head)

    # It will stop after 5 steps because of the max depth and num trees.
    num_steps = 100
    # Train for a few steps, and validate final checkpoint.
    est.train(train_input_fn, steps=num_steps)
    self._assert_checkpoint(est.model_dir, 6)

    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertEquals(5, len(predictions))
    self.assertAllClose([0.703549], predictions[0]['predictions'])
    self.assertAllClose([0.266539], predictions[1]['predictions'])
    self.assertAllClose([0.256479], predictions[2]['predictions'])
    self.assertAllClose([1.088732], predictions[3]['predictions'])
    self.assertAllClose([1.901732], predictions[4]['predictions'])
Esempio n. 19
0
  def testNumpyInputFnWithYAsDict(self):
    a = np.arange(4) * 1.0
    b = np.arange(32, 36)
    x = {'a': a, 'b': b}
    y = {'y1': np.arange(-32, -28), 'y2': np.arange(32, 28, -1)}

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features_tensor, targets_tensor = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      features, targets = session.run([features_tensor, targets_tensor])
      self.assertEqual(len(features), 2)
      self.assertAllEqual(features['a'], [0, 1])
      self.assertAllEqual(features['b'], [32, 33])
      self.assertEqual(len(targets), 2)
      self.assertAllEqual(targets['y1'], [-32, -31])
      self.assertAllEqual(targets['y2'], [32, 31])

      session.run([features_tensor, targets_tensor])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features_tensor, targets_tensor])

      coord.request_stop()
      coord.join(threads)
Esempio n. 20
0
  def testBinaryClassifierTrainInMemoryAndEvalAndInfer(self):
    train_input_fn = _make_train_input_fn(is_classification=True)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees.boosted_trees_classifier_train_in_memory(
        train_input_fn=train_input_fn,
        feature_columns=self._feature_columns,
        n_trees=1,
        max_depth=5)
    # It will stop after 5 steps because of the max depth and num trees.
    self._assert_checkpoint(est.model_dir, 6)

    # Check eval.
    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 1.0)

    # Check predict that all labels are correct.
    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertEquals(5, len(predictions))
    self.assertAllClose([0], predictions[0]['class_ids'])
    self.assertAllClose([1], predictions[1]['class_ids'])
    self.assertAllClose([1], predictions[2]['class_ids'])
    self.assertAllClose([0], predictions[3]['class_ids'])
    self.assertAllClose([0], predictions[4]['class_ids'])
Esempio n. 21
0
  def testRegressorTrainInMemoryAndEvalAndInfer(self):
    train_input_fn = _make_train_input_fn(is_classification=False)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees.boosted_trees_regressor_train_in_memory(
        train_input_fn=train_input_fn,
        feature_columns=self._feature_columns,
        n_trees=1,
        max_depth=5)
    # It will stop after 5 steps because of the max depth and num trees.
    self._assert_checkpoint(est.model_dir, 6)

    # Check eval.
    eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
    self.assertAllClose(eval_res['average_loss'], 2.2136638)

    # Validate predictions.
    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertEquals(5, len(predictions))
    self.assertAllClose([0.703549], predictions[0]['predictions'])
    self.assertAllClose([0.266539], predictions[1]['predictions'])
    self.assertAllClose([0.256479], predictions[2]['predictions'])
    self.assertAllClose([1.088732], predictions[3]['predictions'])
    self.assertAllClose([1.901732], predictions[4]['predictions'])
Esempio n. 22
0
  def test_multi_dim(self):
    """Asserts predictions for multi-dimensional input and logits."""
    # Create checkpoint: num_inputs=2, hidden_units=(2, 2), num_outputs=3.
    _create_checkpoint((
        ([[.6, .5], [-.6, -.5]], [.1, -.1]),
        ([[1., .8], [-.8, -1.]], [.2, -.2]),
        ([[-1., 1., .5], [-1., 1., .5]], [.3, -.3, .0]),
    ), 100, self._model_dir)

    # Create DNNRegressor and predict.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('x', shape=(2,)),),
        label_dimension=3,
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        # Inputs shape is (batch_size, num_inputs).
        x={'x': np.array([[10., 8.]])},
        batch_size=1,
        shuffle=False)
    # Uses identical numbers as
    # DNNModelFnTest.test_multi_dim_input_multi_dim_logits.
    # See that test for calculation of logits.
    # logits = [[-0.48, 0.48, 0.39]] => predictions = [-0.48, 0.48, 0.39]
    self.assertAllClose({
        prediction_keys.PredictionKeys.PREDICTIONS: [-0.48, 0.48, 0.39],
    }, next(dnn_regressor.predict(input_fn=input_fn)))
 def test_train_op_calls_both_dnn_and_linear(self):
   opt = gradient_descent.GradientDescentOptimizer(1.)
   x_column = feature_column.numeric_column('x')
   input_fn = numpy_io.numpy_input_fn(
       x={'x': np.array([[0.], [1.]])},
       y=np.array([[0.], [1.]]),
       batch_size=1,
       shuffle=False)
   est = dnn_linear_combined.DNNLinearCombinedClassifier(
       linear_feature_columns=[x_column],
       # verifies linear_optimizer is used only for linear part.
       linear_optimizer=self._mock_optimizer(opt, 'linear'),
       dnn_hidden_units=(2, 2),
       dnn_feature_columns=[x_column],
       # verifies dnn_optimizer is used only for linear part.
       dnn_optimizer=self._mock_optimizer(opt, 'dnn'),
       model_dir=self._model_dir)
   est.train(input_fn, steps=1)
   # verifies train_op fires linear minimize op
   self.assertEqual(100.,
                    checkpoint_utils.load_variable(
                        self._model_dir, 'linear_called'))
   # verifies train_op fires dnn minimize op
   self.assertEqual(100.,
                    checkpoint_utils.load_variable(
                        self._model_dir, 'dnn_called'))
Esempio n. 24
0
  def testFirstCheckpointWorksFine(self):
    """Tests that eval/pred doesn't crash with the very first checkpoint.

    The step-0 checkpoint will have only an empty ensemble, and a separate eval
    job might read from it and crash.
    This test ensures that prediction/evaluation works fine with it.
    """
    input_fn = _make_train_input_fn(is_classification=True)
    predict_input_fn = numpy_io.numpy_input_fn(
        x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False)

    est = boosted_trees.BoostedTreesClassifier(
        feature_columns=self._feature_columns,
        n_batches_per_layer=1,
        n_trees=1,
        max_depth=5)

    class BailOutWithoutTraining(session_run_hook.SessionRunHook):

      def before_run(self, run_context):
        raise StopIteration('to bail out.')

    est.train(input_fn, steps=100,  # must stop at 0 anyway.
              hooks=[BailOutWithoutTraining()])
    self._assert_checkpoint(
        est.model_dir, global_step=0, finalized_trees=0, attempted_layers=0)
    # Empty ensemble returns 0 logits, so that all output labels are 0.
    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['accuracy'], 0.6)
    predictions = list(est.predict(input_fn=predict_input_fn))
    self.assertAllClose([[0], [0], [0], [0], [0]],
                        [pred['class_ids'] for pred in predictions])
Esempio n. 25
0
  def test_one_dim(self):
    """Asserts predictions for one-dimensional input and logits."""
    dnn_testing_utils.create_checkpoint(
        (([[.6, .5]], [.1, -.1]), ([[1., .8], [-.8, -1.]], [.2, -.2]),
         ([[-1.], [1.]], [.3]),),
        global_step=0,
        model_dir=self._model_dir)

    dnn_classifier = dnn.DNNClassifier(
        hidden_units=(2, 2),
        feature_columns=(feature_column.numeric_column('x'),),
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
    # Uses identical numbers as DNNModelTest.test_one_dim_logits.
    # See that test for calculation of logits.
    # logits = [-2.08] =>
    # logistic = exp(-2.08)/(1 + exp(-2.08)) = 0.11105597
    # probabilities = [1-logistic, logistic] = [0.88894403, 0.11105597]
    # class_ids = argmax(probabilities) = [0]
    predictions = next(dnn_classifier.predict(input_fn=input_fn))
    self.assertAllClose([-2.08],
                        predictions[prediction_keys.PredictionKeys.LOGITS])
    self.assertAllClose([0.11105597],
                        predictions[prediction_keys.PredictionKeys.LOGISTIC])
    self.assertAllClose(
        [0.88894403,
         0.11105597], predictions[prediction_keys.PredictionKeys.PROBABILITIES])
    self.assertAllClose([0],
                        predictions[prediction_keys.PredictionKeys.CLASS_IDS])
    self.assertAllEqual([b'0'],
                        predictions[prediction_keys.PredictionKeys.CLASSES])
Esempio n. 26
0
  def test_linear_model_numpy_input_fn(self):
    price = fc.numeric_column('price')
    price_buckets = fc.bucketized_column(price, boundaries=[0., 10., 100.,])
    body_style = fc.categorical_column_with_vocabulary_list(
        'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])

    input_fn = numpy_io.numpy_input_fn(
        x={
            'price': np.array([-1., 2., 13., 104.]),
            'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']),
        },
        batch_size=2,
        shuffle=False)
    features = input_fn()
    net = fc.linear_model(features, [price_buckets, body_style])
    # self.assertEqual(1 + 3 + 5, net.shape[1])
    with self._initialized_session() as sess:
      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(sess, coord=coord)

      bias = self._get_linear_model_bias()
      price_buckets_var = self._get_linear_model_column_var(price_buckets)
      body_style_var = self._get_linear_model_column_var(body_style)

      sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]]))
      sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]]))
      sess.run(bias.assign([5.]))

      self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net))

      coord.request_stop()
      coord.join(threads)
Esempio n. 27
0
 def testNumpyInputFnWithXIsEmptyArray(self):
   x = np.array([[], []])
   y = np.arange(4)
   with self.test_session():
     with self.assertRaisesRegexp(ValueError, 'x cannot be an empty'):
       failing_input_fn = numpy_io.numpy_input_fn(x, y, shuffle=False)
       failing_input_fn()
  def test_dnn_and_linear_logits_are_added(self):
    with ops.Graph().as_default():
      variables_lib.Variable([[1.0]], name='linear/linear_model/x/weights')
      variables_lib.Variable([2.0], name='linear/linear_model/bias_weights')
      variables_lib.Variable([[3.0]], name='dnn/hiddenlayer_0/kernel')
      variables_lib.Variable([4.0], name='dnn/hiddenlayer_0/bias')
      variables_lib.Variable([[5.0]], name='dnn/logits/kernel')
      variables_lib.Variable([6.0], name='dnn/logits/bias')
      variables_lib.Variable(1, name='global_step', dtype=dtypes.int64)
      linear_testing_utils.save_variables_to_ckpt(self._model_dir)

    x_column = feature_column.numeric_column('x')
    est = dnn_linear_combined.DNNLinearCombinedRegressor(
        linear_feature_columns=[x_column],
        dnn_hidden_units=[1],
        dnn_feature_columns=[x_column],
        model_dir=self._model_dir)
    input_fn = numpy_io.numpy_input_fn(
        x={'x': np.array([[10.]])}, batch_size=1, shuffle=False)
    # linear logits = 10*1 + 2 = 12
    # dnn logits = (10*3 + 4)*5 + 6 = 176
    # logits = dnn + linear = 176 + 12 = 188
    self.assertAllClose(
        {
            prediction_keys.PredictionKeys.PREDICTIONS: [188.],
        },
        next(est.predict(input_fn=input_fn)))
Esempio n. 29
0
  def testNumpyInputFnWithBatchSizeNotDividedByDataSize(self):
    batch_size = 2
    a = np.arange(5) * 1.0
    b = np.arange(32, 37)
    x = {'a': a, 'b': b}
    y = np.arange(-32, -27)

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=batch_size, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [0, 1])
      self.assertAllEqual(res[0]['b'], [32, 33])
      self.assertAllEqual(res[1], [-32, -31])

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [2, 3])
      self.assertAllEqual(res[0]['b'], [34, 35])
      self.assertAllEqual(res[1], [-30, -29])

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [4])
      self.assertAllEqual(res[0]['b'], [36])
      self.assertAllEqual(res[1], [-28])

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Esempio n. 30
0
  def testNumpyInputFn(self):
    a = np.arange(4) * 1.0
    b = np.arange(32, 36)
    x = {'a': a, 'b': b}
    y = np.arange(-32, -28)

    with self.test_session() as session:
      input_fn = numpy_io.numpy_input_fn(
          x, y, batch_size=2, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], [0, 1])
      self.assertAllEqual(res[0]['b'], [32, 33])
      self.assertAllEqual(res[1], [-32, -31])

      session.run([features, target])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Esempio n. 31
0
def main(*args):
    """Creates an estimator for the boston house-prices datase.

    References:
        * This dataset concerns housing values in Boston suburbs.
        It's based on the "Boston Housing Dataset" from University of California, Irvine,
        which in turn was taken from the StatLib library maintained at Carnegie Mellon University.

    Returns:
        * https://archive.ics.uci.edu/ml/datasets/Housing
    """
    # Load dataset
    boston = datasets.load_boston()
    x, y = boston.data, boston.target

    # Split dataset into train / test
    x_train, x_test, y_train, y_test = model_selection.train_test_split(
        x, y, test_size=0.2, random_state=42)

    # Scale data (training set) to 0 mean and unit standard deviation.
    scaler = preprocessing.StandardScaler()
    x_train = scaler.fit_transform(x_train)

    def graph_fn(mode, features):
        x = plx.layers.FullyConnected(mode,
                                      num_units=32,
                                      activation='relu',
                                      dropout=0.3)(features['x'])
        x = plx.layers.FullyConnected(mode,
                                      num_units=32,
                                      activation='relu',
                                      dropout=0.3)(x)
        return plx.layers.FullyConnected(mode, num_units=1, dropout=0.3)(x)

    def model_fn(features, labels, mode):
        model = plx.models.Regressor(
            mode,
            graph_fn=graph_fn,
            loss_config=plx.configs.LossConfig(module='mean_squared_error'),
            optimizer_config=plx.configs.OptimizerConfig(module='sgd',
                                                         learning_rate=0.01),
            summaries='all')
        return model(features, labels)

    estimator = plx.estimators.Estimator(model_fn=model_fn,
                                         model_dir="/tmp/polyaxon_logs/boston")

    estimator.train(
        input_fn=numpy_input_fn({'x': np.asarray(x_train, dtype=np.float32)},
                                np.expand_dims(y_train, axis=1),
                                shuffle=False,
                                num_epochs=5000,
                                batch_size=64))

    x_test = scaler.transform(x_test)

    estimator.evaluate(
        input_fn=numpy_input_fn({'x': np.asarray(x_test, dtype=np.float32)},
                                np.expand_dims(y_test, axis=1),
                                shuffle=False,
                                num_epochs=1,
                                batch_size=32))
Esempio n. 32
0
    def test_complete_flow(self):
        n_classes = 3
        input_dimension = 2
        batch_size = 12

        data = np.linspace(0.,
                           n_classes - 1.,
                           batch_size * input_dimension,
                           dtype=np.float32)
        x_data = data.reshape(batch_size, input_dimension)
        y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
        train_input_fn = numpy_io.numpy_input_fn(x={'x': x_data},
                                                 y=y_data,
                                                 batch_size=batch_size,
                                                 num_epochs=None,
                                                 shuffle=True)
        eval_input_fn = numpy_io.numpy_input_fn(x={'x': x_data},
                                                y=y_data,
                                                batch_size=batch_size,
                                                shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': x_data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]

        estimator = dnn.DNNClassifier(hidden_units=(2, 2),
                                      feature_columns=feature_columns,
                                      n_classes=n_classes,
                                      model_dir=self._model_dir)

        def optimizer_fn():
            return optimizers.get_optimizer_instance('Adagrad',
                                                     learning_rate=0.05)

        # TODO(isaprykin):  Switch Estimator to use allow_soft_placement=True
        # during export_savedmodel and then switch this test to replicate over
        # GPUs instead of CPUs.
        estimator = estimator_lib.Estimator(
            model_fn=replicate_model_fn.replicate_model_fn(
                estimator.model_fn,
                optimizer_fn,
                devices=['/cpu:0', '/cpu:0', '/cpu:0']),
            model_dir=estimator.model_dir,
            config=estimator.config,
            params=estimator.params)

        num_steps = 10
        estimator.train(train_input_fn, steps=num_steps)

        scores = estimator.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        predicted_proba = np.array([
            x[prediction_keys.PredictionKeys.PROBABILITIES]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Esempio n. 33
0
  def test_weighted_multi_batch(self):
    # Create checkpoint: num_inputs=4, hidden_units=(2, 2), num_outputs=3.
    global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), global_step, self._model_dir)

    # Create batched input.
    input_fn = numpy_io.numpy_input_fn(
        x={
            # Dimensions are (batch_size, feature_column.dimension).
            'x': np.array((
                (15., 0., 1.5, 135.2),
                (45., 45000., 1.8, 158.8),
                (21., 33000., 1.7, 207.1),
                (60., 10000., 1.6, 90.2)
            )),
            # TODO(ptucker): Add test for different weight shapes when we fix
            # head._compute_weighted_loss (currently it requires weights to be
            # same shape as labels & logits).
            'label_weights': np.array((
                (1., 1., 0.),
                (.5, 1., .1),
                (.5, 0., .9),
                (0., 0., 0.),
            ))
        },
        # Label shapes is (batch_size, num_outputs).
        y=np.array((
            (5., 2., 2.),
            (-2., 1., -4.),
            (-1., -1., -1.),
            (-4., 3., 9.),
        )),
        batch_size=1,
        shuffle=False)

    # Create DNNRegressor and evaluate.
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=(2, 2),
        feature_columns=(
            # Dimension is number of inputs.
            feature_column.numeric_column(
                'x', dtype=dtypes.int32, shape=(4,)),
        ),
        model_dir=self._model_dir,
        label_dimension=3,
        weight_feature_key='label_weights')
    self.assertAllClose({
        # TODO(ptucker): Point to tool for calculating a neural net output?
        # predictions = [
        #   [  54033.5    76909.6    99785.7]
        #   [8030393.8 11433082.4 14835771.0]
        #   [5923209.2  8433014.8 10942820.4]
        #   [1810021.6  2576969.6  3343917.6]
        # ]
        # losses = label_weights*(labels-predictions)^2 = [
        #  [  2.91907881e+09   5.91477894e+09                0]
        #  [  3.22436284e+13   1.30715350e+14   2.20100220e+13]
        #  [  1.75422095e+13                0   1.07770806e+14]
        #  [               0                0                0]
        # ]
        # total_loss = sum(losses) = 3.10290850204e+14
        # loss = total_loss / 4 = 7.7572712551e+13
        metric_keys.MetricKeys.LOSS: 7.7572712551e+13,
        # average_loss = total_loss / sum(label_weights) = 6.20581700408e+13
        metric_keys.MetricKeys.LOSS_MEAN: 6.20581700408e+13,
        ops.GraphKeys.GLOBAL_STEP: global_step
    }, dnn_regressor.evaluate(input_fn=input_fn, steps=4))
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // len(distribution.worker_devices),
            shuffle=True)
        eval_input_fn = self.dataset_input_fn(x={'x': data},
                                              y=data,
                                              batch_size=batch_size //
                                              len(distribution.worker_devices),
                                              shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            # TODO(isaprykin): Work around the colocate_with error.
            dnn_optimizer=adagrad.AdagradOptimizer(0.001),
            linear_optimizer=adagrad.AdagradOptimizer(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution))

        num_steps = 10
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', scores)

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
    def test_complete_flow_with_mode(self, distribution,
                                     use_train_and_evaluate):
        label_dimension = 2
        input_dimension = label_dimension
        batch_size = 10
        data = np.linspace(0.,
                           2.,
                           batch_size * label_dimension,
                           dtype=np.float32)
        data = data.reshape(batch_size, label_dimension)
        train_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // distribution.num_replicas_in_sync)
        eval_input_fn = self.dataset_input_fn(
            x={'x': data},
            y=data,
            batch_size=batch_size // distribution.num_replicas_in_sync)
        predict_input_fn = numpy_io.numpy_input_fn(x={'x': data},
                                                   batch_size=batch_size,
                                                   shuffle=False)

        linear_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        dnn_feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, ))
        ]
        feature_columns = linear_feature_columns + dnn_feature_columns
        session_config = config_pb2.ConfigProto(log_device_placement=True,
                                                allow_soft_placement=True)
        estimator = dnn_linear_combined.DNNLinearCombinedRegressor(
            linear_feature_columns=linear_feature_columns,
            dnn_hidden_units=(2, 2),
            dnn_feature_columns=dnn_feature_columns,
            label_dimension=label_dimension,
            model_dir=self._model_dir,
            dnn_optimizer=adam.Adam(0.001),
            linear_optimizer=adam.Adam(0.001),
            config=run_config.RunConfig(train_distribute=distribution,
                                        eval_distribute=distribution,
                                        session_config=session_config))

        num_steps = 2
        if use_train_and_evaluate:
            scores, _ = training.train_and_evaluate(
                estimator,
                training.TrainSpec(train_input_fn, max_steps=num_steps),
                training.EvalSpec(eval_input_fn))
        else:
            estimator.train(train_input_fn, steps=num_steps)
            scores = estimator.evaluate(eval_input_fn)

        self.assertIn('loss', six.iterkeys(scores))

        predictions = np.array([
            x[prediction_keys.PredictionKeys.PREDICTIONS]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, label_dimension), predictions.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Esempio n. 36
0
    def testCalibratedEtlMonotonicClassifierTraining(self):
        # Construct the following training pair.
        #
        # Training: (x, y)
        # ([0., 0.], 0.0)
        # ([0., 1.], 1.0)
        # ([1., 0.], 1.0)
        # ([1., 1.], 0.0)
        #
        # which is not a monotonic function. Then check the forcing monotonicity
        # resulted in the following monotonicity or not.
        # f(0, 0) <= f(0, 1), f(0, 0) <= f(1, 0), f(0, 1) <= f(1, 1),
        # f(1, 0) < = f(1, 1).
        x0 = np.array([0.0, 0.0, 1.0, 1.0])
        x1 = np.array([0.0, 1.0, 0.0, 1.0])
        x_samples = {'x0': x0, 'x1': x1}
        training_y = np.array([[False], [True], [True], [False]])

        train_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                 y=training_y,
                                                 batch_size=4,
                                                 num_epochs=1000,
                                                 shuffle=False)
        test_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                y=None,
                                                shuffle=False)

        # Define monotonic lattice classifier.
        feature_columns = [
            feature_column_lib.numeric_column('x0'),
            feature_column_lib.numeric_column('x1'),
        ]

        def init_fn():
            return keypoints_initialization.uniform_keypoints_for_signal(
                2, 0., 1., 0., 1.)

        hparams = tfl_hparams.CalibratedEtlHParams(num_keypoints=2,
                                                   monotonic_num_lattices=2,
                                                   monotonic_lattice_rank=2,
                                                   monotonic_lattice_size=2)
        hparams.set_param('calibration_monotonic', +1)
        hparams.set_param('lattice_monotonic', True)
        hparams.set_param('learning_rate', 0.1)

        estimator = calibrated_etl.calibrated_etl_classifier(
            feature_columns=feature_columns,
            hparams=hparams,
            keypoints_initializers_fn=init_fn)
        estimator.train(input_fn=train_input_fn)
        predictions = [
            results['logits'][0]
            for results in estimator.predict(input_fn=test_input_fn)
        ]

        self.assertEqual(len(predictions), 4)
        # Check monotonicity. Note that projection has its own precision, so we
        # add a small number.
        self.assertLess(predictions[0], predictions[1] + 1e-6)
        self.assertLess(predictions[0], predictions[2] + 1e-6)
        self.assertLess(predictions[1], predictions[3] + 1e-6)
        self.assertLess(predictions[2], predictions[3] + 1e-6)
Esempio n. 37
0
def input_fn(num_epochs=1):
    return numpy_input_fn({'X': X}, y,
                          shuffle=False,
                          num_epochs=num_epochs,
                          batch_size=len(X))
    def testContribEstimatorThatDFCIsInPredictions(self):
        # pylint:disable=protected-access
        head = canned_boosted_trees._create_regression_head(label_dimension=1)
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees._BoostedTreesEstimator(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            head=head,
            n_trees=1,
            max_depth=5,
            center_bias=True)
        # pylint:enable=protected-access

        num_steps = 100
        # Train for a few steps. Validate debug outputs in prediction dicts.
        est.train(train_input_fn, steps=num_steps)
        debug_predictions = est.experimental_predict_with_explanations(
            predict_input_fn)
        biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
                             for pred in debug_predictions])
        self.assertAllClose([1.8] * 5, biases)
        self.assertAllClose(({
            0: -0.070499420166015625,
            1: -0.095000028610229492,
            2: 0.0
        }, {
            0: -0.53763031959533691,
            1: 0.063333392143249512,
            2: 0.0
        }, {
            0: -0.51756942272186279,
            1: -0.095000028610229492,
            2: 0.0
        }, {
            0: 0.1563495397567749,
            1: 0.063333392143249512,
            2: 0.0
        }, {
            0: 0.96934974193572998,
            1: 0.063333392143249512,
            2: 0.0
        }), dfcs)

        # Assert sum(dfcs) + bias == predictions.
        expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
                                [2.01968288], [2.83268309]]
        predictions = [[sum(dfc.values()) + bias]
                       for (dfc, bias) in zip(dfcs, biases)]
        self.assertAllClose(expected_predictions, predictions)

        # Test when user doesn't include bias or dfc in predict_keys.
        debug_predictions = est.experimental_predict_with_explanations(
            predict_input_fn, predict_keys=['predictions'])
        for prediction_dict in debug_predictions:
            self.assertTrue('bias' in prediction_dict)
            self.assertTrue('dfc' in prediction_dict)
            self.assertTrue('predictions' in prediction_dict)
            self.assertEqual(len(prediction_dict), 3)
Esempio n. 39
0
  def test_weighted_multi_example_multi_column(self):
    hidden_units = (2, 2)
    base_global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), base_global_step, self._model_dir)

    # Create DNNRegressor with mock optimizer.
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # predictions = [
    #   [  54033.5    76909.6    99785.7]
    #   [8030393.8 11433082.4 14835771.0]
    #   [5923209.2  8433014.8 10942820.4]
    #   [1810021.6  2576969.6  3343917.6]
    # ]
    # loss = sum(label_weights*(labels-predictions)^2) = 3.10290850204e+14
    expected_loss = 3.10290850204e+14
    mock_optimizer = self._mockOptimizer(
        hidden_units=hidden_units, expected_loss=expected_loss)
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=(
            # Dimensions add up to 4 (number of inputs).
            feature_column.numeric_column(
                'x', dtype=dtypes.int32, shape=(2,)),
            feature_column.numeric_column(
                'y', dtype=dtypes.float32, shape=(2,)),
        ),
        optimizer=mock_optimizer,
        model_dir=self._model_dir,
        label_dimension=3,
        weight_feature_key='label_weights')
    self.assertEqual(0, mock_optimizer.minimize.call_count)

    # Create batched inputs.
    input_fn = numpy_io.numpy_input_fn(
        # NOTE: feature columns are concatenated in alphabetic order of keys.
        x={
            # Inputs shapes are (batch_size, feature_column.dimension).
            'x': np.array((
                (15., 0.),
                (45., 45000.),
                (21., 33000.),
                (60., 10000.)
            )),
            'y': np.array((
                (1.5, 135.2),
                (1.8, 158.8),
                (1.7, 207.1),
                (1.6, 90.2)
            )),
            # TODO(ptucker): Add test for different weight shapes when we fix
            # head._compute_weighted_loss (currently it requires weights to be
            # same shape as labels & logits).
            'label_weights': np.array((
                (1., 1., 0.),
                (.5, 1., .1),
                (.5, 0., .9),
                (0., 0., 0.),
            ))
        },
        # Labels shapes is (batch_size, num_outputs).
        y=np.array((
            (5., 2., 2.),
            (-2., 1., -4.),
            (-1., -1., -1.),
            (-4., 3., 9.),
        )),
        batch_size=4,
        num_epochs=None,
        shuffle=False)

    # Train for 1 step, then validate optimizer, summaries, and checkpoint.
    summary_hook = _SummaryHook()
    dnn_regressor.train(input_fn=input_fn, steps=1, hooks=(summary_hook,))
    self.assertEqual(1, mock_optimizer.minimize.call_count)
    summaries = summary_hook.summaries()
    self.assertEqual(1, len(summaries))
    self._assert_simple_summary({
        metric_keys.MetricKeys.LOSS: expected_loss,
        # average_loss = loss / sum(label_weights) = 3.10290850204e+14 / 5.
        #              = 6.205817e+13
        metric_keys.MetricKeys.LOSS_MEAN: 6.205817e+13,
        'dnn/dnn/hiddenlayer_0_activation': 0.,
        'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
        'dnn/dnn/hiddenlayer_1_activation': 0.,
        'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.,
        'dnn/dnn/logits_activation': 0.,
        'dnn/dnn/logits_fraction_of_zero_values': 0.,
    }, summaries[0])
    self._assert_checkpoint(
        base_global_step + 1,
        input_units=4,  # Sum of feature column dimensions.
        hidden_units=hidden_units,
        output_units=3)  # = label_dimension

    # Train for 3 steps - we should still get the same loss since we're not
    # updating weights.
    dnn_regressor.train(input_fn=input_fn, steps=3)
    self.assertEqual(2, mock_optimizer.minimize.call_count)
    self._assert_checkpoint(
        base_global_step + 4,
        input_units=4,  # Sum of feature column dimensions.
        hidden_units=hidden_units,
        output_units=3)  # = label_dimension
Esempio n. 40
0
  def test_weighted_multi_batch(self):
    hidden_units = (2, 2)
    base_global_step = 100
    _create_checkpoint((
        (((1., 2.), (3., 4.), (5., 6.), (7., 8.),), (9., 8.)),
        (((7., 6.), (5., 4.),), (3., 2.)),
        (((1., 2., 3.), (4., 5., 6.),), (7., 8., 9.)),
    ), base_global_step, self._model_dir)

    mock_optimizer = self._mockOptimizer(hidden_units=hidden_units)
    dnn_regressor = dnn.DNNRegressor(
        hidden_units=hidden_units,
        feature_columns=(
            # Dimension is number of inputs.
            feature_column.numeric_column(
                'x', dtype=dtypes.int32, shape=(4,)),
        ),
        optimizer=mock_optimizer,
        model_dir=self._model_dir,
        label_dimension=3,
        weight_feature_key='label_weights')
    self.assertEqual(0, mock_optimizer.minimize.call_count)

    # Create batched input.
    input_fn = numpy_io.numpy_input_fn(
        x={
            # Inputs shape is (batch_size, feature_column.dimension).
            'x': np.array((
                (15., 0., 1.5, 135.2),
                (45., 45000., 1.8, 158.8),
                (21., 33000., 1.7, 207.1),
                (60., 10000., 1.6, 90.2)
            )),
            # TODO(ptucker): Add test for different weight shapes when we fix
            # head._compute_weighted_loss (currently it requires weights to be
            # same shape as labels & logits).
            'label_weights': np.array((
                (1., 1., 0.),
                (.5, 1., .1),
                (.5, 0., .9),
                (0., 0., 0.),
            ))
        },
        # Labels shapes is (batch_size, num_outputs).
        y=np.array((
            (5., 2., 2.),
            (-2., 1., -4.),
            (-1., -1., -1.),
            (-4., 3., 9.),
        )),
        batch_size=1,
        shuffle=False)

    # Train for 1 step, then validate optimizer, summaries, and checkpoint.
    num_steps = 4
    summary_hook = _SummaryHook()
    dnn_regressor.train(
        input_fn=input_fn, steps=num_steps, hooks=(summary_hook,))
    self.assertEqual(1, mock_optimizer.minimize.call_count)
    summaries = summary_hook.summaries()
    self.assertEqual(num_steps, len(summaries))
    # TODO(ptucker): Point to tool for calculating a neural net output?
    # predictions = [
    #   [  54033.5    76909.6    99785.7]
    #   [8030393.8 11433082.4 14835771.0]
    #   [5923209.2  8433014.8 10942820.4]
    #   [1810021.6  2576969.6  3343917.6]
    # ]
    # losses = label_weights*(labels-predictions)^2 = [
    #   [2.91907881e+09 5.91477894e+09              0]
    #   [3.22436284e+13 1.30715350e+14 2.20100220e+13]
    #   [1.75422095e+13              0 1.07770806e+14]
    #   [             0              0              0]
    # ]
    # step_losses = [sum(losses[i]) for i in 0...3]
    #             = [8833857750, 1.84969e+14, 1.2531302e+14, 0]
    expected_step_losses = (8833857750, 1.84969e+14, 1.2531302e+14, 0)
    # step_average_losses = [
    #     step_losses[i] / sum(label_weights[i]) for i in 0...3
    # ] = [4416928875, 1.1560563e+14, 8.95093e+13, 0]
    expected_step_average_losses = (4416928875, 1.1560563e+14, 8.95093e+13, 0)
    for i in range(len(summaries)):
      self._assert_simple_summary({
          metric_keys.MetricKeys.LOSS: expected_step_losses[i],
          metric_keys.MetricKeys.LOSS_MEAN: expected_step_average_losses[i],
          'dnn/dnn/hiddenlayer_0_activation': 0.,
          'dnn/dnn/hiddenlayer_0_fraction_of_zero_values': 0.,
          'dnn/dnn/hiddenlayer_1_activation': 0.,
          'dnn/dnn/hiddenlayer_1_fraction_of_zero_values': 0.,
          'dnn/dnn/logits_activation': 0.,
          'dnn/dnn/logits_fraction_of_zero_values': 0.,
      }, summaries[i])
    self._assert_checkpoint(
        base_global_step + num_steps,
        input_units=4,  # Sum of feature column dimensions.
        hidden_units=hidden_units,
        output_units=3)  # = label_dimension
Esempio n. 41
0
def create_input_data_fn(mode,
                         pipeline_config,
                         scope=None,
                         input_type=None,
                         x=None,
                         y=None):
    """Creates an input data function that can be used with estimators.
    Note that you must pass "factory functions" for both the data provider and
    featurizer to ensure that everything will be created in  the same graph.

    Args:
        mode: `str`, Specifies if this training, evaluation or prediction. See `Modes`.
        pipeline_config: the configuration to create a Pipeline instance.
        scope: `str`. scope to use for this input data block.
        input_type: `str`. The type of the input, values: `NUMPY`, `PANDAS`.
                    If `None`, will create a function based on the pipeline config.
        x: `np.ndarray` or `np.Dataframe` or `None`.
        y: `np.ndarray` or `None`.

    Returns:
        An input function that returns `(feature_batch, labels_batch)`
        tuples when called.
    """
    pipeline_config = pipeline_config

    if input_type == 'NUMPY':
        # setup_train_data_feeder
        return numpy_input_fn(x,
                              y,
                              batch_size=pipeline_config.batch_size,
                              num_epochs=pipeline_config.num_epochs,
                              shuffle=pipeline_config.shuffle,
                              num_threads=pipeline_config.num_threads)

    if input_type == 'PANDAS':
        # setup_train_data_feeder
        return pandas_input_fn(x,
                               y,
                               batch_size=pipeline_config.batch_size,
                               num_epochs=pipeline_config.num_epochs,
                               shuffle=pipeline_config.shuffle,
                               num_threads=pipeline_config.num_threads)

    def input_fn():
        """Creates features and labels."""
        pipeline_params = pipeline_config.to_dict()
        batch_size = pipeline_params.pop('batch_size', None)
        dynamic_pad = pipeline_params.pop('dynamic_pad', None)
        capacity = pipeline_params.pop('capacity', None)
        del pipeline_params['num_threads']
        del pipeline_params['min_after_dequeue']
        allow_smaller_final_batch = pipeline_params.pop(
            'allow_smaller_final_batch', None)
        bucket_boundaries = pipeline_params.pop('bucket_boundaries', None)

        pipeline = getters.get_pipeline(mode=mode,
                                        module=pipeline_config.IDENTIFIER,
                                        **pipeline_params)

        with tf.variable_scope(scope or 'input_fn'):
            data_provider = pipeline.make_data_provider()
            features_and_labels = pipeline.read_from_data_provider(
                data_provider)
            # call pipeline processors
            features_and_labels = pipeline(features_and_labels, None)

            if bucket_boundaries:
                _, batch = tf.contrib.training.bucket_by_sequence_length(
                    input_length=features_and_labels['source_len'],
                    bucket_boundaries=bucket_boundaries,
                    tensors=features_and_labels,
                    batch_size=batch_size,
                    keep_input=features_and_labels['source_len'] >= 1,
                    dynamic_pad=dynamic_pad,
                    capacity=capacity,
                    allow_smaller_final_batch=allow_smaller_final_batch,
                    name='bucket_queue')
            else:
                batch = tf.train.batch(
                    tensors=features_and_labels,
                    enqueue_many=False,
                    batch_size=batch_size,
                    dynamic_pad=dynamic_pad,
                    capacity=capacity,
                    allow_smaller_final_batch=allow_smaller_final_batch,
                    name='batch_queue')

            # Separate features and labels
            features_batch = {k: batch[k] for k in pipeline.feature_keys}
            if set(batch.keys()).intersection(pipeline.label_keys):
                labels_batch = {k: batch[k] for k in pipeline.label_keys}
            else:
                labels_batch = None

            return features_batch, labels_batch

    return input_fn
Esempio n. 42
0
x_train = np.reshape(mnist.train.images, (-1, 28, 28, 1))
y_train = mnist.train.labels
x_test = np.reshape(mnist.test.images, (-1, 28, 28, 1))
y_test = mnist.test.labels

# parameters
LEARNING_RATE = 0.01
BATCH_SIZE = 128
STEPS = 1000

# Input functions
x_train_dict = {'x': x_train}
train_input_fn = numpy_io.numpy_input_fn(x_train_dict,
                                         y_train,
                                         batch_size=BATCH_SIZE,
                                         shuffle=True,
                                         num_epochs=None,
                                         queue_capacity=10000,
                                         num_threads=4)

x_test_dict = {'x': x_test}
test_input_fn = numpy_io.numpy_input_fn(x_test_dict,
                                        y_test,
                                        batch_size=BATCH_SIZE,
                                        shuffle=False,
                                        num_epochs=1)

model_params = {"learning_rate": LEARNING_RATE}


# create experiment
Esempio n. 43
0

def graph_fn(mode, features):
    return plx.layers.SingleUnit(mode)(features['X'])


def model_fn(features, labels, mode):
    model = plx.models.Regressor(
        mode,
        graph_fn=graph_fn,
        loss_config=plx.configs.LossConfig(module='mean_squared_error'),
        optimizer_config=plx.configs.OptimizerConfig(module='sgd',
                                                     learning_rate=0.009),
        eval_metrics_config=[],
        summaries='all',
        name='regressor')
    return model(features, labels)


estimator = plx.estimators.Estimator(model_fn=model_fn,
                                     model_dir="/tmp/polyaxon_logs/linear")

estimator.train(input_fn=numpy_input_fn(
    {'X': X}, y, shuffle=False, num_epochs=10000, batch_size=len(X)))

print([
    x['results'] for x in estimator.predict(
        input_fn=numpy_input_fn({'X': X_val}, shuffle=False))
])
print(y_val)
Esempio n. 44
0
    def testCalibratedRtlMonotonicClassifierTraining(self):
        # Construct the following training/testing pair.
        #
        # Training: (x, y)
        # ([0., 0.], 0.0)
        # ([0., 1.], 1.0)
        # ([1., 0.], 1.0)
        # ([1., 1.], 0.0)
        #
        # Test: (x, y)
        # ([0., 0.], 0.0)
        # ([0., 1.], 1.0)
        # ([1., 0.], 1.0)
        # ([1., 1.], 1.0)
        #
        # Note that training example has a noisy sample, ([1., 1.], 0.0), and test
        # examples are generated by the logical-OR function. Therefore by enforcing
        # increasing monotonicity to all features, we should be able to work well
        # in the test examples.
        x0 = np.array([0.0, 0.0, 1.0, 1.0])
        x1 = np.array([0.0, 1.0, 0.0, 1.0])
        x_samples = {'x0': x0, 'x1': x1}
        training_y = np.array([[False], [True], [True], [False]])
        test_y = np.array([[False], [True], [True], [True]])

        train_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                 y=training_y,
                                                 batch_size=4,
                                                 num_epochs=1000,
                                                 shuffle=False)
        test_input_fn = numpy_io.numpy_input_fn(x=x_samples,
                                                y=test_y,
                                                shuffle=False)

        # Define monotonic lattice classifier.
        feature_columns = [
            feature_column_lib.numeric_column('x0'),
            feature_column_lib.numeric_column('x1'),
        ]

        def init_fn():
            return keypoints_initialization.uniform_keypoints_for_signal(
                2, 0., 1., 0., 1.)

        hparams = tfl_hparams.CalibratedRtlHParams(num_keypoints=2,
                                                   num_lattices=5,
                                                   lattice_rank=2)
        # Monotonic calibrated lattice.

        hparams.set_param('monotonicity', +1)
        hparams.set_param('learning_rate', 0.1)
        hparams.set_param('interpolation_type', 'hypercube')

        estimator = calibrated_rtl.calibrated_rtl_classifier(
            feature_columns=feature_columns,
            hparams=hparams,
            keypoints_initializers_fn=init_fn)

        estimator.train(input_fn=train_input_fn)
        results = estimator.evaluate(input_fn=test_input_fn)
        # We should expect 1.0 accuracy.
        self.assertGreater(results['accuracy'], 0.999)
    def _complete_flow_with_mode(self, mode):
        n_classes = 3
        input_dimension = 2
        batch_size = 12

        data = np.linspace(0.,
                           n_classes - 1.,
                           batch_size * input_dimension,
                           dtype=np.float32)
        x_data = data.reshape(batch_size, input_dimension)
        categorical_data = np.random.random_integers(0,
                                                     len(x_data),
                                                     size=len(x_data))
        y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1))
        train_input_fn = numpy_io.numpy_input_fn(x={
            'x': x_data,
            'categories': categorical_data
        },
                                                 y=y_data,
                                                 batch_size=batch_size,
                                                 num_epochs=None,
                                                 shuffle=True)
        eval_input_fn = numpy_io.numpy_input_fn(x={
            'x': x_data,
            'categories': categorical_data
        },
                                                y=y_data,
                                                batch_size=batch_size,
                                                shuffle=False)
        predict_input_fn = numpy_io.numpy_input_fn(x={
            'x':
            x_data,
            'categories':
            categorical_data
        },
                                                   batch_size=batch_size,
                                                   shuffle=False)

        feature_columns = [
            feature_column.numeric_column('x', shape=(input_dimension, )),
            feature_column.embedding_column(
                feature_column.categorical_column_with_vocabulary_list(
                    'categories',
                    vocabulary_list=np.linspace(0.,
                                                len(x_data),
                                                len(x_data),
                                                dtype=np.int64)), 1)
        ]

        estimator = dnn.DNNClassifier(hidden_units=(2, 2),
                                      feature_columns=feature_columns,
                                      n_classes=n_classes,
                                      model_dir=self._model_dir)

        def optimizer_fn():
            return optimizers.get_optimizer_instance('Adagrad',
                                                     learning_rate=0.05)

        estimator = estimator_lib.Estimator(
            model_fn=replicate_model_fn.replicate_model_fn(
                estimator.model_fn,
                optimizer_fn,
                devices=['/gpu:0', '/gpu:1', '/gpu:2'],
                mode=mode),
            model_dir=estimator.model_dir,
            config=estimator.config,
            params=estimator.params)

        num_steps = 10
        estimator.train(train_input_fn, steps=num_steps)

        scores = estimator.evaluate(eval_input_fn)
        self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP])
        self.assertIn('loss', six.iterkeys(scores))

        predicted_proba = np.array([
            x[prediction_keys.PredictionKeys.PROBABILITIES]
            for x in estimator.predict(predict_input_fn)
        ])
        self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)

        feature_spec = feature_column.make_parse_example_spec(feature_columns)
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        export_dir = estimator.export_savedmodel(tempfile.mkdtemp(),
                                                 serving_input_receiver_fn)
        self.assertTrue(gfile.Exists(export_dir))
Esempio n. 46
0
tf.logging.set_verbosity(tf.logging.INFO)

X = np.linspace(-1, 1, 100)
y = 2 * X + np.random.randn(*X.shape) * 0.33

# Test a data set
X_val = np.linspace(1, 1.5, 10)
y_val = 2 * X_val + np.random.randn(*X_val.shape) * 0.33


def graph_fn(mode, inputs):
    return plx.layers.SingleUnit(mode)(inputs['X'])


def model_fn(features, labels, mode):
    model = plx.experiments.RegressorModel(
        mode, graph_fn=graph_fn, loss_config=plx.configs.LossConfig(name='mean_squared_error'),
        optimizer_config=plx.configs.OptimizerConfig(name='SGD', learning_rate=0.009),
        eval_metrics_config=[],
        summaries='all', name='regressor')
    return model(features, labels)


estimator = plx.experiments.Estimator(model_fn=model_fn, model_dir="/tmp/polyaxon_logs/linear")

estimator.train(input_fn=numpy_input_fn({'X': X}, y, shuffle=False, num_epochs=10000,
                                        batch_size=len(X)))

print([x['results'] for x in estimator.predict(input_fn=numpy_input_fn({'X': X_val}, shuffle=False))])
print(y_val)