def testMakePlaceHolderTensorsForBaseFeatures(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    real_valued_col = fc.real_valued_column("real_valued_column", 5)
    vlen_real_valued_col = fc.real_valued_column(
        "vlen_real_valued_column", dimension=None)

    bucketized_col = fc.bucketized_column(
        fc.real_valued_column("real_valued_column_for_bucketization"), [0, 4])
    feature_columns = set(
        [sparse_col, real_valued_col, vlen_real_valued_col, bucketized_col])
    placeholders = (
        fc.make_place_holder_tensors_for_base_features(feature_columns))

    self.assertEqual(4, len(placeholders))
    self.assertTrue(
        isinstance(placeholders["sparse_column"],
                   sparse_tensor_lib.SparseTensor))
    self.assertTrue(
        isinstance(placeholders["vlen_real_valued_column"],
                   sparse_tensor_lib.SparseTensor))
    placeholder = placeholders["real_valued_column"]
    self.assertGreaterEqual(
        placeholder.name.find(u"Placeholder_real_valued_column"), 0)
    self.assertEqual(dtypes.float32, placeholder.dtype)
    self.assertEqual([None, 5], placeholder.get_shape().as_list())
    placeholder = placeholders["real_valued_column_for_bucketization"]
    self.assertGreaterEqual(
        placeholder.name.find(
            u"Placeholder_real_valued_column_for_bucketization"), 0)
    self.assertEqual(dtypes.float32, placeholder.dtype)
    self.assertEqual([None, 1], placeholder.get_shape().as_list())
예제 #2
0
  def testRealValuedFeaturesPerfectlySeparable(self):
    """Tests SVM classifier with real valued features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'feature1': constant_op.constant([[0.0], [1.0], [3.0]]),
          'feature2': constant_op.constant([[1.0], [-1.2], [1.0]]),
      }, constant_op.constant([[1], [0], [1]])

    feature1 = feature_column.real_valued_column('feature1')
    feature2 = feature_column.real_valued_column('feature2')
    svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                             example_id_column='example_id',
                             l1_regularization=0.0,
                             l2_regularization=0.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
    loss = metrics['loss']
    accuracy = metrics['accuracy']
    # The points are not only separable but there exist weights (for instance
    # w1=0.0, w2=1.0) that satisfy the margin inequalities (y_i* w^T*x_i >=1).
    # The unregularized loss should therefore be 0.0.
    self.assertAlmostEqual(loss, 0.0, places=3)
    self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #3
0
  def testBiasAndOtherColumns(self):
    """SDCALinearRegressor has valid bias weight with other columns present."""

    def input_fn():
      """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.4 of all instances that have feature 'a' are positive, and 0.2 of all
      instances that have feature 'b' are positive. The labels in the dataset
      are ordered to appear shuffled since SDCA expects shuffled data, and
      converges faster with this pseudo-random ordering.
      If the bias was centered we would expect the weights to be:
      bias: 0.3
      a: 0.1
      b: -0.1
      Until b/29339026 is resolved, the bias gets regularized with the same
      global value for the other columns, and so the expected weights get
      shifted and are:
      bias: 0.2
      a: 0.2
      b: 0.0
      Returns:
        The test dataset.
      """
      num_examples = 200
      half = int(num_examples / 2)
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          'a':
              constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
          'b':
              constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
      }, constant_op.constant(
          [[x]
           for x in [1, 0, 0, 1, 1, 0, 0, 0, 1, 0] * int(half / 10) +
           [0, 1, 0, 0, 0, 0, 0, 0, 1, 0] * int(half / 10)])

    with self._single_threaded_test_session():
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[
              feature_column_lib.real_valued_column('a'),
              feature_column_lib.real_valued_column('b')
          ])

      regressor.fit(input_fn=input_fn, steps=200)

      variable_names = regressor.get_variable_names()
      self.assertIn('linear/bias_weight', variable_names)
      self.assertIn('linear/a/weight', variable_names)
      self.assertIn('linear/b/weight', variable_names)
      # TODO(b/29339026): Change the expected results to expect a centered bias.
      self.assertNear(
          regressor.get_variable_value('linear/bias_weight')[0], 0.2, err=0.05)
      self.assertNear(
          regressor.get_variable_value('linear/a/weight')[0], 0.2, err=0.05)
      self.assertNear(
          regressor.get_variable_value('linear/b/weight')[0], 0.0, err=0.05)
예제 #4
0
  def testRealValuedFeaturesWithMildL1Regularization(self):
    """Tests SVM classifier with real valued features and L2 regularization."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'feature1': constant_op.constant([[0.5], [1.0], [1.0]]),
          'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]),
      }, constant_op.constant([[1], [0], [1]])

    feature1 = feature_column.real_valued_column('feature1')
    feature2 = feature_column.real_valued_column('feature2')
    svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                             example_id_column='example_id',
                             l1_regularization=0.5,
                             l2_regularization=1.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
    loss = metrics['loss']
    accuracy = metrics['accuracy']

    # Adding small L1 regularization favors even smaller weights. This results
    # to somewhat moderate unregularized loss (bigger than the one when there is
    # no L1 regularization. Still, since L1 is small, all the predictions will
    # be correct resulting to perfect accuracy.
    self.assertGreater(loss, 0.1)
    self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #5
0
  def testMixedFeatures(self):
    """Tests SDCALogisticClassifier with a mix of features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.6], [0.8], [0.3]]),
          'sq_footage':
              constant_op.constant([[900.0], [700.0], [600.0]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[3.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    price = feature_column_lib.real_valued_column('price')
    sq_footage_bucket = feature_column_lib.bucketized_column(
        feature_column_lib.real_valued_column('sq_footage'),
        boundaries=[650.0, 800.0])
    country = feature_column_lib.sparse_column_with_hash_bucket(
        'country', hash_bucket_size=5)
    sq_footage_country = feature_column_lib.crossed_column(
        [sq_footage_bucket, country], hash_bucket_size=10)
    classifier = sdca_estimator.SDCALogisticClassifier(
        example_id_column='example_id',
        feature_columns=[price, sq_footage_bucket, country, sq_footage_country],
        weight_column_name='weights')
    classifier.fit(input_fn=input_fn, steps=50)
    metrics = classifier.evaluate(input_fn=input_fn, steps=1)
    self.assertGreater(metrics['accuracy'], 0.9)
  def testRealValuedColumnDensification(self):
    """Tests densification behavior of `RealValuedColumn`."""
    # No default value, dimension 1 float.
    real_valued_column = fc.real_valued_column(
        "sparse_real_valued1", dimension=None)
    sparse_tensor = sparse_tensor_lib.SparseTensor(
        values=[2.0, 5.0], indices=[[0, 0], [2, 0]], dense_shape=[3, 1])
    densified_output = real_valued_column._to_dnn_input_layer(sparse_tensor)

    # With default value, dimension 2 int.
    real_valued_column_with_default = fc.real_valued_column(
        "sparse_real_valued2",
        dimension=None,
        default_value=-1,
        dtype=dtypes.int32)
    sparse_tensor2 = sparse_tensor_lib.SparseTensor(
        values=[2, 5, 9, 0],
        indices=[[0, 0], [1, 1], [2, 0], [2, 1]],
        dense_shape=[3, 2])
    densified_output2 = real_valued_column_with_default._to_dnn_input_layer(
        sparse_tensor2)

    with self.test_session() as sess:
      densified_output_eval, densified_output_eval2 = sess.run(
          [densified_output, densified_output2])
      self.assertAllEqual(densified_output_eval, [[2.0], [0.0], [5.0]])
      self.assertAllEqual(densified_output_eval2, [[2, -1], [-1, 5], [9, 0]])
예제 #7
0
  def testRealValuedFeaturesWithBigL1Regularization(self):
    """Tests SVM classifier with real valued features and L2 regularization."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'feature1': constant_op.constant([0.5, 1.0, 1.0]),
          'feature2': constant_op.constant([[1.0], [-1.0], [0.5]]),
      }, constant_op.constant([[1], [0], [1]])

    feature1 = feature_column.real_valued_column('feature1')
    feature2 = feature_column.real_valued_column('feature2')
    svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                             example_id_column='example_id',
                             l1_regularization=3.0,
                             l2_regularization=1.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
    loss = metrics['loss']
    accuracy = metrics['accuracy']

    # When L1 regularization parameter is large, the loss due to regularization
    # outweights the unregularized loss. In this case, the classifier will favor
    # very small weights (in current case 0) resulting both big unregularized
    # loss and bad accuracy.
    self.assertAlmostEqual(loss, 1.0, places=3)
    self.assertAlmostEqual(accuracy, 1 / 3, places=3)
예제 #8
0
  def testRealValuedFeaturesWithL2Regularization(self):
    """Tests SVM classifier with real valued features and L2 regularization."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'feature1': constant_op.constant([0.5, 1.0, 1.0]),
          'feature2': constant_op.constant([1.0, -1.0, 0.5]),
      }, constant_op.constant([1, 0, 1])

    feature1 = feature_column.real_valued_column('feature1')
    feature2 = feature_column.real_valued_column('feature2')
    svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                             example_id_column='example_id',
                             l1_regularization=0.0,
                             l2_regularization=1.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
    loss = metrics['loss']
    accuracy = metrics['accuracy']
    # The points are in general separable. Also, if there was no regularization,
    # the margin inequalities would be satisfied too (for instance by w1=1.0,
    # w2=5.0). Due to regularization, smaller weights are chosen. This results
    # to a small but non-zero uneregularized loss. Still, all the predictions
    # will be correct resulting to perfect accuracy.
    self.assertLess(loss, 0.1)
    self.assertAlmostEqual(accuracy, 1.0, places=3)
  def testPrepareInputsForRnnBatchSize2(self):

    num_unroll = 3

    expected = [
        np.array([[11., 31., 5., 7.], [21., 41., 6., 8.]]),
        np.array([[12., 32., 5., 7.], [22., 42., 6., 8.]]),
        np.array([[13., 33., 5., 7.], [23., 43., 6., 8.]])
    ]

    sequence_features = {
        'seq_feature0':
            constant_op.constant([[11., 12., 13.], [21., 22., 23.]]),
        'seq_feature1':
            constant_op.constant([[31., 32., 33.], [41., 42., 43.]])
    }

    sequence_feature_columns = [
        feature_column.real_valued_column(
            'seq_feature0', dimension=1),
        feature_column.real_valued_column(
            'seq_feature1', dimension=1),
    ]

    context_features = {
        'ctx_feature0': constant_op.constant([[5.], [6.]]),
        'ctx_feature1': constant_op.constant([[7.], [8.]])
    }

    self._test_prepare_inputs_for_rnn(sequence_features, context_features,
                                      sequence_feature_columns, num_unroll,
                                      expected)
예제 #10
0
  def testBucketizedFeatures(self):
    """Tests SDCALogisticClassifier with bucketized features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'price': constant_op.constant([600.0, 1000.0, 400.0]),
          'sq_footage': constant_op.constant([[1000.0], [600.0], [700.0]]),
          'weights': constant_op.constant([[1.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    with self._single_threaded_test_session():
      price_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('price'),
          boundaries=[500.0, 700.0])
      sq_footage_bucket = feature_column_lib.bucketized_column(
          feature_column_lib.real_valued_column('sq_footage'),
          boundaries=[650.0])
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[price_bucket, sq_footage_bucket],
          weight_column_name='weights',
          l2_regularization=1.0)
      classifier.fit(input_fn=input_fn, steps=50)
      metrics = classifier.evaluate(input_fn=input_fn, steps=1)
      self.assertGreater(metrics['accuracy'], 0.9)
예제 #11
0
  def testCreateSequenceFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    sparse_id_col = fc.sparse_column_with_keys("id_column",
                                               ["marlo", "omar", "stringer"])
    weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
                                                "id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2)
    real_valued_col2 = fc.real_valued_column(
        "real_valued_default_column", dimension=5, default_value=3.0)
    real_valued_col3 = fc._real_valued_var_len_column(
        "real_valued_var_len_column", default_value=3.0, is_sparse=True)
    real_valued_col4 = fc._real_valued_var_len_column(
        "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False)

    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, real_valued_col1,
        real_valued_col2, real_valued_col3, real_valued_col4
    ])

    feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns)

    expected_feature_spec = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[2], dtype=dtypes.float32, allow_missing=False),
        "real_valued_default_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[5], dtype=dtypes.float32, allow_missing=True),
        "real_valued_var_len_column":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_var_len_dense_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[], dtype=dtypes.float32, allow_missing=True,
                default_value=4.0),
    }

    self.assertDictEqual(expected_feature_spec, feature_spec)
예제 #12
0
  def testRealValuedColumnDtypes(self):
    rvc = fc.real_valued_column("rvc")
    self.assertDictEqual({
        "rvc": parsing_ops.FixedLenFeature([1], dtype=dtypes.float32)
    }, rvc.config)

    rvc = fc.real_valued_column("rvc", dtype=dtypes.int32)
    self.assertDictEqual({
        "rvc": parsing_ops.FixedLenFeature([1], dtype=dtypes.int32)
    }, rvc.config)

    with self.assertRaisesRegexp(ValueError,
                                 "dtype must be convertible to float"):
      fc.real_valued_column("rvc", dtype=dtypes.string)
예제 #13
0
  def testBiasAndOtherColumnsFabricatedCentered(self):
    """SDCALinearRegressor has valid bias weight when instances are centered."""

    def input_fn():
      """Testing the bias weight when there are other features present.

      1/2 of the instances in this input have feature 'a', the rest have
      feature 'b', and we expect the bias to be added to each instance as well.
      0.1 of all instances that have feature 'a' have a label of 1, and 0.1 of
      all instances that have feature 'b' have a label of -1.
      We can expect the weights to be:
      bias: 0.0
      a: 0.1
      b: -0.1
      Returns:
        The test dataset.
      """
      num_examples = 200
      half = int(num_examples / 2)
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          'a':
              constant_op.constant([[1]] * int(half) + [[0]] * int(half)),
          'b':
              constant_op.constant([[0]] * int(half) + [[1]] * int(half)),
      }, constant_op.constant([[1 if x % 10 == 0 else 0] for x in range(half)] +
                              [[-1 if x % 10 == 0 else 0] for x in range(half)])

    with self._single_threaded_test_session():
      regressor = sdca_estimator.SDCALinearRegressor(
          example_id_column='example_id',
          feature_columns=[
              feature_column_lib.real_valued_column('a'),
              feature_column_lib.real_valued_column('b')
          ])

      regressor.fit(input_fn=input_fn, steps=100)

      variable_names = regressor.get_variable_names()
      self.assertIn('linear/bias_weight', variable_names)
      self.assertIn('linear/a/weight', variable_names)
      self.assertIn('linear/b/weight', variable_names)
      self.assertNear(
          regressor.get_variable_value('linear/bias_weight')[0], 0.0, err=0.05)
      self.assertNear(
          regressor.get_variable_value('linear/a/weight')[0], 0.1, err=0.05)
      self.assertNear(
          regressor.get_variable_value('linear/b/weight')[0], -0.1, err=0.05)
예제 #14
0
  def testSparseFeatures(self):
    """Tests SDCALogisticClassifier with sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.4], [0.6], [0.3]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 3], [2, 1]],
                  dense_shape=[3, 5]),
          'weights':
              constant_op.constant([[1.0], [1.0], [1.0]])
      }, constant_op.constant([[1], [0], [1]])

    price = feature_column_lib.real_valued_column('price')
    country = feature_column_lib.sparse_column_with_hash_bucket(
        'country', hash_bucket_size=5)
    classifier = sdca_estimator.SDCALogisticClassifier(
        example_id_column='example_id',
        feature_columns=[price, country],
        weight_column_name='weights')
    classifier.fit(input_fn=input_fn, steps=50)
    metrics = classifier.evaluate(input_fn=input_fn, steps=1)
    self.assertGreater(metrics['accuracy'], 0.9)
예제 #15
0
  def testFitAndEvaluateMultiClassFullDontThrowException(self):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 3
    learner_config.constraints.max_tree_depth = 1
    learner_config.multi_class_strategy = (
        learner_pb2.LearnerConfig.FULL_HESSIAN)

    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    classifier = estimator.GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        n_classes=learner_config.num_classes,
        num_trees=1,
        examples_per_layer=7,
        model_dir=model_dir,
        config=config,
        center_bias=False,
        feature_columns=[contrib_feature_column.real_valued_column("x")])

    classifier.fit(input_fn=_multiclass_train_input_fn, steps=100)
    classifier.evaluate(input_fn=_eval_input_fn, steps=1)
    classifier.export(self._export_dir_base)
    result_iter = classifier.predict(input_fn=_eval_input_fn)
    for prediction_dict in result_iter:
      self.assertTrue("classes" in prediction_dict)
예제 #16
0
  def testBiasOnly(self):
    """Tests SDCALinearRegressor has a valid bias weight."""

    def input_fn():
      """Testing the bias weight when it's the only feature present.

      All of the instances in this input only have the bias feature, and a
      1/4 of the labels are positive. This means that the expected weight for
      the bias should be close to the average prediction, i.e 0.25.
      Returns:
        Training data for the test.
      """
      num_examples = 40
      return {
          'example_id':
              constant_op.constant([str(x + 1) for x in range(num_examples)]),
          # place_holder is an empty column which is always 0 (absent), because
          # LinearClassifier requires at least one column.
          'place_holder':
              constant_op.constant([[0.0]] * num_examples),
      }, constant_op.constant([[1 if i % 4 is 0 else 0]
                               for i in range(num_examples)])

    place_holder = feature_column_lib.real_valued_column('place_holder')
    regressor = sdca_estimator.SDCALinearRegressor(
        example_id_column='example_id', feature_columns=[place_holder])
    regressor.fit(input_fn=input_fn, steps=100)
    self.assertNear(
        regressor.get_variable_value('linear/bias_weight')[0], 0.25, err=0.1)
예제 #17
0
  def testMultiDimensionalRealValuedFeaturesWithL2Regularization(self):
    """Tests SVM with multi-dimensional real features and L2 regularization."""

    # This is identical to the one in testRealValuedFeaturesWithL2Regularization
    # where 2 tensors (dense features) of shape [3, 1] have been replaced by a
    # single tensor (dense feature) of shape [3, 2].
    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'multi_dim_feature':
              constant_op.constant([[0.5, 1.0], [1.0, -1.0], [1.0, 0.5]]),
      }, constant_op.constant([[1], [0], [1]])

    multi_dim_feature = feature_column.real_valued_column(
        'multi_dim_feature', dimension=2)
    svm_classifier = svm.SVM(feature_columns=[multi_dim_feature],
                             example_id_column='example_id',
                             l1_regularization=0.0,
                             l2_regularization=1.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
    loss = metrics['loss']
    accuracy = metrics['accuracy']
    self.assertLess(loss, 0.1)
    self.assertAlmostEqual(accuracy, 1.0, places=3)
 def _getModelFnOpsForMode(self, mode):
   """Helper for testGetRnnModelFn{Train,Eval,Infer}()."""
   num_units = [4]
   seq_columns = [
       feature_column.real_valued_column(
           'inputs', dimension=1)
   ]
   features = {
       'inputs': constant_op.constant([1., 2., 3.]),
   }
   labels = constant_op.constant([1., 0., 1.])
   model_fn = ssre._get_rnn_model_fn(
       cell_type='basic_rnn',
       target_column=target_column_lib.multi_class_target(n_classes=2),
       optimizer='SGD',
       num_unroll=2,
       num_units=num_units,
       num_threads=1,
       queue_capacity=10,
       batch_size=1,
       # Only CLASSIFICATION yields eval metrics to test for.
       problem_type=constants.ProblemType.CLASSIFICATION,
       sequence_feature_columns=seq_columns,
       context_feature_columns=None,
       learning_rate=0.1)
   model_fn_ops = model_fn(features=features, labels=labels, mode=mode)
   return model_fn_ops
예제 #19
0
  def testLinearRegression(self):
    my_seed = 42
    config = run_config.RunConfig(tf_random_seed=my_seed)
    boston = base.load_boston()
    columns = [feature_column.real_valued_column('', dimension=13)]

    # We train with

    with ops.Graph().as_default() as g1:
      random.seed(my_seed)
      g1.seed = my_seed
      variables.create_global_step()
      regressor1 = linear.LinearRegressor(
          optimizer=_NULL_OPTIMIZER, feature_columns=columns, config=config)
      regressor1.fit(x=boston.data, y=boston.target, steps=1)

    with ops.Graph().as_default() as g2:
      random.seed(my_seed)
      g2.seed = my_seed
      variables.create_global_step()
      regressor2 = linear.LinearRegressor(
          optimizer=_NULL_OPTIMIZER, feature_columns=columns, config=config)
      regressor2.fit(x=boston.data, y=boston.target, steps=1)

    self.assertAllClose(regressor1.weights_, regressor2.weights_)
    self.assertAllClose(regressor1.bias_, regressor2.bias_)
    self.assertAllClose(
        list(regressor1.predict_scores(
            boston.data, as_iterable=True)),
        list(regressor2.predict_scores(
            boston.data, as_iterable=True)),
        atol=1e-05)
예제 #20
0
  def testExportMonitorRegressionSignature(self):

    def _regression_signature(examples, unused_features, predictions):
      signatures = {}
      signatures['regression'] = (
          exporter.regression_signature(examples, predictions))
      return signatures['regression'], signatures

    random.seed(42)
    x = np.random.rand(1000)
    y = 2 * x + 3
    cont_features = [feature_column.real_valued_column('', dimension=1)]
    regressor = learn.LinearRegressor(feature_columns=cont_features)
    export_dir = os.path.join(tempfile.mkdtemp(), 'export')
    export_monitor = learn.monitors.ExportMonitor(
        every_n_steps=1,
        export_dir=export_dir,
        exports_to_keep=1,
        signature_fn=_regression_signature)
    regressor.fit(x, y, steps=10, monitors=[export_monitor])

    self.assertTrue(gfile.Exists(export_dir))
    with self.assertRaises(errors.NotFoundError):
      saver.checkpoint_exists(os.path.join(export_dir, '00000000', 'export'))
    self.assertTrue(
        saver.checkpoint_exists(os.path.join(export_dir, '00000010', 'export')))
    # Validate the signature
    signature = self._get_default_signature(
        os.path.join(export_dir, '00000010', 'export.meta'))
    self.assertTrue(signature.HasField('regression_signature'))
예제 #21
0
 def testCrossedColumnNotSupportRealValuedColumn(self):
   b = fc.sparse_column_with_hash_bucket("bbb", hash_bucket_size=100)
   with self.assertRaisesRegexp(
       TypeError, "columns must be a set of _SparseColumn, _CrossedColumn, "
       "or _BucketizedColumn instances"):
     fc.crossed_column(
         set([b, fc.real_valued_column("real")]), hash_bucket_size=10000)
예제 #22
0
  def testSparseFeatures(self):
    """Tests SVM classifier with (hashed) sparse features."""

    def input_fn():
      return {
          'example_id':
              constant_op.constant(['1', '2', '3']),
          'price':
              constant_op.constant([[0.8], [0.6], [0.3]]),
          'country':
              sparse_tensor.SparseTensor(
                  values=['IT', 'US', 'GB'],
                  indices=[[0, 0], [1, 0], [2, 0]],
                  dense_shape=[3, 1]),
      }, constant_op.constant([[0], [1], [1]])

    price = feature_column.real_valued_column('price')
    country = feature_column.sparse_column_with_hash_bucket(
        'country', hash_bucket_size=5)
    svm_classifier = svm.SVM(feature_columns=[price, country],
                             example_id_column='example_id',
                             l1_regularization=0.0,
                             l2_regularization=1.0)
    svm_classifier.fit(input_fn=input_fn, steps=30)
    accuracy = svm_classifier.evaluate(input_fn=input_fn, steps=1)['accuracy']
    self.assertAlmostEqual(accuracy, 1.0, places=3)
 def testClassifierConstructor(self):
   batch_size = 16
   num_classes = 2
   num_unroll = 32
   sequence_length = 32
   num_units = 4
   learning_rate = 0.5
   steps = 100
   input_fn = self._get_input_fn(sequence_length,
                                 seed=1234)
   model_dir = tempfile.mkdtemp()
   seq_columns = [
       feature_column.real_valued_column(
           'inputs', dimension=num_units)
   ]
   estimator = ssre.multi_value_rnn_classifier(num_classes,
                                               num_units,
                                               num_unroll,
                                               batch_size,
                                               seq_columns,
                                               learning_rate=learning_rate,
                                               model_dir=model_dir,
                                               queue_capacity=batch_size+2,
                                               seed=1234)
   estimator.fit(input_fn=input_fn, steps=steps)
예제 #24
0
  def testBostonDNN(self):
    boston = base.load_boston()
    feature_columns = [feature_column.real_valued_column("", dimension=13)]
    regressor = dnn.DNNRegressor(
        feature_columns=feature_columns,
        hidden_units=[10, 20, 10],
        config=run_config.RunConfig(tf_random_seed=1))
    regressor.fit(boston.data,
                  boston.target,
                  steps=300,
                  batch_size=boston.data.shape[0])
    weights = ([regressor.get_variable_value("dnn/hiddenlayer_0/weights")] +
               [regressor.get_variable_value("dnn/hiddenlayer_1/weights")] +
               [regressor.get_variable_value("dnn/hiddenlayer_2/weights")] +
               [regressor.get_variable_value("dnn/logits/weights")])
    self.assertEqual(weights[0].shape, (13, 10))
    self.assertEqual(weights[1].shape, (10, 20))
    self.assertEqual(weights[2].shape, (20, 10))
    self.assertEqual(weights[3].shape, (10, 1))

    biases = ([regressor.get_variable_value("dnn/hiddenlayer_0/biases")] +
              [regressor.get_variable_value("dnn/hiddenlayer_1/biases")] +
              [regressor.get_variable_value("dnn/hiddenlayer_2/biases")] +
              [regressor.get_variable_value("dnn/logits/biases")])
    self.assertEqual(biases[0].shape, (10,))
    self.assertEqual(biases[1].shape, (20,))
    self.assertEqual(biases[2].shape, (10,))
    self.assertEqual(biases[3].shape, (1,))
예제 #25
0
  def testRealValuedLinearFeatures(self):
    """Tests SDCALinearRegressor works with real valued features."""
    x = [[1.2, 2.0, -1.5], [-2.0, 3.0, -0.5], [1.0, -0.5, 4.0]]
    weights = [[3.0], [-1.2], [0.5]]
    y = np.dot(x, weights)

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2', '3']),
          'x': constant_op.constant(x),
          'weights': constant_op.constant([[10.0], [10.0], [10.0]])
      }, constant_op.constant(y)

    x_column = feature_column_lib.real_valued_column('x', dimension=3)
    regressor = sdca_estimator.SDCALinearRegressor(
        example_id_column='example_id',
        feature_columns=[x_column],
        weight_column_name='weights')
    regressor.fit(input_fn=input_fn, steps=20)
    loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
    self.assertLess(loss, 0.01)
    self.assertIn('linear/x/weight', regressor.get_variable_names())
    regressor_weights = regressor.get_variable_value('linear/x/weight')
    self.assertAllClose(
        [w[0] for w in weights], regressor_weights.flatten(), rtol=0.1)
예제 #26
0
 def testRealValuedColumnDeepCopy(self):
   column = fc.real_valued_column(
       "aaa", dimension=3, default_value=[1, 2, 3], dtype=dtypes.int32)
   column_copy = copy.deepcopy(column)
   self.assertEqual(column_copy.name, "aaa")
   self.assertEqual(column_copy.dimension, 3)
   self.assertEqual(column_copy.default_value, (1, 2, 3))
예제 #27
0
def _make_experiment_fn(output_dir):
  """Creates experiment for DNNBoostedTreeCombinedRegressor."""
  (x_train, y_train), (x_test,
                       y_test) = tf.keras.datasets.boston_housing.load_data()

  train_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
      x={"x": x_train},
      y=y_train,
      batch_size=FLAGS.batch_size,
      num_epochs=None,
      shuffle=True)
  eval_input_fn = tf.compat.v1.estimator.inputs.numpy_input_fn(
      x={"x": x_test}, y=y_test, num_epochs=1, shuffle=False)

  feature_columns = [
      feature_column.real_valued_column("x", dimension=_BOSTON_NUM_FEATURES)
  ]
  feature_spec = tf.contrib.layers.create_feature_spec_for_parsing(
      feature_columns)
  serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(feature_spec)
  export_strategies = [
      saved_model_export_utils.make_export_strategy(serving_input_fn)]
  return tf.contrib.learn.Experiment(
      estimator=_get_estimator(output_dir, feature_columns),
      train_input_fn=train_input_fn,
      eval_input_fn=eval_input_fn,
      train_steps=None,
      eval_steps=FLAGS.num_eval_steps,
      eval_metrics=None,
      export_strategies=export_strategies)
예제 #28
0
  def testTrainWithWeights(self):
    """Tests training with given weight column."""

    def _input_fn_train():
      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
      # First row has more weight than others. Model should fit (y=x) better
      # than (y=Not(x)) due to the relative higher weight of the first row.
      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
      features = {
          'x': array_ops.ones(
              shape=[4, 1], dtype=dtypes.float32),
          'w': constant_op.constant([[100.], [3.], [2.], [2.]])
      }
      return features, labels

    def _input_fn_eval():
      # Create 4 rows (y = x)
      labels = constant_op.constant([[1.], [1.], [1.], [1.]])
      features = {
          'x': array_ops.ones(
              shape=[4, 1], dtype=dtypes.float32),
          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
      }
      return features, labels

    regressor = dnn.DNNRegressor(
        weight_column_name='w',
        feature_columns=[feature_column.real_valued_column('x')],
        hidden_units=[3, 3],
        config=run_config.RunConfig(tf_random_seed=1))

    regressor.fit(input_fn=_input_fn_train, steps=5)
    scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
    self.assertIn('loss', scores)
예제 #29
0
  def testExport(self):
    """Tests export model for servo."""

    def input_fn():
      return {
          'age':
              constant_op.constant([1]),
          'language':
              sparse_tensor.SparseTensor(
                  values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
      }, constant_op.constant([[1]])

    language = feature_column.sparse_column_with_hash_bucket('language', 100)
    feature_columns = [
        feature_column.real_valued_column('age'),
        feature_column.embedding_column(
            language, dimension=1)
    ]

    classifier = debug.DebugClassifier(config=run_config.RunConfig(
        tf_random_seed=1))
    classifier.fit(input_fn=input_fn, steps=5)

    def default_input_fn(unused_estimator, examples):
      return feature_column_ops.parse_feature_columns_from_examples(
          examples, feature_columns)

    export_dir = tempfile.mkdtemp()
    classifier.export(export_dir, input_fn=default_input_fn)
예제 #30
0
  def test_checkpoint_and_export(self):
    model_dir = tempfile.mkdtemp()
    config = run_config_lib.RunConfig(save_checkpoints_steps=3)
    est = dnn.DNNClassifier(
        n_classes=3,
        feature_columns=[
            feature_column.real_valued_column('feature', dimension=4)
        ],
        hidden_units=[3, 3],
        model_dir=model_dir,
        config=config)

    exp_strategy = saved_model_export_utils.make_export_strategy(
        est, 'export_input', exports_to_keep=None)

    ex = experiment.Experiment(
        est,
        train_input_fn=test_data.iris_input_multiclass_fn,
        eval_input_fn=test_data.iris_input_multiclass_fn,
        export_strategies=(exp_strategy,),
        train_steps=8,
        checkpoint_and_export=True,
        eval_delay_secs=0)

    with test.mock.patch.object(ex, '_maybe_export'):
      with test.mock.patch.object(ex, '_call_evaluate'):
        ex.train_and_evaluate()
        # Eval and export are called after steps 1, 4, 7, and 8 (after training
        # is completed).
        self.assertEqual(ex._maybe_export.call_count, 4)
        self.assertEqual(ex._call_evaluate.call_count, 4)
    def testDNNModel(self):
        """Tests multi-class classification using matrix data as input."""
        cont_features = [
            feature_column.real_valued_column('feature', dimension=4)
        ]

        head = head_lib._multi_class_head(n_classes=3)
        classifier = _dnn_estimator(head,
                                    feature_columns=cont_features,
                                    hidden_units=[3, 3])

        classifier.fit(input_fn=_iris_input_fn, steps=1000)
        classifier.evaluate(input_fn=_iris_input_fn, steps=100)
 def testCreateFeatureSpec_RealValuedColumnWithDefaultValue(self):
     real_valued_col1 = fc.real_valued_column("real_valued_column1",
                                              default_value=2)
     real_valued_col2 = fc.real_valued_column("real_valued_column2",
                                              5,
                                              default_value=4)
     real_valued_col3 = fc.real_valued_column("real_valued_column3",
                                              default_value=[8])
     real_valued_col4 = fc.real_valued_column("real_valued_column4",
                                              3,
                                              default_value=[1, 0, 6])
     real_valued_col5 = fc.real_valued_column("real_valued_column5",
                                              dimension=None,
                                              default_value=2)
     feature_columns = [
         real_valued_col1, real_valued_col2, real_valued_col3,
         real_valued_col4, real_valued_col5
     ]
     config = fc.create_feature_spec_for_parsing(feature_columns)
     self.assertEqual(5, len(config))
     self.assertDictEqual(
         {
             "real_valued_column1":
             parsing_ops.FixedLenFeature(
                 [1], dtype=dtypes.float32, default_value=[2.]),
             "real_valued_column2":
             parsing_ops.FixedLenFeature([5],
                                         dtype=dtypes.float32,
                                         default_value=[4., 4., 4., 4., 4.
                                                        ]),
             "real_valued_column3":
             parsing_ops.FixedLenFeature(
                 [1], dtype=dtypes.float32, default_value=[8.]),
             "real_valued_column4":
             parsing_ops.FixedLenFeature(
                 [3], dtype=dtypes.float32, default_value=[1., 0., 6.]),
             "real_valued_column5":
             parsing_ops.VarLenFeature(dtype=dtypes.float32)
         }, config)
예제 #33
0
    def testBucketizedColumnDeepCopy(self):
        """Tests that we can do a deepcopy of a bucketized column.

    This test requires that the bucketized column also accept boundaries
    as tuples.
    """
        bucketized = fc.bucketized_column(fc.real_valued_column("a"),
                                          [1., 2., 2., 3., 3.])
        self.assertEqual(bucketized.name, "a_bucketized")
        self.assertTupleEqual(bucketized.boundaries, (1., 2., 3.))
        bucketized_copy = copy.deepcopy(bucketized)
        self.assertEqual(bucketized_copy.name, "a_bucketized")
        self.assertTupleEqual(bucketized_copy.boundaries, (1., 2., 3.))
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.DNNBoostedTreeCombinedClassifier(
            dnn_hidden_units=[1],
            dnn_feature_columns=[feature_column.real_valued_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            n_classes=2,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=False,
            tree_feature_columns=[feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
    def testMixedFeaturesArbitraryWeightsPartitioned(self):
        """Tests SDCALinearRegressor works with a mix of features (partitioned)."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [5.0], [7.0]])
            }, constant_op.constant([[1.55], [-1.25], [-3.0]])

        with self._single_threaded_test_session():
            price = feature_column_lib.real_valued_column('price')
            sq_footage_bucket = feature_column_lib.bucketized_column(
                feature_column_lib.real_valued_column('sq_footage'),
                boundaries=[650.0, 800.0])
            country = feature_column_lib.sparse_column_with_hash_bucket(
                'country', hash_bucket_size=5)
            sq_footage_country = feature_column_lib.crossed_column(
                [sq_footage_bucket, country], hash_bucket_size=10)
            regressor = sdca_estimator.SDCALinearRegressor(
                example_id_column='example_id',
                feature_columns=[
                    price, sq_footage_bucket, country, sq_footage_country
                ],
                l2_regularization=1.0,
                weight_column_name='weights',
                partitioner=partitioned_variables.fixed_size_partitioner(
                    num_shards=2, axis=0))
            regressor.fit(input_fn=input_fn, steps=20)
            loss = regressor.evaluate(input_fn=input_fn, steps=1)['loss']
            self.assertLess(loss, 0.05)
예제 #36
0
  def testRealValuedFeatures(self):
    """Tests SDCALogisticClassifier works with real valued features."""

    def input_fn():
      return {
          'example_id': constant_op.constant(['1', '2']),
          'maintenance_cost': constant_op.constant([500.0, 200.0]),
          'sq_footage': constant_op.constant([[800.0], [600.0]]),
          'weights': constant_op.constant([[1.0], [1.0]])
      }, constant_op.constant([[0], [1]])

    with self._single_threaded_test_session():
      maintenance_cost = feature_column_lib.real_valued_column(
          'maintenance_cost')
      sq_footage = feature_column_lib.real_valued_column('sq_footage')
      classifier = sdca_estimator.SDCALogisticClassifier(
          example_id_column='example_id',
          feature_columns=[maintenance_cost, sq_footage],
          weight_column_name='weights')
      classifier.fit(input_fn=input_fn, steps=100)
      loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
      self.assertLess(loss, 0.05)
    def testPartitionedMixedFeatures(self):
        """Tests SDCALogisticClassifier with a mix of features (partitioned)."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([900.0, 700.0, 600.0]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        with self._single_threaded_test_session():
            price = feature_column_lib.real_valued_column('price')
            sq_footage_bucket = feature_column_lib.bucketized_column(
                feature_column_lib.real_valued_column('sq_footage'),
                boundaries=[650.0, 800.0])
            country = feature_column_lib.sparse_column_with_hash_bucket(
                'country', hash_bucket_size=5)
            sq_footage_country = feature_column_lib.crossed_column(
                [sq_footage_bucket, country], hash_bucket_size=10)
            classifier = sdca_estimator.SDCALogisticClassifier(
                example_id_column='example_id',
                feature_columns=[
                    price, sq_footage_bucket, country, sq_footage_country
                ],
                weight_column_name='weights',
                partitioner=partitioned_variables.fixed_size_partitioner(
                    num_shards=2, axis=0))
            classifier.fit(input_fn=input_fn, steps=50)
            metrics = classifier.evaluate(input_fn=input_fn, steps=1)
            self.assertGreater(metrics['accuracy'], 0.9)
예제 #38
0
    def testMixedFeatures(self):
        """Tests SVM classifier with a mix of features."""
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2', '3']),
                'price':
                constant_op.constant([[0.6], [0.8], [0.3]]),
                'sq_footage':
                constant_op.constant([[900.0], [700.0], [600.0]]),
                'country':
                sparse_tensor.SparseTensor(values=['IT', 'US', 'GB'],
                                           indices=[[0, 0], [1, 3], [2, 1]],
                                           dense_shape=[3, 5]),
                'weights':
                constant_op.constant([[3.0], [1.0], [1.0]])
            }, constant_op.constant([[1], [0], [1]])

        price = feature_column.real_valued_column('price')
        sq_footage_bucket = feature_column.bucketized_column(
            feature_column.real_valued_column('sq_footage'),
            boundaries=[650.0, 800.0])
        country = feature_column.sparse_column_with_hash_bucket(
            'country', hash_bucket_size=5)
        sq_footage_country = feature_column.crossed_column(
            [sq_footage_bucket, country], hash_bucket_size=10)
        svm_classifier = svm.SVM(feature_columns=[
            price, sq_footage_bucket, country, sq_footage_country
        ],
                                 example_id_column='example_id',
                                 weight_column_name='weights',
                                 l1_regularization=0.1,
                                 l2_regularization=1.0)

        svm_classifier.fit(input_fn=input_fn, steps=30)
        accuracy = svm_classifier.evaluate(input_fn=input_fn,
                                           steps=1)['accuracy']
        self.assertAlmostEqual(accuracy, 1.0, places=3)
 def testExportMonitor(self):
     random.seed(42)
     x = np.random.rand(1000)
     y = 2 * x + 3
     cont_features = [feature_column.real_valued_column('', dimension=1)]
     export_dir = tempfile.mkdtemp() + 'export/'
     export_monitor = learn.monitors.ExportMonitor(
         every_n_steps=1,
         export_dir=export_dir,
         exports_to_keep=2,
         signature_fn=export.generic_signature_fn)
     regressor = learn.LinearRegressor(feature_columns=cont_features)
     regressor.fit(x, y, steps=10, monitors=[export_monitor])
     self._assert_export(export_monitor, export_dir, 'generic_signature')
  def testLegacyConstructor(self):
    """Exercise legacy constructor function."""
    num_units = 16
    num_layers = 6
    output_keep_prob = 0.9
    input_keep_prob = 0.7
    batch_size = 11
    learning_rate = 0.1
    train_sequence_length = 21
    train_steps = 121

    def get_input_fn(batch_size, sequence_length, state_dict, starting_step=0):

      def input_fn():
        sequence = constant_op.constant(
            [[(starting_step + i + j) % 2 for j in range(sequence_length + 1)]
             for i in range(batch_size)],
            dtype=dtypes.int32)
        labels = array_ops.slice(sequence, [0, 0],
                                 [batch_size, sequence_length])
        inputs = array_ops.expand_dims(
            math_ops.to_float(
                array_ops.slice(sequence, [0, 1], [batch_size, sequence_length
                                                  ])), 2)
        input_dict = state_dict
        input_dict['inputs'] = inputs
        return input_dict, labels

      return input_fn

    seq_columns = [feature_column.real_valued_column('inputs', dimension=1)]
    config = run_config.RunConfig(tf_random_seed=21212)

    model_dir = tempfile.mkdtemp()
    sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_classifier(
        num_classes=2,
        num_units=num_units,
        num_rnn_layers=num_layers,
        input_keep_probability=input_keep_prob,
        output_keep_probability=output_keep_prob,
        sequence_feature_columns=seq_columns,
        learning_rate=learning_rate,
        config=config,
        model_dir=model_dir)

    train_input_fn = get_input_fn(
        batch_size, train_sequence_length, state_dict={})

    sequence_estimator.fit(input_fn=train_input_fn, steps=train_steps)
예제 #41
0
    def testPrepareInputsForRnnSparseAndDense(self):
        num_unroll = 2
        embedding_dimension = 8
        dense_dimension = 2

        expected = [
            np.array([[1., 1., 1., 1., 1., 1., 1., 1., 111., 112.],
                      [1., 1., 1., 1., 1., 1., 1., 1., 211., 212.],
                      [1., 1., 1., 1., 1., 1., 1., 1., 311., 312.]]),
            np.array([[1., 1., 1., 1., 1., 1., 1., 1., 121., 122.],
                      [2., 2., 2., 2., 2., 2., 2., 2., 221., 222.],
                      [1., 1., 1., 1., 1., 1., 1., 1., 321., 322.]])
        ]

        sequence_features = {
            'wire_cast':
            sparse_tensor.SparseTensor(indices=[[0, 0, 0], [0, 1,
                                                            0], [1, 0, 0],
                                                [1, 1, 0], [1, 1, 1],
                                                [2, 0, 0], [2, 1, 1]],
                                       values=[
                                           b'marlo', b'stringer', b'omar',
                                           b'stringer', b'marlo', b'marlo',
                                           b'omar'
                                       ],
                                       dense_shape=[3, 2, 2]),
            'seq_feature0':
            constant_op.constant([[[111., 112.], [121., 122.]],
                                  [[211., 212.], [221., 222.]],
                                  [[311., 312.], [321., 322.]]])
        }

        wire_cast = feature_column.sparse_column_with_keys(
            'wire_cast', ['marlo', 'omar', 'stringer'])
        wire_cast_embedded = feature_column.embedding_column(
            wire_cast,
            dimension=embedding_dimension,
            combiner='sum',
            initializer=init_ops.ones_initializer())
        seq_feature0_column = feature_column.real_valued_column(
            'seq_feature0', dimension=dense_dimension)

        sequence_feature_columns = [seq_feature0_column, wire_cast_embedded]

        context_features = None

        self._test_prepare_inputs_for_rnn(sequence_features, context_features,
                                          sequence_feature_columns, num_unroll,
                                          expected)
예제 #42
0
    def testRealValuedFeaturesPerfectlySeparable(self):
        """Tests SVM classifier with real valued features."""
        def input_fn():
            return {
                'example_id': constant_op.constant(['1', '2', '3']),
                'feature1': constant_op.constant([[0.0], [1.0], [3.0]]),
                'feature2': constant_op.constant([[1.0], [-1.2], [1.0]]),
            }, constant_op.constant([[1], [0], [1]])

        feature1 = feature_column.real_valued_column('feature1')
        feature2 = feature_column.real_valued_column('feature2')
        svm_classifier = svm.SVM(feature_columns=[feature1, feature2],
                                 example_id_column='example_id',
                                 l1_regularization=0.0,
                                 l2_regularization=0.0)
        svm_classifier.fit(input_fn=input_fn, steps=30)
        metrics = svm_classifier.evaluate(input_fn=input_fn, steps=1)
        loss = metrics['loss']
        accuracy = metrics['accuracy']
        # The points are not only separable but there exist weights (for instance
        # w1=0.0, w2=1.0) that satisfy the margin inequalities (y_i* w^T*x_i >=1).
        # The unregularized loss should therefore be 0.0.
        self.assertAlmostEqual(loss, 0.0, places=3)
        self.assertAlmostEqual(accuracy, 1.0, places=3)
예제 #43
0
 def testIrisDNN(self):
     iris = base.load_iris()
     feature_columns = [feature_column.real_valued_column("", dimension=4)]
     classifier = dnn.DNNClassifier(
         feature_columns=feature_columns,
         hidden_units=[10, 20, 10],
         n_classes=3,
         config=run_config.RunConfig(tf_random_seed=1))
     classifier.fit(iris.data, iris.target, max_steps=200)
     weights = classifier.weights_
     self.assertEqual(weights[0].shape, (4, 10))
     self.assertEqual(weights[1].shape, (10, 20))
     self.assertEqual(weights[2].shape, (20, 10))
     self.assertEqual(weights[3].shape, (10, 3))
     biases = classifier.bias_
     self.assertEqual(len(biases), 4)
예제 #44
0
    def benchmarkLogisticFloatLabel(self):
        def _input_fn(num_epochs=None):
            features = {
                'age':
                input_lib.limit_epochs(constant_op.constant(
                    ((50, ), (20, ), (10, ))),
                                       num_epochs=num_epochs),
                'language':
                sparse_tensor.SparseTensor(values=input_lib.limit_epochs(
                    ('en', 'fr', 'zh'), num_epochs=num_epochs),
                                           indices=((0, 0), (0, 1), (2, 0)),
                                           dense_shape=(3, 2))
            }
            return features, constant_op.constant(((0.8, ), (0., ), (0.2, )),
                                                  dtype=dtypes.float32)

        lang_column = feature_column.sparse_column_with_hash_bucket(
            'language', hash_bucket_size=20)
        n_classes = 2
        classifier = dnn.DNNClassifier(
            n_classes=n_classes,
            feature_columns=(feature_column.embedding_column(lang_column,
                                                             dimension=1),
                             feature_column.real_valued_column('age')),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        steps = 1000
        metrics = classifier.fit(input_fn=_input_fn,
                                 steps=steps).evaluate(input_fn=_input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)

        # Prediction probabilities mirror the labels column, which proves that the
        # classifier learns from float input.
        self._report_metrics(metrics)
        self._report_predictions(
            classifier=classifier,
            input_fn=functools.partial(_input_fn, num_epochs=1),
            iters=metrics['global_step'],
            n_examples=3,
            n_classes=n_classes,
            expected_probabilities=((0.2, 0.8), (1., 0.), (0.8, 0.2)),
            expected_classes=(1, 0, 0),
            benchmark_name_override=(
                'DNNClassifierBenchmark.benchmarkLogisticFloatLabel_predictions'
            ))
    def benchmarkTensorData(self):
        def _input_fn():
            iris = test_data.prepare_iris_data_for_logistic_regression()
            features = {}
            for i in range(4):
                # The following shows how to provide the Tensor data for
                # RealValuedColumns.
                features.update({
                    str(i):
                    array_ops.reshape(
                        constant_op.constant(iris.data[:, i],
                                             dtype=dtypes.float32), (-1, 1))
                })
            # The following shows how to provide the SparseTensor data for
            # a SparseColumn.
            features['dummy_sparse_column'] = sparse_tensor.SparseTensor(
                values=('en', 'fr', 'zh'),
                indices=((0, 0), (0, 1), (60, 0)),
                dense_shape=(len(iris.target), 2))
            labels = array_ops.reshape(
                constant_op.constant(iris.target, dtype=dtypes.int32), (-1, 1))
            return features, labels

        iris = test_data.prepare_iris_data_for_logistic_regression()
        cont_features = [
            feature_column.real_valued_column(str(i)) for i in range(4)
        ]
        linear_features = [
            feature_column.bucketized_column(
                cont_features[i],
                test_data.get_quantile_based_buckets(iris.data[:, i], 10))
            for i in range(4)
        ]
        linear_features.append(
            feature_column.sparse_column_with_hash_bucket(
                'dummy_sparse_column', hash_bucket_size=100))

        classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
            model_dir=tempfile.mkdtemp(),
            linear_feature_columns=linear_features,
            dnn_feature_columns=cont_features,
            dnn_hidden_units=(3, 3))

        metrics = classifier.fit(input_fn=_input_fn,
                                 steps=_ITERS).evaluate(input_fn=_input_fn,
                                                        steps=100)
        self._assertSingleClassMetrics(metrics)
  def testDNNRegression(self):
    my_seed = 42
    config = run_config.RunConfig(tf_random_seed=my_seed)
    boston = base.load_boston()
    columns = [feature_column.real_valued_column('', dimension=13)]

    with ops.Graph().as_default() as g1:
      random.seed(my_seed)
      g1.seed = my_seed
      variables.create_global_step()
      regressor1 = dnn.DNNRegressor(
          hidden_units=[10],
          feature_columns=columns,
          optimizer=_NULL_OPTIMIZER,
          config=config)
      regressor1.fit(x=boston.data, y=boston.target, steps=1)

    with ops.Graph().as_default() as g2:
      random.seed(my_seed)
      g2.seed = my_seed
      variables.create_global_step()
      regressor2 = dnn.DNNRegressor(
          hidden_units=[10],
          feature_columns=columns,
          optimizer=_NULL_OPTIMIZER,
          config=config)
      regressor2.fit(x=boston.data, y=boston.target, steps=1)

    weights1 = ([regressor1.get_variable_value('dnn/hiddenlayer_0/weights')] +
                [regressor1.get_variable_value('dnn/logits/weights')])
    weights2 = ([regressor2.get_variable_value('dnn/hiddenlayer_0/weights')] +
                [regressor2.get_variable_value('dnn/logits/weights')])
    for w1, w2 in zip(weights1, weights2):
      self.assertAllClose(w1, w2)

    biases1 = ([regressor1.get_variable_value('dnn/hiddenlayer_0/biases')] +
               [regressor1.get_variable_value('dnn/logits/biases')])
    biases2 = ([regressor2.get_variable_value('dnn/hiddenlayer_0/biases')] +
               [regressor2.get_variable_value('dnn/logits/biases')])
    for b1, b2 in zip(biases1, biases2):
      self.assertAllClose(b1, b2)
    self.assertAllClose(
        list(regressor1.predict(
            boston.data, as_iterable=True)),
        list(regressor2.predict(
            boston.data, as_iterable=True)),
        atol=1e-05)
  def setUp(self):
    super(DynamicRnnEstimatorTest, self).setUp()
    self.rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS)
    self.mock_target_column = MockTargetColumn(
        num_label_columns=self.NUM_LABEL_COLUMNS)

    location = feature_column.sparse_column_with_keys(
        'location', keys=['west_side', 'east_side', 'nyc'])
    location_onehot = feature_column.one_hot_column(location)
    self.context_feature_columns = [location_onehot]

    wire_cast = feature_column.sparse_column_with_keys(
        'wire_cast', ['marlo', 'omar', 'stringer'])
    wire_cast_embedded = feature_column.embedding_column(wire_cast, dimension=8)
    measurements = feature_column.real_valued_column(
        'measurements', dimension=2)
    self.sequence_feature_columns = [measurements, wire_cast_embedded]
def _build_estimator_for_resource_export_test():
    def _input_fn():
        iris = base.load_iris()
        return {
            'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
        }, constant_op.constant(iris.target, shape=[150], dtype=dtypes.int32)

    feature_columns = [
        feature_column_lib.real_valued_column('feature', dimension=4)
    ]

    def resource_constant_model_fn(unused_features, unused_labels, mode):
        """A model_fn that loads a constant from a resource and serves it."""
        assert mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL,
                        model_fn.ModeKeys.INFER)

        const = constant_op.constant(-1, dtype=dtypes.int64)
        table = lookup.MutableHashTable(dtypes.string,
                                        dtypes.int64,
                                        const,
                                        name='LookupTableModel')
        if mode in (model_fn.ModeKeys.TRAIN, model_fn.ModeKeys.EVAL):
            key = constant_op.constant(['key'])
            value = constant_op.constant([42], dtype=dtypes.int64)
            train_op_1 = table.insert(key, value)
            training_state = lookup.MutableHashTable(
                dtypes.string,
                dtypes.int64,
                const,
                name='LookupTableTrainingState')
            training_op_2 = training_state.insert(key, value)
            return const, const, control_flow_ops.group(
                train_op_1, training_op_2)
        if mode == model_fn.ModeKeys.INFER:
            key = constant_op.constant(['key'])
            prediction = table.lookup(key)
            return prediction, const, control_flow_ops.no_op()

    est = estimator.Estimator(model_fn=resource_constant_model_fn)
    est.fit(input_fn=_input_fn, steps=1)

    feature_spec = feature_column_lib.create_feature_spec_for_parsing(
        feature_columns)
    serving_input_fn = input_fn_utils.build_parsing_serving_input_fn(
        feature_spec)
    return est, serving_input_fn
예제 #49
0
    def benchmarkLogisticMatrixData(self):
        classifier = dnn.DNNClassifier(
            feature_columns=(feature_column.real_valued_column('feature',
                                                               dimension=4), ),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        input_fn = test_data.iris_input_logistic_fn
        steps = 400
        metrics = classifier.fit(input_fn=input_fn,
                                 steps=steps).evaluate(input_fn=input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
        estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics)

        self._report_metrics(metrics)
예제 #50
0
def get_experiment(output_dir):
    """Run a simple Experiment. Cluster config can be set in the environment"""
    # Get the TF_CONFIG from the environment, and set some other options.
    # This is optional since the default RunConfig() for Estimators will
    # pick up the cluster configuration from TF_CONFIG environment
    config = RunConfig(log_device_placement=True)
    exp = experiment.Experiment(
        estimator=dnn.DNNRegressor(feature_columns=[
            feature_column.real_valued_column('feature', dimension=4)
        ],
                                   model_dir=output_dir,
                                   hidden_units=[3, 3],
                                   config=config),
        train_input_fn=test_data.iris_input_logistic_fn,
        eval_input_fn=test_data.iris_input_logistic_fn,
        train_steps=500000)
    return exp
예제 #51
0
def _infer_real_valued_column_for_tensor(name, tensor):
    """Creates a real_valued_column for given tensor and name."""
    if isinstance(tensor, sparse_tensor_py.SparseTensor):
        raise ValueError(
            'SparseTensor is not supported for auto detection. Please define '
            'corresponding FeatureColumn for tensor {} {}.', name, tensor)

    if not (tensor.dtype.is_integer or tensor.dtype.is_floating):
        raise ValueError(
            'Non integer or non floating types are not supported for auto detection'
            '. Please define corresponding FeatureColumn for tensor {} {}.',
            name, tensor)

    shape = tensor.get_shape().as_list()
    dimension = 1
    for i in range(1, len(shape)):
        dimension *= shape[i]
    return fc.real_valued_column(name, dimension=dimension, dtype=tensor.dtype)
    def benchmarkMultiClass(self):
        iris = base.load_iris()
        cont_feature = feature_column.real_valued_column('feature',
                                                         dimension=4)
        bucketized_feature = feature_column.bucketized_column(
            cont_feature, test_data.get_quantile_based_buckets(iris.data, 10))

        classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
            n_classes=3,
            linear_feature_columns=(bucketized_feature, ),
            dnn_feature_columns=(cont_feature, ),
            dnn_hidden_units=(3, 3))

        input_fn = test_data.iris_input_multiclass_fn
        metrics = classifier.fit(input_fn=input_fn,
                                 steps=_ITERS).evaluate(input_fn=input_fn,
                                                        steps=100)
        self._assertCommonMetrics(metrics)
예제 #53
0
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
        classifier.export(self._export_dir_base)
    def benchmarkMatrixData(self):
        iris = test_data.prepare_iris_data_for_logistic_regression()
        cont_feature = feature_column.real_valued_column('feature',
                                                         dimension=4)
        bucketized_feature = feature_column.bucketized_column(
            cont_feature, test_data.get_quantile_based_buckets(iris.data, 10))

        classifier = dnn_linear_combined.DNNLinearCombinedClassifier(
            model_dir=tempfile.mkdtemp(),
            linear_feature_columns=(bucketized_feature, ),
            dnn_feature_columns=(cont_feature, ),
            dnn_hidden_units=(3, 3))

        input_fn = test_data.iris_input_logistic_fn
        metrics = classifier.fit(input_fn=input_fn,
                                 steps=_ITERS).evaluate(input_fn=input_fn,
                                                        steps=100)
        self._assertSingleClassMetrics(metrics)
예제 #55
0
    def benchmarkLogisticTensorData(self):
        def _input_fn(num_epochs=None):
            features = {
                'age':
                input_lib.limit_epochs(constant_op.constant(
                    ((.8, ), (0.2, ), (.1, ))),
                                       num_epochs=num_epochs),
                'language':
                sparse_tensor.SparseTensor(values=input_lib.limit_epochs(
                    ('en', 'fr', 'zh'), num_epochs=num_epochs),
                                           indices=((0, 0), (0, 1), (2, 0)),
                                           dense_shape=(3, 2))
            }
            return features, constant_op.constant(((1, ), (0, ), (0, )),
                                                  dtype=dtypes.int32)

        lang_column = feature_column.sparse_column_with_hash_bucket(
            'language', hash_bucket_size=20)
        classifier = dnn.DNNClassifier(
            feature_columns=(feature_column.embedding_column(lang_column,
                                                             dimension=1),
                             feature_column.real_valued_column('age')),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        steps = 100
        metrics = classifier.fit(input_fn=_input_fn,
                                 steps=steps).evaluate(input_fn=_input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
        estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics)

        self._report_metrics(metrics)
        self._report_predictions(
            classifier=classifier,
            input_fn=functools.partial(_input_fn, num_epochs=1),
            iters=metrics['global_step'],
            n_examples=3,
            n_classes=2,
            expected_classes=(1, 0, 0),
            benchmark_name_override=(
                'DNNClassifierBenchmark.benchmarkLogisticTensorData_predictions'
            ))
예제 #56
0
    def benchmarkLogisticNpMatrixData(self):
        classifier = dnn.DNNClassifier(
            feature_columns=(feature_column.real_valued_column('',
                                                               dimension=4), ),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))
        iris = test_data.prepare_iris_data_for_logistic_regression()
        train_x = iris.data
        train_y = iris.target
        steps = 100
        metrics = classifier.fit(x=train_x, y=train_y,
                                 steps=steps).evaluate(x=train_x,
                                                       y=train_y,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.8, 1.0, 'accuracy', metrics)

        self._report_metrics(metrics)
예제 #57
0
    def testOverridesGlobalSteps(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 2
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False,
            override_global_step_value=10000000)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        self._assert_checkpoint(classifier.model_dir, global_step=10000000)
예제 #58
0
    def testRealValuedFeatureWithHigherDimension(self):
        """Tests SDCALogisticClassifier with high-dimension real valued features."""

        # input_fn is identical to the one in testRealValuedFeatures where 2
        # 1-dimensional dense features are replaced by a 2-dimensional feature.
        def input_fn():
            return {
                'example_id':
                constant_op.constant(['1', '2']),
                'dense_feature':
                constant_op.constant([[500.0, 800.0], [200.0, 600.0]])
            }, constant_op.constant([[0], [1]])

        dense_feature = feature_column_lib.real_valued_column('dense_feature',
                                                              dimension=2)
        classifier = SDCALogisticClassifier(example_id_column='example_id',
                                            feature_columns=[dense_feature])
        classifier.fit(input_fn=input_fn, steps=100)
        loss = classifier.evaluate(input_fn=input_fn, steps=1)['loss']
        self.assertLess(loss, 0.05)
예제 #59
0
    def benchmarkMultiClassMatrixData(self):
        """Tests multi-class classification using matrix data as input."""
        classifier = dnn.DNNClassifier(
            n_classes=3,
            feature_columns=(feature_column.real_valued_column('feature',
                                                               dimension=4), ),
            hidden_units=(3, 3),
            config=run_config.RunConfig(tf_random_seed=1))

        input_fn = test_data.iris_input_multiclass_fn
        steps = 500
        metrics = classifier.fit(input_fn=input_fn,
                                 steps=steps).evaluate(input_fn=input_fn,
                                                       steps=1)
        estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step',
                                             metrics)
        estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics)
        estimator_test_utils.assert_in_range(0.0, 0.4, 'loss', metrics)

        self._report_metrics(metrics)
예제 #60
0
    def testThatLeafIndexIsInPredictions(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("leaf_index" in prediction_dict)
            self.assertTrue("logits" in prediction_dict)