Exemple #1
0
  def test_read_higgs_data(self):
    """Tests read_higgs_data() function."""
    # Error when a wrong data_dir is given.
    with self.assertRaisesRegexp(RuntimeError, "Error loading data.*"):
      train_data, eval_data = train_higgs.read_higgs_data(
          self.data_dir + "non-existing-path",
          train_start=0, train_count=15, eval_start=15, eval_count=5)

    # Loading fine with the correct data_dir.
    train_data, eval_data = train_higgs.read_higgs_data(
        self.data_dir,
        train_start=0, train_count=15, eval_start=15, eval_count=5)
    self.assertEqual((15, 29), train_data.shape)
    self.assertEqual((5, 29), eval_data.shape)
  def test_read_higgs_data(self):
    """Tests read_higgs_data() function."""
    # Error when a wrong data_dir is given.
    with self.assertRaisesRegexp(RuntimeError, 'Error loading data.*'):
      train_data, eval_data = train_higgs.read_higgs_data(
          self.data_dir + 'non-existing-path',
          train_start=0, train_count=15, eval_start=15, eval_count=5)

    # Loading fine with the correct data_dir.
    train_data, eval_data = train_higgs.read_higgs_data(
        self.data_dir,
        train_start=0, train_count=15, eval_start=15, eval_count=5)
    self.assertEqual((15, 29), train_data.shape)
    self.assertEqual((5, 29), eval_data.shape)
Exemple #3
0
    def test_make_inputs_from_np_arrays(self):
        """Tests make_inputs_from_np_arrays() function."""
        train_data, _ = train_higgs.read_higgs_data(self.data_dir,
                                                    train_start=0,
                                                    train_count=15,
                                                    eval_start=15,
                                                    eval_count=5)
        (input_fn, feature_names,
         feature_columns) = train_higgs.make_inputs_from_np_arrays(
             features_np=train_data[:, 1:], label_np=train_data[:, 0:1])

        # Check feature_names.
        self.assertAllEqual(feature_names,
                            ["feature_%02d" % (i + 1) for i in range(28)])

        # Check feature columns.
        self.assertEqual(28, len(feature_columns))
        bucketized_column_type = type(
            tf.feature_column.bucketized_column(
                tf.feature_column.numeric_column("feature_01"),
                boundaries=[0, 1, 2]))  # dummy boundaries.
        for feature_column in feature_columns:
            self.assertIsInstance(feature_column, bucketized_column_type)
            # At least 2 boundaries.
            self.assertGreaterEqual(len(feature_column.boundaries), 2)
        # Tests that the source column names of the bucketized columns match.
        self.assertAllEqual(
            feature_names, [col.source_column.name for col in feature_columns])

        # Check features.
        features, labels = input_fn().make_one_shot_iterator().get_next()
        with tf.Session() as sess:
            features, labels = sess.run((features, labels))
        self.assertIsInstance(features, dict)
        self.assertAllEqual(feature_names, sorted(features.keys()))
        self.assertAllEqual([[15, 1]] * 28,
                            [features[name].shape for name in feature_names])
        # Validate actual values of some features.
        self.assertAllClose([
            0.869293, 0.907542, 0.798834, 1.344384, 1.105009, 1.595839,
            0.409391, 0.933895, 1.405143, 1.176565, 0.945974, 0.739356,
            1.384097, 1.383548, 1.343652
        ], np.squeeze(features[feature_names[0]], 1))
        self.assertAllClose([
            -0.653674, -0.213641, 1.540659, -0.676015, 1.020974, 0.643109,
            -1.038338, -2.653732, 0.567342, 0.534315, 0.720819, -0.481741,
            1.409523, -0.307865, 1.474605
        ], np.squeeze(features[feature_names[10]], 1))
Exemple #4
0
  def test_make_inputs_from_np_arrays(self):
    """Tests make_inputs_from_np_arrays() function."""
    train_data, _ = train_higgs.read_higgs_data(
        self.data_dir,
        train_start=0, train_count=15, eval_start=15, eval_count=5)
    (input_fn, feature_names,
     feature_columns) = train_higgs.make_inputs_from_np_arrays(
         features_np=train_data[:, 1:], label_np=train_data[:, 0:1])

    # Check feature_names.
    self.assertAllEqual(feature_names,
                        ["feature_%02d" % (i+1) for i in range(28)])

    # Check feature columns.
    self.assertEqual(28, len(feature_columns))
    bucketized_column_type = type(
        tf.feature_column.bucketized_column(
            tf.feature_column.numeric_column("feature_01"),
            boundaries=[0, 1, 2]))  # dummy boundaries.
    for feature_column in feature_columns:
      self.assertIsInstance(feature_column, bucketized_column_type)
      # At least 2 boundaries.
      self.assertGreaterEqual(len(feature_column.boundaries), 2)
    # Tests that the source column names of the bucketized columns match.
    self.assertAllEqual(feature_names,
                        [col.source_column.name for col in feature_columns])

    # Check features.
    features, labels = input_fn().make_one_shot_iterator().get_next()
    with tf.Session() as sess:
      features, labels = sess.run((features, labels))
    self.assertIsInstance(features, dict)
    self.assertAllEqual(feature_names, sorted(features.keys()))
    self.assertAllEqual([[15, 1]] * 28,
                        [features[name].shape for name in feature_names])
    # Validate actual values of some features.
    self.assertAllClose(
        [0.869293, 0.907542, 0.798834, 1.344384, 1.105009, 1.595839,
         0.409391, 0.933895, 1.405143, 1.176565, 0.945974, 0.739356,
         1.384097, 1.383548, 1.343652],
        np.squeeze(features[feature_names[0]], 1))
    self.assertAllClose(
        [-0.653674, -0.213641, 1.540659, -0.676015, 1.020974, 0.643109,
         -1.038338, -2.653732, 0.567342, 0.534315, 0.720819, -0.481741,
         1.409523, -0.307865, 1.474605],
        np.squeeze(features[feature_names[10]], 1))