コード例 #1
0
 def test_to_instance_dicts_error(self,
                                  feature_spec,
                                  feed_dict,
                                  error_msg,
                                  error_type=ValueError):
     schema = dataset_schema.from_feature_spec(feature_spec)
     with self.assertRaisesRegexp(error_type, error_msg):
         impl_helper.to_instance_dicts(schema, feed_dict)
コード例 #2
0
    def testMakeOutputDictErrorSparse(self):
        schema = self.toSchema({'a': tf.VarLenFeature(tf.string)})

        # SparseTensor that cannot be represented as VarLenFeature.
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]),
                                 values=np.array([10.0, 20.0, 30.0]),
                                 dense_shape=(1, 20))
        }
        with self.assertRaisesRegexp(
                ValueError, 'cannot be decoded by ListColumnRepresentation'):
            _ = impl_helper.to_instance_dicts(schema, fetches)

        # SparseTensor of invalid rank.
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 0, 1), (0, 0, 2),
                                                   (0, 0, 3)]),
                                 values=np.array([10.0, 20.0, 30.0]),
                                 dense_shape=(1, 10, 10))
        }
        with self.assertRaisesRegexp(
                ValueError, 'cannot be decoded by ListColumnRepresentation'):
            _ = impl_helper.to_instance_dicts(schema, fetches)

        # SparseTensor with indices that are out of order.
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]),
                                 values=np.array([10.0, 20.0, 30.0]),
                                 dense_shape=(3, 20))
        }
        with self.assertRaisesRegexp(ValueError,
                                     'Encountered out-of-order sparse index'):
            _ = impl_helper.to_instance_dicts(schema, fetches)

        # SparseTensors with different batch dimension sizes.
        schema = self.toSchema({
            'a': tf.VarLenFeature(tf.string),
            'b': tf.VarLenFeature(tf.string)
        })
        fetches = {
            'a':
            tf.SparseTensorValue(indices=np.array([(0, 0)]),
                                 values=np.array([10.0]),
                                 dense_shape=(1, 20)),
            'b':
            tf.SparseTensorValue(indices=np.array([(0, 0)]),
                                 values=np.array([10.0]),
                                 dense_shape=(2, 20))
        }
        with self.assertRaisesRegexp(
                ValueError,
                r'Inconsistent batch sizes: "\w" had batch dimension \d, "\w" had batch'
                r' dimension \d'):
            _ = impl_helper.to_instance_dicts(schema, fetches)
コード例 #3
0
 def testMakeOutputDictErrorDense(self):
     schema = self.toSchema({
         'a': tf.FixedLenFeature((), tf.string),
         'b': tf.FixedLenFeature((), tf.string)
     })
     # Tensors with different batch dimension sizes.
     fetches = {'a': np.array([1]), 'b': np.array([1, 2])}
     with self.assertRaisesRegexp(
             ValueError,
             r'Inconsistent batch sizes: "\w" had batch dimension \d, "\w" had batch'
             r' dimension \d'):
         impl_helper.to_instance_dicts(schema, fetches)
コード例 #4
0
ファイル: impl.py プロジェクト: sswapnil2/transform
def _convert_and_unbatch_to_instance_dicts(batch_dict, schema,
                                           passthrough_keys):
  """Convert batches of ndarrays to unbatched instance dicts."""

  # Making a copy of batch_dict because mutating PCollection elements is not
  # allowed.
  if passthrough_keys:
    batch_dict = copy.copy(batch_dict)
  passthrough_data = {key: batch_dict.pop(key) for key in passthrough_keys}

  result = impl_helper.to_instance_dicts(schema, batch_dict)

  for key, data in six.iteritems(passthrough_data):
    data_set = set(data)
    if len(data_set) == 1:
      # Relaxing ValueError below to only trigger in case pass-through data
      # has more than one value.
      data = (data_set.pop(),) * len(result)
    if len(data) != len(result):
      raise ValueError(
          'Cannot pass-through data when input and output batch sizes '
          'are different ({} vs. {})'.format(len(data), len(result)))
    for instance, instance_data in zip(result, data):
      instance[key] = instance_data

  return result
コード例 #5
0
  def testToInstanceDicts(self):
    batch_dict = {
        'a': [100, 200],
        'b': [10.0, 20.0],
        'c': [[40.0], [80.0]],
        'd': [[[1.0, 2.0], [3.0, 4.0]],
              [[5.0, 6.0], [7.0, 8.0]]],
        'e': [['doe', 'a', 'deer'],
              ['a', 'female', 'deer']],
        'f': ([[2, 4, 8], []],
              [[10.0, 20.0, 30.0], []])
    }

    instance_dicts = impl_helper.to_instance_dicts(batch_dict)
    self.assertEqual(2, len(instance_dicts))
    self.assertSetEqual(set(six.iterkeys(instance_dicts[0])),
                        set(['a', 'b', 'c', 'd', 'e', 'f']))
    self.assertAllEqual(instance_dicts[0]['a'], 100)
    self.assertAllEqual(instance_dicts[0]['b'], 10.0)
    self.assertAllEqual(instance_dicts[0]['c'], [40.0])
    self.assertAllEqual(instance_dicts[0]['d'], [[1.0, 2.0], [3.0, 4.0]])
    self.assertAllEqual(instance_dicts[0]['e'], ['doe', 'a', 'deer'])
    self.assertEqual(len(instance_dicts[0]['f']), 2)
    self.assertAllEqual(instance_dicts[0]['f'][0], [2, 4, 8])
    self.assertAllEqual(instance_dicts[0]['f'][1], [10.0, 20.0, 30.0])
    self.assertAllEqual(instance_dicts[1]['a'], 200)
    self.assertAllEqual(instance_dicts[1]['b'], 20.0)
    self.assertAllEqual(instance_dicts[1]['c'], [80.0])
    self.assertAllEqual(instance_dicts[1]['d'], [[5.0, 6.0], [7.0, 8.0]])
    self.assertAllEqual(instance_dicts[1]['e'], ['a', 'female', 'deer'])
    self.assertEqual(len(instance_dicts[1]['f']), 2)
    self.assertAllEqual(instance_dicts[1]['f'][0], [])
    self.assertAllEqual(instance_dicts[1]['f'][1], [])
コード例 #6
0
  def testMakeOutputDictVarLen(self):
    # Specifically test the empty ndarray optimization codepaths.
    schema = dataset_schema.from_feature_spec({
        'a': tf.VarLenFeature(tf.int64),
        'b': tf.VarLenFeature(tf.float32),
        'c': tf.VarLenFeature(tf.string),
    })

    fetches = {
        'a': tf.SparseTensorValue(
            indices=np.array([(0, 0), (2, 0)]),
            values=np.array([0, 1], np.int64),
            dense_shape=(4, 1)),
        'b': tf.SparseTensorValue(
            indices=np.array([(0, 0), (2, 0)]),
            values=np.array([0.5, 1.5], np.float32),
            dense_shape=(4, 1)),
        'c': tf.SparseTensorValue(
            indices=np.array([(0, 0), (2, 0)]),
            values=np.array(['hello', 'goodbye'], np.object),
            dense_shape=(4, 1)),
    }

    instance_dicts = impl_helper.to_instance_dicts(schema, fetches)
    self.assertEqual(4, len(instance_dicts))
    self.assertEqual(instance_dicts[1]['a'].dtype, np.int64)
    self.assertEqual(instance_dicts[3]['a'].dtype, np.int64)
    self.assertEqual(instance_dicts[1]['b'].dtype, np.float32)
    self.assertEqual(instance_dicts[3]['b'].dtype, np.float32)
    self.assertEqual(instance_dicts[1]['c'].dtype, np.object)
    self.assertEqual(instance_dicts[3]['c'].dtype, np.object)
コード例 #7
0
    def testMakeOutputDict(self):
        schema = self.toSchema({
            'a':
            tf.FixedLenFeature(None, tf.int64),
            'b':
            tf.FixedLenFeature([], tf.float32),
            'c':
            tf.FixedLenFeature([1], tf.float32),
            'd':
            tf.FixedLenFeature([2, 2], tf.float32),
            'e':
            tf.VarLenFeature(tf.string),
            'f':
            tf.SparseFeature('idx', 'val', tf.float32, 10)
        })

        fetches = {
            'a':
            np.array([100, 200]),
            'b':
            np.array([10.0, 20.0]),
            'c':
            np.array([[40.0], [80.0]]),
            'd':
            np.array([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]),
            'e':
            tf.SparseTensorValue(
                indices=np.array([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1),
                                  (1, 2)]),
                values=np.array(['doe', 'a', 'deer', 'a', 'female', 'deer']),
                dense_shape=(2, 3)),
            'f':
            tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8),
                                                   (1, 4), (1, 8)]),
                                 values=np.array(
                                     [10.0, 20.0, 30.0, 40.0, 50.0]),
                                 dense_shape=(2, 20))
        }

        instance_dicts = impl_helper.to_instance_dicts(schema, fetches)
        self.assertEqual(2, len(instance_dicts))
        self.assertSetEqual(set(six.iterkeys(instance_dicts[0])),
                            set(['a', 'b', 'c', 'd', 'e', 'f']))
        self.assertAllEqual(instance_dicts[0]['a'], 100)
        self.assertAllEqual(instance_dicts[0]['b'], 10.0)
        self.assertAllEqual(instance_dicts[0]['c'], [40.0])
        self.assertAllEqual(instance_dicts[0]['d'], [[1.0, 2.0], [3.0, 4.0]])
        self.assertAllEqual(instance_dicts[0]['e'], ['doe', 'a', 'deer'])
        self.assertEqual(len(instance_dicts[0]['f']), 2)
        self.assertAllEqual(instance_dicts[0]['f'][0], [2, 4, 8])
        self.assertAllEqual(instance_dicts[0]['f'][1], [10.0, 20.0, 30.0])
        self.assertAllEqual(instance_dicts[1]['a'], 200)
        self.assertAllEqual(instance_dicts[1]['b'], 20.0)
        self.assertAllEqual(instance_dicts[1]['c'], [80.0])
        self.assertAllEqual(instance_dicts[1]['d'], [[5.0, 6.0], [7.0, 8.0]])
        self.assertAllEqual(instance_dicts[1]['e'], ['a', 'female', 'deer'])
        self.assertEqual(len(instance_dicts[1]['f']), 2)
        self.assertAllEqual(instance_dicts[1]['f'][0], [4, 8])
        self.assertAllEqual(instance_dicts[1]['f'][1], [40.0, 50.0])
コード例 #8
0
 def test_to_instance_dicts(self, feature_spec, instances, record_batch,
                            feed_dict, feed_eager_tensors):
     del record_batch
     if feed_eager_tensors:
         test_case.skip_if_not_tf2('Tensorflow 2.x required')
     schema = schema_utils.schema_from_feature_spec(feature_spec)
     feed_dict_local = (_eager_tensor_from_values(feed_dict)
                        if feed_eager_tensors else copy.copy(feed_dict))
     result = impl_helper.to_instance_dicts(schema, feed_dict_local)
     np.testing.assert_equal(instances, result)
コード例 #9
0
ファイル: impl_helper_test.py プロジェクト: Mikehem/tfx
 def test_to_instance_dicts(self, feature_spec, instances, feed_dict,
                            feed_eager_tensors):
     if feed_eager_tensors:
         test_case.skip_if_not_tf2('Tensorflow 2.x required')
     schema = schema_utils.schema_from_feature_spec(feature_spec)
     feed_dict_local = copy.copy(feed_dict)
     if feed_eager_tensors:
         for key, value in six.iteritems(feed_dict_local):
             if isinstance(value, tf.compat.v1.SparseTensorValue):
                 feed_dict_local[key] = tf.sparse.SparseTensor.from_value(
                     value)
             else:
                 feed_dict_local[key] = tf.constant(value)
     np.testing.assert_equal(
         instances, impl_helper.to_instance_dicts(schema, feed_dict_local))
コード例 #10
0
 def test_to_instance_dicts(self, feature_spec, instances, feed_dict):
     schema = dataset_schema.from_feature_spec(feature_spec)
     np.testing.assert_equal(
         instances, impl_helper.to_instance_dicts(schema, feed_dict))
コード例 #11
0
ファイル: impl.py プロジェクト: robertwb/transform
 def convert_and_unbatch(batch_dict):
   return impl_helper.to_instance_dicts(
       impl_helper.make_output_dict(output_metadata.schema, batch_dict))