def test_to_instance_dicts_error(self, feature_spec, feed_dict, error_msg, error_type=ValueError): schema = dataset_schema.from_feature_spec(feature_spec) with self.assertRaisesRegexp(error_type, error_msg): impl_helper.to_instance_dicts(schema, feed_dict)
def testMakeOutputDictErrorSparse(self): schema = self.toSchema({'a': tf.VarLenFeature(tf.string)}) # SparseTensor that cannot be represented as VarLenFeature. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(1, 20)) } with self.assertRaisesRegexp( ValueError, 'cannot be decoded by ListColumnRepresentation'): _ = impl_helper.to_instance_dicts(schema, fetches) # SparseTensor of invalid rank. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 0, 1), (0, 0, 2), (0, 0, 3)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(1, 10, 10)) } with self.assertRaisesRegexp( ValueError, 'cannot be decoded by ListColumnRepresentation'): _ = impl_helper.to_instance_dicts(schema, fetches) # SparseTensor with indices that are out of order. fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 2), (2, 4), (1, 8)]), values=np.array([10.0, 20.0, 30.0]), dense_shape=(3, 20)) } with self.assertRaisesRegexp(ValueError, 'Encountered out-of-order sparse index'): _ = impl_helper.to_instance_dicts(schema, fetches) # SparseTensors with different batch dimension sizes. schema = self.toSchema({ 'a': tf.VarLenFeature(tf.string), 'b': tf.VarLenFeature(tf.string) }) fetches = { 'a': tf.SparseTensorValue(indices=np.array([(0, 0)]), values=np.array([10.0]), dense_shape=(1, 20)), 'b': tf.SparseTensorValue(indices=np.array([(0, 0)]), values=np.array([10.0]), dense_shape=(2, 20)) } with self.assertRaisesRegexp( ValueError, r'Inconsistent batch sizes: "\w" had batch dimension \d, "\w" had batch' r' dimension \d'): _ = impl_helper.to_instance_dicts(schema, fetches)
def testMakeOutputDictErrorDense(self): schema = self.toSchema({ 'a': tf.FixedLenFeature((), tf.string), 'b': tf.FixedLenFeature((), tf.string) }) # Tensors with different batch dimension sizes. fetches = {'a': np.array([1]), 'b': np.array([1, 2])} with self.assertRaisesRegexp( ValueError, r'Inconsistent batch sizes: "\w" had batch dimension \d, "\w" had batch' r' dimension \d'): impl_helper.to_instance_dicts(schema, fetches)
def _convert_and_unbatch_to_instance_dicts(batch_dict, schema, passthrough_keys): """Convert batches of ndarrays to unbatched instance dicts.""" # Making a copy of batch_dict because mutating PCollection elements is not # allowed. if passthrough_keys: batch_dict = copy.copy(batch_dict) passthrough_data = {key: batch_dict.pop(key) for key in passthrough_keys} result = impl_helper.to_instance_dicts(schema, batch_dict) for key, data in six.iteritems(passthrough_data): data_set = set(data) if len(data_set) == 1: # Relaxing ValueError below to only trigger in case pass-through data # has more than one value. data = (data_set.pop(),) * len(result) if len(data) != len(result): raise ValueError( 'Cannot pass-through data when input and output batch sizes ' 'are different ({} vs. {})'.format(len(data), len(result))) for instance, instance_data in zip(result, data): instance[key] = instance_data return result
def testToInstanceDicts(self): batch_dict = { 'a': [100, 200], 'b': [10.0, 20.0], 'c': [[40.0], [80.0]], 'd': [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]], 'e': [['doe', 'a', 'deer'], ['a', 'female', 'deer']], 'f': ([[2, 4, 8], []], [[10.0, 20.0, 30.0], []]) } instance_dicts = impl_helper.to_instance_dicts(batch_dict) self.assertEqual(2, len(instance_dicts)) self.assertSetEqual(set(six.iterkeys(instance_dicts[0])), set(['a', 'b', 'c', 'd', 'e', 'f'])) self.assertAllEqual(instance_dicts[0]['a'], 100) self.assertAllEqual(instance_dicts[0]['b'], 10.0) self.assertAllEqual(instance_dicts[0]['c'], [40.0]) self.assertAllEqual(instance_dicts[0]['d'], [[1.0, 2.0], [3.0, 4.0]]) self.assertAllEqual(instance_dicts[0]['e'], ['doe', 'a', 'deer']) self.assertEqual(len(instance_dicts[0]['f']), 2) self.assertAllEqual(instance_dicts[0]['f'][0], [2, 4, 8]) self.assertAllEqual(instance_dicts[0]['f'][1], [10.0, 20.0, 30.0]) self.assertAllEqual(instance_dicts[1]['a'], 200) self.assertAllEqual(instance_dicts[1]['b'], 20.0) self.assertAllEqual(instance_dicts[1]['c'], [80.0]) self.assertAllEqual(instance_dicts[1]['d'], [[5.0, 6.0], [7.0, 8.0]]) self.assertAllEqual(instance_dicts[1]['e'], ['a', 'female', 'deer']) self.assertEqual(len(instance_dicts[1]['f']), 2) self.assertAllEqual(instance_dicts[1]['f'][0], []) self.assertAllEqual(instance_dicts[1]['f'][1], [])
def testMakeOutputDictVarLen(self): # Specifically test the empty ndarray optimization codepaths. schema = dataset_schema.from_feature_spec({ 'a': tf.VarLenFeature(tf.int64), 'b': tf.VarLenFeature(tf.float32), 'c': tf.VarLenFeature(tf.string), }) fetches = { 'a': tf.SparseTensorValue( indices=np.array([(0, 0), (2, 0)]), values=np.array([0, 1], np.int64), dense_shape=(4, 1)), 'b': tf.SparseTensorValue( indices=np.array([(0, 0), (2, 0)]), values=np.array([0.5, 1.5], np.float32), dense_shape=(4, 1)), 'c': tf.SparseTensorValue( indices=np.array([(0, 0), (2, 0)]), values=np.array(['hello', 'goodbye'], np.object), dense_shape=(4, 1)), } instance_dicts = impl_helper.to_instance_dicts(schema, fetches) self.assertEqual(4, len(instance_dicts)) self.assertEqual(instance_dicts[1]['a'].dtype, np.int64) self.assertEqual(instance_dicts[3]['a'].dtype, np.int64) self.assertEqual(instance_dicts[1]['b'].dtype, np.float32) self.assertEqual(instance_dicts[3]['b'].dtype, np.float32) self.assertEqual(instance_dicts[1]['c'].dtype, np.object) self.assertEqual(instance_dicts[3]['c'].dtype, np.object)
def testMakeOutputDict(self): schema = self.toSchema({ 'a': tf.FixedLenFeature(None, tf.int64), 'b': tf.FixedLenFeature([], tf.float32), 'c': tf.FixedLenFeature([1], tf.float32), 'd': tf.FixedLenFeature([2, 2], tf.float32), 'e': tf.VarLenFeature(tf.string), 'f': tf.SparseFeature('idx', 'val', tf.float32, 10) }) fetches = { 'a': np.array([100, 200]), 'b': np.array([10.0, 20.0]), 'c': np.array([[40.0], [80.0]]), 'd': np.array([[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]), 'e': tf.SparseTensorValue( indices=np.array([(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)]), values=np.array(['doe', 'a', 'deer', 'a', 'female', 'deer']), dense_shape=(2, 3)), 'f': tf.SparseTensorValue(indices=np.array([(0, 2), (0, 4), (0, 8), (1, 4), (1, 8)]), values=np.array( [10.0, 20.0, 30.0, 40.0, 50.0]), dense_shape=(2, 20)) } instance_dicts = impl_helper.to_instance_dicts(schema, fetches) self.assertEqual(2, len(instance_dicts)) self.assertSetEqual(set(six.iterkeys(instance_dicts[0])), set(['a', 'b', 'c', 'd', 'e', 'f'])) self.assertAllEqual(instance_dicts[0]['a'], 100) self.assertAllEqual(instance_dicts[0]['b'], 10.0) self.assertAllEqual(instance_dicts[0]['c'], [40.0]) self.assertAllEqual(instance_dicts[0]['d'], [[1.0, 2.0], [3.0, 4.0]]) self.assertAllEqual(instance_dicts[0]['e'], ['doe', 'a', 'deer']) self.assertEqual(len(instance_dicts[0]['f']), 2) self.assertAllEqual(instance_dicts[0]['f'][0], [2, 4, 8]) self.assertAllEqual(instance_dicts[0]['f'][1], [10.0, 20.0, 30.0]) self.assertAllEqual(instance_dicts[1]['a'], 200) self.assertAllEqual(instance_dicts[1]['b'], 20.0) self.assertAllEqual(instance_dicts[1]['c'], [80.0]) self.assertAllEqual(instance_dicts[1]['d'], [[5.0, 6.0], [7.0, 8.0]]) self.assertAllEqual(instance_dicts[1]['e'], ['a', 'female', 'deer']) self.assertEqual(len(instance_dicts[1]['f']), 2) self.assertAllEqual(instance_dicts[1]['f'][0], [4, 8]) self.assertAllEqual(instance_dicts[1]['f'][1], [40.0, 50.0])
def test_to_instance_dicts(self, feature_spec, instances, record_batch, feed_dict, feed_eager_tensors): del record_batch if feed_eager_tensors: test_case.skip_if_not_tf2('Tensorflow 2.x required') schema = schema_utils.schema_from_feature_spec(feature_spec) feed_dict_local = (_eager_tensor_from_values(feed_dict) if feed_eager_tensors else copy.copy(feed_dict)) result = impl_helper.to_instance_dicts(schema, feed_dict_local) np.testing.assert_equal(instances, result)
def test_to_instance_dicts(self, feature_spec, instances, feed_dict, feed_eager_tensors): if feed_eager_tensors: test_case.skip_if_not_tf2('Tensorflow 2.x required') schema = schema_utils.schema_from_feature_spec(feature_spec) feed_dict_local = copy.copy(feed_dict) if feed_eager_tensors: for key, value in six.iteritems(feed_dict_local): if isinstance(value, tf.compat.v1.SparseTensorValue): feed_dict_local[key] = tf.sparse.SparseTensor.from_value( value) else: feed_dict_local[key] = tf.constant(value) np.testing.assert_equal( instances, impl_helper.to_instance_dicts(schema, feed_dict_local))
def test_to_instance_dicts(self, feature_spec, instances, feed_dict): schema = dataset_schema.from_feature_spec(feature_spec) np.testing.assert_equal( instances, impl_helper.to_instance_dicts(schema, feed_dict))
def convert_and_unbatch(batch_dict): return impl_helper.to_instance_dicts( impl_helper.make_output_dict(output_metadata.schema, batch_dict))