def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) with self.test_session(): encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() example = example_pb2.Example(features=feature_pb2.Features(feature={ 'image/encoded': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), 'image/format': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])), 'image/object/bbox/xmin': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[0.0])), 'image/object/bbox/xmax': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[1.0])), 'image/object/bbox/ymin': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[0.0])), 'image/object/bbox/ymax': feature_pb2.Feature( float_list=feature_pb2.FloatList(value=[1.0])), 'image/object/class/label': feature_pb2.Feature( int64_list=feature_pb2.Int64List(value=[2])), })) writer.write(example.SerializeToString()) writer.close() return path
def create_tf_record(self): print('\ncreate_tf_record') tmp_dir = os.path.join(os.environ['HOME'], 'tmp') if not os.path.isdir(tmp_dir): os.makedirs(tmp_dir) path = os.path.join(tmp_dir, 'tfrecord') writer = tf.python_io.TFRecordWriter(path=path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg( tf.constant(image_tensor)).eval() example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/encoded': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[encoded_jpeg])), 'image/format': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=['jpeg'.encode('utf-8')])), 'image/height': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[4])), 'image/width': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[5])), 'image/object/bbox/xmin': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[0.0])), 'image/object/bbox/xmax': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[1.0])), 'image/object/bbox/ymin': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[0.0])), 'image/object/bbox/ymax': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[1.0])), 'image/object/class/label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[2])), 'image/object/mask': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=flat_mask)), })) writer.write(example.SerializeToString()) writer.close() return path
def create_tf_record(self, has_additional_channels=False): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) additional_channels_tensor = np.random.randint( 255, size=(4, 5, 1)).astype(np.uint8) flat_mask = (4 * 5) * [1.0] with self.test_session(): encoded_jpeg = tf.image.encode_jpeg( tf.constant(image_tensor)).eval() encoded_additional_channels_jpeg = tf.image.encode_jpeg( tf.constant(additional_channels_tensor)).eval() features = { 'image/encoded': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[encoded_jpeg])), 'image/format': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=['jpeg'.encode('utf-8')])), 'image/height': feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[4])), 'image/width': feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[5])), 'image/object/bbox/xmin': feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])), 'image/object/bbox/xmax': feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])), 'image/object/bbox/ymin': feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[0.0])), 'image/object/bbox/ymax': feature_pb2.Feature(float_list=feature_pb2.FloatList(value=[1.0])), 'image/object/class/label': feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=[2])), 'image/object/mask': feature_pb2.Feature(float_list=feature_pb2.FloatList( value=flat_mask)), } if has_additional_channels: features[ 'image/additional_channels/encoded'] = feature_pb2.Feature( bytes_list=feature_pb2.BytesList( value=[encoded_additional_channels_jpeg] * 2)) example = example_pb2.Example(features=feature_pb2.Features( feature=features)) writer.write(example.SerializeToString()) writer.close() return path
def to_tf_example(ingradients): return example_pb2.Example(features=feature_pb2.Features( feature={ 'ingredients': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[bytes(x, 'utf-8') for x in ingradients])) }))
def _record(self, f, r, l): example = example_pb2.Example(features=feature_pb2.Features( feature={ "file": feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[f])), "record": feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[r])), "keywords": feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=self._get_keywords(f, r))), "label": feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[compat.as_bytes(l)])) })) return example.SerializeToString()
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'] _, examples_file = tempfile.mkstemp() writer = python_io.TFRecordWriter(examples_file) for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) writer.write(example.SerializeToString()) writer.close() col = seq_fc.sequence_categorical_column_with_hash_bucket( 'tokens', hash_bucket_size=10) embed = fc.embedding_column(col, dimension=2) feature_columns = [embed] feature_spec = parsing_utils.classifier_parse_example_spec( feature_columns, label_key='label', label_dtype=dtypes.int64) def _train_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec) return dataset.map(lambda features: (features, features.pop('label'))) def _eval_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) return dataset.map(lambda features: (features, features.pop('label'))) def _predict_input_fn(): dataset = readers.make_batched_features_dataset( examples_file, batch_size, feature_spec, num_epochs=1) def features_fn(features): features.pop('label') return features return dataset.map(features_fn) self._test_complete_flow( feature_columns=feature_columns, train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def create_tf_record(self): path = os.path.join(self.get_temp_dir(), 'tfrecord') writer = tf.python_io.TFRecordWriter(path) image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8) with self.test_session(): encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval() example = example_pb2.Example(features=feature_pb2.Features(feature={ 'image/encoded': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=[encoded_jpeg])), 'image/format': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=['jpeg'.encode('utf-8')])), 'image/transcript': feature_pb2.Feature( bytes_list=feature_pb2.BytesList(value=[ 'hello'.encode('utf-8')])) })) writer.write(example.SerializeToString()) writer.close() return path
def _create_feature(feature): feature_list = feature if isinstance(feature, list) else [feature] # Each feature can be exactly one kind: # https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L76 feature_type = type(feature_list[0]) if feature_type == int: return feature_pb2.Feature(int64_list=feature_pb2.Int64List(value=feature_list)) elif feature_type == str: return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=feature_list)) elif feature_type == unicode: return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=map(lambda x: str(x), feature_list))) elif feature_type == float: return feature_pb2.Feature(float_list=feature_pb2.FloatList(value=feature_list)) else: message = """Unsupported request data format: {}, {}. Valid formats: float, int, str any object that implements __iter__ or classification_pb2.ClassificationRequest""" raise ValueError(message.format(feature, type(feature)))
def test_example_proto(): image = 'aa' example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/encoded': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[image])) })) print('aaa') print(example) example.SerializeToString()
def testParseExampleInputFn(self): """Tests complete flow with input_fn constructed from parse_example.""" n_classes = 3 batch_size = 10 words = [ b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept' ] serialized_examples = [] for _ in range(batch_size): sequence_length = random.randint(1, len(words)) sentence = random.sample(words, sequence_length) label = random.randint(0, n_classes - 1) example = example_pb2.Example(features=feature_pb2.Features( feature={ 'tokens': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=sentence)), 'label': feature_pb2.Feature(int64_list=feature_pb2.Int64List( value=[label])), })) serialized_examples.append(example.SerializeToString()) feature_spec = { 'tokens': parsing_ops.VarLenFeature(dtypes.string), 'label': parsing_ops.FixedLenFeature([1], dtypes.int64), } def _train_input_fn(): features = parsing_ops.parse_example(serialized_examples, feature_spec) labels = features.pop('label') return features, labels def _eval_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) labels = features.pop('label') return features, labels def _predict_input_fn(): features = parsing_ops.parse_example( input_lib.limit_epochs(serialized_examples, num_epochs=1), feature_spec) features.pop('label') return features, None self._test_complete_flow(train_input_fn=_train_input_fn, eval_input_fn=_eval_input_fn, predict_input_fn=_predict_input_fn, n_classes=n_classes, batch_size=batch_size)
def testDecodeExampleWithRepeatedImages(self): image_shape = (2, 3, 3) image_format = 'png' image, _ = self.GenerateImage(image_format=image_format, image_shape=image_shape) tf_encoded = self._Encoder(image, image_format) with self.test_session(): tf_string = tf_encoded.eval() example = example_pb2.Example(features=feature_pb2.Features( feature={ 'image/encoded': feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[tf_string, tf_string])), 'image/format': self._StringFeature(image_format), })) serialized_example = example.SerializeToString() with self.test_session(): serialized_example = array_ops.reshape(serialized_example, shape=[]) decoder = tfexample_decoder.TFExampleDecoder( keys_to_features={ 'image/encoded': parsing_ops.FixedLenFeature((2, ), dtypes.string), 'image/format': parsing_ops.FixedLenFeature((), dtypes.string, default_value=image_format), }, items_to_handlers={ 'image': tfexample_decoder.Image(repeated=True) }) [tf_image] = decoder.decode(serialized_example, ['image']) output_image = tf_image.eval() self.assertEqual(output_image.shape, (2, 2, 3, 3)) self.assertAllEqual(np.squeeze(output_image[0, :, :, :]), image) self.assertAllEqual(np.squeeze(output_image[1, :, :, :]), image)
def _string_feature(value): value = value.encode('utf-8') return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=[value]))
def string_to_bytes(value): return feature_pb2.BytesList(value=[value])
def _BytesFeatureFromList(self, ndarray): values = ndarray.flatten().tolist() for i in range(len(values)): values[i] = values[i].encode('utf-8') return feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=values))
def BytesList(value): return feature_pb2.BytesList(value=[value])
from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging # Helpers for creating Example objects example = example_pb2.Example feature = feature_pb2.Feature features = lambda d: feature_pb2.Features(feature=d) bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v)) int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v)) float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v)) # Helpers for creating SequenceExample objects feature_list = lambda l: feature_pb2.FeatureList(feature=l) feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d) sequence_example = example_pb2.SequenceExample def _compare_output_to_expected(tester, dict_tensors, expected_tensors, flat_output): tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys())) i = 0 # Index into the flattened output of session.run() for k, v in sorted(dict_tensors.items()): # TODO(shivaniagrawal): flat_output is same as v.
def _bytes_feature(*values): return feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[v.encode("utf-8") for v in values]))
def _bytes_feature(list_of_strings): """Returns a bytes_list from a list of string / byte.""" return feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=list_of_strings))
def _BytesFeatureFromList(self, ndarray): values = ndarray.flatten().tolist() return feature_pb2.Feature(bytes_list=feature_pb2.BytesList(value=values))
def testDecodeJpegImageAndBoundingBox(self): """Test if the decoder can correctly decode the image and bounding box. A set of random images (represented as an image tensor) is first decoded as the groundtrue image. Meanwhile, the image tensor will be encoded and pass through the sequence example, and then decoded as images. The groundtruth image and the decoded image are expected to be equal. Similar tests are also applied to labels such as bounding box. """ image_tensor = np.random.randint(256, size=(256, 256, 3)).astype(np.uint8) encoded_jpeg = self._EncodeImage(image_tensor) decoded_jpeg = self._DecodeImage(encoded_jpeg) sequence_example = example_pb2.SequenceExample( feature_lists=feature_pb2.FeatureLists( feature_list={ 'image/encoded': feature_pb2.FeatureList(feature=[ feature_pb2.Feature(bytes_list=feature_pb2.BytesList( value=[encoded_jpeg])), ]), 'bbox/xmin': feature_pb2.FeatureList(feature=[ feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[0.0])), ]), 'bbox/xmax': feature_pb2.FeatureList(feature=[ feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[1.0])) ]), 'bbox/ymin': feature_pb2.FeatureList(feature=[ feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[0.0])), ]), 'bbox/ymax': feature_pb2.FeatureList(feature=[ feature_pb2.Feature(float_list=feature_pb2.FloatList( value=[1.0])) ]), })).SerializeToString() example_decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder( ) tensor_dict = example_decoder.decode( tf.convert_to_tensor(sequence_example)) # Test tensor dict image dimension. self.assertAllEqual( (tensor_dict[fields.InputDataFields.image].get_shape().as_list()), [None, None, None, 3]) with self.test_session() as sess: tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image]) tensor_dict[fields.InputDataFields.groundtruth_boxes] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_boxes]) tensor_dict = sess.run(tensor_dict) # Test decoded image. self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image]) # Test decoded bounding box. self.assertAllEqual( [0.0, 0.0, 1.0, 1.0], tensor_dict[fields.InputDataFields.groundtruth_boxes])