def test_default_value(self):

        # Encode example with the previous version
        file_adapter = file_format_adapter.TFRecordExampleAdapter({
            "image/encoded":
            features.TensorInfo(shape=(), dtype=tf.string),
        })
        serialized_example = file_adapter._serializer.serialize_example({
            "image/encoded":
            "hello world",
        })

        # Decode example with the new version
        file_adapter = file_format_adapter.TFRecordExampleAdapter({
            "image/encoded":
            features.TensorInfo(shape=(),
                                dtype=tf.string,
                                default_value=b"some string"),
            "image/height":
            features.TensorInfo(shape=(), dtype=tf.int64, default_value=-1),
            "image/width":
            features.TensorInfo(shape=(), dtype=tf.int64, default_value=-1),
        })

        # New fields should have the default values
        ds = tf.data.Dataset.from_tensors(serialized_example)
        ds = ds.map(file_adapter._parser.parse_example)
        example = next(iter(dataset_utils.as_numpy(ds)))
        self.assertEqual(
            example, {
                "image/encoded": b"hello world",
                "image/height": -1,
                "image/width": -1,
            })
Esempio n. 2
0
def features_encode_decode(features_dict, sample, as_tensor=False):
    """Runs the full pipeline: encode > write > tmp files > read > decode."""
    # Encode sample
    encoded_sample = features_dict.encode_sample(sample)

    with tmp_dir() as tmp_dir_:
        tmp_filename = os.path.join(tmp_dir_, "tmp.tfrecord")

        # Read/write the file
        file_adapter = file_format_adapter.TFRecordExampleAdapter(
            features_dict.get_serialized_features())
        file_adapter.write_from_generator(
            generator_fn=lambda: [encoded_sample],
            output_files=[tmp_filename],
        )
        dataset = file_adapter.dataset_from_filename(tmp_filename)

        # Decode the sample
        dataset = dataset.map(features_dict.decode_sample)

        if not as_tensor:  # Evaluate to numpy array
            for el in dataset_utils.iterate_over_dataset(dataset):
                return el
        else:
            if tf.executing_eagerly():
                return next(iter(dataset))
            else:
                return dataset.make_one_shot_iterator().get_next()
Esempio n. 3
0
def features_encode_decode(features_dict, example, decoders):
    """Runs the full pipeline: encode > write > tmp files > read > decode."""
    # Encode example
    encoded_example = features_dict.encode_example(example)

    with tmp_dir() as tmp_dir_:
        tmp_filename = os.path.join(tmp_dir_, "tmp.tfrecord")

        # Read/write the file
        file_adapter = file_format_adapter.TFRecordExampleAdapter(
            features_dict.get_serialized_info())
        file_adapter.write_from_generator(
            generator=[encoded_example],
            output_files=[tmp_filename],
        )
        ds = file_adapter.dataset_from_filename(tmp_filename)

        # Decode the example
        decode_fn = functools.partial(
            features_dict.decode_example,
            decoders=decoders,
        )
        ds = ds.map(decode_fn)

        if tf.executing_eagerly():
            out_tensor = next(iter(ds))
        else:
            out_tensor = tf.compat.v1.data.make_one_shot_iterator(
                ds).get_next()
        out_numpy = dataset_utils.as_numpy(out_tensor)
        return out_tensor, out_numpy
    def assertWrongSpecs(self, specs, raise_cls, raise_msg):

        adapter = file_format_adapter.TFRecordExampleAdapter(
            {"wrong_field": specs})
        # Raise error if an unsupported dtype is given
        with self.assertRaisesWithPredicateMatch(raise_cls, raise_msg):
            adapter._parser._build_feature_specs()
Esempio n. 5
0
def features_encode_decode(features_dict, example, as_tensor=False):
    """Runs the full pipeline: encode > write > tmp files > read > decode."""
    # Encode example
    encoded_example = features_dict.encode_example(example)

    with tmp_dir() as tmp_dir_:
        tmp_filename = os.path.join(tmp_dir_, "tmp.tfrecord")

        # Read/write the file
        file_adapter = file_format_adapter.TFRecordExampleAdapter(
            features_dict.get_serialized_info())
        file_adapter.write_from_generator(
            generator=[encoded_example],
            output_files=[tmp_filename],
        )
        ds = file_adapter.dataset_from_filename(tmp_filename)

        # Decode the example
        ds = ds.map(features_dict.decode_example)

        if not as_tensor:  # Evaluate to numpy array
            for el in dataset_utils.as_numpy(ds):
                return el
        else:
            if tf.executing_eagerly():
                return next(iter(ds))
            else:
                return tf.compat.v1.data.make_one_shot_iterator(ds).get_next()
Esempio n. 6
0
def _encode_decode(specs, sample):
    """Runs the full pipeline: encode > write > tmp files > read > decode."""
    # Encode sample
    encoded_sample = specs.encode_sample(sample)

    # Build a unique filename to store the tfrecord
    global _encode_count
    _encode_count += 1
    tmp_filename = os.path.join(tempfile.mkdtemp(), 'tmp.tfrecord')

    # Read/write the file
    file_adapter = file_format_adapter.TFRecordExampleAdapter(
        specs.get_specs())
    file_adapter.write_from_generator(
        generator_fn=lambda: [encoded_sample],
        output_files=[tmp_filename],
    )
    dataset = file_adapter.dataset_from_filename(tmp_filename)

    # Decode the sample
    dataset = dataset.map(specs.decode_sample)

    # Return the first sample
    if tf.executing_eagerly():
        return next(iter(dataset))
    else:
        with tf.Graph().as_default():
            item = dataset.make_one_shot_iterator().get_next()
            with tf.Session(config=tf.ConfigProto(
                    device_count={'GPU': 0})) as sess:
                return sess.run(item)
Esempio n. 7
0
 def _file_format_adapter(self):
     example_spec = {
         "input/encoded": tf.FixedLenFeature(tuple(), tf.string),
         "fine_label": tf.FixedLenFeature(tuple(), tf.int64),
         "coarse_label": tf.FixedLenFeature(tuple(), tf.int64),
     }
     return file_format_adapter.TFRecordExampleAdapter(example_spec)
 def _file_format_adapter(self):
     example_spec = {
         "x": tf.FixedLenFeature(tuple(), tf.int64),
         "y": tf.FixedLenFeature(tuple(), tf.int64),
         "z": tf.FixedLenFeature(tuple(), tf.string),
     }
     return file_format_adapter.TFRecordExampleAdapter(example_spec)
    def assertFeature(self, specs, serialized_info, tests):
        """Test the TFRecordExampleAdapter encoding."""

        adapter = file_format_adapter.TFRecordExampleAdapter(specs)

        with self._subTest("serialized_info"):
            self.assertEqual(serialized_info,
                             adapter._parser._build_feature_specs())

        for i, test in enumerate(tests):
            with self._subTest(str(i)):

                if test.raise_cls is not None:
                    with self.assertRaisesWithPredicateMatch(
                            test.raise_cls, test.raise_msg):
                        adapter._serializer.serialize_example(test.value)
                    continue
                serialized = adapter._serializer.serialize_example(test.value)

                if test.expected_serialized is not None:
                    example_proto = tf.train.Example()
                    example_proto.ParseFromString(serialized)
                    expected_proto = tf.train.Example(
                        features=tf.train.Features(
                            feature=test.expected_serialized))
                    self.assertEqual(expected_proto, example_proto)

                example = _parse_example(serialized,
                                         adapter._parser.parse_example)

                with self._subTest("dtype"):
                    out_dtypes = utils.map_nested(lambda s: s.dtype, example)
                    expected_dtypes = utils.map_nested(lambda s: s.dtype,
                                                       specs)
                    self.assertEqual(out_dtypes, expected_dtypes)
                with self._subTest("shape"):
                    # For shape, because (None, 3) match with (5, 3), we use
                    # tf.TensorShape.assert_is_compatible_with on each of the elements
                    utils.map_nested(
                        lambda x: x[0].shape.assert_is_compatible_with(x[1].
                                                                       shape),
                        utils.zip_nested(example, specs))
                np_example = dataset_utils.as_numpy(example)
                self.assertAllEqualNested(np_example, test.expected)
Esempio n. 10
0
def features_encode_decode(specs_dict, sample):
    """Runs the full pipeline: encode > write > tmp files > read > decode."""
    # Encode sample
    encoded_sample = specs_dict.encode_sample(sample)

    with tmp_dir() as tmp_dir_:
        tmp_filename = os.path.join(tmp_dir_, 'tmp.tfrecord')

        # Read/write the file
        file_adapter = file_format_adapter.TFRecordExampleAdapter(
            specs_dict.get_specs())
        file_adapter.write_from_generator(
            generator_fn=lambda: [encoded_sample],
            output_files=[tmp_filename],
        )
        dataset = file_adapter.dataset_from_filename(tmp_filename)

        # Decode the sample
        dataset = dataset.map(specs_dict.decode_sample)

        for el in dataset_utils.iterate_over_dataset(dataset):
            return el