예제 #1
0
  def testInt32WeightedSparseInt64ColumnDtypes(self):
    ids = fc.sparse_column_with_keys("ids", [42, 1, -1000], dtype=dtypes.int64)
    weighted_ids = fc.weighted_sparse_column(ids, "weights", dtype=dtypes.int32)
    self.assertDictEqual({
        "ids": parsing_ops.VarLenFeature(dtypes.int64),
        "weights": parsing_ops.VarLenFeature(dtypes.int32)
    }, weighted_ids.config)

    with self.assertRaisesRegexp(ValueError,
                                 "dtype is not convertible to float"):
      weighted_ids = fc.weighted_sparse_column(
          ids, "weights", dtype=dtypes.string)
  def testDecodeExampleWithBranchedBackupHandler(self):
    example1 = example_pb2.Example(
        features=feature_pb2.Features(
            feature={
                'image/object/class/text':
                    self._BytesFeatureFromList(
                        np.array(['cat', 'dog', 'guinea pig'])),
                'image/object/class/label':
                    self._Int64FeatureFromList(np.array([42, 10, 900]))
            }))
    example2 = example_pb2.Example(
        features=feature_pb2.Features(
            feature={
                'image/object/class/text':
                    self._BytesFeatureFromList(
                        np.array(['cat', 'dog', 'guinea pig'])),
            }))
    example3 = example_pb2.Example(
        features=feature_pb2.Features(
            feature={
                'image/object/class/label':
                    self._Int64FeatureFromList(np.array([42, 10, 901]))
            }))
    # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
    table = lookup_ops.index_table_from_tensor(
        constant_op.constant(['dog', 'guinea pig', 'cat']))
    keys_to_features = {
        'image/object/class/text': parsing_ops.VarLenFeature(dtypes.string),
        'image/object/class/label': parsing_ops.VarLenFeature(dtypes.int64),
    }
    backup_handler = tf_example_decoder.BackupHandler(
        handler=slim_example_decoder.Tensor('image/object/class/label'),
        backup=tf_example_decoder.LookupTensor('image/object/class/text',
                                               table))
    items_to_handlers = {
        'labels': backup_handler,
    }
    decoder = slim_example_decoder.TFExampleDecoder(keys_to_features,
                                                    items_to_handlers)
    obtained_class_ids_each_example = []
    with self.test_session() as sess:
      sess.run(lookup_ops.tables_initializer())
      for example in [example1, example2, example3]:
        serialized_example = array_ops.reshape(
            example.SerializeToString(), shape=[])
        obtained_class_ids_each_example.append(
            decoder.decode(serialized_example)[0].eval())

    self.assertAllClose([42, 10, 900], obtained_class_ids_each_example[0])
    self.assertAllClose([2, 0, 1], obtained_class_ids_each_example[1])
    self.assertAllClose([42, 10, 901], obtained_class_ids_each_example[2])
예제 #3
0
    def test_build_parsing_serving_input_receiver_fn(self):
        feature_spec = {
            'int_feature': parsing_ops.VarLenFeature(dtypes.int64),
            'float_feature': parsing_ops.VarLenFeature(dtypes.float32)
        }
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)
        with ops.Graph().as_default():
            serving_input_receiver = serving_input_receiver_fn()
            self.assertEqual(set(['int_feature', 'float_feature']),
                             set(serving_input_receiver.features.keys()))
            self.assertEqual(
                set(['examples']),
                set(serving_input_receiver.receiver_tensors.keys()))

            example = example_pb2.Example()
            text_format.Parse(
                "features: { "
                "  feature: { "
                "    key: 'int_feature' "
                "    value: { "
                "      int64_list: { "
                "        value: [ 21, 2, 5 ] "
                "      } "
                "    } "
                "  } "
                "  feature: { "
                "    key: 'float_feature' "
                "    value: { "
                "      float_list: { "
                "        value: [ 525.25 ] "
                "      } "
                "    } "
                "  } "
                "} ", example)

            with self.test_session() as sess:
                sparse_result = sess.run(
                    serving_input_receiver.features,
                    feed_dict={
                        serving_input_receiver.receiver_tensors['examples'].name:
                        [example.SerializeToString()]
                    })
                self.assertAllEqual([[0, 0], [0, 1], [0, 2]],
                                    sparse_result['int_feature'].indices)
                self.assertAllEqual([21, 2, 5],
                                    sparse_result['int_feature'].values)
                self.assertAllEqual([[0, 0]],
                                    sparse_result['float_feature'].indices)
                self.assertAllEqual([525.25],
                                    sparse_result['float_feature'].values)
예제 #4
0
def _ReadExamples(filename_queue, shape, using_ctc, reader=None):
    """Builds network input tensor ops for TF Example.

  Args:
    filename_queue: Queue of filenames, from tf.train_bkp.string_input_producer
    shape:          ImageShape with the desired shape of the input.
    using_ctc:      Take the unpadded_class labels instead of padded.
    reader:         Function that returns an actual reader to read Examples from
      input files. If None, uses tf.TFRecordReader().
  Returns:
    image:   Float Tensor containing the input image scaled to [-1.28, 1.27].
    height:  Tensor int64 containing the height of the image.
    width:   Tensor int64 containing the width of the image.
    labels:  Serialized SparseTensor containing the int64 labels.
    text:    Tensor string of the utf8 truth text.
  """
    if reader:
        reader = reader()
    else:
        reader = tf.TFRecordReader()
    _, example_serialized = reader.read(filename_queue)
    example_serialized = tf.reshape(example_serialized, shape=[])
    features = tf.parse_single_example(
        example_serialized, {
            'image/encoded':
            parsing_ops.FixedLenFeature([1], dtype=tf.string,
                                        default_value=''),
            'image/text':
            parsing_ops.FixedLenFeature([1], dtype=tf.string,
                                        default_value=''),
            'image/class':
            parsing_ops.VarLenFeature(dtype=tf.int64),
            'image/unpadded_class':
            parsing_ops.VarLenFeature(dtype=tf.int64),
            'image/height':
            parsing_ops.FixedLenFeature([1], dtype=tf.int64, default_value=1),
            'image/width':
            parsing_ops.FixedLenFeature([1], dtype=tf.int64, default_value=1)
        })
    if using_ctc:
        labels = features['image/unpadded_class']
    else:
        labels = features['image/class']
    labels = tf.serialize_sparse(labels)
    image = tf.reshape(features['image/encoded'], shape=[], name='encoded')
    image = _ImageProcessing(image, shape)
    height = tf.reshape(features['image/height'], [-1])
    width = tf.reshape(features['image/width'], [-1])
    text = tf.reshape(features['image/text'], shape=[])

    return image, height, width, labels, text
  def test_to_feature_columns_and_input_fn(self):
    df = setup_test_df_3layer()
    feature_columns, input_fn = (
        estimator_utils.to_feature_columns_and_input_fn(
            df,
            base_input_keys_with_defaults={"a": 1,
                                           "b": 2,
                                           "c": 3,
                                           "d": 4},
            label_keys=["g"],
            feature_keys=["a", "b", "f"]))

    expected_feature_column_a = feature_column.DataFrameColumn(
        "a",
        learn.PredefinedSeries(
            "a",
            parsing_ops.FixedLenFeature(tensor_shape.unknown_shape(),
                                        dtypes.int32, 1)))
    expected_feature_column_b = feature_column.DataFrameColumn(
        "b",
        learn.PredefinedSeries("b", parsing_ops.VarLenFeature(dtypes.int32)))
    expected_feature_column_f = feature_column.DataFrameColumn(
        "f",
        learn.TransformedSeries([
            learn.PredefinedSeries("c",
                                   parsing_ops.FixedLenFeature(
                                       tensor_shape.unknown_shape(),
                                       dtypes.int32, 3)),
            learn.PredefinedSeries("d", parsing_ops.VarLenFeature(dtypes.int32))
        ], mocks.Mock2x2Transform("iue", "eui", "snt"), "out2"))

    expected_feature_columns = [
        expected_feature_column_a, expected_feature_column_b,
        expected_feature_column_f
    ]
    self.assertEqual(sorted(expected_feature_columns), sorted(feature_columns))

    base_features, labels = input_fn()
    expected_base_features = {
        "a": mocks.MockTensor("Tensor a", dtypes.int32),
        "b": mocks.MockSparseTensor("SparseTensor b", dtypes.int32),
        "c": mocks.MockTensor("Tensor c", dtypes.int32),
        "d": mocks.MockSparseTensor("SparseTensor d", dtypes.int32)
    }
    self.assertEqual(expected_base_features, base_features)

    expected_labels = mocks.MockTensor("Out iue", dtypes.int32)
    self.assertEqual(expected_labels, labels)

    self.assertEqual(3, len(feature_columns))
예제 #6
0
    def test_export_savedmodel_proto_roundtrip(self):
        tmpdir = tempfile.mkdtemp()
        est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
        est.train(input_fn=dummy_input_fn, steps=1)
        feature_spec = {
            'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
            'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)
        }
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)

        # Perform the export.
        export_dir_base = os.path.join(compat.as_bytes(tmpdir),
                                       compat.as_bytes('export'))
        export_dir = est.export_savedmodel(export_dir_base,
                                           serving_input_receiver_fn)

        # Check that all the files are in the right places.
        self.assertTrue(gfile.Exists(export_dir_base))
        self.assertTrue(gfile.Exists(export_dir))
        self.assertTrue(
            gfile.Exists(
                os.path.join(compat.as_bytes(export_dir),
                             compat.as_bytes('saved_model.pb'))))
        self.assertTrue(
            gfile.Exists(
                os.path.join(compat.as_bytes(export_dir),
                             compat.as_bytes('variables'))))
        self.assertTrue(
            gfile.Exists(
                os.path.join(compat.as_bytes(export_dir),
                             compat.as_bytes('variables/variables.index'))))
        self.assertTrue(
            gfile.Exists(
                os.path.join(
                    compat.as_bytes(export_dir),
                    compat.as_bytes(
                        'variables/variables.data-00000-of-00001'))))

        # Restore, to validate that the export was well-formed.
        with ops.Graph().as_default() as graph:
            with session.Session(graph=graph) as sess:
                loader.load(sess, [tag_constants.SERVING], export_dir)
                graph_ops = [x.name for x in graph.get_operations()]
                self.assertTrue('input_example_tensor' in graph_ops)
                self.assertTrue('ParseExample/ParseExample' in graph_ops)
                self.assertTrue('weight' in graph_ops)

        # Clean up.
        gfile.DeleteRecursively(tmpdir)
예제 #7
0
  def testCreateSequenceFeatureSpec(self):
    sparse_col = fc.sparse_column_with_hash_bucket(
        "sparse_column", hash_bucket_size=100)
    embedding_col = fc.embedding_column(
        fc.sparse_column_with_hash_bucket(
            "sparse_column_for_embedding", hash_bucket_size=10),
        dimension=4)
    sparse_id_col = fc.sparse_column_with_keys("id_column",
                                               ["marlo", "omar", "stringer"])
    weighted_id_col = fc.weighted_sparse_column(sparse_id_col,
                                                "id_weights_column")
    real_valued_col1 = fc.real_valued_column("real_valued_column", dimension=2)
    real_valued_col2 = fc.real_valued_column(
        "real_valued_default_column", dimension=5, default_value=3.0)
    real_valued_col3 = fc._real_valued_var_len_column(
        "real_valued_var_len_column", default_value=3.0, is_sparse=True)
    real_valued_col4 = fc._real_valued_var_len_column(
        "real_valued_var_len_dense_column", default_value=4.0, is_sparse=False)

    feature_columns = set([
        sparse_col, embedding_col, weighted_id_col, real_valued_col1,
        real_valued_col2, real_valued_col3, real_valued_col4
    ])

    feature_spec = fc._create_sequence_feature_spec_for_parsing(feature_columns)

    expected_feature_spec = {
        "sparse_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "sparse_column_for_embedding":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_column":
            parsing_ops.VarLenFeature(dtypes.string),
        "id_weights_column":
            parsing_ops.VarLenFeature(dtypes.float32),
        "real_valued_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[2], dtype=dtypes.float32, allow_missing=False),
        "real_valued_default_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[5], dtype=dtypes.float32, allow_missing=True),
        "real_valued_var_len_column":
            parsing_ops.VarLenFeature(dtype=dtypes.float32),
        "real_valued_var_len_dense_column":
            parsing_ops.FixedLenSequenceFeature(
                shape=[], dtype=dtypes.float32, allow_missing=True,
                default_value=4.0),
    }

    self.assertDictEqual(expected_feature_spec, feature_spec)
    def testSparseColumnDtypes(self):
        sc = fc.sparse_column_with_integerized_feature("sc", 10)
        self.assertDictEqual(
            {"sc": parsing_ops.VarLenFeature(dtype=dtypes.int64)}, sc.config)

        sc = fc.sparse_column_with_integerized_feature("sc",
                                                       10,
                                                       dtype=dtypes.int32)
        self.assertDictEqual(
            {"sc": parsing_ops.VarLenFeature(dtype=dtypes.int32)}, sc.config)

        with self.assertRaisesRegexp(ValueError, "dtype must be an integer"):
            fc.sparse_column_with_integerized_feature("sc",
                                                      10,
                                                      dtype=dtypes.float32)
예제 #9
0
    def test_scaffold_is_used_for_local_init(self):
        tmpdir = tempfile.mkdtemp()

        def _model_fn_scaffold(features, labels, mode):
            _, _ = features, labels
            my_int = variables.Variable(
                1, name='my_int', collections=[ops.GraphKeys.LOCAL_VARIABLES])
            scores = constant_op.constant([3.])
            with ops.control_dependencies([
                    variables.local_variables_initializer(),
                    data_flow_ops.tables_initializer()
            ]):
                assign_op = state_ops.assign(my_int, 12345)

            # local_initSop must be an Operation, not a Tensor.
            custom_local_init_op = control_flow_ops.group(assign_op)
            return model_fn_lib.EstimatorSpec(
                mode=mode,
                predictions=constant_op.constant([[1.]]),
                loss=constant_op.constant(0.),
                train_op=constant_op.constant(0.),
                scaffold=training.Scaffold(local_init_op=custom_local_init_op),
                export_outputs={
                    'test': export_output.ClassificationOutput(scores)
                })

        est = estimator.Estimator(model_fn=_model_fn_scaffold)
        est.train(dummy_input_fn, steps=1)
        feature_spec = {
            'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
            'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)
        }
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)

        # Perform the export.
        export_dir_base = os.path.join(compat.as_bytes(tmpdir),
                                       compat.as_bytes('export'))
        export_dir = est.export_savedmodel(export_dir_base,
                                           serving_input_receiver_fn)

        # Restore, to validate that the custom local_init_op runs.
        with ops.Graph().as_default() as graph:
            with session.Session(graph=graph) as sess:
                loader.load(sess, [tag_constants.SERVING], export_dir)
                my_int = graph.get_tensor_by_name('my_int:0')
                my_int_value = sess.run(my_int)
                self.assertEqual(12345, my_int_value)
    def testSerializedContainingSparse(self):
        original = [
            example(features=features({"st_c": float_feature([3, 4])})),
            example(features=features({
                "st_c": float_feature([]),  # empty float list
            })),
            example(features=features({
                "st_d": feature(),  # feature with nothing in it
            })),
            example(features=features({
                "st_c": float_feature([1, 2, -1]),
                "st_d": bytes_feature([b"hi"])
            }))
        ]

        expected_outputs = [{
            "st_c": (np.array([[0], [1]], dtype=np.int64),
                     np.array([3.0, 4.0],
                              dtype=np.float32), np.array([2],
                                                          dtype=np.int64)),
            "st_d":
            empty_sparse(bytes)
        }, {
            "st_c": empty_sparse(np.float32),
            "st_d": empty_sparse(bytes)
        }, {
            "st_c": empty_sparse(np.float32),
            "st_d": empty_sparse(bytes)
        }, {
            "st_c": (np.array([[0], [1], [2]], dtype=np.int64),
                     np.array([1.0, 2.0, -1.0],
                              dtype=np.float32), np.array([3],
                                                          dtype=np.int64)),
            "st_d": (np.array([[0]],
                              dtype=np.int64), np.array(["hi"], dtype=bytes),
                     np.array([1], dtype=np.int64))
        }]

        for proto, expected_output in zip(original, expected_outputs):
            self._test(
                {
                    "serialized": ops.convert_to_tensor(
                        proto.SerializeToString()),
                    "features": {
                        "st_c": parsing_ops.VarLenFeature(dtypes.float32),
                        "st_d": parsing_ops.VarLenFeature(dtypes.string)
                    },
                }, expected_output)
    def testEmptySerializedWithAllDefaults(self):
        sparse_name = "st_a"
        a_name = "a"
        b_name = "b"
        c_name = "c:has_a_tricky_name"
        a_default = [0, 42, 0]
        b_default = np.random.rand(3, 3).astype(bytes)
        c_default = np.random.rand(2).astype(np.float32)

        expected_st_a = (  # indices, values, shape
            np.empty((0, 2), dtype=np.int64),  # indices
            np.empty((0, ), dtype=np.int64),  # sp_a is DT_INT64
            np.array([2, 0], dtype=np.int64))  # batch == 2, max_elems = 0

        expected_output = {
            sparse_name: expected_st_a,
            a_name: np.array(2 * [[a_default]]),
            b_name: np.array(2 * [b_default]),
            c_name: np.array(2 * [c_default]),
        }

        self._test(ops.convert_to_tensor(["", ""]), {
            sparse_name:
            parsing_ops.VarLenFeature(dtypes.int64),
            a_name:
            parsing_ops.FixedLenFeature(
                (1, 3), dtypes.int64, default_value=a_default),
            b_name:
            parsing_ops.FixedLenFeature(
                (3, 3), dtypes.string, default_value=b_default),
            c_name:
            parsing_ops.FixedLenFeature(
                (2, ), dtypes.float32, default_value=c_default),
        },
                   expected_values=expected_output)
예제 #12
0
 def testSparseColumnSingleBucket(self):
   sc = fc.sparse_column_with_integerized_feature("sc", 1)
   self.assertDictEqual(
       {
           "sc": parsing_ops.VarLenFeature(dtype=dtypes.int64)
       }, sc.config)
   self.assertEqual(1, sc._wide_embedding_lookup_arguments(None).vocab_size)
예제 #13
0
  def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
    original = sequence_example(feature_lists=feature_lists({
        "st_a":
            feature_list([
                float_feature([3.0, 4.0]),
                feature(),
                float_feature([5.0]),
            ]),
    }))

    serialized = original.SerializeToString()

    expected_st_a = (
        np.array(
            [[0, 0], [0, 1], [2, 0]], dtype=np.int64),  # indices
        np.array(
            [3.0, 4.0, 5.0], dtype=np.float32),  # values
        np.array(
            [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2

    expected_feature_list_output = {
        "st_a": expected_st_a,
    }

    self._test(
        {
            "example_name": "in1",
            "serialized": ops.convert_to_tensor(serialized),
            "sequence_features": {
                "st_a": parsing_ops.VarLenFeature(dtypes.float32),
            }
        },
        expected_feat_list_values=expected_feature_list_output)
예제 #14
0
 def parse_examples(example_protos):
   features = {
       'target':
           parsing_ops.FixedLenFeature(
               shape=[1], dtype=dtypes.float32, default_value=0),
       'age_indices':
           parsing_ops.VarLenFeature(dtype=dtypes.int64),
       'age_values':
           parsing_ops.VarLenFeature(dtype=dtypes.float32),
       'gender_indices':
           parsing_ops.VarLenFeature(dtype=dtypes.int64),
       'gender_values':
           parsing_ops.VarLenFeature(dtype=dtypes.float32)
   }
   return parsing_ops.parse_example(
       [e.SerializeToString() for e in example_protos], features)
예제 #15
0
    def testDecodeExampleWithBranchedLookup(self):

        example = example_pb2.Example(features=feature_pb2.Features(
            feature={
                'image/object/class/text':
                self._BytesFeatureFromList(
                    np.array(['cat', 'dog', 'guinea pig'])),
            }))
        serialized_example = example.SerializeToString()
        # 'dog' -> 0, 'guinea pig' -> 1, 'cat' -> 2
        table = lookup_ops.index_table_from_tensor(
            constant_op.constant(['dog', 'guinea pig', 'cat']))

        with self.test_session() as sess:
            sess.run(lookup_ops.tables_initializer())

            serialized_example = array_ops.reshape(serialized_example,
                                                   shape=[])

            keys_to_features = {
                'image/object/class/text':
                parsing_ops.VarLenFeature(dtypes.string),
            }

            items_to_handlers = {
                'labels':
                tf_example_decoder.LookupTensor('image/object/class/text',
                                                table),
            }

            decoder = slim_example_decoder.TFExampleDecoder(
                keys_to_features, items_to_handlers)
            obtained_class_ids = decoder.decode(serialized_example)[0].eval()

        self.assertAllClose([2, 0, 1], obtained_class_ids)
예제 #16
0
  def testEmptySerializedWithoutDefaultsShouldFail(self):
    input_features = {
        "st_a":
            parsing_ops.VarLenFeature(dtypes.int64),
        "a":
            parsing_ops.FixedLenFeature(
                (1, 3), dtypes.int64, default_value=[0, 42, 0]),
        "b":
            parsing_ops.FixedLenFeature(
                (3, 3),
                dtypes.string,
                default_value=np.random.rand(3, 3).astype(bytes)),
        # Feature "c" is missing a default, this gap will cause failure.
        "c":
            parsing_ops.FixedLenFeature(
                (2,), dtype=dtypes.float32),
    }

    # Edge case where the key is there but the feature value is empty
    original = example(features=features({"c": feature()}))
    self._test(
        [original.SerializeToString()],
        input_features,
        expected_err=(errors_impl.InvalidArgumentError,
                      "Feature: c \\(data type: float\\) is required"))

    # Standard case of missing key and value.
    self._test(
        ["", ""],
        input_features,
        expected_err=(errors_impl.InvalidArgumentError,
                      "Feature: c \\(data type: float\\) is required"))
예제 #17
0
    def make_batch_feature(self,
                           filenames,
                           num_epochs,
                           batch_size,
                           reader_num_threads=1,
                           parser_num_threads=1,
                           shuffle=False,
                           shuffle_seed=None,
                           drop_final_batch=False):
        self.filenames = filenames
        self.num_epochs = num_epochs
        self.batch_size = batch_size

        return readers.make_batched_features_dataset(
            file_pattern=self.filenames,
            batch_size=self.batch_size,
            features={
                "file": parsing_ops.FixedLenFeature([], dtypes.int64),
                "record": parsing_ops.FixedLenFeature([], dtypes.int64),
                "keywords": parsing_ops.VarLenFeature(dtypes.string)
            },
            reader=core_readers.TFRecordDataset,
            num_epochs=self.num_epochs,
            shuffle=shuffle,
            shuffle_seed=shuffle_seed,
            reader_num_threads=reader_num_threads,
            parser_num_threads=parser_num_threads,
            drop_final_batch=drop_final_batch)
예제 #18
0
def _to_feature_spec(tensor, default_value=None):
    if isinstance(tensor, ops.SparseTensor):
        return parsing_ops.VarLenFeature(dtype=tensor.dtype)
    else:
        return parsing_ops.FixedLenFeature(shape=tensor.get_shape(),
                                           dtype=tensor.dtype,
                                           default_value=default_value)
 def test_non_v1_feature_column(self):
     parsing_spec = self._parse_example_fn(
         feature_columns=[fc.sequence_numeric_column('a')], label_key='b')
     expected_spec = {
         'a': parsing_ops.VarLenFeature(dtype=dtypes.float32),
         'b': parsing_ops.FixedLenFeature((1, ), dtype=dtypes.float32),
     }
     self.assertDictEqual(expected_spec, parsing_spec)
예제 #20
0
  def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
    original = example(features=features({
        "c": float_feature([3, 4]),
        "val": bytes_feature([b"a", b"b"]),
        "idx": int64_feature([0, 3]),
        "st_a": float_feature([3.0, 4.0])
    }))

    serialized = original.SerializeToString()

    expected_st_a = (
        np.array(
            [[0], [1]], dtype=np.int64),  # indices
        np.array(
            [3.0, 4.0], dtype=np.float32),  # values
        np.array(
            [2], dtype=np.int64))  # shape: max_values = 2

    expected_sp = (  # indices, values, shape
        np.array(
            [[0], [3]], dtype=np.int64), np.array(
                ["a", "b"], dtype="|S"), np.array(
                    [13], dtype=np.int64))  # max_values = 13

    a_default = [1, 2, 3]
    b_default = np.random.rand(3, 3).astype(bytes)
    expected_output = {
        "st_a": expected_st_a,
        "sp": expected_sp,
        "a": [a_default],
        "b": b_default,
        "c": np.array(
            [3, 4], dtype=np.float32),
    }

    self._test(
        {
            "example_names":
                ops.convert_to_tensor("in1"),
            "serialized":
                ops.convert_to_tensor(serialized),
            "features": {
                "st_a":
                    parsing_ops.VarLenFeature(dtypes.float32),
                "sp":
                    parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
                "a":
                    parsing_ops.FixedLenFeature(
                        (1, 3), dtypes.int64, default_value=a_default),
                "b":
                    parsing_ops.FixedLenFeature(
                        (3, 3), dtypes.string, default_value=b_default),
                # Feature "c" must be provided, since it has no default_value.
                "c":
                    parsing_ops.FixedLenFeature((2,), dtypes.float32),
            }
        },
        expected_output)
예제 #21
0
  def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
    expected_st_a = (  # indices, values, shape
        np.empty(
            (0, 2), dtype=np.int64),  # indices
        np.empty(
            (0,), dtype=np.int64),  # sp_a is DT_INT64
        np.array(
            [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
    expected_sp = (  # indices, values, shape
        np.array(
            [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array(
                ["a", "b", "c"], dtype="|S"), np.array(
                    [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13

    original = [
        example(features=features({
            "c": float_feature([3, 4]),
            "val": bytes_feature([b"a", b"b"]),
            "idx": int64_feature([0, 3])
        })), example(features=features({
            "c": float_feature([1, 2]),
            "val": bytes_feature([b"c"]),
            "idx": int64_feature([7])
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    a_default = [1, 2, 3]
    b_default = np.random.rand(3, 3).astype(bytes)
    expected_output = {
        "st_a": expected_st_a,
        "sp": expected_sp,
        "a": np.array(2 * [[a_default]]),
        "b": np.array(2 * [b_default]),
        "c": np.array(
            [[3, 4], [1, 2]], dtype=np.float32),
    }

    self._test(
        ops.convert_to_tensor(serialized),
        {
            "st_a":
                parsing_ops.VarLenFeature(dtypes.int64),
            "sp":
                parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
            "a":
                parsing_ops.FixedLenFeature(
                    (1, 3), dtypes.int64, default_value=a_default),
            "b":
                parsing_ops.FixedLenFeature(
                    (3, 3), dtypes.string, default_value=b_default),
            # Feature "c" must be provided, since it has no default_value.
            "c":
                parsing_ops.FixedLenFeature((2,), dtypes.float32),
        },
        expected_values=expected_output)
    def testDecodeExampleWithBoundingBox(self):
        num_bboxes = 10
        np_ymin = np.random.rand(num_bboxes, 1)
        np_xmin = np.random.rand(num_bboxes, 1)
        np_ymax = np.random.rand(num_bboxes, 1)
        np_xmax = np.random.rand(num_bboxes, 1)
        np_bboxes = np.hstack([np_ymin, np_xmin, np_ymax, np_xmax])

        example = example_pb2.Example(features=feature_pb2.Features(
            feature={
                'image/object/bbox/ymin': self._EncodedFloatFeature(np_ymin),
                'image/object/bbox/xmin': self._EncodedFloatFeature(np_xmin),
                'image/object/bbox/ymax': self._EncodedFloatFeature(np_ymax),
                'image/object/bbox/xmax': self._EncodedFloatFeature(np_xmax),
            }))
        serialized_example = example.SerializeToString()

        with self.test_session():
            serialized_example = array_ops.reshape(serialized_example,
                                                   shape=[])

            keys_to_features = {
                'image/object/bbox/ymin':
                parsing_ops.VarLenFeature(dtypes.float32),
                'image/object/bbox/xmin':
                parsing_ops.VarLenFeature(dtypes.float32),
                'image/object/bbox/ymax':
                parsing_ops.VarLenFeature(dtypes.float32),
                'image/object/bbox/xmax':
                parsing_ops.VarLenFeature(dtypes.float32),
            }

            items_to_handlers = {
                'object/bbox':
                tfexample_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                              'image/object/bbox/'),
            }

            decoder = tfexample_decoder.TFExampleDecoder(
                keys_to_features, items_to_handlers)
            [tf_bboxes] = decoder.decode(serialized_example, ['object/bbox'])
            bboxes = tf_bboxes.eval()

        self.assertAllClose(np_bboxes, bboxes)
예제 #23
0
 def get_feature_spec(self):
   dtype = self.dtype
   # Convert, because example parser only supports float32, int64 and string.
   if dtype == dtypes.int32:
     dtype = dtypes.int64
   if dtype == dtypes.float64:
     dtype = dtypes.float32
   if self.is_sparse:
     return parsing_ops.VarLenFeature(dtype=dtype)
   return parsing_ops.FixedLenFeature(shape=self.shape[1:], dtype=dtype)
예제 #24
0
  def testSerializedContainingSparse(self):
    original = [
        example(features=features({
            "st_c": float_feature([3, 4])
        })),
        example(features=features({
            "st_c": float_feature([]),  # empty float list
        })),
        example(features=features({
            "st_d": feature(),  # feature with nothing in it
        })),
        example(features=features({
            "st_c": float_feature([1, 2, -1]),
            "st_d": bytes_feature([b"hi"])
        }))
    ]

    serialized = [m.SerializeToString() for m in original]

    expected_st_c = (  # indices, values, shape
        np.array(
            [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array(
                [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array(
                    [4, 3], dtype=np.int64))  # batch == 2, max_elems = 3

    expected_st_d = (  # indices, values, shape
        np.array(
            [[3, 0]], dtype=np.int64), np.array(
                ["hi"], dtype=bytes), np.array(
                    [4, 1], dtype=np.int64))  # batch == 2, max_elems = 1

    expected_output = {
        "st_c": expected_st_c,
        "st_d": expected_st_d,
    }

    self._test({
        "serialized": ops.convert_to_tensor(serialized),
        "features": {
            "st_c": parsing_ops.VarLenFeature(dtypes.float32),
            "st_d": parsing_ops.VarLenFeature(dtypes.string)
        }
    }, expected_output)
        def parse_fn(serialized):
            features = {"x": parsing_ops.VarLenFeature(dtypes.int64)}
            parsed = parsing_ops.parse_single_example(serialized, features)
            parsed = parsed["x"].values

            size = array_ops.size(parsed)
            value = math_ops.cast(parsed, dtypes.bool)
            return control_flow_ops.cond(
                size > 0, lambda: array_ops.reshape(value, []),
                lambda: array_ops.zeros([], dtypes.bool))
예제 #26
0
    def testParseExampleInputFn(self):
        """Tests complete flow with input_fn constructed from parse_example."""
        n_classes = 3
        batch_size = 10
        words = [
            b'dog', b'cat', b'bird', b'the', b'a', b'sat', b'flew', b'slept'
        ]

        serialized_examples = []
        for _ in range(batch_size):
            sequence_length = random.randint(1, len(words))
            sentence = random.sample(words, sequence_length)
            label = random.randint(0, n_classes - 1)
            example = example_pb2.Example(features=feature_pb2.Features(
                feature={
                    'tokens':
                    feature_pb2.Feature(bytes_list=feature_pb2.BytesList(
                        value=sentence)),
                    'label':
                    feature_pb2.Feature(int64_list=feature_pb2.Int64List(
                        value=[label])),
                }))
            serialized_examples.append(example.SerializeToString())

        feature_spec = {
            'tokens': parsing_ops.VarLenFeature(dtypes.string),
            'label': parsing_ops.FixedLenFeature([1], dtypes.int64),
        }

        def _train_input_fn():
            features = parsing_ops.parse_example(serialized_examples,
                                                 feature_spec)
            labels = features.pop('label')
            return features, labels

        def _eval_input_fn():
            features = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            labels = features.pop('label')
            return features, labels

        def _predict_input_fn():
            features = parsing_ops.parse_example(
                input_lib.limit_epochs(serialized_examples, num_epochs=1),
                feature_spec)
            features.pop('label')
            return features, None

        self._test_complete_flow(train_input_fn=_train_input_fn,
                                 eval_input_fn=_eval_input_fn,
                                 predict_input_fn=_predict_input_fn,
                                 n_classes=n_classes,
                                 batch_size=batch_size)
예제 #27
0
    def test_export_savedmodel_extra_assets(self):
        tmpdir = tempfile.mkdtemp()
        est = estimator.Estimator(model_fn=_model_fn_for_export_tests)
        est.train(input_fn=dummy_input_fn, steps=1)
        feature_spec = {
            'x': parsing_ops.VarLenFeature(dtype=dtypes.int64),
            'y': parsing_ops.VarLenFeature(dtype=dtypes.int64)
        }
        serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
            feature_spec)

        # Create a fake asset.
        extra_file_name = os.path.join(compat.as_bytes(tmpdir),
                                       compat.as_bytes('my_extra_file'))
        extra_file = gfile.GFile(extra_file_name, mode='w')
        extra_file.write(_EXTRA_FILE_CONTENT)
        extra_file.close()

        # Perform the export.
        assets_extra = {'some/sub/directory/my_extra_file': extra_file_name}
        export_dir_base = os.path.join(compat.as_bytes(tmpdir),
                                       compat.as_bytes('export'))
        export_dir = est.export_savedmodel(export_dir_base,
                                           serving_input_receiver_fn,
                                           assets_extra=assets_extra)

        # Check that the asset files are in the right places.
        expected_extra_path = os.path.join(
            compat.as_bytes(export_dir),
            compat.as_bytes('assets.extra/some/sub/directory/my_extra_file'))
        self.assertTrue(
            gfile.Exists(
                os.path.join(compat.as_bytes(export_dir),
                             compat.as_bytes('assets.extra'))))
        self.assertTrue(gfile.Exists(expected_extra_path))
        self.assertEqual(
            compat.as_bytes(_EXTRA_FILE_CONTENT),
            compat.as_bytes(gfile.GFile(expected_extra_path).read()))

        # cleanup
        gfile.DeleteRecursively(tmpdir)
    def testDecodeExampleShapeKeyTensor(self):
        np_image = np.random.rand(2, 3, 1).astype('f')
        np_labels = np.array([[[1], [2], [3]], [[4], [5], [6]]])

        example = example_pb2.Example(features=feature_pb2.Features(
            feature={
                'image':
                self._EncodedFloatFeature(np_image),
                'image/shape':
                self._EncodedInt64Feature(np.array(np_image.shape)),
                'labels':
                self._EncodedInt64Feature(np_labels),
                'labels/shape':
                self._EncodedInt64Feature(np.array(np_labels.shape)),
            }))

        serialized_example = example.SerializeToString()

        with self.test_session():
            serialized_example = array_ops.reshape(serialized_example,
                                                   shape=[])
            keys_to_features = {
                'image': parsing_ops.VarLenFeature(dtype=dtypes.float32),
                'image/shape': parsing_ops.VarLenFeature(dtype=dtypes.int64),
                'labels': parsing_ops.VarLenFeature(dtype=dtypes.int64),
                'labels/shape': parsing_ops.VarLenFeature(dtype=dtypes.int64),
            }
            items_to_handlers = {
                'image':
                tfexample_decoder.Tensor('image', shape_keys='image/shape'),
                'labels':
                tfexample_decoder.Tensor('labels', shape_keys='labels/shape'),
            }
            decoder = tfexample_decoder.TFExampleDecoder(
                keys_to_features, items_to_handlers)
            [tf_image, tf_labels] = decoder.decode(serialized_example,
                                                   ['image', 'labels'])
            self.assertAllEqual(tf_image.eval(), np_image)
            self.assertAllEqual(tf_labels.eval(), np_labels)
 def testBasic(self):
     with session.Session() as sess:
         examples = array_ops.placeholder(dtypes.string, shape=[1])
         feature_to_type = {
             'x': parsing_ops.FixedLenFeature([1], dtypes.float32, 33.0),
             'y': parsing_ops.VarLenFeature(dtypes.string)
         }
         result = parsing_ops.parse_example(examples, feature_to_type)
         parse_example_op = result['x'].op
         config = extract_example_parser_configuration(
             parse_example_op, sess)
         expected = self.getExpectedConfig(parse_example_op.type)
         self.assertProtoEquals(expected, config)
 def testOldStyleReader(self):
   with self.assertRaisesRegex(
       TypeError, r"The `reader` argument must return a `Dataset` object. "
       r"`tf.ReaderBase` subclasses are not supported."):
     _ = readers.make_batched_features_dataset(
         file_pattern=self.test_filenames[0], batch_size=32,
         features={
             "file": parsing_ops.FixedLenFeature([], dtypes.int64),
             "record": parsing_ops.FixedLenFeature([], dtypes.int64),
             "keywords": parsing_ops.VarLenFeature(dtypes.string),
             "label": parsing_ops.FixedLenFeature([], dtypes.string),
         },
         reader=io_ops.TFRecordReader)