def _makeDataset(self, inputter, data_file, metadata=None, dataset_size=1, shapes=None): if metadata is not None: inputter.initialize(metadata) self.assertEqual(dataset_size, inputter.get_dataset_size(data_file)) dataset = inputter.make_dataset(data_file) dataset = dataset.map( lambda *arg: inputter.process(item_or_tuple(arg))) dataset = dataset.padded_batch( 1, padded_shapes=data.get_padded_shapes(dataset)) iterator = dataset.make_initializable_iterator() tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) next_element = iterator.get_next() if shapes is not None: for features in (next_element, inputter.get_serving_input_receiver().features): self.assertNotIn("raw", features) for field, shape in six.iteritems(shapes): self.assertIn(field, features) self.assertAllEqual(shape, features[field].get_shape().as_list()) transformed = inputter.transform_data(next_element) return next_element, transformed
def _makeDataset(self, inputter, data_file, data_config=None, dataset_size=1, shapes=None): if data_config is not None: inputter.initialize(data_config) self.assertEqual(inputter.get_dataset_size(data_file), dataset_size) dataset = inputter.make_dataset(data_file) eager_features = inputter.make_features(iter(dataset).next(), training=True) eager_features = tf.nest.map_structure(lambda t: tf.expand_dims(t, 0), eager_features) dataset = dataset.map(lambda *arg: inputter.make_features( item_or_tuple(arg), training=True)) dataset = dataset.apply(dataset_util.batch_dataset(1)) features = iter(dataset).next() if shapes is not None: self._checkFeatures(features, shapes) self._checkFeatures(eager_features, shapes) keep = inputter.keep_for_training(features) self.assertIs(keep.dtype, tf.bool) inputs = inputter(features, training=True) if not isinstance(inputter, inputters.ExampleInputter): self._testServing(inputter) return self.evaluate((features, inputs))
def _makeDataset(self, inputter, data_file, metadata=None, dataset_size=1, shapes=None): if metadata is not None: inputter.initialize(metadata) self.assertEqual(dataset_size, inputter.get_dataset_size(data_file)) dataset = inputter.make_dataset(data_file) dataset = dataset.map(lambda *arg: inputter.process(item_or_tuple(arg))) dataset = dataset.padded_batch(1, padded_shapes=data.get_padded_shapes(dataset)) if compat.is_tf2(): iterator = None features = iter(dataset).next() else: iterator = dataset.make_initializable_iterator() features = iterator.get_next() if shapes is not None: all_features = [features] if not compat.is_tf2() and not inputter.is_target: all_features.append(inputter.get_serving_input_receiver().features) for f in all_features: for field, shape in six.iteritems(shapes): self.assertIn(field, f) self.assertTrue(f[field].shape.is_compatible_with(shape)) inputs = inputter.make_inputs(features, training=True) if not compat.is_tf2(): with self.test_session() as sess: sess.run(tf.tables_initializer()) sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) return self.evaluate((features, inputs))
def _first_element(inputter, data_file, metadata): inputter.initialize(metadata) dataset = inputter.make_dataset(data_file) iterator = dataset.make_initializable_iterator() tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) next_element = iterator.get_next() data = inputter.process(next_element) for key, value in data.items(): data[key] = tf.expand_dims(value, 0) transformed = inputter.transform_data(data) return data, transformed
def _first_element(inputter, data_file, metadata): inputter.initialize(metadata) dataset = inputter.make_dataset(data_file) iterator = dataset.make_initializable_iterator() tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) next_element = iterator.get_next() data = inputter.process(next_element) for key, value in six.iteritems(data): data[key] = tf.expand_dims(value, 0) transformed = inputter.transform_data(data) return data, transformed
def testWordEmbedderWithCompression(self): vocab_file = self._makeTextFile("vocab.txt", ["the", "world", "hello", "■"]) data_file = self._makeTextFile("data.txt", ["hello world !", "how are you ?"], compress=True) inputter = text_inputter.WordEmbedder(embedding_size=10) inputter.initialize(dict(vocabulary=vocab_file)) dataset = inputter.make_inference_dataset(data_file, batch_size=1) iterator = iter(dataset) self.assertAllEqual( next(iterator)["tokens"].numpy()[0], [b"hello", b"world", b"!"])
def _first_element(inputter, data_file, metadata=None): if metadata is not None: inputter.initialize(metadata) dataset = inputter.make_dataset(data_file) iterator = dataset.make_initializable_iterator() tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer) next_element = iterator.get_next() data = inputter.process(next_element) data_in = {} for key, value in six.iteritems(data): value = tf.expand_dims(value, 0) value.set_shape([None] + inputter.padded_shapes[key]) data_in[key] = value transformed = inputter.transform_data(data_in) return data, transformed
def _makeDataset(self, inputter, data_file, data_config=None, dataset_size=1, shapes=None): if data_config is not None: inputter.initialize(data_config) dataset = inputter.make_dataset(data_file) dataset = dataset.map(lambda *arg: inputter.make_features( item_or_tuple(arg), training=True)) dataset = dataset.apply(dataset_util.batch_dataset(1)) features = iter(dataset).next() if shapes is not None: self._checkFeatures(features, shapes) inputs = inputter(features, training=True) return self.evaluate((features, inputs))