def make_evaluation_dataset(self, features_file, labels_file, batch_size, num_threads=1, prefetch_buffer_size=None): """Builds a dataset to be used for evaluation. Args: features_file: The evaluation source file. labels_file: The evaluation target file. batch_size: The batch size to use. num_threads: The number of elements processed in parallel. prefetch_buffer_size: The number of batches to prefetch asynchronously. If ``None``, use an automatically tuned value. Returns: A ``tf.data.Dataset``. See Also: :func:`opennmt.data.inference_pipeline` """ map_func = lambda *arg: self.make_features(element=arg, training=False) dataset = self.make_dataset([features_file, labels_file], training=False) dataset = dataset.apply(dataset_util.inference_pipeline( batch_size, process_fn=map_func, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset
def make_evaluation_dataset( self, features_file, labels_file, batch_size, batch_type="examples", length_bucket_width=None, num_threads=1, prefetch_buffer_size=None, ): """Builds a dataset to be used for evaluation. Args: features_file: The evaluation source file. labels_file: The evaluation target file. batch_size: The batch size to use. batch_type: The batching strategy to use: can be "examples" or "tokens". length_bucket_width: The width of the length buckets to select batch candidates from (for efficiency). Set ``None`` to not constrain batch formation. num_threads: The number of elements processed in parallel. prefetch_buffer_size: The number of batches to prefetch asynchronously. If ``None``, use an automatically tuned value. Returns: A ``tf.data.Dataset``. See Also: :func:`opennmt.data.inference_pipeline` """ if labels_file is not None: data_files = [features_file, labels_file] length_fn = [ self.features_inputter.get_length, self.labels_inputter.get_length, ] else: data_files = features_file length_fn = self.get_length map_fn = lambda *arg: self.make_features( element=misc.item_or_tuple(arg), training=False) transform_fns = [ lambda dataset: dataset.map(map_fn, num_parallel_calls=num_threads or 1) ] dataset = self.make_dataset(data_files, training=False) dataset = dataset.apply( dataset_util.inference_pipeline( batch_size, batch_type=batch_type, transform_fns=transform_fns, length_bucket_width=length_bucket_width, length_fn=length_fn, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size, )) return dataset
def make_inference_dataset( self, features_file, batch_size, batch_type="examples", length_bucket_width=None, num_threads=1, prefetch_buffer_size=None, ): """Builds a dataset to be used for inference. For evaluation and training datasets, see :class:`opennmt.inputters.ExampleInputter`. Args: features_file: The test file. batch_size: The batch size to use. batch_type: The batching strategy to use: can be "examples" or "tokens". length_bucket_width: The width of the length buckets to select batch candidates from (for efficiency). Set ``None`` to not constrain batch formation. num_threads: The number of elements processed in parallel. prefetch_buffer_size: The number of batches to prefetch asynchronously. If ``None``, use an automatically tuned value. Returns: A ``tf.data.Dataset``. See Also: :func:`opennmt.data.inference_pipeline` """ def _map_fn(*arg): features = self.make_features(element=misc.item_or_tuple(arg), training=False) if isinstance(features, (list, tuple)): # Special case for unsupervised inputters that always return a # tuple (features, labels). return features[0] return features transform_fns = [ lambda dataset: dataset.map(_map_fn, num_parallel_calls=num_threads or 1) ] dataset = self.make_dataset(features_file, training=False) dataset = dataset.apply( dataset_util.inference_pipeline( batch_size, batch_type=batch_type, transform_fns=transform_fns, length_bucket_width=length_bucket_width, length_fn=self.get_length, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size, )) return dataset
def testReorderInferDataset(self): dataset = tf.data.Dataset.from_tensor_slices([8, 2, 5, 6, 7, 1, 3, 9]) dataset = dataset.map(lambda x: {"length": x}) dataset = dataset.apply(dataset_util.inference_pipeline( 3, length_bucket_width=3, length_fn=lambda x: x["length"])) elements = list(iter(dataset)) def _check_element(element, length, index): self.assertAllEqual(element["length"], length) self.assertAllEqual(element["index"], index) self.assertEqual(len(elements), 4) _check_element(elements[0], [8, 6, 7], [0, 3, 4]) _check_element(elements[1], [2, 1], [1, 5]) _check_element(elements[2], [5, 3], [2, 6]) _check_element(elements[3], [9], [7])
def make_evaluation_dataset(self, features_file, labels_file, batch_size, num_threads=1, prefetch_buffer_size=None): """See :meth:`opennmt.inputters.ExampleInputter.make_evaluation_dataset`.""" _ = labels_file dataset = self.make_dataset(features_file, training=False) dataset = dataset.apply( dataset_util.inference_pipeline( batch_size, process_fn=lambda x: self.make_features(x, training=False), num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset
def make_inference_dataset(self, features_file, batch_size, length_bucket_width=None, num_threads=1, prefetch_buffer_size=None): dataset = self.make_dataset(features_file, training=False) dataset = dataset.apply( dataset_util.inference_pipeline( batch_size, process_fn=lambda x: self.make_features(x, training=False)[0], length_bucket_width=length_bucket_width, length_fn=self.get_length, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset
def make_inference_dataset(self, features_file, batch_size, length_bucket_width=None, num_threads=1, prefetch_buffer_size=None): """Builds a dataset to be used for inference. For evaluation and training datasets, see :class:`opennmt.inputters.ExampleInputter`. Args: features_file: The test file. batch_size: The batch size to use. length_bucket_width: The width of the length buckets to select batch candidates from (for efficiency). Set ``None`` to not constrain batch formation. num_threads: The number of elements processed in parallel. prefetch_buffer_size: The number of batches to prefetch asynchronously. If ``None``, use an automatically tuned value. Returns: A ``tf.data.Dataset``. See Also: :func:`opennmt.data.inference_pipeline` """ map_func = lambda *arg: self.make_features( element=misc.item_or_tuple(arg), training=False) transform_fns = [ lambda dataset: dataset.map(map_func, num_parallel_calls=num_threads or 4) ] dataset = self.make_dataset(features_file, training=False) dataset = dataset.apply( dataset_util.inference_pipeline( batch_size, transform_fns=transform_fns, length_bucket_width=length_bucket_width, length_fn=self.get_length, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset
def make_evaluation_dataset(self, features_file, labels_file, batch_size, num_threads=1, prefetch_buffer_size=None): """See :meth:`opennmt.inputters.ExampleInputter.make_evaluation_dataset`.""" _ = labels_file dataset = self.make_dataset(features_file, training=False) map_func = lambda x: self.make_features(element=x, training=False) transform_fns = [lambda dataset: dataset.map(map_func, num_parallel_calls=num_threads or 4)] dataset = dataset.apply(dataset_util.inference_pipeline( batch_size, transform_fns=transform_fns, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset
def make_inference_dataset(self, features_file, batch_size, length_bucket_width=None, num_threads=1, prefetch_buffer_size=None): dataset = self.make_dataset(features_file, training=False) map_func = lambda x: self.make_features(element=x, training=False)[0] transform_fns = [lambda dataset: dataset.map(map_func, num_parallel_calls=num_threads or 4)] dataset = dataset.apply(dataset_util.inference_pipeline( batch_size, transform_fns=transform_fns, length_bucket_width=length_bucket_width, length_fn=self.get_length, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset
def make_evaluation_dataset(self, features_file, labels_file, batch_size, num_threads=1, prefetch_buffer_size=None): """Builds a dataset to be used for evaluation. Args: features_file: The evaluation source file. labels_file: The evaluation target file. batch_size: The batch size to use. num_threads: The number of elements processed in parallel. prefetch_buffer_size: The number of batches to prefetch asynchronously. If ``None``, use an automatically tuned value. Returns: A ``tf.data.Dataset``. See Also: :func:`opennmt.data.inference_pipeline` """ if labels_file is not None: data_files = [features_file, labels_file] else: data_files = features_file map_fn = lambda *arg: self.make_features( element=misc.item_or_tuple(arg), training=False) transform_fns = [ lambda dataset: dataset.map(map_fn, num_parallel_calls=num_threads or 1) ] dataset = self.make_dataset(data_files, training=False) dataset = dataset.apply( dataset_util.inference_pipeline( batch_size, transform_fns=transform_fns, num_threads=num_threads, prefetch_buffer_size=prefetch_buffer_size)) return dataset