def _as_dataset(self, split=splits_lib.Split.TRAIN, decoders=None, shuffle_files=False): if self.version.implements(utils.Experiment.S3): dataset = self._tfrecords_reader.read(self.name, split, self.info.splits.values(), shuffle_files) else: # Resolve all the named split tree by real ones read_instruction = split.get_read_instruction(self.info.splits) # Extract the list of SlicedSplitInfo objects containing the splits # to use and their associated slice list_sliced_split_info = read_instruction.get_list_sliced_split_info( ) # Resolve the SlicedSplitInfo objects into a list of # {'filepath': 'path/to/data-00032-00100', 'mask': [True, False, ...]} instruction_dicts = self._slice_split_info_to_instruction_dicts( list_sliced_split_info) # Load the dataset dataset = dataset_utils.build_dataset( instruction_dicts=instruction_dicts, dataset_from_file_fn=self._file_format_adapter. dataset_from_filename, shuffle_files=shuffle_files, ) decode_fn = functools.partial(self.info.features.decode_example, decoders=decoders) dataset = dataset.map(decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) return dataset
def _as_dataset(self, split=Split.TRAIN, shuffle_files=None): return dataset_utils.build_dataset( filepattern=self._split_files(num_shards=None, split=split).filepattern, dataset_from_file_fn=self._file_format_adapter.dataset_from_filename, process_fn=self._preprocess, shuffle_files=( split == Split.TRAIN if shuffle_files is None else shuffle_files))
def _as_dataset(self, split=Split.TRAIN, shuffle_files=None): tf_data = dataset_utils.build_dataset( filepattern=self._split_files(num_shards=None, split=split).filepattern, dataset_from_file_fn=self._file_format_adapter.dataset_from_filename, shuffle_files=( split == Split.TRAIN if shuffle_files is None else shuffle_files)) tf_data = tf_data.map(self.info.specs.decode_sample) return tf_data
def _as_dataset(self, split=splits.Split.TRAIN, shuffle_files=None): # Automatically activate shuffling if training should_shuffle = shuffle_files if shuffle_files is None: should_shuffle = split == splits.Split.TRAIN # Compute filenames from the given split # TODO(epot): Implement synthetic splits filenames = self._build_split_filenames( data_dir=self._data_dir, split_info_list=[self.info.splits[split]], ) # Load the dataset tf_data = dataset_utils.build_dataset( filepattern=filenames, dataset_from_file_fn=self._file_format_adapter. dataset_from_filename, shuffle_files=should_shuffle, ) tf_data = tf_data.map(self.info.specs.decode_sample) return tf_data