class DaliRnntIterator(object): """ Returns batches of data for RNN-T training: preprocessed_signal, preprocessed_signal_length, transcript, transcript_length This iterator is not meant to be the entry point to Dali processing pipeline. Use DataLoader instead. """ def __init__(self, dali_pipelines, transcripts, tokenizer, batch_size, shard_size, pipeline_type, normalize_transcripts=False): self.normalize_transcripts = normalize_transcripts self.tokenizer = tokenizer self.batch_size = batch_size from nvidia.dali.plugin.pytorch import DALIGenericIterator from nvidia.dali.plugin.base_iterator import LastBatchPolicy # in train pipeline shard_size is set to divisable by batch_size, so PARTIAL policy is safe if pipeline_type == 'val': self.dali_it = DALIGenericIterator( dali_pipelines, ["audio", "label", "audio_shape"], reader_name="Reader", dynamic_shape=True, auto_reset=True, last_batch_policy=LastBatchPolicy.PARTIAL) else: self.dali_it = DALIGenericIterator( dali_pipelines, ["audio", "label", "audio_shape"], size=shard_size, dynamic_shape=True, auto_reset=True, last_batch_padded=True, last_batch_policy=LastBatchPolicy.PARTIAL) self.tokenize(transcripts) def tokenize(self, transcripts): transcripts = [transcripts[i] for i in range(len(transcripts))] if self.normalize_transcripts: transcripts = [ normalize_string(t, self.tokenizer.charset, punctuation_map(self.tokenizer.charset)) for t in transcripts ] transcripts = [self.tokenizer.tokenize(t) for t in transcripts] transcripts = [torch.tensor(t) for t in transcripts] self.tr = np.array(transcripts, dtype=object) self.t_sizes = torch.tensor([len(t) for t in transcripts], dtype=torch.int32) self._min_token_sequence_len = int(torch.min(self.t_sizes)) self._max_token_sequence_len = int(torch.max(self.t_sizes)) self.padded_tr = [ np.pad(t, (0, self._max_token_sequence_len - ts)) for t, ts in zip(self.tr, self.t_sizes) ] self.padded_tr = np.array(self.padded_tr) def _gen_transcripts(self, labels, normalize_transcripts: bool = True): """ Generate transcripts in format expected by NN """ ids = labels.flatten().numpy() transcripts = self.padded_tr[ids] # padding below not required anymore cause padding is done apriori #transcripts = torch.nn.utils.rnn.pad_sequence(transcripts, batch_first=True) return transcripts, self.t_sizes[ids] def __next__(self): data = self.dali_it.__next__() audio, audio_shape = data[0]["audio"], data[0]["audio_shape"][:, 1] if audio.shape[0] == 0: # empty tensor means, other GPUs got last samples from dataset # and this GPU has nothing to do; calling `__next__` raises StopIteration return self.dali_it.__next__() audio = audio[:, :, :audio_shape.max()] # the last batch transcripts, transcripts_lengths = self._gen_transcripts( data[0]["label"]) return audio, audio_shape, transcripts, transcripts_lengths def next(self): return self.__next__() def __iter__(self): return self @property def min_token_sequence_len(self): return self._min_token_sequence_len @property def max_token_sequence_len(self): return self._max_token_sequence_len
class DaliJasperIterator(object): """ Returns batches of data for Jasper training: preprocessed_signal, preprocessed_signal_length, transcript, transcript_length This iterator is not meant to be the entry point to Dali processing pipeline. Use DataLoader instead. """ def __init__(self, dali_pipelines, transcripts, symbols, batch_size, reader_name, train_iterator: bool): self.transcripts = transcripts self.symbols = symbols self.batch_size = batch_size from nvidia.dali.plugin.pytorch import DALIGenericIterator from nvidia.dali.plugin.base_iterator import LastBatchPolicy # in train pipeline shard_size is set to divisable by batch_size, so PARTIAL policy is safe self.dali_it = DALIGenericIterator( dali_pipelines, ["audio", "label", "audio_shape"], reader_name=reader_name, dynamic_shape=True, auto_reset=True, last_batch_policy=LastBatchPolicy.PARTIAL) @staticmethod def _str2list(s: str): """ Returns list of floats, that represents given string. '0.' denotes separator '1.' denotes 'a' '27.' denotes "'" Assumes, that the string is lower case. """ list = [] for c in s: if c == "'": list.append(27.) else: list.append(max(0., ord(c) - 96.)) return list @staticmethod def _pad_lists(lists: list, pad_val=0): """ Pads lists, so that all have the same size. Returns list with actual sizes of corresponding input lists """ max_length = 0 sizes = [] for li in lists: sizes.append(len(li)) max_length = max_length if len(li) < max_length else len(li) for li in lists: li += [pad_val] * (max_length - len(li)) return sizes def _gen_transcripts(self, labels, normalize_transcripts: bool = True): """ Generate transcripts in format expected by NN """ lists = [ self._str2list( normalize_string(self.transcripts[lab.item()], self.symbols, punctuation_map(self.symbols))) for lab in labels ] if normalize_transcripts else [ self._str2list(self.transcripts[lab.item()]) for lab in labels ] sizes = self._pad_lists(lists) return torch.tensor(lists).cuda(), torch.tensor( sizes, dtype=torch.int32).cuda() def __next__(self): data = self.dali_it.__next__() transcripts, transcripts_lengths = self._gen_transcripts( data[0]["label"]) return data[0]["audio"], data[0][ "audio_shape"][:, 1], transcripts, transcripts_lengths def next(self): return self.__next__() def __iter__(self): return self
class DALIDataloader(): """DataLoader for Nvidia DALI augmentation pipeline, to handle non-DALI augmentations, this loader utilize Ray to paralellize augmentation for every sample in a batch """ def __init__(self, dataset: Type[BasicDatasetWrapper], batch_size: int = 1, num_thread: int = 1, device_id: int = 0, collate_fn: Type[Callable] = None, shuffle: bool = True): """Initialization Args: dataset (Type[BasicDatasetWrapper]): dataset object to be adapted into DALI format batch_size (int): How many samples per batch to load num_thread (int, optional): Number of CPU threads used by the pipeline. Defaults to 1. device_id (int, optional): GPU id to be used for pipeline. Defaults to 0. collate_fn (Type[Callable], optional): merges a list of samples to form a mini-batch of Tensor(s). Defaults to None. shuffle (bool, optional): set to True to have the data reshuffled at every epoch. Defaults to True. """ iterator = DALIIteratorWrapper(dataset, batch_size=batch_size, shuffle=shuffle, device_id=device_id) self.dataset = iterator.dataset self.image_auto_pad = self.dataset.image_auto_pad self.data_format = dataset.data_format self.preprocess_args = iterator.dataset.preprocess_args # Initialize DALI only augmentations self.augmentations_list = self.dataset.augmentations_list dali_augments = None external_augments = None normalize = True if self.dataset.stage == 'train' and self.augmentations_list is not None: external_augments = [] # Handler if using Nvidia DALI, if DALI augmentations is used in experiment file, it must be in the first order aug_module_sequence = [ augment.module for augment in self.augmentations_list ] if 'nvidia_dali' in aug_module_sequence and aug_module_sequence[ 0] != 'nvidia_dali': raise RuntimeError( 'Nvidia DALI augmentation module must be in the first order of the "augmentations" list!, found {}' .format(aug_module_sequence[0])) for augment in self.augmentations_list: module_name = augment.module module_args = augment.args if not isinstance(module_args, dict): raise TypeError( "expect augmentation module's args value to be dictionary, got %s" % type(module_args)) tf_kwargs = module_args tf_kwargs['data_format'] = self.data_format augments = create_transform(module_name, **tf_kwargs) if module_name == 'nvidia_dali': dali_augments = augments else: external_augments.append(augments) self.external_executors = None # If there are any external augments if len(external_augments) != 0: # do not apply normalization and channel format swap in DALI pipeline normalize = False # Instantiate external augments executor ray.init(ignore_reinit_error=True) transforms_list_ref = ray.put(external_augments) data_format_ref = ray.put(self.data_format) preprocess_args_ref = ray.put(self.preprocess_args) self.external_executors = [ ExternalAugmentsExecutor.remote(transforms_list_ref, data_format_ref, preprocess_args_ref, self.image_auto_pad) for i in range(batch_size) ] pipeline = DALIExternalSourcePipeline(dataset_iterator=iterator, batch_size=batch_size, num_threads=num_thread, device_id=device_id, dali_augments=dali_augments, normalize=normalize) self.labels_pad_value = pipeline.labels_pad_value self.original_data_layout = copy.copy(pipeline.original_data_layout) self.original_data_layout.remove('images') # Additional field to retrieve image shape self.output_map = pipeline.pipeline_output_data_layout self.dali_pytorch_loader = DALIGenericIterator( pipelines=[pipeline], output_map=self.output_map, size=iterator.size, dynamic_shape=True, fill_last_batch=False, last_batch_padded=True, auto_reset=True) self.collate_fn = collate_fn self.size = self.dali_pytorch_loader.size self.batch_size = batch_size def __iter__(self): return self def __next__(self): output = self.dali_pytorch_loader.__next__()[ 0] # Vortex doesn't support multiple pipelines yet # Prepare Pytorch style data loader output batch = [] for i in range(len(output['images'])): image = output['images'][i].type(torch.float32) # DALI still have flaws about padding image to square, this is the workaround by bringing the image shape before padding pre_padded_image_size = output['pre_padded_image_shape'][i].cpu( )[:2].type(torch.float32) if self.image_auto_pad: input_size = self.preprocess_args.input_size padded_image_size = torch.tensor([input_size, input_size ]).type(torch.float32) diff_ratio = pre_padded_image_size / padded_image_size else: image = image[:, :pre_padded_image_size[0].type(torch.int). item(), :pre_padded_image_size[1].type(torch.int ).item()] # Prepare labels array aug_labels = dict() for layout in self.original_data_layout: label_output = output[layout][i].numpy() # Remove padded value from DALI, this assume that labels dimension 1 shape is same rows_with_padded_value = np.unique( np.where(label_output == self.labels_pad_value)[0]) label_output = np.delete(label_output, rows_with_padded_value, axis=0) # Placeholder to combine all labels if layout == 'original_labels': ret_targets = label_output else: if self.image_auto_pad: # DALI still have flaws about padding image to square, # this is the workaround by bringing the image shape before padding label_output = self._fix_coordinates( label_output, layout, diff_ratio) aug_labels[layout] = label_output # Modify labels placeholder with augmented labels for label_key in self.data_format: if label_key in self.original_data_layout: label_data_format = self.data_format[label_key] augmented_label = aug_labels[label_key] # Refactor reshaped landmarks and apply asymmetric coordinates fixing if needed if label_key == 'landmarks': nrof_obj_landmarks = int( augmented_label.size / len(self.data_format['landmarks']['indices'])) # Reshape to shape [nrof_objects,nrof_points] augmented_label = augmented_label.reshape( nrof_obj_landmarks, len(self.data_format['landmarks']['indices'])) # Coordinates sequence fixing for asymmetric landmarks if 'asymm_pairs' in self.data_format['landmarks']: # Extract flip flag from pipeline output # import pdb; pdb.set_trace() flip_flags = np.array([ output[key][i].numpy() for key in output.keys() if key.startswith('flip_flag_') ]) flip_count = np.sum(flip_flags) # if flip count mod 2 is even, skip coordinates sequence flipping if flip_count % 2 == 1: n_keypoints = int(len(augmented_label[0]) / 2) augmented_label = augmented_label.reshape( (-1, n_keypoints, 2)) # For each index keypoints pair, swap the position for keypoint_pair in self.data_format.landmarks.asymm_pairs: keypoint_pair = np.array(keypoint_pair) augmented_label[:, keypoint_pair, :] = augmented_label[:, keypoint_pair[:: -1], :] # Convert back to original format augmented_label = augmented_label.reshape( (-1, n_keypoints * 2)) # Put back augmented labels in the placeholder array for returned labels np.put_along_axis( ret_targets, values=augmented_label, axis=label_data_format['axis'], indices=np.array( label_data_format['indices'])[np.newaxis, :]) if list(self.data_format.keys()) == ['class_label']: ret_targets = ret_targets.flatten().astype('int') batch.append((image, torch.tensor(ret_targets))) # Apply external (non-DALI) augments, utilizing ray if self.external_executors: batch = [(image.cpu(), target) for image, target in batch] batch_ref = ray.put(batch) batch_futures = [ self.external_executors[index].run.remote(batch_ref, index) for index in range(len(batch)) ] batch = ray.get(batch_futures) # if self.collate_fn is None: self.collate_fn = torch.utils.data._utils.collate.default_collate return self.collate_fn(batch) def __len__(self): if self.size % self.batch_size == 0: return self.size // self.batch_size else: return self.size // self.batch_size + 1 def _fix_coordinates(self, labels, label_key, diff_ratio): """Fix coordinates label after image padding which break original image wh ratio Args: labels ([type]): [description] label_key ([type]): [description] diff_ratio ([type]): [description] Returns: [type]: [description] """ diff_ratio = diff_ratio.numpy() labels[:, ::2] = labels[:, ::2] * diff_ratio[1] labels[:, 1::2] = labels[:, 1::2] * diff_ratio[0] return labels