def check_copy(shape, layout, dev, dtype=np.uint8): pipe = copy_pipe(shape, layout, dev, dtype) pipe.build() for i in range(10): input, output = pipe.run() for i in range(batch_size): assert output[i].layout() == input[i].layout() expected = to_array(input[i]) obtained = to_array(output[i]) np.testing.assert_array_equal(expected, obtained)
def run_decode_fused(test_fun, path, img_type, batch, device, threads, validation_fun): data_path = os.path.join(test_data_root, path, img_type) pipe = test_fun(data_path=data_path, batch_size=batch, num_threads=threads, device_id=0, device=device, prefetch_queue_depth=1) pipe.build() iters = math.ceil(pipe.epoch_size("Reader") / batch) for _ in range(iters): out_1, out_2 = pipe.run() for img_1, img_2 in zip(out_1, out_2): img_1 = to_array(img_1) img_2 = to_array(img_2) assert validation_fun(img_1, img_2)
def check_element_extract(shape, layout, element_map, dev, dtype=np.uint8): pipe = element_extract_pipe(shape, layout, element_map, dev, dtype) pipe.build() for i in range(10): results = pipe.run() input = results[0] elements = results[1:] for i in range(batch_size): for j, idx in enumerate(element_map): assert elements[j][i].layout() == layout[1:] expected = to_array(input[i])[idx] obtained = to_array(elements[j][i]) np.testing.assert_array_equal(expected, obtained)
def check_pad_last_sample(device): with tempfile.TemporaryDirectory(prefix=gds_data_root) as test_data_root: # create files num_samples = 2 batch_size = 5 filenames = [] arr_np_list = [] last_file_name = None for index in range(0, num_samples): filename = os.path.join(test_data_root, "test_{:02d}.npy".format(index)) last_file_name = filename filenames.append(filename) create_numpy_file(filename, (5, 2, 8), np.float32, False) arr_np_list.append(np.load(filename)) while len(arr_np_list) < batch_size: arr_np_list.append(np.load(last_file_name)) pipe = NumpyReaderPipeline(path=test_data_root, files=filenames, file_list=None, file_filter=None, device=device, batch_size=batch_size, num_threads=4, device_id=0, pad_last_batch=True) pipe.build() for _ in range(2): pipe_out = pipe.run() for i in range(batch_size): pipe_arr = to_array(pipe_out[0][i]) ref_arr = arr_np_list[i] assert_array_equal(pipe_arr, ref_arr)
def _testimpl_numpy_reader_roi_empty_range(testcase_name, file_root, batch_size, ndim, dtype, device, fortran_order, file_filter="*.npy"): @pipeline_def(batch_size=batch_size, device_id=0, num_threads=8) def pipe(): data0 = fn.readers.numpy(device=device, file_root=file_root, file_filter=file_filter, shard_id=0, num_shards=1, cache_header_information=False, seed=1234) data1 = fn.readers.numpy(device=device, file_root=file_root, file_filter=file_filter, roi_start=[ 1, ], roi_end=[ 1, ], roi_axes=[ 1, ], shard_id=0, num_shards=1, cache_header_information=False, seed=1234) return data0, data1 p = pipe() p.build() data0, data1 = p.run() for i in range(batch_size): arr = to_array(data0[i]) roi_arr = to_array(data1[i]) for d in range(len(arr.shape)): if d == 1: assert roi_arr.shape[d] == 0 else: assert roi_arr.shape[d] == arr.shape[d]
def _testimpl_types_and_shapes(device, shapes, type, batch_size, num_threads, fortran_order_arg, file_arg_type, cache_header_information): """ compare reader with numpy, with different batch_size and num_threads """ nsamples = len(shapes) with tempfile.TemporaryDirectory(prefix=gds_data_root) as test_data_root: # setup file filenames = ["test_{:02d}.npy".format(i) for i in range(nsamples)] full_paths = [ os.path.join(test_data_root, fname) for fname in filenames ] for i in range(nsamples): fortran_order = fortran_order_arg if fortran_order is None: fortran_order = random.choice([False, True]) create_numpy_file(full_paths[i], shapes[i], type, fortran_order) # load manually arrays = [np.load(filename) for filename in full_paths] # load with numpy reader file_list_arg = None files_arg = None file_filter_arg = None if file_arg_type == 'file_list': file_list_arg = os.path.join(test_data_root, "input.lst") with open(file_list_arg, "w") as f: f.writelines("\n".join(filenames)) elif file_arg_type == 'files': files_arg = filenames elif file_arg_type == "file_filter": file_filter_arg = "*.npy" else: assert False pipe = NumpyReaderPipeline( path=test_data_root, files=files_arg, file_list=file_list_arg, file_filter=file_filter_arg, cache_header_information=cache_header_information, device=device, batch_size=batch_size, num_threads=num_threads, device_id=0) pipe.build() i = 0 while i < nsamples: pipe_out = pipe.run() for s in range(batch_size): if i == nsamples: break pipe_arr = to_array(pipe_out[0][s]) ref_arr = arrays[i] assert_array_equal(pipe_arr, ref_arr) i += 1
def _testimpl_numpy_reader_roi(file_root, batch_size, ndim, dtype, device, fortran_order=False, file_filter="*.npy", roi_start=None, rel_roi_start=None, roi_end=None, rel_roi_end=None, roi_shape=None, rel_roi_shape=None, roi_axes=None, out_of_bounds_policy=None, fill_value=None): default_axes = list(range(ndim)) pipe = numpy_reader_roi_pipe(file_root=file_root, file_filter=file_filter, device=device, roi_start=roi_start, rel_roi_start=rel_roi_start, roi_end=roi_end, rel_roi_end=rel_roi_end, roi_shape=roi_shape, rel_roi_shape=rel_roi_shape, roi_axes=roi_axes, default_axes=default_axes, out_of_bounds_policy=out_of_bounds_policy, fill_value=fill_value, batch_size=batch_size) pipe.build() roi_out, sliced_out = pipe.run() for i in range(batch_size): roi_arr = to_array(roi_out[i]) sliced_arr = to_array(sliced_out[i]) assert_array_equal(roi_arr, sliced_arr)
def run_pipeline(pipelines, iterations, device, to_stop_iter=False): if not isinstance(pipelines, list): pipelines = [pipelines] for pipeline in pipelines: pipeline.build() results = [] with expect_iter_end(not to_stop_iter, StopIteration): for _ in range(iterations): shard_outputs = [] for pipeline in pipelines: pipe_outputs = pipeline.run() shard_outputs.append(tuple(to_array(result) for result in pipe_outputs)) results.append(tuple(shard_outputs)) return results
def _testimpl_numpy_reader_roi_empty_axes(testcase_name, file_root, batch_size, ndim, dtype, device, fortran_order, file_filter="*.npy"): # testcase name used for visibility in the output logs @pipeline_def(batch_size=batch_size, device_id=0, num_threads=8) def pipe(): data0 = fn.readers.numpy(device=device, file_root=file_root, file_filter=file_filter, shard_id=0, num_shards=1, cache_header_information=False, seed=1234) data1 = fn.readers.numpy(device=device, file_root=file_root, file_filter=file_filter, roi_start=[], roi_end=[], roi_axes=[], shard_id=0, num_shards=1, cache_header_information=False, seed=1234) return data0, data1 p = pipe() p.build() data0, data1 = p.run() for i in range(batch_size): arr = to_array(data0[i]) roi_arr = to_array(data1[i]) assert_array_equal(arr, roi_arr)
def _testimpl_rnnt_data_pipeline(device, pad_amount=0, preemph_coeff=.97, window_size=.02, window_stride=.01, window="hann", nfeatures=64, n_fft=512, frame_splicing_stack=1, frame_splicing_subsample=1, lowfreq=0.0, highfreq=None, normalize_type='per_feature', batch_size=32): sample_rate = npy_files_sr speed_perturb = False silence_trim = False ref_pipeline = FilterbankFeatures( sample_rate=sample_rate, window_size=window_size, window_stride=window_stride, window=window, normalize=normalize_type, n_fft=n_fft, pad_amount=pad_amount, preemph=preemph_coeff, nfilt=nfeatures, lowfreq=lowfreq, highfreq=highfreq, log=True, frame_splicing_stack=frame_splicing_stack, frame_splicing_subsample=frame_splicing_subsample) reference_data = [] for i in range(nrecordings): reference_data.append( ref_pipeline.forward(torch.tensor([recordings[i]]), torch.tensor([recordings[i].shape[0]]))) pipe = rnnt_train_pipe(audio_files, sample_rate, pad_amount, preemph_coeff, window_size, window_stride, window, nfeatures, n_fft, frame_splicing_stack, frame_splicing_subsample, lowfreq, highfreq, normalize_type, speed_perturb, silence_trim, device, seed=42, batch_size=batch_size) pipe.build() nbatches = (nrecordings + batch_size - 1) // batch_size i = 0 for b in range(nbatches): dali_out = list(pipe.run()) for s in range(batch_size): if i >= nrecordings: break norm_log_features, log_features_spliced, log_features, mel_spec, spec, preemph_audio, padded_audio, audio = \ [to_array(out[s]) for out in dali_out] ref = np.array(reference_data[i].squeeze(0)) assert ref.shape == norm_log_features.shape, f"{ref.shape}, {norm_log_features.shape}" nfeatures, seq_len = ref.shape size = nfeatures * seq_len audio_ref = recordings[i] audio_len_ref = recordings[i].shape[0] np.testing.assert_allclose(audio, audio_ref, atol=1e-4) padded_audio_ref = torch_reflect_pad(audio, pad_amount) np.testing.assert_equal(padded_audio, padded_audio_ref) preemph_audio_ref = torch_preemphasis(padded_audio_ref, preemph=preemph_coeff) np.testing.assert_allclose(preemph_audio, preemph_audio_ref, atol=1e-4) spec_ref = torch_spectrogram(preemph_audio_ref, npy_files_sr, window_size=window_size, window_stride=window_stride, center=True, pad_mode='reflect', window=window, n_fft=n_fft) np.testing.assert_allclose(spec, spec_ref, atol=1e-4) mel_spec_ref = torch_mel_fbank(spec_ref, npy_files_sr) np.testing.assert_allclose(mel_spec, mel_spec_ref, atol=1e-4) log_features_ref = torch_log(mel_spec_ref) np.testing.assert_allclose(log_features, log_features_ref, atol=1e-3) log_features_ref2 = torch_log(mel_spec) np.testing.assert_allclose(log_features, log_features_ref2, atol=1e-4) log_features_spliced_ref = torch_frame_splicing( log_features_ref, stacking=frame_splicing_stack, subsampling=frame_splicing_subsample) np.testing.assert_allclose(log_features_spliced, log_features_spliced_ref, atol=1e-3) log_features_spliced_ref2 = torch_frame_splicing( log_features, stacking=frame_splicing_stack, subsampling=frame_splicing_subsample) np.testing.assert_allclose(log_features_spliced, log_features_spliced_ref2, atol=1e-4) norm_log_features_ref = torch_normalize(log_features_spliced_ref, normalize_type) np.testing.assert_allclose(norm_log_features, norm_log_features_ref, atol=1e-3) norm_log_features_ref2 = torch_normalize(log_features_spliced, normalize_type) np.testing.assert_allclose(norm_log_features, norm_log_features_ref2, atol=1e-4) # Full pipeline np.testing.assert_allclose(norm_log_features, ref, atol=1e-3) i += 1
def dali_run(pipe, device): pipe.build() outs = pipe.run() return to_array(outs[0])[0]
def run_for_dataset(args, dataset): print("Build pipeline") pipes = [ DetectionPipeline(args, device_id, dataset[0], dataset[1]) for device_id in range(args.num_gpus) ] [pipe.build() for pipe in pipes] set_iters(args, pipes[0].epoch_size('Reader')) for iter in range(args.iters): for pipe in pipes: labels, \ image_ssd_crop, image_decode_crop, \ image_slice_cpu, image_slice_gpu, \ boxes_ssd_crop, boxes_random_crop, \ labels_ssd_crop, labels_random_crop,\ image_resized_cpu, image_resized_gpu, \ image_normalized_cpu, image_normalized_gpu, \ image_twisted_cpu, image_twisted_gpu, \ image_legacy_twisted_cpu, image_legacy_twisted_gpu, \ image_flipped_cpu, image_flipped_gpu,\ boxes_flipped_cpu, boxes_flipped_gpu, \ encoded_boxes_cpu, encoded_boxes_gpu, \ encoded_labels_cpu, encoded_labels_gpu, \ encoded_offset_boxes_cpu, encoded_offset_boxes_gpu, \ encoded_offset_labels_cpu, encoded_offset_labels_gpu, \ image_decode_crop_gpu, image_gpu_slice_gpu = \ [np.squeeze(to_array(out)) for out in pipe.run()] # Check reader labels = ((labels > 0) & (labels <= 80)).all() # Check cropping ops decode_crop = compare(image_ssd_crop, image_decode_crop) slice_cpu = compare(image_ssd_crop, image_slice_cpu) slice_gpu = compare(image_ssd_crop, image_slice_gpu) decode_crop_gpu = compare(image_gpu_slice_gpu, image_decode_crop_gpu) image_crop = decode_crop and slice_cpu and slice_gpu and decode_crop_gpu boxes_crop = compare(boxes_ssd_crop, boxes_random_crop) labels_crop = compare(labels_ssd_crop, labels_random_crop) crop = image_crop and boxes_crop and labels_crop hsv_bc_twist = relaxed_compare(image_twisted_gpu, image_legacy_twisted_gpu, eps=4) # Check resizing ops resize = relaxed_compare(val_1=image_resized_cpu, val_2=image_resized_gpu, reference=resize_ref( image_ssd_crop, (300, 300)), border=1) # Check normalizing ops image_normalized_ref = normalize_ref(image_resized_cpu) normalize = compare(image_normalized_cpu, image_normalized_gpu, image_normalized_ref) # Check twisting ops twist_gpu_cpu = relaxed_compare(image_twisted_cpu, image_twisted_gpu, eps=2) twist = twist_gpu_cpu and hsv_bc_twist # Check flipping ops image_flipped_ref, boxes_flipped_ref = horizontal_flip_ref( image_resized_cpu, boxes_ssd_crop) image_flip = compare(image_flipped_cpu, image_flipped_gpu, image_flipped_ref) boxes_flip = compare(boxes_flipped_cpu, boxes_flipped_gpu, boxes_flipped_ref) flip = image_flip and boxes_flip # Check box encoding ops encoded_boxes = compare(encoded_boxes_cpu, encoded_boxes_gpu) encoded_labels = compare(encoded_labels_cpu, encoded_labels_gpu) encoded_boxes_offset = compare(encoded_offset_boxes_cpu, encoded_offset_boxes_gpu) encoded_labels_offset = compare(encoded_offset_labels_cpu, encoded_offset_labels_gpu) encoded_labels_cpu = compare(encoded_labels_cpu, encoded_offset_labels_cpu) encoded_labels_gpu = compare(encoded_labels_gpu, encoded_offset_labels_gpu) box_encoder = encoded_boxes and encoded_boxes_offset and encoded_labels and encoded_labels_offset and encoded_labels_cpu and encoded_labels_gpu if not labels or not crop or not resize or not normalize or not twist or not flip or not box_encoder: print('Error during iteration', iter) print('Labels = ', labels) print('Crop = ', crop) print(' decode_crop =', decode_crop) print(' decode_crop_gpu =', decode_crop_gpu) print(' slice_cpu =', slice_cpu) print(' slice_gpu =', slice_gpu) print(' boxes_crop =', boxes_crop) print(' labels_crop =', labels_crop) print('Resize =', resize) print('Normalize =', normalize) print('Twist =', twist) print(' twist gpu vs cpu = ', twist_gpu_cpu) print(' HSV + BC vs legacy Twist = ', hsv_bc_twist) print('Flip =', flip) print(' image_flip =', image_flip) print(' boxes_flip =', boxes_flip) print('Box encoder =', box_encoder) print(' encoded_boxes =', encoded_boxes) print(' encoded_boxes_offset =', encoded_boxes_offset) print(' encoded_labels =', encoded_labels) print(' encoded_labels_offset =', encoded_labels_offset) print(' encoded_labels_cpu =', encoded_labels_cpu) print(' encoded_labels_gpu =', encoded_labels_gpu) exit(1) if not iter % 100: print("Iteration: {}/ {}".format(iter + 1, args.iters)) print("OK")