def check_audio_decoder_correctness(fmt, dtype): batch_size = 16 niterations = 10 @pipeline_def(batch_size=batch_size, device_id=0, num_threads=4) def audio_decoder_pipe(fnames, dtype, downmix=False): encoded, _ = fn.readers.file(files=fnames) decoded, _ = fn.decoders.audio(encoded, dtype=dtype, downmix=downmix) return decoded audio_files = get_files(os.path.join('db', 'audio', fmt), fmt) npy_files = [os.path.splitext(fpath)[0] + '.npy' for fpath in audio_files] pipe = audio_decoder_pipe(audio_files, dtype) pipe.build() for it in range(niterations): data = pipe.run() for s in range(batch_size): sample_idx = (it * batch_size + s) % len(audio_files) ref = np.load(npy_files[sample_idx]) if len(ref.shape) == 1: ref = np.expand_dims(ref, 1) arr = np.array(data[0][s]) assert arr.shape == ref.shape if fmt == 'ogg': # For OGG Vorbis, we consider errors any value that is off by more than 1 # TODO(janton): There is a bug in libsndfile that produces underflow/overflow. # Remove this when the bug is fixed. # Tuple with two arrays, we just need the first dimension wrong_values = np.where(np.abs(arr - ref) > 1)[0] nerrors = len(wrong_values) assert nerrors <= 1 # TODO(janton): Uncomment this when the bug is fixed # np.testing.assert_allclose(arr, ref, atol=1) else: np.testing.assert_equal(arr, ref)
import nvidia.dali import nvidia.dali.ops as ops import nvidia.dali.fn as fn from nvidia.dali.pipeline import pipeline_def import nvidia.dali.types as types from test_utils import get_files, to_array import numpy as np import librosa import torch import math import random import os from nose.tools import nottest audio_files = get_files('db/audio/wav', 'wav') audio_files = [file for file in audio_files if '237-134500' in file] # Filtering librispeech samples npy_files = [os.path.splitext(fpath)[0] + '.npy' for fpath in audio_files] npy_files_sr = 16000 # From DeepLearningExamples def _convert_samples_to_float32(samples): """Convert sample type to float32. Audio sample type is usually integer or float-point. Integers will be scaled to [-1, 1] in float32. """ float32_samples = samples.astype('float32') if samples.dtype in np.sctypes['int']: bits = np.iinfo(samples.dtype).bits
from webdataset_base import generate_temp_index_file as generate_temp_wds_index import re import numpy as np from nose_utils import assert_raises import os import glob from math import ceil, sqrt import tempfile import sys import json from collections.abc import Iterable data_root = get_dali_extra_path() images_dir = os.path.join(data_root, 'db', 'single', 'jpeg') audio_files = get_files(os.path.join('db', 'audio', 'wav'), 'wav') caffe_dir = os.path.join(data_root, 'db', 'lmdb') caffe2_dir = os.path.join(data_root, 'db', 'c2lmdb') recordio_dir = os.path.join(data_root, 'db', 'recordio') tfrecord_dir = os.path.join(data_root, 'db', 'tfrecord') webdataset_dir = os.path.join(data_root, 'db', 'webdataset') coco_dir = os.path.join(data_root, 'db', 'coco', 'images') coco_annotation = os.path.join(data_root, 'db', 'coco', 'instances.json') sequence_dir = os.path.join(data_root, 'db', 'sequence', 'frames') batch_size = 2 test_data_shape = [10, 20, 3] def get_data(): out = [
setup_test_numpy_reader_cpu) from test_detection_pipeline import coco_anchors from test_utils import check_batch, get_dali_extra_path, get_files, module_functions from segmentation_test_utils import make_batch_select_masks from webdataset_base import generate_temp_index_file as generate_temp_wds_index """ Tests of coverage of eager operators. For each operator results from standard pipeline and eager version are compared across a couple of iterations. If you have added a new operator you should add a test here for an eager version of it. Also make sure you have correctly classified the operator in `dali/python/nvidia/dali/_utils/eager_utils.py` as stateless, stateful or iterator. """ data_root = get_dali_extra_path() images_dir = os.path.join(data_root, 'db', 'single', 'jpeg') audio_files = get_files(os.path.join('db', 'audio', 'wav'), 'wav') caffe_dir = os.path.join(data_root, 'db', 'lmdb') caffe2_dir = os.path.join(data_root, 'db', 'c2lmdb') recordio_dir = os.path.join(data_root, 'db', 'recordio') webdataset_dir = os.path.join(data_root, 'db', 'webdataset') coco_dir = os.path.join(data_root, 'db', 'coco', 'images') coco_annotation = os.path.join(data_root, 'db', 'coco', 'instances.json') sequence_dir = os.path.join(data_root, 'db', 'sequence', 'frames') video_files = get_files(os.path.join('db', 'video', 'vfr'), 'mp4') rng = np.random.default_rng() batch_size = 2 data_size = 10 sample_shape = [20, 20, 3]
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import librosa import numpy as np import nvidia.dali.types as types import test_utils import os import nvidia.dali.fn as fn from nvidia.dali import pipeline_def audio_files = test_utils.get_files(os.path.join('db', 'audio', 'wav'), 'wav') def trim_ref(cutoff_db, ref, frame_length, hop_length, input_data): yt, index = librosa.effects.trim(y=input_data, top_db=-cutoff_db, ref=ref, frame_length=frame_length, hop_length=hop_length) # librosa's trim function calculates power with reference to center of window, # while DALI uses beginning of window. Hence the subtraction below begin = index[0] - frame_length // 2 length = index[1] - index[0] if length != 0: length += frame_length - 1 return np.array(begin), np.array(length) @pipeline_def def nonsilent_region_pipe(cutoff_value, window_size, reference_power, reset_interval):