def read_kaldi(fd, endian='<', return_size=False): """Load kaldi Args: fd (file): Binary mode file object. Cannot input string endian (str): return_size (bool): """ assert endian in ('<', '>'), endian binary_flag = fd.read(4) assert isinstance(binary_flag, binary_type), type(binary_flag) if seekable(fd): fd.seek(-4, 1) else: fd = MultiFileDescriptor(BytesIO(binary_flag), fd) if binary_flag[:4] == b'RIFF': # array: Tuple[int, np.ndarray] array, size = read_wav(fd, return_size=True) # Load as binary elif binary_flag[:2] == b'\0B': if binary_flag[2:3] == b'\4': # This is int32Vector array, size = read_int32vector(fd, endian, return_size=True) else: array, size = read_matrix_or_vector(fd, endian, return_size=True) # Load as ascii else: array, size = read_ascii_mat(fd, return_size=True) if return_size: return array, size else: return array
def test_incorrect_header_wav(tmpdir, func): wav = os.path.join(os.path.dirname(__file__), 'arks', 'incorrect_header.wav') _, array = read_wav(wav) path = tmpdir.mkdir('test') scp = path.join('wav.scp').strpath with open(scp, 'w') as f: f.write('aaa sox {wav} -t wav - |\n'.format(wav=wav)) rate, test = dict(func(scp))['aaa'] np.testing.assert_array_equal(array, test)
def test_incorrect_header_wav(tmpdir, func): wav = os.path.join(os.path.dirname(__file__), "arks", "incorrect_header.wav") _, array = read_wav(wav) path = tmpdir.mkdir("test") scp = path.join("wav.scp").strpath with open(scp, "w") as f: f.write("aaa sox {wav} -t wav - |\n".format(wav=wav)) rate, test = dict(func(scp))["aaa"] np.testing.assert_array_equal(array, test)
def read_kaldi(fd, endian="<", audio_loader="soundfile"): """Load kaldi Args: fd (file): Binary mode file object. Cannot input string endian (str): audio_loader: (Union[str, callable]): """ assert endian in ("<", ">"), endian max_flag_length = len(b"AUDIO") binary_flag = fd.read(max_flag_length) assert isinstance(binary_flag, binary_type), type(binary_flag) if seekable(fd): fd.seek(-max_flag_length, 1) else: fd = MultiFileDescriptor(BytesIO(binary_flag), fd) if binary_flag[:4] == b"RIFF": # array: Tuple[int, np.ndarray] array = read_wav(fd) elif binary_flag[:3] == b"NPY": fd.read(3) length_ = _read_length_header(fd) buf = fd.read(length_) _fd = BytesIO(buf) array = np.load(_fd) elif binary_flag[:3] == b"PKL": fd.read(3) array = pickle.load(fd) elif binary_flag[:5] == b"AUDIO": fd.read(5) length_ = _read_length_header(fd) buf = fd.read(length_) _fd = BytesIO(buf) if audio_loader == "soundfile": import soundfile audio_loader = soundfile.read else: raise ValueError( "Not supported: audio_loader={}".format(audio_loader)) x1, x2 = audio_loader(_fd) # array: Tuple[int, np.ndarray] according to scipy wav read if isinstance(x1, int) and isinstance(x2, np.ndarray): array = (x1, x2) elif isinstance(x1, np.ndarray) and isinstance(x2, int): array = (x2, x1) else: raise RuntimeError( "Got unexpected type from audio_loader: ({}, {})".format( type(x1), type(x2))) # Load as binary elif binary_flag[:2] == b"\0B": if binary_flag[2:3] == b"\4": # This is int32Vector array = read_int32vector(fd, endian) else: array = read_matrix_or_vector(fd, endian) # Load as ascii else: array = read_ascii_mat(fd) return array