Example #1
0
def read_kaldi(fd, endian='<', return_size=False):
    """Load kaldi

    Args:
        fd (file): Binary mode file object. Cannot input string
        endian (str):
        return_size (bool):
    """
    assert endian in ('<', '>'), endian
    binary_flag = fd.read(4)
    assert isinstance(binary_flag, binary_type), type(binary_flag)

    if seekable(fd):
        fd.seek(-4, 1)
    else:
        fd = MultiFileDescriptor(BytesIO(binary_flag), fd)

    if binary_flag[:4] == b'RIFF':
        # array: Tuple[int, np.ndarray]
        array, size = read_wav(fd, return_size=True)

    # Load as binary
    elif binary_flag[:2] == b'\0B':
        if binary_flag[2:3] == b'\4':  # This is int32Vector
            array, size = read_int32vector(fd, endian, return_size=True)
        else:
            array, size = read_matrix_or_vector(fd, endian, return_size=True)
    # Load as ascii
    else:
        array, size = read_ascii_mat(fd, return_size=True)
    if return_size:
        return array, size
    else:
        return array
Example #2
0
def read_wav_scipy(fd, return_size=False):
    if not seekable(fd):
        # scipy.io.wavfile doesn't support unseekable fd
        data = fd.read()
        fd = BytesIO(data)
        offset = None
    else:
        offset = fd.tell()
    rate, array = wavfile.read(fd)
    size = 44 + array.nbytes
    if offset is not None:
        fd.seek(size + offset)

    if return_size:
        return (rate, array), size
    else:
        return rate, array
Example #3
0
def read_kaldi(fd, endian="<", audio_loader="soundfile"):
    """Load kaldi

    Args:
        fd (file): Binary mode file object. Cannot input string
        endian (str):
        audio_loader: (Union[str, callable]):
    """
    assert endian in ("<", ">"), endian

    max_flag_length = len(b"AUDIO")

    binary_flag = fd.read(max_flag_length)
    assert isinstance(binary_flag, binary_type), type(binary_flag)

    if seekable(fd):
        fd.seek(-max_flag_length, 1)
    else:
        fd = MultiFileDescriptor(BytesIO(binary_flag), fd)

    if binary_flag[:4] == b"RIFF":
        # array: Tuple[int, np.ndarray]
        array = read_wav(fd)

    elif binary_flag[:3] == b"NPY":
        fd.read(3)
        length_ = _read_length_header(fd)
        buf = fd.read(length_)
        _fd = BytesIO(buf)
        array = np.load(_fd)

    elif binary_flag[:3] == b"PKL":
        fd.read(3)
        array = pickle.load(fd)

    elif binary_flag[:5] == b"AUDIO":
        fd.read(5)
        length_ = _read_length_header(fd)
        buf = fd.read(length_)
        _fd = BytesIO(buf)

        if audio_loader == "soundfile":
            import soundfile

            audio_loader = soundfile.read
        else:
            raise ValueError(
                "Not supported: audio_loader={}".format(audio_loader))

        x1, x2 = audio_loader(_fd)

        # array: Tuple[int, np.ndarray] according to scipy wav read
        if isinstance(x1, int) and isinstance(x2, np.ndarray):
            array = (x1, x2)
        elif isinstance(x1, np.ndarray) and isinstance(x2, int):
            array = (x2, x1)
        else:
            raise RuntimeError(
                "Got unexpected type from audio_loader: ({}, {})".format(
                    type(x1), type(x2)))

    # Load as binary
    elif binary_flag[:2] == b"\0B":
        if binary_flag[2:3] == b"\4":  # This is int32Vector
            array = read_int32vector(fd, endian)
        else:
            array = read_matrix_or_vector(fd, endian)
    # Load as ascii
    else:
        array = read_ascii_mat(fd)

    return array