Beispiel #1
0
 def preprocess_audio(self, x, audio_fps):
     x = ap.to_mono(x.numpy())
     if audio_fps != self.audio_fps:
         x = ap.resample(x, audio_fps, self.audio_fps)
     if x.shape[0] < self.audio_fps:
         x = np.pad(x, (0, self.audio_fps - x.shape[0]))
     return x.reshape((1, -1))
Beispiel #2
0
def generate_cqt(file_path, st_status):
    st_status.text('Opening {}'.format(file_path))
    data, sample_rate = auto_load(file_path, sr=None)
    print('Sample Rate:', sample_rate, 'shape:', data.shape)

    if len(data.shape) == 2:
        print('Converting to mono channel...')
        data = to_mono(data)

    st_status.text('Resampling to {} Hz...'.format(TARGET_SAMPLE_RATE))
    downsampled_data = resample(data,
                                orig_sr=sample_rate,
                                target_sr=TARGET_SAMPLE_RATE)
    # downsampled_data = data
    st_status.text('Downsampled to {} Hz, shape is now {}'.format(
        TARGET_SAMPLE_RATE, downsampled_data.shape))

    st_status.text('Generating CQT...')
    cqt_result = np.abs(
        cqt(downsampled_data,
            sr=TARGET_SAMPLE_RATE,
            hop_length=HOP_LENGTH,
            n_bins=TOTAL_BINS,
            bins_per_octave=BINS_PER_OCTAVE))

    return cqt_result
Beispiel #3
0
def generate_cqt(i, file_path, offset=0, duration=None):
    print('[{}] Opening'.format(i), file_path)
    data, sample_rate = load(file_path,
                             sr=None,
                             offset=offset,
                             duration=duration)
    print('[{}] Sample Rate:'.format(i), sample_rate, 'shape:', data.shape)

    if len(data.shape) == 2:
        with Timer('[{}] Converted to mono'.format(i)):
            print('[{}] Converting to mono channel...'.format(i))
            data = to_mono(data)

    with Timer('[{}] Resampling'.format(i)):
        print('[{}] Resampling to'.format(i), TARGET_SAMPLE_RATE, 'Hz...')
        downsampled_data = resample(data,
                                    orig_sr=sample_rate,
                                    target_sr=TARGET_SAMPLE_RATE)
        # downsampled_data = data
        print('[{}] Downsampled to'.format(i), TARGET_SAMPLE_RATE,
              'Hz shape is now', downsampled_data.shape)

    with Timer('[{}] CQT'.format(i)):
        print('[{}] Generating CQT...'.format(i))
        cqt_result = np.abs(
            cqt(downsampled_data,
                sr=TARGET_SAMPLE_RATE,
                hop_length=HOP_LENGTH,
                n_bins=TOTAL_BINS,
                bins_per_octave=BINS_PER_OCTAVE))

    return cqt_result
Beispiel #4
0
 def to_mono(self, x):
     """
     make sure we deal with a 1D array
     """
     if len(x.shape) == 2:
         return lb.to_mono(numpy.transpose(x))
     else:
         return x
def downsample_mono(path, sr):
    obj = wavio.read(path)
    wav = obj.data.astype(np.float32, order="F")
    rate = obj.rate
    try:
        channel = wav.shape[1]
        if channel == 2:
            wav = to_mono(wav.T)
        elif channel == 1:
            wav = to_mono(wav.reshape(-1))
    except IndexError:
        wav = to_mono(wav.reshape(-1))
        pass
    except Exception as exc:
        raise exc
    wav = resample(wav, rate, sr)
    wav = wav.astype(np.int16)
    return sr, wav
Beispiel #6
0
def downsample_mono(path, sr):
    rate, wav = wavfile.read(path)
    wav = wav.astype(np.float32, order='F')
    try:
        tmp = wav.shape[1]
        wav = to_mono(wav.T)
    except:
        pass
    wav = resample(wav, rate, sr)
    wav = wav.astype(np.int16)
    return sr, wav
Beispiel #7
0
def downsample_mono(path, sr):
    obj = wavio.read(path)
    wav = obj.data.astype(np.float32, order='F')
    rate = obj.rate
    try:
        # tmp = wav.shape[1]
        wav = to_mono(wav.T)
    except:
        pass
    wav = resample(wav, rate, sr)
    wav = wav.astype(np.int16)
    return sr, wav
def downsample(path, down_sample):
    sample_rate, wave = wavfile.read(path)
    wave = wave.astype(np.float32, order='F')

    ##    wave, sample_rate = librosa.load(path, sr = args.down_sample, mono=True)

    try:
        tmp = wave.shape[1]
        wave = to_mono(wave.T)
    except:
        pass
    wave = resample(wave, sample_rate, down_sample)
    wave = wave.astype(np.int16)

    return wave, down_sample
def load_audio_file(file_path):
    data_l = os.listdir(file_path)
    input_length = 16000
    x = 1
    for i in data_l:
        rate, data_in = wavfile.read(file_path + i)
        data_in = data_in.astype(np.float32, order='F')
        try:
            tmp = data_in.shape[1]
            data_in = to_mono(data_in.T)
        except:
            pass
        data_in = resample(data_in, rate, 16000)
        data_in = data_in.astype(np.float32)
        data_ap.append(data_in)
        x += 1
    return data_ap
def stream_to_np(bytes_io,
                 sr=22050,
                 mono=True,
                 offset=0.0,
                 duration=None,
                 dtype=np.float32,
                 res_type='kaiser_best'):
    """
    重写了librosa.load函数,把文件参数改成bytesIO类型,并把audioread.audio_open替换为自定义的RawAudioStream类,
    因为前者需要文件路径作为参数。
    :param bytes_io:
    :param sr:
    :param mono:
    :param offset:
    :param duration:
    :param dtype:
    :param res_type:
    :return:
    """
    y = []

    with RawAudioStream(bytes_io) as input_file:
        sr_native = input_file.samplerate
        n_channels = input_file.channels

        s_start = int(np.round(sr_native * offset)) * n_channels

        if duration is None:
            s_end = np.inf
        else:
            s_end = s_start + (int(np.round(sr_native * duration)) *
                               n_channels)

        n = 0

        for frame in input_file:
            frame = util.buf_to_float(frame, dtype=dtype)
            n_prev = n
            n = n + len(frame)

            if n < s_start:
                # offset is after the current frame
                # keep reading
                continue

            if s_end < n_prev:
                # we're off the end.  stop reading
                break

            if s_end < n:
                # the end is in this frame.  crop.
                frame = frame[:s_end - n_prev]

            if n_prev <= s_start <= n:
                # beginning is in this frame
                frame = frame[(s_start - n_prev):]

            # tack on the current frame
            y.append(frame)

        if y:
            y = np.concatenate(y)

            if n_channels > 1:
                y = y.reshape((-1, n_channels)).T
                if mono:
                    y = to_mono(y)

            if sr is not None:
                y = resample(y, sr_native, sr, res_type=res_type)

            else:
                sr = sr_native

        # Final cleanup for dtype and contiguity
        y = np.ascontiguousarray(y, dtype=dtype)

        return y, sr
def load_yield_chunks(path,
                      sr=22050,
                      mono=True,
                      offset=0.0,
                      duration=None,
                      dtype=np.float32,
                      res_type='kaiser_best',
                      choplenspls=0,
                      hoplenspls=0):
    """Load an audio file as a floating point time series.
    This is MODIFIED from librosa's own load() function, to yield chunks one-by-one so they never all need to be loaded into memory.

    Parameters
    ----------
    path : string
        path to the input file.

        Any format supported by `audioread` will work.

    sr   : number > 0 [scalar]
        target sampling rate

        'None' uses the native sampling rate

    mono : bool
        convert signal to mono

    offset : float
        start reading after this time (in seconds)

    duration : float
        only load up to this much audio (in seconds)

    dtype : numeric type
        data type of `y`

    res_type : str
        resample type (see note)

        .. note::
            By default, this uses `resampy`'s high-quality mode ('kaiser_best').

            To use a faster method, set `res_type='kaiser_fast'`.

            To use `scipy.signal.resample`, set `res_type='scipy'`.

    choplenspls : int
        number of samples in each chunk to be yielded.

    Returns
    -------
    y    : np.ndarray [shape=(n,) or (2, n)]
        audio time series

    sr   : number > 0 [scalar]
        sampling rate of `y`


    Examples
    --------
    >>> # Load a wav file
    >>> filename = librosa.util.example_audio_file()
    >>> y, sr = librosa.load(filename)
    >>> y
    array([ -4.756e-06,  -6.020e-06, ...,  -1.040e-06,   0.000e+00], dtype=float32)
    >>> sr
    22050

    >>> # Load a wav file and resample to 11 KHz
    >>> filename = librosa.util.example_audio_file()
    >>> y, sr = librosa.load(filename, sr=11025)
    >>> y
    array([ -2.077e-06,  -2.928e-06, ...,  -4.395e-06,   0.000e+00], dtype=float32)
    >>> sr
    11025

    >>> # Load 5 seconds of a wav file, starting 15 seconds in
    >>> filename = librosa.util.example_audio_file()
    >>> y, sr = librosa.load(filename, offset=15.0, duration=5.0)
    >>> y
    array([ 0.069,  0.1  , ..., -0.101,  0.   ], dtype=float32)
    >>> sr
    22050

    """

    if not hoplenspls or (hoplenspls <= 0 or hoplenspls > choplenspls):
        hoplenspls = choplenspls

    y = np.array([], dtype=dtype)
    with audioread.audio_open(os.path.realpath(path)) as input_file:
        sr_native = input_file.samplerate
        n_channels = input_file.channels

        s_start = int(np.round(sr_native * offset)) * n_channels

        if duration is None:
            s_end = np.inf
        else:
            s_end = s_start + (int(np.round(sr_native * duration)) *
                               n_channels)

        n = 0

        for frame in input_file:
            frame = util.buf_to_float(frame, dtype=dtype)
            n_prev = n
            n = n + len(frame)

            if n < s_start:
                # offset is after the current frame
                # keep reading
                continue

            if s_end < n_prev:
                # we're off the end.  stop reading
                break

            if s_end < n:
                # the end is in this frame.  crop.
                frame = frame[:s_end - n_prev]

            if n_prev <= s_start <= n:
                # beginning is in this frame
                frame = frame[(s_start - n_prev):]

            # NB here we apply to one single frame, the postprocessing that librosa applies to the whole file at the end
            if n_channels > 1:
                frame = frame.reshape((-1, n_channels)).T
                if mono:
                    frame = to_mono(frame)

            if sr is not None:
                frame = resample(frame, sr_native, sr, res_type=res_type)

            else:
                sr = sr_native
            # Final cleanup for dtype and contiguity
            frame = np.ascontiguousarray(frame, dtype=dtype)

            y = np.concatenate((y, frame))
            while y.shape[0] >= choplenspls:
                yield (y[:choplenspls], sr)
                y = y[hoplenspls:]

    if y.shape[0] != 0:
        print(
            "WARNING: load_yield_chunks() dropped %i final samples" %
            (y.shape[0])
        )  # TODO can the final incomplete chunk be handled elegantly within the above loop?
Beispiel #12
0
def mono_detection(sig):
    if len(sig.shape) == 2:
        sig = to_mono(sig.T)
        return sig
    else:
        return sig