Beispiel #1
0
def extract_temp_channels(wav_path, temp_directory):
    """
    Extract a single channel from a stereo file to a new mono wav file

    Parameters
    ----------
    wav_path : str
        Path to stereo wav file
    temp_directory : str
        Directory to save extracted
    """
    name, ext = os.path.splitext(wav_path)
    base = os.path.basename(name)
    a_path = os.path.join(temp_directory, base + '_A.wav')
    b_path = os.path.join(temp_directory, base + '_B.wav')
    if not os.path.exists(a_path):
        with soundfile.SoundFile(wav_path, 'r') as inf:
            sr = inf.samplerate
            sound_format = inf.format
            endian = inf.endian
            subtype = inf.subtype
        stream = librosa.stream(wav_path,
                                block_length=256,
                                frame_length=2048,
                                hop_length=2048,
                                mono=False)
        with soundfile.SoundFile(a_path, 'w', samplerate=sr, channels=1, endian=endian, subtype=subtype, format=sound_format) as af, \
             soundfile.SoundFile(b_path, 'w', samplerate=sr, channels=1, endian=endian, subtype=subtype, format=sound_format) as bf:

            for s in stream:
                af.write(s[0, :])
                bf.write(s[1, :])
    return a_path, b_path
Beispiel #2
0
def data_stream_librosa(**kwargs):
    """data_stream_librosa

    load and stream audio data in frames, same as aubio.src does
    """
    # filename = librosa.util.example_audio_file()
    if 'filename' in kwargs:
        filename = kwargs['filename']
    else:
        filename = '/home/src/QK/data/sound-arglaaa-2018-10-25/24.wav'

    if 'frame_length' in kwargs:
        frame_length = kwargs['frame_length']
    else:
        frame_length = 512
        
    if 'hop_length' in kwargs:
        hop_length = kwargs['hop_length']
    else:
        hop_length = 512
        
    # streaming does not provide on-the-fly resampling so we must consider the samplerate
    sr = librosa.get_samplerate(filename)
    # get duration
    # dur = librosa.get_duration(filename)
    # open the stream
    stream = librosa.stream(filename,
                            block_length=1,
                            frame_length=frame_length,
                            hop_length=hop_length,
                            mono=True
    )
    # return stream descriptor and samplerate
    return tuple((stream, sr))
Beispiel #3
0
def librosa_mix_test():
    sr = librosa.get_samplerate(fs[0])
    m = sr // SAMPLERATE

    frame_length = FRAMESPERBUFFER * m
    hop_length = FRAMESPERBUFFER * m

    strms = map(
        lambda f: librosa.stream(f,
                                 mono=False,
                                 fill_value=0.,
                                 block_length=1,
                                 frame_length=frame_length,
                                 hop_length=hop_length), fs)

    frame = [np.zeros(FRAMESPERBUFFER), np.zeros(FRAMESPERBUFFER)]
    for s in strms:
        blk = next(s)
        frame[0] += blk[0][::4]
        frame[1] += blk[1][::4]

    frame = np.column_stack((frame[0], frame[1])).ravel()
    frame *= 0x800000
    frame /= 4
    data = frame.astype(np.int32)
    _bytes = wavdecode.to24le(data)
    print(np.array(_bytes)[:20])
    def create_stream(self,
                      key,
                      filename,
                      sampling_rate,
                      duration,
                      block_duration=5):
        file_path = os.path.join(self.data_path,
                                 f"train_audio/{key}/{filename}.wav")
        assert os.path.exists(
            file_path
        ), "The audio file you are trying to split does not exist in .wav format."

        # conversion
        block_length = 128
        samples_per_block = sampling_rate * block_duration

        frame_length = samples_per_block // block_length
        hop_length = frame_length // 5

        stream = librosa.stream(
            path=file_path,
            block_length=block_length,  # num. of frames per block
            frame_length=frame_length,  # num. of samples per frame
            hop_length=hop_length,  # num. of samples to advance between frames
            fill_value=0)
        return stream
def log_mel(filepath, out_folder, fs, N, overlap, win_type='hamming', n_mels=128, fmin=0.0, fmax=None, htk=True):
    # Load an audio file as a floating point time series

    # x, fs = librosa.core.load(filepath, sr=fs, offset=5.0)
    # x_trimmered, index = librosa.effects.trim(x)
    # chunks = librosa.effects.split(x_trimmered, frame_length=5*fs, hop_length=1*fs)

    stream = librosa.stream(filepath, block_length=1, frame_length=5*fs, hop_length=1*fs, fill_value=0)
    i = 0
    for x_chunks in stream:
        image_filename = os.path.join(dest_path, out_folder, os.path.splitext(os.path.basename(filepath))[0] + '_' + str(i))
        image_filename = image_filename.replace('_16bit', '')
        # Power spectrum
        S = np.abs(librosa.core.stft(x_chunks, n_fft=N, window=signal.get_window(win_type, N), hop_length=N-overlap, center=False)) ** 2
        # Build a Mel filter
        mel_basis = librosa.filters.mel(fs, N, n_mels, fmin, fmax, htk)
        # Filtering
        mel_filtered = np.dot(mel_basis, S)

        coefficients = librosa.core.power_to_db(mel_filtered)
        plt.figure()
        plt.imshow(coefficients)
        plt.savefig(image_filename + '.png')
        i += 1

   # delta = librosa.feature.delta(mel_filtered, delta_width*2+1, order=1, axis=-1)
    #coefficients = np.concatenate((coefficients, delta))
    # add delta e delta-deltas
    # coefficients.append(librosa.feature.delta(mel_filtered, delta_width*2+1, order=1, axis=-1))
    # coefficients.append(librosa.feature.delta(mel_filtered, delta_width*2+1, order=2, axis=-1))

    return True
Beispiel #6
0
def file_load_stream(wav_name, mono=False):
    try:
        sr = librosa.get_samplerate(wav_name)
        frameSize = sr
        hoplength = frameSize // 2
        stream = librosa.stream(wav_name,
                                block_length=1,
                                frame_length=frameSize,
                                hop_length=hoplength,
                                mono=mono)
    except:
        logger.error("file_broken or not exists!! : {}".format(wav_name))
    return stream
Beispiel #7
0
def load_file(filepath):

    sr = librosa.get_samplerate(filepath)

    frame_length = 1024
    hop_length = 256

    stream = librosa.stream(filepath,
                            block_length=128,
                            frame_length=1024,
                            hop_length=256)

    for x in stream:
        yield (librosa.stft(x, n_fft=1024, hop_length=256), sr)
Beispiel #8
0
def a():
    sr = 44100
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=sr, input=True)
    # with s as stream:
    # librosa.get_samplerate(stream)
    for y_block in librosa.stream(stream,
                                  block_length=256,
                                  frame_length=2048,
                                  hop_length=2048):
        m_block = librosa.feature.melspectrogram(y_block,
                                                 sr=sr,
                                                 n_fft=2048,
                                                 hop_length=2048,
                                                 center=False)
        print(type(m_block))

    stream.close()
Beispiel #9
0
    def _stream_generator(self, filename, sr):
        "Used for generating Mel Spectrograms when given sample rate is an integer multiple of the original"
        assert sr % self.sample_rate == 0, f'Sample Rate {sr} is not a multiple, use block generator instead'
        mult = sr // self.sample_rate
        stream = librosa.stream(filename,
                                block_length=self.timesteps,
                                frame_length=self.n_fft * mult,
                                hop_length=self.hop_length * mult,
                                fill_value=0)

        for block in stream:
            yield librosa.feature.melspectrogram(block,
                                                 sr=sr,
                                                 n_fft=self.n_fft * mult,
                                                 hop_length=self.hop_length *
                                                 mult,
                                                 n_mels=self.n_mels,
                                                 center=False)
Beispiel #10
0
def find_audio_sample(source_path, template_path):
    template_sound, template_rate = librosa.load(template_path, sr=None)

    source_rate = librosa.get_samplerate(source_path)

    frame_length = len(template_sound)
    block_length = 1024
    hop_length = 128  # 512

    source_stream = librosa.stream(source_path,
                                   block_length=block_length,
                                   frame_length=frame_length,
                                   hop_length=int(hop_length))

    max_value = 0
    max_time = -1

    for i_block, block in enumerate(source_stream):
        i_frame = 0

        while i_frame * hop_length < hop_length * (block_length - 1):
            frame = block[i_frame * hop_length:i_frame * hop_length +
                          frame_length]

            if frame.shape[0] < frame_length:
                break

            curr_time = (i_block * block_length * hop_length +
                         i_frame * hop_length) / source_rate

            #print(f"Processing {format_time(curr_time)}")

            corr = abs(np.correlate(frame, template_sound)[0])

            if max_value < corr:
                max_time = curr_time
                max_value = corr

            i_frame += 1

    return max_time, max_value
Beispiel #11
0
def block_stream(filename=None,
                 y=None,
                 sr=None,
                 frame_length=2048,
                 hop_length=512,
                 segment_duration=1,
                 n_blocks=1,
                 full_frames=True):
    '''Load audio into frames. If given a filename, it will load from file in blocks.
    If y and sr are given, slice into blocks

    Arguments:
        filename (str, optional):
        y, sr (np.ndarray, int, optional):
        frame_length (int):
        hop_length (int):
        block_duration (float)

    '''
    # load audio frame generator

    if filename is not None:
        if y is not None:
            raise ParameterError('Either y or filename must be equal to None')

        # get blocks from file
        duration = librosa.get_duration(filename=filename)
        orig_sr = librosa.get_samplerate(filename)
        sr = sr or orig_sr

        # see: https://librosa.github.io/librosa/_modules/librosa/core/audio.html#stream
        # block_length is in units of `frames` so reverse calculation
        block_length = max(segment_duration * orig_sr, frame_length)
        block_n_frames = librosa.core.samples_to_frames(
            block_length, frame_length, hop_length)

        n_total = duration * orig_sr / librosa.core.frames_to_samples(
            block_n_frames, frame_length, hop_length)
        n_total = int(n_total / n_blocks)

        y_blocks = librosa.stream(filename,
                                  block_length=block_n_frames * n_blocks,
                                  frame_length=frame_length,
                                  hop_length=hop_length)

        # will throw an error if audio is not valid
        y_blocks = (y for y in y_blocks
                    if librosa.util.valid_audio(y, mono=True))

        if sr != orig_sr:  # resample if we have a different sr
            y_blocks = (librosa.resample(y, orig_sr, sr) for y in y_blocks)

    else:
        if y is None or sr is None:
            raise ParameterError(
                'At least one of (y, sr) or filename must be provided')

        librosa.util.valid_audio(y, mono=True)

        # get block length, make it evenly divisible into frames (with hop)
        block_length = max(segment_duration * sr,
                           frame_length) * n_blocks  # min block size = 1 frame
        block_length = librosa.core.samples_to_frames(
            block_length, frame_length, hop_length)  # convert to even frames
        block_length = librosa.core.frames_to_samples(
            block_length, frame_length, hop_length)  # convert back

        # get frames from array
        y_blocks = librosa.util.frame(y, block_length, block_length).T

        n_total = len(y_blocks)

    if full_frames:  # drop any frames that are incomplete
        y_blocks = (y for y in y_blocks if y.size == block_length)

    return y_blocks, n_total, sr
Beispiel #12
0
# We'll generate 64 frames at a time, each frame having 2048 samples
# and 75% overlap.
#

n_fft = 2048
hop_length = 512

# fill_value pads out the last frame with zeros so that we have a
# full frame at the end of the signal, even if the signal doesn't
# divide evenly into full frames.
sr = librosa.get_samplerate(filename)

stream = librosa.stream(filename,
                        block_length=16,
                        frame_length=n_fft,
                        hop_length=hop_length,
                        mono=True,
                        fill_value=0)
#######################################################################
# For this example, we'll compute PCEN on each block, find the maximum
# response over frequency, and store the results in a list.

# Make an array to store the frequency-averaged PCEN values
pcen_blocks = []

# Initialize the PCEN filter delays to steady state
zi = None

for y_block in stream:
    # Compute the STFT (without padding, so center=False)
    D = librosa.stft(y_block, n_fft=n_fft, hop_length=hop_length, center=False)
                                             song_names[song_idx])
    except:
        print('[Invalid Song Number: {}]'.format(song_number))
        sys.exit()

    original_path, chiptune_path, piano_path = file_sets[song_idx]

    print('[fs: {}] [fs: {}] [fs: {}]'.format(
        librosa.core.get_samplerate(original_path),
        librosa.core.get_samplerate(chiptune_path),
        librosa.core.get_samplerate(piano_path)))

    fs = 44100

    original_stream = librosa.stream(original_path,
                                     block_length=1,
                                     frame_length=int(fs / 10),
                                     hop_length=int(fs / 10))
    chiptune_stream = librosa.stream(chiptune_path,
                                     block_length=1,
                                     frame_length=int(fs / 10),
                                     hop_length=int(fs / 10))
    piano_stream = librosa.stream(piano_path,
                                  block_length=1,
                                  frame_length=int(fs / 10),
                                  hop_length=int(fs / 10))

    original_buffer = np.empty((0), np.float32)
    chiptune_buffer = np.empty((0), np.float32)
    piano_buffer = np.empty((0), np.float32)

    once = True
# We'll generate 16 frames at a time, each frame having 4096 samples
# and 50% overlap.
#

n_fft = 4096
hop_length = n_fft // 2

# fill_value pads out the last frame with zeros so that we have a
# full frame at the end of the signal, even if the signal doesn't
# divide evenly into full frames.
sr = librosa.get_samplerate(filename)

stream = librosa.stream(filename, block_length=16,
                        frame_length=n_fft,
                        hop_length=hop_length,
                        mono=True,
                        fill_value=0)
#####################################################################
# For this example, we'll compute PCEN on each block, average over
# frequency, and store the results in a list.

# Make an array to store the frequency-averaged PCEN values
pcen_blocks = []

# Initialize the PCEN filter delays to steady state
zi = None

for y_block in stream:
    # Compute the STFT (without padding, so center=False)
    D = librosa.stft(y_block, n_fft=n_fft, hop_length=hop_length,
def extract_energy_bands(file):
    print("Extracting energy bands...")
    sample_rate = librosa.get_samplerate(file)
    logging.info(f'Starting analysis of {file}')

    # ================================================
    # Declare audio loading stream and related parameters
    # ================================================

    frame_length = sample_rate * 60
    hop_length = sample_rate * 5
    block_length = 5 * 4
    # Load the audio as a stream
    stream = librosa.stream(
        file,
        block_length=block_length,
        frame_length=frame_length,
        hop_length=hop_length,
    )
    logging.info(f'Sample rate: {sample_rate}')

    # ================================================
    # Define features to extract
    # ================================================

    energy_band_params = [
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 20,
            'stopCutoffFrequency': 100
        },
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 100,
            'stopCutoffFrequency': 200
        },
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 200,
            'stopCutoffFrequency': 800
        },
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 800,
            'stopCutoffFrequency': 2000
        },
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 2000,
            'stopCutoffFrequency': 5000
        },
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 5000,
            'stopCutoffFrequency': 8000
        },
        {
            'sampleRate': sample_rate,
            'startCutoffFrequency': 8000,
            'stopCutoffFrequency': 22050
        },
    ]

    def hz_to_bin(f, n_bins, sr):
        return int(np.round((f / sr) * n_bins))

    class EnergyBandSelf:
        ''' Feature extraction class that extracts the energy in a frequency band, given a power STFT spectrogram.'''
        def __init__(self,
                     sampleRate=44100,
                     startCutoffFrequency=20.0,
                     stopCutoffFrequency=11025.0):
            self.sr = sampleRate
            self.f_start = startCutoffFrequency
            self.f_stop = stopCutoffFrequency

        def __call__(self, S_power):
            n_bins = S_power.shape[0]
            n_start, n_stop = hz_to_bin(self.f_start, n_bins,
                                        self.sr), hz_to_bin(
                                            self.f_stop, n_bins, self.sr)
            return np.sum(S_power[n_start:n_stop, :])
            # return np.median(np.sum(S_power[n_start:n_stop, :], axis=0))

    energy_band_extractors = [
        EnergyBandSelf(**kwargs) for kwargs in energy_band_params
    ]

    # ================================================
    # Feature extraction
    # ================================================
    energy_band_features = []

    # Read the librosa docs to understand how the blocks and frames relate to each other:
    # https://librosa.org/blog/2019/07/29/stream-processing/#Blocks
    for i, y_block in enumerate(stream):
        for j, n_start in enumerate(range(0, len(y_block), hop_length)):
            if j == 0 and i % 5 == 0:
                t = (j * hop_length +
                     i * block_length * hop_length) / (60 * sample_rate)
                logging.info(f'Processing from minute {t:.0f}.')
            # Select the current audio frame
            y_frame = y_block[..., n_start:n_start + hop_length]
            # Calculate the STFT power spectrogram for this audio frame
            S = np.abs(librosa.stft(y_frame))**2
            # Calculate the energy in each of the predefined energy bands
            energy_band_features.append([e(S) for e in energy_band_extractors])
    # Back to a numpy array
    energy_band_features = np.array(energy_band_features)

    # ================================================
    # Plotting
    # ================================================

    toplot = energy_band_features / np.max(energy_band_features,
                                           axis=0)[np.newaxis, :]

    toplot = toplot / np.sum(toplot, axis=1)[:, np.newaxis]
    yhat = savgol_filter(toplot, 15, 3, axis=0)  # smooth the output a bit

    df = pd.DataFrame(data=yhat)
    return df
Beispiel #16
0
def multifile_stream(filenames,
                     segment_duration,
                     frame_length,
                     hop_length,
                     sr=None):
    '''Chain multiple file audio block streams into a single stream. It uses samples
    from the next file to complete a previously incomplete block
    '''
    # TODO: need to do this for every file and add up n_total

    all_y_blocks = []
    N_total = 0  # TODO: this doesn't take into account `remainder`
    remainder = None
    for filename in filenames:
        offset = 0

        # get blocks from file
        duration = librosa.get_duration(filename=filename)
        orig_sr = librosa.get_samplerate(filename)
        sr = sr or orig_sr

        block_length = max(segment_duration * orig_sr, frame_length)
        block_n_frames = librosa.core.samples_to_frames(
            block_length, frame_length, hop_length)

        # TODO: this needs to be made a part of the generator expression
        if remainder is not None:
            offset = 1. * (block_length - len(remainder)) / orig_sr
            rem_completed, _ = librosa.load(filename,
                                            sr=orig_sr,
                                            duration=offset)
            if sr != orig_sr:  # resample if we have a different sr
                rem_completed = librosa.resample(rem_completed, orig_sr, sr)
            remainder = np.concatenate([remainder, rem_completed])

            if len(
                    remainder
            ) == block_length:  # there were enough samples in the new file
                yield remainder
                remainder = None  # clear
            else:  # super short file ??? - better to be robust to all types of weather.
                continue

        n_total = duration * orig_sr / librosa.core.frames_to_samples(
            block_n_frames, frame_length, hop_length)
        n_total = int(n_total / n_blocks)
        N_total += n_total

        y_blocks = librosa.stream(filename,
                                  offset=offset,
                                  block_length=block_n_frames,
                                  frame_length=frame_length,
                                  hop_length=hop_length)

        # will throw an error if audio is not valid
        y_blocks = (y for y in y_blocks
                    if librosa.util.valid_audio(y, mono=True))

        if sr != orig_sr:  # resample if we have a different sr
            y_blocks = (librosa.resample(y, orig_sr, sr) for y in y_blocks)

        for y in y_blocks:
            yield y  # TODO: return y_blocks, n_total, sr
Beispiel #17
0
prm_sound_filepath = "C:/Users/f.clement/Desktop/vod_cutter/temp_prm_audio.wav"

# Implementation based on https://stackoverflow.com/questions/52572693/find-sound-effect-inside-an-audio-file
# See also https://librosa.org/blog/2019/07/29/stream-processing/

source_sound, source_rate = librosa.load(ref_sound_filepath, sr=None)
template_sound, template_rate = librosa.load(prm_sound_filepath, sr=None)

source_rate = librosa.get_samplerate(ref_sound_filepath)

frame_length = len(template_sound)
hop_length = 128
block_length = 1024

source_stream = librosa.stream(ref_sound_filepath,
                               block_length=block_length,
                               frame_length=frame_length,
                               hop_length=int(hop_length))

xs = []
ys = []
abs_ys = []

for i_block, block in enumerate(source_stream):
    i_frame = 0

    while i_frame * hop_length < hop_length * (block_length - 1):
        frame = block[i_frame * hop_length:i_frame * hop_length + frame_length]

        if frame.shape[0] < frame_length:
            break
Beispiel #18
0
lab = pd.read_csv("/home/gnlenfn/remote/lstm/emotion_classes.csv")
count = 0
mel_spec = pd.DataFrame()
for file in glob.glob(wav_loc):
    sr = librosa.get_samplerate(file)
    frame_length = 0.02
    frame_stride = 0.01
    input_nfft = int(round(sr * frame_length))
    input_stride = int(round(sr * frame_stride))
    label = np.array(
        lab.EMOTION[lab.name == file.split("/")[-1].split(".")[0]])

    stream = librosa.stream(file,
                            block_length=304,
                            frame_length=input_stride,
                            hop_length=input_stride,
                            fill_value=0)

    sub = 0
    for y_block in stream:
        sub += 1
        m_block = librosa.stft(
            y_block,
            n_fft=800,
            win_length=input_nfft,
            hop_length=input_stride,
            window='hamming',
            center=False,
            #fmax=4000
        )
Beispiel #19
0
import librosa

sr = librosa.get_samplerate('D:/Vasanth/DeepLearning/SubtitleSynchronizer/videos1/GardenofEvil.flac')

# Set the frame parameters to be equivalent to the librosa defaults
# in the file's native sampling rate
frame_length = (2048 * sr) // 22050
hop_length = (512 * sr) // 22050

# Stream the data, working on 128 frames at a time
stream = librosa.stream('D:/Vasanth/DeepLearning/SubtitleSynchronizer/videos1/GardenofEvil.flac',
                        block_length=128,
                        frame_length=frame_length,
                        hop_length=hop_length)

chromas = []
for y in stream:
   chroma_block = librosa.feature.chroma_stft(y=y, sr=sr,
                                              n_fft=frame_length,
                                              hop_length=hop_length,
                                              center=False)
   chromas.append(chromas)
Beispiel #20
0
import math
import sys

import librosa

sys.path.append("./../")
from configs import config


for wav_file in wav_files:
    print(wav_file)
    sr = librosa.get_samplerate(wav_file)
    frame_length = int(math.pow(2, math.ceil(math.log2((sr * config.frame_size_in_ms * 0.001)))))
    hop_length = int(config.percentage_overlap * frame_length / 100)
    print(sr, frame_length, hop_length)

    stream = librosa.stream(wav_file, block_length=1, frame_length=frame_length, hop_length=hop_length)
    for frame in stream:
        print(list(frame))
        break