Beispiel #1
0
def process(file):
    # read in the file
    f, sr, enc = wavread(file)
    # compute the fourier transform & compute the window times:
    D = librosa.stft(f)
    times = librosa.frames_to_samples(np.arange(D.shape[1]))
    # compute the onset strength envelope:
    env = librosa.onset.onset_strength(y=f, sr=sr)
    assert (len(times) == len(env))
    # compute the onsets we are actually interested in, convert to samples:
    onsets = librosa.onset.onset_detect(y=f, sr=sr)
    onset_samps = librosa.frames_to_samples(onsets)
    assert (onset_samps[-1] <= len(f))
    # create a lookup table for retrieving onset strenghts:
    lookup = []
    prevval = 0
    for v in onset_samps:
        for i in xrange(prevval, len(times)):
            if times[i] == v:
                lookup.append(i)
                prevval = i + 1
                break
    # create an empty audio buffer (result):
    result = np.zeros(len(f))
    # write envelope onset strength values at every onset point
    # computed by the envelope:
    for i in xrange(len(lookup)):
        result[onset_samps[i]] = env[lookup[i]]
    # write the result:
    wavwrite(result, file[:-4] + '_proc.wav', sr, enc)
    return
def gen_hihat(all_data, fs, fps, cand):
    fps = librosa.samples_to_frames(fs, hop_length=hop_len, n_fft=win_len)
    fps = 100
    print(cand)
    proc = BeatTrackingProcessor(look_aside=0.2, fps=fps)
    act = RNNBeatProcessor()(all_data)
    beat_times = proc(act)

    song_len = librosa.samples_to_time(data.shape, sr=fs)[0]
    hihat = np.zeros(all_data.shape)
    idx = np.where(beat_times <= song_len)[0]
    new_beat_times = np.zeros(idx.shape)
    new_beat_times[idx] = beat_times[idx]
    beat_samples = librosa.time_to_samples(new_beat_times, sr=fs)
    start = librosa.frames_to_samples(cand[0], hop_len, n_fft=win_len)
    end = librosa.frames_to_samples(cand[-1], hop_len, n_fft=win_len)
    cand_len = end - start

    i = 3
    is_hihat = np.zeros(beat_samples.shape)
    while i < len(beat_samples):
        is_hihat[i] = 1
        i = i + 4
    for i, s in enumerate(beat_samples):
        if is_hihat[i] == 1:
            if s + cand_len > hihat.shape:
                break
            hihat[s:s + cand_len] = data[start:end]

    return hihat, new_beat_times, beat_samples
def onsets_and_strength(all_onsets_strength, onsets_sorted, dly_onsets,
                        strongest_onset, strongest_onset_2, y_cut,
                        onset_strength):
    print(all_onsets_strength)
    print(onsets_sorted)
    plt.subplot(211)
    plt.vlines(librosa.frames_to_samples(dly_onsets), -1.0, 1.0, zorder=2)
    plt.vlines(librosa.frames_to_samples(strongest_onset['onset']),
               -1.0,
               1.0,
               colors='red',
               zorder=3)
    plt.vlines(librosa.frames_to_samples(strongest_onset_2['onset']),
               -1.0,
               1.0,
               colors='green',
               zorder=3)
    plt.plot(y_cut, zorder=1)
    plt.ylabel('Amplitude in Floating Point')
    plt.xlabel('Samples')
    plt.title('Onset Detection with Delay Effect')
    plt.subplot(212)
    plt.plot(onset_strength[0])
    plt.ylabel('Onset Stength')
    plt.xlabel('Frames')
    plt.show()
def strip_audio(x, frame_length=1024, hop_length=256, rms_ths=0.2):
    # compute energy
    rmse = librosa.feature.rmse(x,
                                frame_length=frame_length,
                                hop_length=hop_length)[0]
    rms_ratio = rmse / rmse.max()

    active_frames = np.nonzero(rms_ratio > rms_ths)[0]
    assert len(active_frames) > 0, "there is no voice part in the wav"

    # strip continous active part
    s_sample = librosa.frames_to_samples(active_frames[0],
                                         hop_length=hop_length)[0]
    e_sample = librosa.frames_to_samples(active_frames[-1],
                                         hop_length=hop_length)[0]

    # plot the rmse on the wavelet of x
    # frames = range(len(energy))
    # import matplot.pyplot as plt
    # energy = np.array([
    # sum(abs(x[i:i+frame_length]**2))
    # for i in range(0, len(x), hop_length)
    # ])
    # t = librosa.frames_to_time(frames, sr=sr, hop_length=hop_length)
    # librosa.display.waveplot(x, sr=sr, alpha=0.4)
    # plt.plot(t, energy/energy.max(), 'r--')             # normalized for visualization
    # plt.plot(t[:len(rmse)], rmse/rmse.max(), color='g') # normalized for visualization
    # plt.legend(('Energy', 'RMSE'))

    return x[s_sample:e_sample]
Beispiel #5
0
def beat_match(song1, song2, sr):
    """
    Creates two lists of length equal to the combined length of both songs. The first list is zero padded from the
    end of the first song until the end of the second song. The second list is zero padded from the beginning of the
    first song until the first beat of the last phrase of that same song. The second song is then appended to the second
    list. The lists are then added together.
    
    Input Parameters
    ------------------------
    
    song1: 1-D array containing sample points of first song
    
    song2: 1-D array containing sample points for second song
    
    sr: integer representing the rate at which the song is being sampled
    
    
    Returns
    ------------------------
    
    a 1-D array containing a syncronized mixture of both songs
    """
    print('begin beatmatch')

    tempo1, beat1 = beat_track(song1)
    tempo2, beat2 = beat_track(song2)

    beat1 = librosa.frames_to_samples(beat1)
    beat2 = librosa.frames_to_samples(beat2)

    song2 = song2[beat2[0]:]

    phrases1 = len(beat1)
    fade_start = phrases1 - 32

    fade_sample = beat1[fade_start]
    fade_out_start = fade_sample
    fade_out_end = len(song2)

    phrases2 = len(beat2)
    fade_in_start = len(song1[:fade_sample])
    fade_in_end = fade_in_start + phrases2

    song2 = fade(song2, type="in", end=beat2[32])
    zeros2 = np.zeros(len(song1[:fade_sample]), dtype=np.float32)
    list2 = np.append(zeros2, song2)
    #list2 = fade(list2, type= "in", start = fade_in_start, end = fade_in_end)

    song1 = fade(song1, type="out", start=fade_out_start)
    zeros1 = np.zeros((len(song2) - len(song1[fade_sample:])),
                      dtype=np.float32)
    list1 = np.append(song1, zeros1)
    #list1 = fade(list1, type= "out", start = fade_out_start, end = fade_out_end)

    mix = list1 + list2
    print('end beatmatch')
    return mix
Beispiel #6
0
def beat_match(song1, song2, sr):
    """
    Creates two lists of length equal to the combined length of both songs. The first list is zero padded from the
    end of the first song until the end of the second song. The second list is zero padded from the beginning of the
    first song until the first beat of the last phrase of that same song. The second song is then appended to the second
    list. The lists are then added together.
    
    Input Parameters
    ------------------------
    
    song1: 1-D array containing sample points of first song
    
    song2: 1-D array containing sample points for second song
    
    sr: integer representing the rate at which the song is being sampled
    
    
    Returns
    ------------------------
    
    a 1-D array containing a syncronized mixture of both songs
    """
    print('begin beatmatch')
    
    tempo1, beat1 = beat_track(song1)
    tempo2, beat2 = beat_track(song2)
    
    beat1 = librosa.frames_to_samples(beat1)
    beat2 = librosa.frames_to_samples(beat2)
    
    song2 = song2[beat2[0]:]
    
    phrases1 = len(beat1)
    fade_start = phrases1 - 32

    fade_sample = beat1[fade_start]
    fade_out_start = fade_sample
    fade_out_end = len(song2)
    

    phrases2 = len(beat2)
    fade_in_start = len(song1[:fade_sample])
    fade_in_end = fade_in_start + phrases2
    
    song2 = fade(song2, type = "in", end = beat2[32])
    zeros2 = np.zeros(len(song1[:fade_sample]), dtype = np.float32)
    list2 = np.append(zeros2, song2)
    #list2 = fade(list2, type= "in", start = fade_in_start, end = fade_in_end)
    
    song1 = fade(song1, type = "out", start = fade_out_start)
    zeros1 = np.zeros((len(song2)-len(song1[fade_sample:])), dtype = np.float32)
    list1 = np.append(song1, zeros1)
    #list1 = fade(list1, type= "out", start = fade_out_start, end = fade_out_end)
    
    mix = list1 + list2
    print('end beatmatch')
    return mix
Beispiel #7
0
    def restretch(self):
        if self.final_offset >= 0:
            offset_frame = librosa.frames_to_samples(self.final_offset)
            noise = np.zeros(offset_frame)
            self.restretch_data = np.concatenate((noise, self.y_2), axis=0)
        else:
            offset_frame = librosa.frames_to_samples(-self.final_offset)
            # noise = np.zeros(offset_frame)        
            self.restretch_data = self.y_2[offset_frame:]

        padding = np.zeros(len(self.y) - len(self.restretch_data))
        self.final_vocal_audio = np.concatenate((self.restretch_data,padding), axis=0)
Beispiel #8
0
def feature_extract_blues(blues_track,
                          sr,
                          current_timesig,
                          onset_threshold=0.7):
    # get rhythm overlay
    hop_length = 512
    blues_harm, blues_perc = librosa.effects.hpss(blues_track,
                                                  margin=(1.0, 5.0))
    onset_env = librosa.onset.onset_strength(blues_perc,
                                             sr=sr,
                                             aggregate=np.median)
    _, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)

    times = librosa.frames_to_time(np.arange(len(onset_env)),
                                   sr=sr,
                                   hop_length=hop_length)

    prev_val = 0

    for i, b in enumerate(beats[:-3]):
        # get the corresponding onset env value
        t_b = times[b]
        on_f_b = librosa.time_to_frames([t_b], sr=sr, hop_length=hop_length)
        if librosa.util.normalize(onset_env)[on_f_b] >= prev_val:
            prev_val = librosa.util.normalize(onset_env)[on_f_b]
            keep_beat_start = b
            keep_beat_end = beats[i + 3]
            alert_start = i

    beat_start = librosa.frames_to_samples([keep_beat_start])[0]
    beat_end = librosa.frames_to_samples([keep_beat_end])[0]

    overlay_sample = blues_perc[beat_start:beat_end]

    # get beat samples
    beat_samples = librosa.frames_to_samples(beats, hop_length=hop_length)

    # get extracted subsample - using VS pipeline
    try:
        rep_samples_audio, num_seg = extract.extract_sample(blues_harm, sr, 1)
        signal_sample = rep_samples_audio[0][0]
    except:
        print "Could not extract sample from VS Pipeline, using default.."
        mdpt = int(len(blues_harm) / 2)
        signal_sample = blues_harm[mdpt:mdpt + sr]

    return {
        'overlay': overlay_sample,
        'beats': beat_samples,
        'alert': signal_sample
    }
Beispiel #9
0
def slice_long_sample(y, sr, declick_samples=15, length_limit=None, fname=''):

    if length_limit and len(y) / sr > length_limit:
        y = y[0:length_limit * sr]

    onsets = rosa.onset.onset_detect(y=y, sr=sr, backtrack=True)
    onset_times = rosa.frames_to_samples(onsets)
    onset_times = np.concatenate([onset_times, [len(y)]])
    segmented = [
        y[onset_times[n]:onset_times[n + 1]]
        for n in range(len(onset_times) - 1)
    ]

    segmented = [s for s in segmented if len(s) >= declick_samples]

    if declick_samples > 1:
        declick_envelope = np.linspace(1 / declick_samples,
                                       1 - (1 / declick_samples),
                                       declick_samples)
        for i in range(len(segmented)):
            segmented[i][0:declick_samples] *= declick_envelope

    slices = []
    for i, s in enumerate(segmented):
        if not i % poll_every and i > 1:
            print(
                rf'calculating features for slice {i}/{len(segmented)} of {fname}...'
            )
        slices.append(ausl.AudioSlice(s, sr, fname))

    return slices, onset_times
Beispiel #10
0
def onset(x, sr):
    # Short-time Fourier transform (for EQ, must do inverse Fourier transform after)
    X = librosa.stft(x)

    # Find the frames when onsets occur
    onset_frames = librosa.onset.onset_detect(x, sr=sr)
    print("Onset Frames = " + str(onset_frames) + "\n ")

    # Find the times, in seconds, when onsets occur in the audio signal
    onset_times = librosa.frames_to_time(onset_frames, sr=sr)
    print("Onset Times = " + str(onset_times) + "\n ")

    # Convert the onset frames into sample indices to play "BEEB" sound on it
    onset_samples = librosa.frames_to_samples(onset_frames)
    print("Onset Samples = " + str(onset_samples) + "\n ")

    # Use the "length" parameter so the click track is the same length as the original signal
    clicks = librosa.clicks(times=onset_times, length=len(x))

    # Play the click track "added to" the original signal
    sd.play(x + clicks, sr)

    # Display the waveform of the original signal
    librosa.display.waveplot(x, sr)
    plt.title("Original Signal")
    plt.show()  # Close window to resume

    return onset_frames, onset_times, onset_samples
Beispiel #11
0
    def __init__(self,
                 dataset,
                 sr=22050,
                 frameSize=2048,
                 hopSize=512,
                 transform=None,
                 cacheSize=4):
        self.dataset = dataset
        self.sr = sr
        self.frameSize = frameSize
        self.hopSize = hopSize
        self.transform = transform
        self.cacheSize = cacheSize
        self.frameDt = float(frameSize) / sr

        # count frames in dataset
        nFramesList = []
        for pathPair in dataset.pathPairs:
            wavPath = pathPair.wav
            duration = librosa.get_duration(filename=wavPath)
            nSamples = librosa.time_to_samples(duration, sr=self.sr)
            nFrames = 1 + int(
                (nSamples - self.frameSize) / float(self.hopSize))
            nFramesList.append(nFrames)
            # check validation
            sStart = librosa.frames_to_samples(nFrames - 1,
                                               hop_length=self.hopSize)
            sEnd = sStart + self.frameSize
            assert (nSamples > 0) and (
                sEnd <= nSamples), f'{nFrames}:{sStart}_{sEnd}, {nSamples}'
        self.frameCumsum = np.cumsum(nFramesList)

        # FIFO cache
        self._sampleCache = deque(maxlen=cacheSize)
        self._sampleIdxCache = deque(maxlen=cacheSize)
def generate_sine_midi_note(f0_info, sr, n_duration):
    f0 = f0_info[0]
    A = remap(f0_info[1], CdB.min(), CdB.max(), 0, 1)
    duration = librosa.frames_to_time(n_duration, sr=fs, hop_length=hop_length)
    # Generate music21 note
    note_duration = 0.02 * np.around(
        duration / 2 /
        0.02)  # Round to 2 decimal places for music21 compatibility
    midi_velocity = int(round(remap(f0_info[1], CdB.min(), CdB.max(), 0, 127)))
    if f0 == None:
        try:
            note_info = Rest(type=mm.secondsToDuration(note_duration).type)
        except DurationException:
            note_info = None
        f0 = 0
    else:
        midi_note = round(librosa.hz_to_midi(f0))
        try:
            note = Note(midi_note,
                        type=mm.secondsToDuration(note_duration).type)
            note.volume.velocity = midi_velocity
            note_info = [note]
        except DurationException:
            note_info = None

    if note_info is None:
        return None

    # Generate Sinewave
    n = np.arange(librosa.frames_to_samples(n_duration, hop_length=hop_length))
    sine_wave = A * np.sin(2 * np.pi * f0 * n / float(sr))
    return [sine_wave, note_info]
Beispiel #13
0
def strip(y, frame_length, hop_length=512):
    """ Removing leading silence from an audio track

    :param y: (np.ndarray) audio signal
    :param frame_length: (int)
    :param hop_length: (int)
    :return: Audio signal with leading silence removed
    """

    # compute RMSE.
    rms = librosa.feature.rms(y,
                              frame_length=frame_length,
                              hop_length=hop_length,
                              center=True)

    # identify the first frame index where RMSE exceeds a threshold.
    thresh = 0.01
    frame_index = 0
    while rms[0][frame_index] < thresh:
        frame_index += 1

    # convert units of frames to samples.
    start_sample_index = librosa.frames_to_samples(frame_index,
                                                   hop_length=hop_length)

    # return the trimmed signal.
    return y[start_sample_index:]
Beispiel #14
0
def slicer(song, n_beats=16, duration=0):
    '''
    Takes in a song and its segments and computes the largest total segment in the dictionary.
    To do this it sums up each of the dictionary entries using that disgusting(tm) comprehension below.
    The segment has to be larger than the given duration in order to be considered in the sum.
    It then takes the max dictionary entry and returns the segment with the bounds.

    :param song: (Song)       | song to slice
    :param duration: (float)  | min duration (in seconds)
    :return: slice (Slice)    | segmented slice
    '''
    largest_seg = max(
        song.segments.items(),
        key=lambda x: sum(
            [z[1] - z[0] for z in x[1] if z[1] - z[0] >= duration]))[1]
    max_pair = tuple(max(largest_seg, key=lambda pair: pair[1] - pair[0]))

    slice = Slice(song.path, offset=max_pair[0], duration=max_pair[1])

    perc_y = librosa.effects.percussive(slice.y)
    beat_track = beatTrack(y=perc_y, sr=song.load.sr)

    end_frame = librosa.frames_to_samples(beat_track.beats[n_beats])[0]
    slice.y = slice.y[:end_frame]

    return slice
Beispiel #15
0
    def generate_note(self, f0_info, n_duration, round_to_sixteenth=True):
        f0 = f0_info[0]
        a = remap(f0_info[1], self.cqt.min(), self.cqt.max(), 0, 1)
        duration = librosa.frames_to_time(n_duration, sr=self.sr, hop_length=self.hop_length)
        note_duration = 0.02 * np.around(duration / 0.02)  # Round to 2 decimal places for music21 compatibility
        midi_duration = second_to_quarter(duration, self.tempo)
        midi_velocity = int(round(remap(f0_info[1], self.cqt.min(), self.cqt.max(), 80, 120)))
        if round_to_sixteenth:
            midi_duration = round(midi_duration * 16) / 16
        try:
            if f0 is None:
                midi_note = None
                note_info = Rest(type=self.mm.secondsToDuration(note_duration).type)
                f0 = 0
            else:
                midi_note = round(librosa.hz_to_midi(f0))
                note = Note(librosa.midi_to_note(midi_note), type=self.mm.secondsToDuration(note_duration).type)
                note.volume.velocity = midi_velocity
                note_info = [note]
        except DurationException:
            if f0 is None:
                midi_note = None
                note_info = Rest(type='32nd')
                f0 = 0
            else:
                midi_note = round(librosa.hz_to_midi(f0))
                note = Note(librosa.midi_to_note(midi_note),
                            type='eighth')
                note.volume.velocity = midi_velocity
                note_info = [note]

        midi_info = [midi_note, midi_duration, midi_velocity]
        n = np.arange(librosa.frames_to_samples(n_duration, hop_length=self.hop_length))
        sine_wave = a * np.sin(2 * np.pi * f0 * n / float(self.sr))
        return [sine_wave, midi_info, note_info]
    def find_localmax(
            self,
            signal,
            noise_threshold=0.0,  # Range: [0.0, 1.0]. 
            jump=None,
            frame_length=1024):
        """ """
        if not librosa_available:
            print('ERROR: Error in find_localmax. Librosa not installed.')
            index_list = []
            return index_list

        # Adjust for comparable results for low sampling rates.
        if self.sampling_freq < 300000:
            frame_length = int(frame_length / 2)
        if jump is None:
            jump = int(self.sampling_freq / 1000)  # Default = 1 ms.
        y = signal.copy()
        if noise_threshold > 0.0:
            y[(np.abs(y) < noise_threshold)] = 0.0
        rmse = librosa.feature.rmse(y=y,
                                    hop_length=jump,
                                    frame_length=frame_length,
                                    center=True)
        locmax = librosa.util.localmax(rmse.T)
        maxindexlist = [index for index, a in enumerate(locmax) if a == True]
        # Original index list is related to jump length. Convert.
        index_list = librosa.frames_to_samples(maxindexlist, hop_length=jump)
        #
        return index_list
def beats_to_sample(beats, y, sr):
    """
    Aligning supposed beats to the peak of energy in the y signal

    beats:  np.ndarray
        frames index where beats are supposed to be
    y: np.ndarray
        input signal
    sr: int
        samplerate

    Returns
    y_beat: np.ndarray
        array with 1 when there is a beat
    beats_indices: np.ndarray
        array with indices of the beats
    """

    y_beat = np.zeros(y.shape)
    margin = int(0.1 * sr)
    for beat in frames_to_samples(beats):
        bs_index = beat - margin + np.argmax(
            np.abs(y[beat - margin:beat + margin]))
        y_beat[bs_index] = 1
    return y_beat, np.where(y_beat == 1)[0]
 def __test(x, y, hop_length, n_fft):
     y_test = librosa.frames_to_samples(x,
                                        hop_length=hop_length,
                                        n_fft=n_fft)
     assert np.allclose(y, y_test)
     y = np.asanyarray(y)
     assert y.shape == y_test.shape
     assert y.ndim == y_test.ndim
	def get_beats_samples(y, sr):
		# 起点强度(音符按键起始点)
		onset_env = librosa.onset.onset_strength(y=y, sr=sr)
		# 节拍点(帧索引)
		_, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=512)
		# 节拍点(采样引)
		beat_samples = librosa.frames_to_samples(beats, hop_length=512)
		return beats, beat_samples
Beispiel #20
0
def onset_detect(audio_vector, sr):
    """Returns the onsets detected of a given audio file"""
    ####### Onset and Tempo detection ##########
    onset = librosa.onset.onset_detect(y=audio_vector, sr=sr, backtrack=True)
    onset_env = librosa.onset.onset_strength(y=audio_vector, sr=sr)
    onset_sec = librosa.frames_to_time(onset, sr=sr)
    onset_frames = librosa.frames_to_samples(onset)
    return onset_frames
Beispiel #21
0
 def apply_trim_offset(self, frame):
     return (
         librosa.samples_to_frames(
             librosa.frames_to_samples(frame) + self.trim_offset
         )
         if self.trim_offset
         else frame
     )
Beispiel #22
0
 def __test(x, y, hop_length, n_fft):
     y_test = librosa.frames_to_samples(x,
                                        hop_length=hop_length,
                                        n_fft=n_fft)
     assert np.allclose(y, y_test)
     y = np.asanyarray(y)
     assert y.shape == y_test.shape
     assert y.ndim == y_test.ndim
Beispiel #23
0
def overlap(siga, sigb, overlap_beats, sr, fade):
    # Trim leading and trailing silence
    siga = np.trim_zeros(siga)
    sigb = np.trim_zeros(sigb)

    # Get beat frames for each track and convert to track sample indices
    atempo, abeatframes = librosa.beat.beat_track(y=siga, sr=sr)
    abeats = librosa.frames_to_samples(abeatframes)
    btempo, bbeatframes = librosa.beat.beat_track(y=sigb, sr=sr)
    bbeats = librosa.frames_to_samples(bbeatframes)

    # print atempo
    # print btempo

    # print siga.shape
    # print sigb.shape

    # print abeats.shape
    # print bbeats.shape

    # If fade is specified, cross-fade both tracks into each other
    if (fade):
        print "Fading tracks"
        fadeindices = int(bbeats[overlap_beats])
        fade = np.linspace(0, 1, num=fadeindices + 1)
        for i in range(0, fade.shape[0]):
            sigb[i] *= fade[i]
            siga[siga.shape[0] - 1 - i] *= fade[i]

    # print "Fade indicies: ",int(bbeats[overlap_beats])

    # Create the output signal
    mix = np.zeros(sigb.shape[0] + siga.shape[0])
    # Prep it with the first track
    mix[:siga.shape[0]] = siga
    # The time frame of the beat where the second track should start
    startframe = abeats[abeats.shape[0] - overlap_beats + 4]
    # print "Start frames: ",startframe,startframe-bbeats[3],startframe-bbeats[3]+sigb.shape[0]
    # print "Time beyond end of a ",(startframe-bbeats[3]+sigb.shape[0]-siga.shape[0])/44100.
    # for i in range(overlap_beats):
    #     print abeats[abeats.shape[0]-overlap_beats+i]-abeats[abeats.shape[0]-overlap_beats+i-1], bbeats[i+1]-bbeats[i]
    mix[startframe - bbeats[3]:startframe - bbeats[3] + sigb.shape[0]] += sigb
    mix = np.trim_zeros(mix)
    # print "Shape of mix ",mix.shape
    return mix
Beispiel #24
0
def compute_segments_librosa(Y, sr, numparts):
    myprint('Computing parts segmentation')
    bounds = librosa.segment.agglomerative(Y, numparts)
    bound_times = librosa.frames_to_time(bounds, sr=sr)
    bound_samples = librosa.frames_to_samples(bounds,
                                              hop_length=512,
                                              n_fft=2048)
    myprint('bound_samples = %s / %s' % (bound_samples.shape, bound_samples))
    return bounds, bound_times, bound_samples
Beispiel #25
0
def analyze_signals(original_signal, test_signal, hop_size, start_time, end_time, sr, start_bpm, offbeat_factor):
    # Offbeat_factor is how much off the beat the person is allowed to be
    test, sr = librosa.load(test_signal, sr)
    test_normalizer = np.max(test)
    test = test/float(test_normalizer)
    original, sr = librosa.load(original_signal, sr)
    original_normalizer = np.max(original)
    original = original/float(original_normalizer)
    test_onset_env, test_beat_frames = estimated_beat(test_signal, hop_size, start_time, end_time, sr, start_bpm)
    original_onset_env, original_beat_frames = estimated_beat(original_signal, hop_size, start_time, end_time, sr, start_bpm)
    # plt.show()
    test_beats = librosa.frames_to_samples(test_beat_frames, hop_length=hop_size)
    original_beats = librosa.frames_to_samples(original_beat_frames, hop_length=hop_size)
    beat_score = calculate_rank(original_beats, test_beats, sr, sr*offbeat_factor)
    
    plt.figure(1)
    ax1 = plt.subplot(2,1,1)
    plt.plot(original, label='Signal')
    plt.vlines(original_beats, -2, 2, alpha=.5, color='r',
                linestyle='solid', linewidth=3, label='Beats')
    plt.legend(frameon=True, framealpha=0.75)
    # Limit the plot to a X-second window
    plt.xlim([start_time * sr, end_time * sr])
    plt.xticks(np.linspace(start_time, end_time, 5) * sr,
                np.linspace(start_time, end_time, 5))
    plt.xlabel('Time (s)')
    plt.tight_layout()

    ax2 = plt.subplot(2,1,2, sharex=ax1, sharey=ax1)
    plt.plot(test, label='Signal')
    plt.vlines(test_beats, -2, 2, alpha=.5, color='g',
                linestyle='solid',linewidth=3, label='Beats')
    plt.legend(frameon=True, framealpha=0.75)
    # Limit the plot to a X-second window
    plt.xlim([start_time * sr, end_time * sr])
    plt.xticks(np.linspace(start_time, end_time, 5) * sr,
                np.linspace(start_time, end_time, 5))
    plt.xlabel('Time (s)')
    plt.tight_layout()
    plt.subplots_adjust(hspace=0)
    plt.show()
    return beat_score
Beispiel #26
0
def split_song(mix_in, mix_out, bpm, file_name):
    file_path = './data/mp3/' + file_name
    audio, _ = librosa.load(file_path, sr=SR)
    _, beats = librosa.beat.beat_track(y=audio, sr=SR, bpm=bpm)

    body = librosa.frames_to_samples(beats[mix_in + MIX_LEN:mix_out])
    trans_in = librosa.frames_to_samples(beats[mix_in:mix_in + MIX_LEN])
    trans_out = librosa.frames_to_samples(beats[mix_out:mix_out + MIX_LEN])

    body_audio = audio[body[0]:body[-1]]
    in_audio = audio[trans_in[0]:trans_in[-1]]
    out_audio = audio[trans_out[0]:trans_out[-1]]

    file_name = file_name.split('.')[0]
    librosa.output.write_wav('./data/chopped/body/' + file_name + '.wav',
                             body_audio, SR)
    librosa.output.write_wav('./data/chopped/trans_in/' + file_name + '.wav',
                             in_audio, SR)
    librosa.output.write_wav('./data/chopped/trans_out/' + file_name + '.wav',
                             out_audio, SR)
def splitSamples(sourceDir, outputDir, mode=None):

    print('- Splitting samples for dataset:', mode)

    source_filelist = os.listdir(sourceDir)

    for f in source_filelist:

        print('processing', f)

        outputSamples = os.path.join(outputDir, 'dataset_' + mode)

        y, sr = librosa.load(os.path.join(sourceDir, f))
        # trim silence at beginning and end
        y, index = librosa.effects.trim(y)
        # detect onsets
        o_env = librosa.onset.onset_strength(y, sr=sr, feature=librosa.cqt)
        onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)

        vectors = []
        words = []
        filenames = []

        onset_samples = list(librosa.frames_to_samples(onset_frames))
        onset_samples = np.concatenate(onset_samples, len(y))
        starts = onset_samples[0:-1]
        stops = onset_samples[1:]

        analysis_folder = sourceDir
        samples_folder = os.path.join(outputSamples, f)

        try:
            os.makedirs(samples_folder)
        except:
            pass

        pbar = ProgressBar()
        for i, (start, stop) in enumerate(pbar(zip(starts, stops))):
            audio = y[start:stop]
            filename = os.path.join(samples_folder, str(i) + '.wav')
            librosa.output.write_wav(filename, audio, sr)
            vector = get_fingerprint(audio, sr=sr)
            word = basename(filename)
            vectors.append(vector)
            words.append(word)
            filenames.append(filename)
        np.savetxt(os.path.join(analysis_folder, 'vectors'),
                   vectors,
                   fmt='%.5f',
                   delimiter='\t')
        np.savetxt(os.path.join(analysis_folder, 'words'), words, fmt='%s')
        np.savetxt(os.path.join(analysis_folder, 'filenames.txt'),
                   filenames,
                   fmt='%s')
Beispiel #28
0
def feature_extract_jazz(jazz_track, sr, num_segments=8, seg_thresh=3):
    # segment boundaries
    mfcc = librosa.feature.mfcc(y=jazz_track, sr=sr)
    bounds = librosa.segment.agglomerative(mfcc, num_segments)
    sample_bounds = librosa.frames_to_samples(bounds)
    sample_intervals = boundaries_to_intervals(sample_bounds)

    # clean up short segments
    del_list = []
    for i, intr in enumerate(sample_intervals):
        if intr[1] - intr[0] < seg_thresh * sr:
            del_list.append(i)
    sample_intervals = np.delete(sample_intervals, del_list, axis=0)

    # corresponding intervals
    #   TODO: determine segment key/ progression
    shift_by = []

    # extracted subsample - using VS pipeline
    jazz_harm, jazz_perc = librosa.effects.hpss(jazz_track)
    try:
        rep_samples_audio, num_seg = extract.extract_sample(jazz_harm, sr, 1)
        signal_sample = rep_samples_audio[0][0]
    except:
        print "Could not extract sample from VS Pipeline, using default.."
        mdpt = int(len(jazz_harm) / 2)
        signal_sample = jazz_harm[mdpt:mdpt + sr]

    # extract beats to overlay VS sample
    onset_env = librosa.onset.onset_strength(jazz_perc,
                                             sr=sr,
                                             aggregate=np.median)
    _, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    beat_samples = librosa.frames_to_samples(beats)

    return {
        'bounds': sample_intervals,
        'shift': shift_by,
        'alert': signal_sample,
        'beats': beat_samples
    }
def print_tf(tf, rate, file=sys.stdout):
    fprint = lambda *args, **kwargs: print(*args, file=file, **kwargs)
    fprint(tf.shape[1], end="")
    for i in range(tf.shape[1]):
        fprint(' {}'.format(librosa.frames_to_samples(i) / rate), end="")
    fprint()
    for i, freq in enumerate(librosa.fft_frequencies(sr=rate)):
        row = tf[i]
        fprint(freq, end="")
        for x in tf[i]:
            fprint(' {}'.format(np.abs(x)), end="")
        fprint()
Beispiel #30
0
def test_frames_to_samples(frames, hop_length, n_fft):

    samples = librosa.frames_to_samples(frames,
                                        hop_length=hop_length,
                                        n_fft=n_fft)
    frames = np.asanyarray(frames)
    assert frames.shape == samples.shape
    assert frames.ndim == samples.ndim
    if n_fft is None:
        assert np.allclose(samples, frames * hop_length)
    else:
        assert np.allclose((samples - n_fft // 2) // hop_length, frames)
Beispiel #31
0
def get_downbeats(y, tempo, beat_frames, sr):
    measures = len(beat_frames) // BEATS
    beat_frames = librosa.samples_to_frames(beat_frames)
    onset_env = librosa.onset.onset_strength(y, sr=sr, aggregate=np.median)
    beat_strengths = onset_env[beat_frames]
    measure_beat_strengths = beat_strengths[:measures * BEATS].reshape(
        -1, BEATS)
    beat_pos_strength = np.sum(measure_beat_strengths, axis=0)
    downbeat_pos = np.argmax(beat_pos_strength)
    full_measure_beats = beat_frames[:measures * BEATS].reshape(-1, BEATS)
    downbeat_frames = full_measure_beats[:, downbeat_pos]
    return librosa.frames_to_samples(downbeat_frames)
Beispiel #32
0
def strip(signal, frame_length=512, hop_length=256):
    # Compute RMSE.
    rmse = librosa.feature.rms(signal,
                               frame_length=frame_length,
                               hop_length=hop_length,
                               center=True)

    # Identify the first frame index where RMSE exceeds a threshold.
    thresh = 0.001
    frame_index = 0
    while rmse[0][frame_index] < thresh:
        frame_index += 1

    # Convert units of frames to samples.
    start_sample_index = librosa.frames_to_samples(frame_index,
                                                   hop_length=hop_length)

    signal = signal[start_sample_index:]
    signal = np.array(list(signal)[::-1])

    # Compute RMSE.
    rmse = librosa.feature.rms(signal,
                               frame_length=frame_length,
                               hop_length=hop_length,
                               center=True)

    # Identify the first frame index where RMSE exceeds a threshold.
    thresh = 0.001
    frame_index = 0
    while rmse[0][frame_index] < thresh:
        frame_index += 1

    # Convert units of frames to samples.
    start_sample_index = librosa.frames_to_samples(frame_index,
                                                   hop_length=hop_length)

    signal = np.array(signal[start_sample_index:])

    # Return the trimmed signal.
    return np.array(list(signal)[::-1])
Beispiel #33
0
def reconstruct(features, n_fft=2048, sr=22050, hop_length=None):
    if (hop_length == None):
        hop_length = n_fft / 4

    # will be a fraction shorter than the original
    wave = np.zeros(lr.frames_to_samples(features.shape[0], hop_length,
                                         n_fft)[0],
                    dtype=np.float32)

    for frame, feature_slice in enumerate(features):
        sample_start = lr.frames_to_samples(frame, hop_length, n_fft)[0]
        wave_slice = reconstruct_slice(feature_slice, n_fft, sr)
        sample_end = sample_start + len(wave_slice)

        # not too sure about this
        if len(wave[sample_start:sample_end]) < len(wave_slice):
            wave_slice = wave_slice[:len(wave[sample_start:sample_end])]

        wave[sample_start:sample_end] += wave_slice * np.hanning(
            len(wave_slice))  # do I need to scale?

    return wave
Beispiel #34
0
def process_audio(filename, frame_size, mel_bands, fmax, display):
    """ act audio metadata and compute the dynamic spectrogram.

    Prepare the audio file and process it to compute the dynamic spectrograms
    block by block.

    Args:
        filename (str): Path to the audio file.
        frame_size (int): frame size for the "per block" processing.
        display_plot (boolean): display or save the plot.

    Returns:
        None

    Todo:
        - check if the samplerate of the file corresponds to the samplerate
          in the configuration file

    Note:
        According to the 2016 base line code, the frame size is 40ms with
        a hop size of 50%.

    """
    samples = librosa.frames_to_samples(frame_size)
    chan_nb, samplerate = extract_audio_data(filename)
    counter = 0

    for block in sfblocks(filename, blocksize=samples[0]):
        counter += 1
        # separate the channels to compute the spectrograms
        for chan in np.arange(chan_nb):
            if chan_nb < 2:
                y = block
            else:
                y = block[:, chan]
            # Compute the dynamic spectrogram
            dynamic_spectrogram(
                    y,
                    filename,
                    block_nb=counter,
                    display=display)
            static_spectrogram(
                    y,
                    filename,
                    counter,
                    mel_bands,
                    fmax, display=display)
Beispiel #35
0
def get_drum_wav(percussion, width=5, n=None):

    # Compute volume shaper
    percussion = librosa.util.normalize(percussion.ravel())

    v = scipy.ndimage.median_filter(percussion,
                                    width,
                                    mode='mirror')
    v = np.atleast_2d(v)

    wav = synthesize(librosa.frames_to_samples(np.arange(v.shape[-1]),
                                               hop_length=hop_length),
                     v,
                     fmin=librosa.midi_to_hz(0),
                     bins_per_octave=12,
                     wave=noise,
                     n=n)[0]

    return wav
Beispiel #36
0
    def __test(times, frames, sr, hop_length, click_freq, click_duration, click, length):

        y = librosa.clicks(times=times,
                           frames=frames,
                           sr=sr,
                           hop_length=hop_length,
                           click_freq=click_freq,
                           click_duration=click_duration,
                           click=click,
                           length=length)

        if times is not None:
            nmax = librosa.time_to_samples(times, sr=sr).max()
        else:
            nmax = librosa.frames_to_samples(frames, hop_length=hop_length).max()

        if length is not None:
            assert len(y) == length
        elif click is not None:
            assert len(y) == nmax + len(click)
Beispiel #37
0
def get_wav(cq, nmin=60, nmax=120, width=5, max_peaks=1, wave=None, n=None):

    # Slice down to the bass range
    cq = cq[nmin:nmax]

    # Pick peaks at each time
    mask = peakgram(librosa.logamplitude(cq**2, top_db=60, ref_power=np.max),
                    max_peaks=max_peaks)

    # Smooth in time
    mask = scipy.ndimage.median_filter(mask,
                                       size=(1, width),
                                       mode='mirror')

    # resynthesize with some magnitude compression
    wav = synthesize(librosa.frames_to_samples(np.arange(cq.shape[-1]),
                                               hop_length=hop_length),
                     mask * cq**(1./3),
                     fmin=librosa.midi_to_hz(nmin + MIDI_MIN),
                     bins_per_octave=12,
                     wave=wave,
                     n=n)[0]

    return wav
#THE KEY FUNCTION of seperation *******
y_harmonic, y_percussive = librosa.effects.hpss(y)
x_harmonic, x_percussive = librosa.effects.hpss(x)

#beats
tempo_y, beats_y = librosa.beat.beat_track(y=y_percussive, sr=sr_y, trim=True)
tempo_x, beats_x = librosa.beat.beat_track(y=x_percussive, sr=sr_x, trim=True)

#adjust x to be the same tempo as y
ym = librosa.effects.time_stretch(y, tempo_x/tempo_y)

#remeasure tempo of y_matched
ym_harmonic, ym_percussive = librosa.effects.hpss(ym)
tempo_ym, beats_ym = librosa.beat.beat_track(y=ym_percussive, sr=sr_y, trim=True)

#PHASE the tracks
#get arrays of the sample indices of the beats
beats_i_x = librosa.frames_to_samples(beats_x)
beats_i_ym = librosa.frames_to_samples(beats_ym)

#cut off the tracks at the beats
x = x[beats_i_x[1]:]
ym = ym[beats_i_ym[1]:]

#mix the matched tracks
mix = np.array([(x + y)/2 for x, y in zip(ym, x)],dtype=np.float32)

#input array must be in numpy.float32!
librosa.output.write_wav('mixes/beat_matched.wav', mix, sr_y)

#LOAD OR CREATE S-MATRIX & NOVELTY VECTOR
s_matrix = init_smatrix(file_id,f,r, sample_duration)
novelty = init_novelty_vector(file_id, w, w_f, sample_duration, s_matrix)

#https://bmcfee.github.io/librosa/generated/librosa.util.peak_pick.html?
#TODO correlate to the beat, somehow
w_p = w_f/w_p_ratio
peaks = librosa.util.peak_pick(novelty, w_p, w_p, w_p, w_p, peak_window, w_p)

#cross reference beats and peaks
#peaks = cross_reference(beats, peaks, beat_threshold)
#assuming music is periodic...
peaks = filter_by_period(peaks, period_threshold,fpb)

#Sample a test segment
p_s = librosa.frames_to_samples(peaks)
if (len(p_s) > 2):
    sample = y[p_s[1]:p_s[2]]
    librosa.output.write_wav('mixes/sampled.wav', sample, sr)
    loop = np.concatenate([sample,sample,sample])
    librosa.output.write_wav('mixes/loop.wav', loop, sr)
    
#Shuffle a test segment
p_s = librosa.frames_to_samples(peaks)
if (len(p_s) >= 4):
    s1 = y[p_s[0]:p_s[1]]
    s2 = y[p_s[1]:p_s[2]]
    s3 = y[p_s[2]:p_s[3]]
    loop = np.concatenate([s3,s2,s1])
    librosa.output.write_wav('mixes/shuffle.wav', loop, sr)
    
 def __test(x, y, hop_length, n_fft):
     y_test = librosa.frames_to_samples(x,
                                        hop_length=hop_length,
                                        n_fft=n_fft)
     assert np.allclose(y, y_test)
from analysis.pitch import *
from analysis.util import *

file_id = 'all'
audio_path = 'assets/'+file_id+'.wav'

#sr = None disables resampling
y, sr = (librosa.load(audio_path,  sr=None,duration=40.0))

#THE KEY FUNCTION of seperation *******
y_harmonic, y_percussive = librosa.effects.hpss(y)
tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, trim=False)

#PHRASE DETECT
phrases = get_phrase_intervals(file_id,y,sr, 1.0 , 1.0, 4.0,  15, 0.13, tempo)
s_phrases = librosa.frames_to_samples(phrases)

y1 = np.array(y[s_phrases[1]:s_phrases[2]])
y2 = np.array(y[s_phrases[3]:s_phrases[4]])

#THE KEY FUNCTION of seperation *******
y_harmonic, y_percussive = librosa.effects.hpss(y1)
y_harmonic_2, y_percussive_2 = librosa.effects.hpss(y2)
tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, trim=False)

# We'll use a CQT-based chromagram here.  An STFT-based implementation also exists in chroma_cqt()
# We'll use the harmonic component to avoid pollution from transients
C = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
C2 = librosa.feature.chroma_cqt(y=y_harmonic_2, sr=sr)

pitch_sums_1 = get_pitch_sums(C)