def process(file): # read in the file f, sr, enc = wavread(file) # compute the fourier transform & compute the window times: D = librosa.stft(f) times = librosa.frames_to_samples(np.arange(D.shape[1])) # compute the onset strength envelope: env = librosa.onset.onset_strength(y=f, sr=sr) assert (len(times) == len(env)) # compute the onsets we are actually interested in, convert to samples: onsets = librosa.onset.onset_detect(y=f, sr=sr) onset_samps = librosa.frames_to_samples(onsets) assert (onset_samps[-1] <= len(f)) # create a lookup table for retrieving onset strenghts: lookup = [] prevval = 0 for v in onset_samps: for i in xrange(prevval, len(times)): if times[i] == v: lookup.append(i) prevval = i + 1 break # create an empty audio buffer (result): result = np.zeros(len(f)) # write envelope onset strength values at every onset point # computed by the envelope: for i in xrange(len(lookup)): result[onset_samps[i]] = env[lookup[i]] # write the result: wavwrite(result, file[:-4] + '_proc.wav', sr, enc) return
def gen_hihat(all_data, fs, fps, cand): fps = librosa.samples_to_frames(fs, hop_length=hop_len, n_fft=win_len) fps = 100 print(cand) proc = BeatTrackingProcessor(look_aside=0.2, fps=fps) act = RNNBeatProcessor()(all_data) beat_times = proc(act) song_len = librosa.samples_to_time(data.shape, sr=fs)[0] hihat = np.zeros(all_data.shape) idx = np.where(beat_times <= song_len)[0] new_beat_times = np.zeros(idx.shape) new_beat_times[idx] = beat_times[idx] beat_samples = librosa.time_to_samples(new_beat_times, sr=fs) start = librosa.frames_to_samples(cand[0], hop_len, n_fft=win_len) end = librosa.frames_to_samples(cand[-1], hop_len, n_fft=win_len) cand_len = end - start i = 3 is_hihat = np.zeros(beat_samples.shape) while i < len(beat_samples): is_hihat[i] = 1 i = i + 4 for i, s in enumerate(beat_samples): if is_hihat[i] == 1: if s + cand_len > hihat.shape: break hihat[s:s + cand_len] = data[start:end] return hihat, new_beat_times, beat_samples
def onsets_and_strength(all_onsets_strength, onsets_sorted, dly_onsets, strongest_onset, strongest_onset_2, y_cut, onset_strength): print(all_onsets_strength) print(onsets_sorted) plt.subplot(211) plt.vlines(librosa.frames_to_samples(dly_onsets), -1.0, 1.0, zorder=2) plt.vlines(librosa.frames_to_samples(strongest_onset['onset']), -1.0, 1.0, colors='red', zorder=3) plt.vlines(librosa.frames_to_samples(strongest_onset_2['onset']), -1.0, 1.0, colors='green', zorder=3) plt.plot(y_cut, zorder=1) plt.ylabel('Amplitude in Floating Point') plt.xlabel('Samples') plt.title('Onset Detection with Delay Effect') plt.subplot(212) plt.plot(onset_strength[0]) plt.ylabel('Onset Stength') plt.xlabel('Frames') plt.show()
def strip_audio(x, frame_length=1024, hop_length=256, rms_ths=0.2): # compute energy rmse = librosa.feature.rmse(x, frame_length=frame_length, hop_length=hop_length)[0] rms_ratio = rmse / rmse.max() active_frames = np.nonzero(rms_ratio > rms_ths)[0] assert len(active_frames) > 0, "there is no voice part in the wav" # strip continous active part s_sample = librosa.frames_to_samples(active_frames[0], hop_length=hop_length)[0] e_sample = librosa.frames_to_samples(active_frames[-1], hop_length=hop_length)[0] # plot the rmse on the wavelet of x # frames = range(len(energy)) # import matplot.pyplot as plt # energy = np.array([ # sum(abs(x[i:i+frame_length]**2)) # for i in range(0, len(x), hop_length) # ]) # t = librosa.frames_to_time(frames, sr=sr, hop_length=hop_length) # librosa.display.waveplot(x, sr=sr, alpha=0.4) # plt.plot(t, energy/energy.max(), 'r--') # normalized for visualization # plt.plot(t[:len(rmse)], rmse/rmse.max(), color='g') # normalized for visualization # plt.legend(('Energy', 'RMSE')) return x[s_sample:e_sample]
def beat_match(song1, song2, sr): """ Creates two lists of length equal to the combined length of both songs. The first list is zero padded from the end of the first song until the end of the second song. The second list is zero padded from the beginning of the first song until the first beat of the last phrase of that same song. The second song is then appended to the second list. The lists are then added together. Input Parameters ------------------------ song1: 1-D array containing sample points of first song song2: 1-D array containing sample points for second song sr: integer representing the rate at which the song is being sampled Returns ------------------------ a 1-D array containing a syncronized mixture of both songs """ print('begin beatmatch') tempo1, beat1 = beat_track(song1) tempo2, beat2 = beat_track(song2) beat1 = librosa.frames_to_samples(beat1) beat2 = librosa.frames_to_samples(beat2) song2 = song2[beat2[0]:] phrases1 = len(beat1) fade_start = phrases1 - 32 fade_sample = beat1[fade_start] fade_out_start = fade_sample fade_out_end = len(song2) phrases2 = len(beat2) fade_in_start = len(song1[:fade_sample]) fade_in_end = fade_in_start + phrases2 song2 = fade(song2, type="in", end=beat2[32]) zeros2 = np.zeros(len(song1[:fade_sample]), dtype=np.float32) list2 = np.append(zeros2, song2) #list2 = fade(list2, type= "in", start = fade_in_start, end = fade_in_end) song1 = fade(song1, type="out", start=fade_out_start) zeros1 = np.zeros((len(song2) - len(song1[fade_sample:])), dtype=np.float32) list1 = np.append(song1, zeros1) #list1 = fade(list1, type= "out", start = fade_out_start, end = fade_out_end) mix = list1 + list2 print('end beatmatch') return mix
def beat_match(song1, song2, sr): """ Creates two lists of length equal to the combined length of both songs. The first list is zero padded from the end of the first song until the end of the second song. The second list is zero padded from the beginning of the first song until the first beat of the last phrase of that same song. The second song is then appended to the second list. The lists are then added together. Input Parameters ------------------------ song1: 1-D array containing sample points of first song song2: 1-D array containing sample points for second song sr: integer representing the rate at which the song is being sampled Returns ------------------------ a 1-D array containing a syncronized mixture of both songs """ print('begin beatmatch') tempo1, beat1 = beat_track(song1) tempo2, beat2 = beat_track(song2) beat1 = librosa.frames_to_samples(beat1) beat2 = librosa.frames_to_samples(beat2) song2 = song2[beat2[0]:] phrases1 = len(beat1) fade_start = phrases1 - 32 fade_sample = beat1[fade_start] fade_out_start = fade_sample fade_out_end = len(song2) phrases2 = len(beat2) fade_in_start = len(song1[:fade_sample]) fade_in_end = fade_in_start + phrases2 song2 = fade(song2, type = "in", end = beat2[32]) zeros2 = np.zeros(len(song1[:fade_sample]), dtype = np.float32) list2 = np.append(zeros2, song2) #list2 = fade(list2, type= "in", start = fade_in_start, end = fade_in_end) song1 = fade(song1, type = "out", start = fade_out_start) zeros1 = np.zeros((len(song2)-len(song1[fade_sample:])), dtype = np.float32) list1 = np.append(song1, zeros1) #list1 = fade(list1, type= "out", start = fade_out_start, end = fade_out_end) mix = list1 + list2 print('end beatmatch') return mix
def restretch(self): if self.final_offset >= 0: offset_frame = librosa.frames_to_samples(self.final_offset) noise = np.zeros(offset_frame) self.restretch_data = np.concatenate((noise, self.y_2), axis=0) else: offset_frame = librosa.frames_to_samples(-self.final_offset) # noise = np.zeros(offset_frame) self.restretch_data = self.y_2[offset_frame:] padding = np.zeros(len(self.y) - len(self.restretch_data)) self.final_vocal_audio = np.concatenate((self.restretch_data,padding), axis=0)
def feature_extract_blues(blues_track, sr, current_timesig, onset_threshold=0.7): # get rhythm overlay hop_length = 512 blues_harm, blues_perc = librosa.effects.hpss(blues_track, margin=(1.0, 5.0)) onset_env = librosa.onset.onset_strength(blues_perc, sr=sr, aggregate=np.median) _, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) times = librosa.frames_to_time(np.arange(len(onset_env)), sr=sr, hop_length=hop_length) prev_val = 0 for i, b in enumerate(beats[:-3]): # get the corresponding onset env value t_b = times[b] on_f_b = librosa.time_to_frames([t_b], sr=sr, hop_length=hop_length) if librosa.util.normalize(onset_env)[on_f_b] >= prev_val: prev_val = librosa.util.normalize(onset_env)[on_f_b] keep_beat_start = b keep_beat_end = beats[i + 3] alert_start = i beat_start = librosa.frames_to_samples([keep_beat_start])[0] beat_end = librosa.frames_to_samples([keep_beat_end])[0] overlay_sample = blues_perc[beat_start:beat_end] # get beat samples beat_samples = librosa.frames_to_samples(beats, hop_length=hop_length) # get extracted subsample - using VS pipeline try: rep_samples_audio, num_seg = extract.extract_sample(blues_harm, sr, 1) signal_sample = rep_samples_audio[0][0] except: print "Could not extract sample from VS Pipeline, using default.." mdpt = int(len(blues_harm) / 2) signal_sample = blues_harm[mdpt:mdpt + sr] return { 'overlay': overlay_sample, 'beats': beat_samples, 'alert': signal_sample }
def slice_long_sample(y, sr, declick_samples=15, length_limit=None, fname=''): if length_limit and len(y) / sr > length_limit: y = y[0:length_limit * sr] onsets = rosa.onset.onset_detect(y=y, sr=sr, backtrack=True) onset_times = rosa.frames_to_samples(onsets) onset_times = np.concatenate([onset_times, [len(y)]]) segmented = [ y[onset_times[n]:onset_times[n + 1]] for n in range(len(onset_times) - 1) ] segmented = [s for s in segmented if len(s) >= declick_samples] if declick_samples > 1: declick_envelope = np.linspace(1 / declick_samples, 1 - (1 / declick_samples), declick_samples) for i in range(len(segmented)): segmented[i][0:declick_samples] *= declick_envelope slices = [] for i, s in enumerate(segmented): if not i % poll_every and i > 1: print( rf'calculating features for slice {i}/{len(segmented)} of {fname}...' ) slices.append(ausl.AudioSlice(s, sr, fname)) return slices, onset_times
def onset(x, sr): # Short-time Fourier transform (for EQ, must do inverse Fourier transform after) X = librosa.stft(x) # Find the frames when onsets occur onset_frames = librosa.onset.onset_detect(x, sr=sr) print("Onset Frames = " + str(onset_frames) + "\n ") # Find the times, in seconds, when onsets occur in the audio signal onset_times = librosa.frames_to_time(onset_frames, sr=sr) print("Onset Times = " + str(onset_times) + "\n ") # Convert the onset frames into sample indices to play "BEEB" sound on it onset_samples = librosa.frames_to_samples(onset_frames) print("Onset Samples = " + str(onset_samples) + "\n ") # Use the "length" parameter so the click track is the same length as the original signal clicks = librosa.clicks(times=onset_times, length=len(x)) # Play the click track "added to" the original signal sd.play(x + clicks, sr) # Display the waveform of the original signal librosa.display.waveplot(x, sr) plt.title("Original Signal") plt.show() # Close window to resume return onset_frames, onset_times, onset_samples
def __init__(self, dataset, sr=22050, frameSize=2048, hopSize=512, transform=None, cacheSize=4): self.dataset = dataset self.sr = sr self.frameSize = frameSize self.hopSize = hopSize self.transform = transform self.cacheSize = cacheSize self.frameDt = float(frameSize) / sr # count frames in dataset nFramesList = [] for pathPair in dataset.pathPairs: wavPath = pathPair.wav duration = librosa.get_duration(filename=wavPath) nSamples = librosa.time_to_samples(duration, sr=self.sr) nFrames = 1 + int( (nSamples - self.frameSize) / float(self.hopSize)) nFramesList.append(nFrames) # check validation sStart = librosa.frames_to_samples(nFrames - 1, hop_length=self.hopSize) sEnd = sStart + self.frameSize assert (nSamples > 0) and ( sEnd <= nSamples), f'{nFrames}:{sStart}_{sEnd}, {nSamples}' self.frameCumsum = np.cumsum(nFramesList) # FIFO cache self._sampleCache = deque(maxlen=cacheSize) self._sampleIdxCache = deque(maxlen=cacheSize)
def generate_sine_midi_note(f0_info, sr, n_duration): f0 = f0_info[0] A = remap(f0_info[1], CdB.min(), CdB.max(), 0, 1) duration = librosa.frames_to_time(n_duration, sr=fs, hop_length=hop_length) # Generate music21 note note_duration = 0.02 * np.around( duration / 2 / 0.02) # Round to 2 decimal places for music21 compatibility midi_velocity = int(round(remap(f0_info[1], CdB.min(), CdB.max(), 0, 127))) if f0 == None: try: note_info = Rest(type=mm.secondsToDuration(note_duration).type) except DurationException: note_info = None f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) try: note = Note(midi_note, type=mm.secondsToDuration(note_duration).type) note.volume.velocity = midi_velocity note_info = [note] except DurationException: note_info = None if note_info is None: return None # Generate Sinewave n = np.arange(librosa.frames_to_samples(n_duration, hop_length=hop_length)) sine_wave = A * np.sin(2 * np.pi * f0 * n / float(sr)) return [sine_wave, note_info]
def strip(y, frame_length, hop_length=512): """ Removing leading silence from an audio track :param y: (np.ndarray) audio signal :param frame_length: (int) :param hop_length: (int) :return: Audio signal with leading silence removed """ # compute RMSE. rms = librosa.feature.rms(y, frame_length=frame_length, hop_length=hop_length, center=True) # identify the first frame index where RMSE exceeds a threshold. thresh = 0.01 frame_index = 0 while rms[0][frame_index] < thresh: frame_index += 1 # convert units of frames to samples. start_sample_index = librosa.frames_to_samples(frame_index, hop_length=hop_length) # return the trimmed signal. return y[start_sample_index:]
def slicer(song, n_beats=16, duration=0): ''' Takes in a song and its segments and computes the largest total segment in the dictionary. To do this it sums up each of the dictionary entries using that disgusting(tm) comprehension below. The segment has to be larger than the given duration in order to be considered in the sum. It then takes the max dictionary entry and returns the segment with the bounds. :param song: (Song) | song to slice :param duration: (float) | min duration (in seconds) :return: slice (Slice) | segmented slice ''' largest_seg = max( song.segments.items(), key=lambda x: sum( [z[1] - z[0] for z in x[1] if z[1] - z[0] >= duration]))[1] max_pair = tuple(max(largest_seg, key=lambda pair: pair[1] - pair[0])) slice = Slice(song.path, offset=max_pair[0], duration=max_pair[1]) perc_y = librosa.effects.percussive(slice.y) beat_track = beatTrack(y=perc_y, sr=song.load.sr) end_frame = librosa.frames_to_samples(beat_track.beats[n_beats])[0] slice.y = slice.y[:end_frame] return slice
def generate_note(self, f0_info, n_duration, round_to_sixteenth=True): f0 = f0_info[0] a = remap(f0_info[1], self.cqt.min(), self.cqt.max(), 0, 1) duration = librosa.frames_to_time(n_duration, sr=self.sr, hop_length=self.hop_length) note_duration = 0.02 * np.around(duration / 0.02) # Round to 2 decimal places for music21 compatibility midi_duration = second_to_quarter(duration, self.tempo) midi_velocity = int(round(remap(f0_info[1], self.cqt.min(), self.cqt.max(), 80, 120))) if round_to_sixteenth: midi_duration = round(midi_duration * 16) / 16 try: if f0 is None: midi_note = None note_info = Rest(type=self.mm.secondsToDuration(note_duration).type) f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) note = Note(librosa.midi_to_note(midi_note), type=self.mm.secondsToDuration(note_duration).type) note.volume.velocity = midi_velocity note_info = [note] except DurationException: if f0 is None: midi_note = None note_info = Rest(type='32nd') f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) note = Note(librosa.midi_to_note(midi_note), type='eighth') note.volume.velocity = midi_velocity note_info = [note] midi_info = [midi_note, midi_duration, midi_velocity] n = np.arange(librosa.frames_to_samples(n_duration, hop_length=self.hop_length)) sine_wave = a * np.sin(2 * np.pi * f0 * n / float(self.sr)) return [sine_wave, midi_info, note_info]
def find_localmax( self, signal, noise_threshold=0.0, # Range: [0.0, 1.0]. jump=None, frame_length=1024): """ """ if not librosa_available: print('ERROR: Error in find_localmax. Librosa not installed.') index_list = [] return index_list # Adjust for comparable results for low sampling rates. if self.sampling_freq < 300000: frame_length = int(frame_length / 2) if jump is None: jump = int(self.sampling_freq / 1000) # Default = 1 ms. y = signal.copy() if noise_threshold > 0.0: y[(np.abs(y) < noise_threshold)] = 0.0 rmse = librosa.feature.rmse(y=y, hop_length=jump, frame_length=frame_length, center=True) locmax = librosa.util.localmax(rmse.T) maxindexlist = [index for index, a in enumerate(locmax) if a == True] # Original index list is related to jump length. Convert. index_list = librosa.frames_to_samples(maxindexlist, hop_length=jump) # return index_list
def beats_to_sample(beats, y, sr): """ Aligning supposed beats to the peak of energy in the y signal beats: np.ndarray frames index where beats are supposed to be y: np.ndarray input signal sr: int samplerate Returns y_beat: np.ndarray array with 1 when there is a beat beats_indices: np.ndarray array with indices of the beats """ y_beat = np.zeros(y.shape) margin = int(0.1 * sr) for beat in frames_to_samples(beats): bs_index = beat - margin + np.argmax( np.abs(y[beat - margin:beat + margin])) y_beat[bs_index] = 1 return y_beat, np.where(y_beat == 1)[0]
def __test(x, y, hop_length, n_fft): y_test = librosa.frames_to_samples(x, hop_length=hop_length, n_fft=n_fft) assert np.allclose(y, y_test) y = np.asanyarray(y) assert y.shape == y_test.shape assert y.ndim == y_test.ndim
def get_beats_samples(y, sr): # 起点强度(音符按键起始点) onset_env = librosa.onset.onset_strength(y=y, sr=sr) # 节拍点(帧索引) _, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr, hop_length=512) # 节拍点(采样引) beat_samples = librosa.frames_to_samples(beats, hop_length=512) return beats, beat_samples
def onset_detect(audio_vector, sr): """Returns the onsets detected of a given audio file""" ####### Onset and Tempo detection ########## onset = librosa.onset.onset_detect(y=audio_vector, sr=sr, backtrack=True) onset_env = librosa.onset.onset_strength(y=audio_vector, sr=sr) onset_sec = librosa.frames_to_time(onset, sr=sr) onset_frames = librosa.frames_to_samples(onset) return onset_frames
def apply_trim_offset(self, frame): return ( librosa.samples_to_frames( librosa.frames_to_samples(frame) + self.trim_offset ) if self.trim_offset else frame )
def overlap(siga, sigb, overlap_beats, sr, fade): # Trim leading and trailing silence siga = np.trim_zeros(siga) sigb = np.trim_zeros(sigb) # Get beat frames for each track and convert to track sample indices atempo, abeatframes = librosa.beat.beat_track(y=siga, sr=sr) abeats = librosa.frames_to_samples(abeatframes) btempo, bbeatframes = librosa.beat.beat_track(y=sigb, sr=sr) bbeats = librosa.frames_to_samples(bbeatframes) # print atempo # print btempo # print siga.shape # print sigb.shape # print abeats.shape # print bbeats.shape # If fade is specified, cross-fade both tracks into each other if (fade): print "Fading tracks" fadeindices = int(bbeats[overlap_beats]) fade = np.linspace(0, 1, num=fadeindices + 1) for i in range(0, fade.shape[0]): sigb[i] *= fade[i] siga[siga.shape[0] - 1 - i] *= fade[i] # print "Fade indicies: ",int(bbeats[overlap_beats]) # Create the output signal mix = np.zeros(sigb.shape[0] + siga.shape[0]) # Prep it with the first track mix[:siga.shape[0]] = siga # The time frame of the beat where the second track should start startframe = abeats[abeats.shape[0] - overlap_beats + 4] # print "Start frames: ",startframe,startframe-bbeats[3],startframe-bbeats[3]+sigb.shape[0] # print "Time beyond end of a ",(startframe-bbeats[3]+sigb.shape[0]-siga.shape[0])/44100. # for i in range(overlap_beats): # print abeats[abeats.shape[0]-overlap_beats+i]-abeats[abeats.shape[0]-overlap_beats+i-1], bbeats[i+1]-bbeats[i] mix[startframe - bbeats[3]:startframe - bbeats[3] + sigb.shape[0]] += sigb mix = np.trim_zeros(mix) # print "Shape of mix ",mix.shape return mix
def compute_segments_librosa(Y, sr, numparts): myprint('Computing parts segmentation') bounds = librosa.segment.agglomerative(Y, numparts) bound_times = librosa.frames_to_time(bounds, sr=sr) bound_samples = librosa.frames_to_samples(bounds, hop_length=512, n_fft=2048) myprint('bound_samples = %s / %s' % (bound_samples.shape, bound_samples)) return bounds, bound_times, bound_samples
def analyze_signals(original_signal, test_signal, hop_size, start_time, end_time, sr, start_bpm, offbeat_factor): # Offbeat_factor is how much off the beat the person is allowed to be test, sr = librosa.load(test_signal, sr) test_normalizer = np.max(test) test = test/float(test_normalizer) original, sr = librosa.load(original_signal, sr) original_normalizer = np.max(original) original = original/float(original_normalizer) test_onset_env, test_beat_frames = estimated_beat(test_signal, hop_size, start_time, end_time, sr, start_bpm) original_onset_env, original_beat_frames = estimated_beat(original_signal, hop_size, start_time, end_time, sr, start_bpm) # plt.show() test_beats = librosa.frames_to_samples(test_beat_frames, hop_length=hop_size) original_beats = librosa.frames_to_samples(original_beat_frames, hop_length=hop_size) beat_score = calculate_rank(original_beats, test_beats, sr, sr*offbeat_factor) plt.figure(1) ax1 = plt.subplot(2,1,1) plt.plot(original, label='Signal') plt.vlines(original_beats, -2, 2, alpha=.5, color='r', linestyle='solid', linewidth=3, label='Beats') plt.legend(frameon=True, framealpha=0.75) # Limit the plot to a X-second window plt.xlim([start_time * sr, end_time * sr]) plt.xticks(np.linspace(start_time, end_time, 5) * sr, np.linspace(start_time, end_time, 5)) plt.xlabel('Time (s)') plt.tight_layout() ax2 = plt.subplot(2,1,2, sharex=ax1, sharey=ax1) plt.plot(test, label='Signal') plt.vlines(test_beats, -2, 2, alpha=.5, color='g', linestyle='solid',linewidth=3, label='Beats') plt.legend(frameon=True, framealpha=0.75) # Limit the plot to a X-second window plt.xlim([start_time * sr, end_time * sr]) plt.xticks(np.linspace(start_time, end_time, 5) * sr, np.linspace(start_time, end_time, 5)) plt.xlabel('Time (s)') plt.tight_layout() plt.subplots_adjust(hspace=0) plt.show() return beat_score
def split_song(mix_in, mix_out, bpm, file_name): file_path = './data/mp3/' + file_name audio, _ = librosa.load(file_path, sr=SR) _, beats = librosa.beat.beat_track(y=audio, sr=SR, bpm=bpm) body = librosa.frames_to_samples(beats[mix_in + MIX_LEN:mix_out]) trans_in = librosa.frames_to_samples(beats[mix_in:mix_in + MIX_LEN]) trans_out = librosa.frames_to_samples(beats[mix_out:mix_out + MIX_LEN]) body_audio = audio[body[0]:body[-1]] in_audio = audio[trans_in[0]:trans_in[-1]] out_audio = audio[trans_out[0]:trans_out[-1]] file_name = file_name.split('.')[0] librosa.output.write_wav('./data/chopped/body/' + file_name + '.wav', body_audio, SR) librosa.output.write_wav('./data/chopped/trans_in/' + file_name + '.wav', in_audio, SR) librosa.output.write_wav('./data/chopped/trans_out/' + file_name + '.wav', out_audio, SR)
def splitSamples(sourceDir, outputDir, mode=None): print('- Splitting samples for dataset:', mode) source_filelist = os.listdir(sourceDir) for f in source_filelist: print('processing', f) outputSamples = os.path.join(outputDir, 'dataset_' + mode) y, sr = librosa.load(os.path.join(sourceDir, f)) # trim silence at beginning and end y, index = librosa.effects.trim(y) # detect onsets o_env = librosa.onset.onset_strength(y, sr=sr, feature=librosa.cqt) onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr) vectors = [] words = [] filenames = [] onset_samples = list(librosa.frames_to_samples(onset_frames)) onset_samples = np.concatenate(onset_samples, len(y)) starts = onset_samples[0:-1] stops = onset_samples[1:] analysis_folder = sourceDir samples_folder = os.path.join(outputSamples, f) try: os.makedirs(samples_folder) except: pass pbar = ProgressBar() for i, (start, stop) in enumerate(pbar(zip(starts, stops))): audio = y[start:stop] filename = os.path.join(samples_folder, str(i) + '.wav') librosa.output.write_wav(filename, audio, sr) vector = get_fingerprint(audio, sr=sr) word = basename(filename) vectors.append(vector) words.append(word) filenames.append(filename) np.savetxt(os.path.join(analysis_folder, 'vectors'), vectors, fmt='%.5f', delimiter='\t') np.savetxt(os.path.join(analysis_folder, 'words'), words, fmt='%s') np.savetxt(os.path.join(analysis_folder, 'filenames.txt'), filenames, fmt='%s')
def feature_extract_jazz(jazz_track, sr, num_segments=8, seg_thresh=3): # segment boundaries mfcc = librosa.feature.mfcc(y=jazz_track, sr=sr) bounds = librosa.segment.agglomerative(mfcc, num_segments) sample_bounds = librosa.frames_to_samples(bounds) sample_intervals = boundaries_to_intervals(sample_bounds) # clean up short segments del_list = [] for i, intr in enumerate(sample_intervals): if intr[1] - intr[0] < seg_thresh * sr: del_list.append(i) sample_intervals = np.delete(sample_intervals, del_list, axis=0) # corresponding intervals # TODO: determine segment key/ progression shift_by = [] # extracted subsample - using VS pipeline jazz_harm, jazz_perc = librosa.effects.hpss(jazz_track) try: rep_samples_audio, num_seg = extract.extract_sample(jazz_harm, sr, 1) signal_sample = rep_samples_audio[0][0] except: print "Could not extract sample from VS Pipeline, using default.." mdpt = int(len(jazz_harm) / 2) signal_sample = jazz_harm[mdpt:mdpt + sr] # extract beats to overlay VS sample onset_env = librosa.onset.onset_strength(jazz_perc, sr=sr, aggregate=np.median) _, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) beat_samples = librosa.frames_to_samples(beats) return { 'bounds': sample_intervals, 'shift': shift_by, 'alert': signal_sample, 'beats': beat_samples }
def print_tf(tf, rate, file=sys.stdout): fprint = lambda *args, **kwargs: print(*args, file=file, **kwargs) fprint(tf.shape[1], end="") for i in range(tf.shape[1]): fprint(' {}'.format(librosa.frames_to_samples(i) / rate), end="") fprint() for i, freq in enumerate(librosa.fft_frequencies(sr=rate)): row = tf[i] fprint(freq, end="") for x in tf[i]: fprint(' {}'.format(np.abs(x)), end="") fprint()
def test_frames_to_samples(frames, hop_length, n_fft): samples = librosa.frames_to_samples(frames, hop_length=hop_length, n_fft=n_fft) frames = np.asanyarray(frames) assert frames.shape == samples.shape assert frames.ndim == samples.ndim if n_fft is None: assert np.allclose(samples, frames * hop_length) else: assert np.allclose((samples - n_fft // 2) // hop_length, frames)
def get_downbeats(y, tempo, beat_frames, sr): measures = len(beat_frames) // BEATS beat_frames = librosa.samples_to_frames(beat_frames) onset_env = librosa.onset.onset_strength(y, sr=sr, aggregate=np.median) beat_strengths = onset_env[beat_frames] measure_beat_strengths = beat_strengths[:measures * BEATS].reshape( -1, BEATS) beat_pos_strength = np.sum(measure_beat_strengths, axis=0) downbeat_pos = np.argmax(beat_pos_strength) full_measure_beats = beat_frames[:measures * BEATS].reshape(-1, BEATS) downbeat_frames = full_measure_beats[:, downbeat_pos] return librosa.frames_to_samples(downbeat_frames)
def strip(signal, frame_length=512, hop_length=256): # Compute RMSE. rmse = librosa.feature.rms(signal, frame_length=frame_length, hop_length=hop_length, center=True) # Identify the first frame index where RMSE exceeds a threshold. thresh = 0.001 frame_index = 0 while rmse[0][frame_index] < thresh: frame_index += 1 # Convert units of frames to samples. start_sample_index = librosa.frames_to_samples(frame_index, hop_length=hop_length) signal = signal[start_sample_index:] signal = np.array(list(signal)[::-1]) # Compute RMSE. rmse = librosa.feature.rms(signal, frame_length=frame_length, hop_length=hop_length, center=True) # Identify the first frame index where RMSE exceeds a threshold. thresh = 0.001 frame_index = 0 while rmse[0][frame_index] < thresh: frame_index += 1 # Convert units of frames to samples. start_sample_index = librosa.frames_to_samples(frame_index, hop_length=hop_length) signal = np.array(signal[start_sample_index:]) # Return the trimmed signal. return np.array(list(signal)[::-1])
def reconstruct(features, n_fft=2048, sr=22050, hop_length=None): if (hop_length == None): hop_length = n_fft / 4 # will be a fraction shorter than the original wave = np.zeros(lr.frames_to_samples(features.shape[0], hop_length, n_fft)[0], dtype=np.float32) for frame, feature_slice in enumerate(features): sample_start = lr.frames_to_samples(frame, hop_length, n_fft)[0] wave_slice = reconstruct_slice(feature_slice, n_fft, sr) sample_end = sample_start + len(wave_slice) # not too sure about this if len(wave[sample_start:sample_end]) < len(wave_slice): wave_slice = wave_slice[:len(wave[sample_start:sample_end])] wave[sample_start:sample_end] += wave_slice * np.hanning( len(wave_slice)) # do I need to scale? return wave
def process_audio(filename, frame_size, mel_bands, fmax, display): """ act audio metadata and compute the dynamic spectrogram. Prepare the audio file and process it to compute the dynamic spectrograms block by block. Args: filename (str): Path to the audio file. frame_size (int): frame size for the "per block" processing. display_plot (boolean): display or save the plot. Returns: None Todo: - check if the samplerate of the file corresponds to the samplerate in the configuration file Note: According to the 2016 base line code, the frame size is 40ms with a hop size of 50%. """ samples = librosa.frames_to_samples(frame_size) chan_nb, samplerate = extract_audio_data(filename) counter = 0 for block in sfblocks(filename, blocksize=samples[0]): counter += 1 # separate the channels to compute the spectrograms for chan in np.arange(chan_nb): if chan_nb < 2: y = block else: y = block[:, chan] # Compute the dynamic spectrogram dynamic_spectrogram( y, filename, block_nb=counter, display=display) static_spectrogram( y, filename, counter, mel_bands, fmax, display=display)
def get_drum_wav(percussion, width=5, n=None): # Compute volume shaper percussion = librosa.util.normalize(percussion.ravel()) v = scipy.ndimage.median_filter(percussion, width, mode='mirror') v = np.atleast_2d(v) wav = synthesize(librosa.frames_to_samples(np.arange(v.shape[-1]), hop_length=hop_length), v, fmin=librosa.midi_to_hz(0), bins_per_octave=12, wave=noise, n=n)[0] return wav
def __test(times, frames, sr, hop_length, click_freq, click_duration, click, length): y = librosa.clicks(times=times, frames=frames, sr=sr, hop_length=hop_length, click_freq=click_freq, click_duration=click_duration, click=click, length=length) if times is not None: nmax = librosa.time_to_samples(times, sr=sr).max() else: nmax = librosa.frames_to_samples(frames, hop_length=hop_length).max() if length is not None: assert len(y) == length elif click is not None: assert len(y) == nmax + len(click)
def get_wav(cq, nmin=60, nmax=120, width=5, max_peaks=1, wave=None, n=None): # Slice down to the bass range cq = cq[nmin:nmax] # Pick peaks at each time mask = peakgram(librosa.logamplitude(cq**2, top_db=60, ref_power=np.max), max_peaks=max_peaks) # Smooth in time mask = scipy.ndimage.median_filter(mask, size=(1, width), mode='mirror') # resynthesize with some magnitude compression wav = synthesize(librosa.frames_to_samples(np.arange(cq.shape[-1]), hop_length=hop_length), mask * cq**(1./3), fmin=librosa.midi_to_hz(nmin + MIDI_MIN), bins_per_octave=12, wave=wave, n=n)[0] return wav
#THE KEY FUNCTION of seperation ******* y_harmonic, y_percussive = librosa.effects.hpss(y) x_harmonic, x_percussive = librosa.effects.hpss(x) #beats tempo_y, beats_y = librosa.beat.beat_track(y=y_percussive, sr=sr_y, trim=True) tempo_x, beats_x = librosa.beat.beat_track(y=x_percussive, sr=sr_x, trim=True) #adjust x to be the same tempo as y ym = librosa.effects.time_stretch(y, tempo_x/tempo_y) #remeasure tempo of y_matched ym_harmonic, ym_percussive = librosa.effects.hpss(ym) tempo_ym, beats_ym = librosa.beat.beat_track(y=ym_percussive, sr=sr_y, trim=True) #PHASE the tracks #get arrays of the sample indices of the beats beats_i_x = librosa.frames_to_samples(beats_x) beats_i_ym = librosa.frames_to_samples(beats_ym) #cut off the tracks at the beats x = x[beats_i_x[1]:] ym = ym[beats_i_ym[1]:] #mix the matched tracks mix = np.array([(x + y)/2 for x, y in zip(ym, x)],dtype=np.float32) #input array must be in numpy.float32! librosa.output.write_wav('mixes/beat_matched.wav', mix, sr_y)
#LOAD OR CREATE S-MATRIX & NOVELTY VECTOR s_matrix = init_smatrix(file_id,f,r, sample_duration) novelty = init_novelty_vector(file_id, w, w_f, sample_duration, s_matrix) #https://bmcfee.github.io/librosa/generated/librosa.util.peak_pick.html? #TODO correlate to the beat, somehow w_p = w_f/w_p_ratio peaks = librosa.util.peak_pick(novelty, w_p, w_p, w_p, w_p, peak_window, w_p) #cross reference beats and peaks #peaks = cross_reference(beats, peaks, beat_threshold) #assuming music is periodic... peaks = filter_by_period(peaks, period_threshold,fpb) #Sample a test segment p_s = librosa.frames_to_samples(peaks) if (len(p_s) > 2): sample = y[p_s[1]:p_s[2]] librosa.output.write_wav('mixes/sampled.wav', sample, sr) loop = np.concatenate([sample,sample,sample]) librosa.output.write_wav('mixes/loop.wav', loop, sr) #Shuffle a test segment p_s = librosa.frames_to_samples(peaks) if (len(p_s) >= 4): s1 = y[p_s[0]:p_s[1]] s2 = y[p_s[1]:p_s[2]] s3 = y[p_s[2]:p_s[3]] loop = np.concatenate([s3,s2,s1]) librosa.output.write_wav('mixes/shuffle.wav', loop, sr)
def __test(x, y, hop_length, n_fft): y_test = librosa.frames_to_samples(x, hop_length=hop_length, n_fft=n_fft) assert np.allclose(y, y_test)
from analysis.pitch import * from analysis.util import * file_id = 'all' audio_path = 'assets/'+file_id+'.wav' #sr = None disables resampling y, sr = (librosa.load(audio_path, sr=None,duration=40.0)) #THE KEY FUNCTION of seperation ******* y_harmonic, y_percussive = librosa.effects.hpss(y) tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, trim=False) #PHRASE DETECT phrases = get_phrase_intervals(file_id,y,sr, 1.0 , 1.0, 4.0, 15, 0.13, tempo) s_phrases = librosa.frames_to_samples(phrases) y1 = np.array(y[s_phrases[1]:s_phrases[2]]) y2 = np.array(y[s_phrases[3]:s_phrases[4]]) #THE KEY FUNCTION of seperation ******* y_harmonic, y_percussive = librosa.effects.hpss(y1) y_harmonic_2, y_percussive_2 = librosa.effects.hpss(y2) tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, trim=False) # We'll use a CQT-based chromagram here. An STFT-based implementation also exists in chroma_cqt() # We'll use the harmonic component to avoid pollution from transients C = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) C2 = librosa.feature.chroma_cqt(y=y_harmonic_2, sr=sr) pitch_sums_1 = get_pitch_sums(C)