def beat_match(song1, song2, sr): """ Creates two lists of length equal to the combined length of both songs. The first list is zero padded from the end of the first song until the end of the second song. The second list is zero padded from the beginning of the first song until the first beat of the last phrase of that same song. The second song is then appended to the second list. The lists are then added together. Input Parameters ------------------------ song1: 1-D array containing sample points of first song song2: 1-D array containing sample points for second song sr: integer representing the rate at which the song is being sampled Returns ------------------------ a 1-D array containing a syncronized mixture of both songs """ print('begin beatmatch') tempo1, beat1 = beat_track(song1) tempo2, beat2 = beat_track(song2) beat1 = librosa.frames_to_samples(beat1) beat2 = librosa.frames_to_samples(beat2) song2 = song2[beat2[0]:] phrases1 = len(beat1) fade_start = phrases1 - 32 fade_sample = beat1[fade_start] fade_out_start = fade_sample fade_out_end = len(song2) phrases2 = len(beat2) fade_in_start = len(song1[:fade_sample]) fade_in_end = fade_in_start + phrases2 song2 = fade(song2, type = "in", end = beat2[32]) zeros2 = np.zeros(len(song1[:fade_sample]), dtype = np.float32) list2 = np.append(zeros2, song2) #list2 = fade(list2, type= "in", start = fade_in_start, end = fade_in_end) song1 = fade(song1, type = "out", start = fade_out_start) zeros1 = np.zeros((len(song2)-len(song1[fade_sample:])), dtype = np.float32) list1 = np.append(song1, zeros1) #list1 = fade(list1, type= "out", start = fade_out_start, end = fade_out_end) mix = list1 + list2 print('end beatmatch') return mix
def beat_match(song1, song2, sr): """ Creates two lists of length equal to the combined length of both songs. The first list is zero padded from the end of the first song until the end of the second song. The second list is zero padded from the beginning of the first song until the first beat of the last phrase of that same song. The second song is then appended to the second list. The lists are then added together. Input Parameters ------------------------ song1: 1-D array containing sample points of first song song2: 1-D array containing sample points for second song sr: integer representing the rate at which the song is being sampled Returns ------------------------ a 1-D array containing a syncronized mixture of both songs """ print('begin beatmatch') tempo1, beat1 = beat_track(song1) tempo2, beat2 = beat_track(song2) beat1 = librosa.frames_to_samples(beat1) beat2 = librosa.frames_to_samples(beat2) song2 = song2[beat2[0]:] phrases1 = len(beat1) fade_start = phrases1 - 32 fade_sample = beat1[fade_start] fade_out_start = fade_sample fade_out_end = len(song2) phrases2 = len(beat2) fade_in_start = len(song1[:fade_sample]) fade_in_end = fade_in_start + phrases2 song2 = fade(song2, type="in", end=beat2[32]) zeros2 = np.zeros(len(song1[:fade_sample]), dtype=np.float32) list2 = np.append(zeros2, song2) #list2 = fade(list2, type= "in", start = fade_in_start, end = fade_in_end) song1 = fade(song1, type="out", start=fade_out_start) zeros1 = np.zeros((len(song2) - len(song1[fade_sample:])), dtype=np.float32) list1 = np.append(song1, zeros1) #list1 = fade(list1, type= "out", start = fade_out_start, end = fade_out_end) mix = list1 + list2 print('end beatmatch') return mix
def initialize_bpf(filename, filepath, only_show=False, rewrite=False): wav_filename = audioconvert.convert_to_monowav(filename, filepath) timestart = time.time() y, sr = load(wav_filename, dtype="float32", res_type=TYPE) print("{LOAD TIME}:%f" % (time.time() - timestart)) tempo, beats = beat_track(y=y, tightness=100) # 计算主要节拍点 tempo1, beats1 = beat_track(y=y, tightness=1) # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱 onset_envelope = onset_strength(y=y) rms_envelope = rmse(y=y) # -----------RMS ENVELOPE tempo = normalize_tempo(tempo) MAX_RMS = np.max(rms_envelope) AVERAGE_RMS = np.mean(rms_envelope) onset_all_beat = [] frame_all_beat = [] for beat in beats1: onset_all_beat.append(onset_envelope[beat]) frame_all_beat.append(beat) AVERAGE_ONSET = np.mean(onset_all_beat) new_frames_list = [] if not os.path.exists("dat/plt/%s.plt" % filename) or rewrite: print("No plt found, initializing...") plt_file = open("dat/plt/%s.plt" % filename, mode="w") plt_file.write( repr((filename, rms_envelope.T.tolist(), onset_all_beat, frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET))) plt_file.close() plt_file = open("dat/plt/%s.plt" % filename, mode="r") plt_file_content = eval(plt_file.read()) plt_process = Process(target=plt_show, args=plt_file_content) plt_process.start() if not only_show: for beat in beats1: if onset_envelope[beat] > AVERAGE_ONSET / ONSET_DETECT_RATIO \ or rms_envelope.T[beat] > MAX_RMS / RMS_RATIO: new_frames_list.append(beat) print("{MAX_ONSET}:%f" % onset_envelope.max()) new_beats_frame = np.array(new_frames_list) mainbeatlocation = frames_to_time(beats) beatlocation = frames_to_time(new_beats_frame).tolist() beatmain = [] for beat in beatlocation: # 分别计算出每个节拍到主要节拍点的距离,也就是这个节拍的主要程度 p = abs(mainbeatlocation - beat) # print("%f: %f" % (beat, p.min())) beatmain.append(p.min()) file = open("dat/bpf/%s.bpf" % filename, mode="w") file.write( repr([tempo, beatlocation, beatmain, mainbeatlocation.tolist()])) file.close() if (os.path.exists("dat/%s.wav" % filename)): os.remove("dat/%s.wav" % filename) return "dat/bpf/%s.bpf" % filename
def generate_click(file, filename, click_freq, click_duration, vol_adj_song, vol_adj_click, convert_folder): saveFileExt = filename.rsplit('.', 1)[1].lower() saveName = filename if(saveFileExt != "wav"): saveName = saveName.rsplit('.', 1)[0].lower() + ".wav" inputAudio, _, newName = convert_file(file, filename, saveName, convert_folder) sr = 44100 _, beats = beat_track(inputAudio, sr) clickAudio = clicks( frames = beats, # the beats to place clicks sr = sr, # sample rate length = len(inputAudio), # length of the song (necessary to align clicktrack and song) click_freq = click_freq, # frequency of each click (in Hz) click_duration = click_duration # duration of each click (in seconds) ) inputAudio *= vol_adj_song clickAudio *= vol_adj_click sf.write(os.path.join(convert_folder, newName), inputAudio + clickAudio, sr) return newName
def fingerprint_beat(audio_signal, audio_parameters): # Parameter(s) sample_rate = audio_parameters['sample_rate'] tempo, beats = beat_track(audio_signal, sample_rate) #time_resolution = audio_parameters['time_resolution'] time_resolution = 3 * np.floor(tempo / 60) if time_resolution == 0: time_resolution = 3 cqt_kernel = audio_parameters['cqt_kernel'] neighborhood_size = audio_parameters['neighborhood_size'] # Transform the signal into a log-scaled spectrogram using the CQT audio_spectrogram = spectrogram(audio_signal, sample_rate, time_resolution, cqt_kernel) # Convert the spectrogram to uint8 (to speed up medfilt2) (in Matlab!) audio_spectrogram = np.uint8( np.around(255 * audio_spectrogram / np.amax(audio_spectrogram))) # Segment the spectrogram into a binary image using an adaptive thresholding method based on a median filtering audio_fingerprint = (audio_spectrogram > ndimage.median_filter( audio_spectrogram, neighborhood_size, mode='reflect')).astype(float) return audio_spectrogram
def get_tempo(self, just_beats = False): if self.__opened == False: raise Exception('load an audio file first!'); tempo_channels = list(); # 1) create frames representing a beat which lasts for 0.2 second samples = np.arange(0, 0.2, 1 / self.__frame_rate); # how many frames for a beat amp_mod = 0.2 / (np.sqrt(samples) + 0.2) - 0.2; # amplitude decay, range in [-0.2, 0.8] amp_mod[amp_mod < 0] = 0; # filter sub-zero part, range in [0, 0.8] x = np.max(self.__data) * np.cos(2 * np.pi * samples * 220) * amp_mod; # generate samples with scaled amplitude # 2) generate audio frames containing beats which is as long as the loaded audio beat_channels = list(); for i in range(self.__data.shape[1]): # detect beats for every single channel of the loaded audio # NOTE: beats is a list of time (seconds) which are picked as beats for tempo tempo, beats = beat_track(self.__data[:,i].astype(np.float32), sr = self.__frame_rate, units="time"); beat_channels.append(beats); #beats -= 0.05; tempo_channel = np.zeros_like(self.__data[:,i]); # temp_channel.shape = (sample number) for ib, b in enumerate(beats): sample_periods = np.arange(0, 0.2, 1 / self.__frame_rate); amp_mod = 0.2 / (np.sqrt(sample_periods) + 0.2) - 0.2; # amplitude decay, range in [-0.2, 0.8] amp_mod[amp_mod < 0] = 0; # filter sub-zero part, range in [0, 0.8] x = np.max(self.__data) * np.cos(2 * np.pi * sample_periods * 220) * amp_mod; tempo_channel[int(self.__frame_rate * b): int(self.__frame_rate * b) + int(x.shape[0])] = x.astype(np.int16); tempo_channels.append(np.expand_dims(tempo_channel, axis = -1)); return tempo_channels if just_beats == False else beat_channels;
def opn_file(self): qwer = File() self.msc = file print(self.msc) self.y, self.sr = librosa.core.load(self.msc) self.tempo, self.beats = bt.beat_track(y=self.y, sr=self.sr) print('loading complete') return qwer
def __init__(self, y, sr): ''' Generates a Librosa beat.beat_track() for the song. :param tempo: (float) | beats per minute :param beats: (list) | list of beat frames ''' tempo, beats = beat.beat_track(y=y, sr=sr, trim=False, start_bpm=160) self.tempo = tempo self.beats = beats
def track_beats(audio_fname): """ """ try: # Track beats audio_file = MakeAudioReader(audio_fname) signal = audio_file.ReadSamplesFloat() signal = np.sum(signal, axis=0) beats = beat.beat_track(signal, sr=44100, units='time')[1] beat_intervals = list(zip(beats[:-1], beats[1:])) # print(beats[1]) # Extract features analysis = [analyze(signal, beat[0]) for beat in beat_intervals] beat_features = [anal[0] for anal in analysis] powers = [anal[1] for anal in analysis] # print('hey') # print(len(beat_features)) # print(beat_features[0].shape) beat_features = np.vstack(beat_features) # print(beat_features.shape) # Label beats pca = PCA(n_components=30) reduced_features = pca.fit_transform(beat_features) # print(reduced_features.shape) labels = KMeans(n_clusters=30, random_state=0).fit(reduced_features).labels_ # print(labels) # Save audio snippets # beat_count = 0 # test_dir = './data/audio' # # print(beat_intervals) # # print(labels) # for beat_interval, label, power in zip(beat_intervals, labels, powers): # start = int(beat_interval[0]*44100) # end = int(beat_interval[1]*44100) # fname = os.path.join(test_dir, str(label)+'_'+str(beat_count)+'.wav') # snippet = signal[start:end] # scipy.io.wavfile.write(fname, 44100, snippet*power/10) # beat_count += 1 track_data = { 'beat_times': beats[:-1].tolist(), 'beat_labels': labels.tolist() } out_dir = './data/metadata' out_file = os.path.splitext(os.path.basename(audio_fname))[0] + '.json' out_file = os.path.join(out_dir, out_file) with open(out_file, 'w') as f: json.dump(track_data, f) except Exception: print('Failed: ' + os.path.basename(audio_fname))
def beat_synchronous_chroma(song, sr): from librosa.beat import beat_track from librosa.feature import chroma_cqt from librosa.util import sync hop_length = 1024 tempo, beat_frames = beat_track(y=song, sr=sr) chromagram = chroma_cqt(y=song, sr=sr, hop_length=hop_length) return sync(chromagram, beat_frames)
def beat_synchronous_mfcc(song, sr): from librosa.beat import beat_track from librosa.feature import mfcc from librosa.util import sync hop_length = 1024 tempo, beat_frames = beat_track(y=song, sr=sr) mfccs = mfcc(y=song, sr=sr, hop_length=hop_length, n_mfcc=20, n_fft=4096) return sync(mfccs, beat_frames)
def analyse_track(dset, index): """analyse track, extract bpm and distribution of notes from the bass line.""" track = dset[index] mix = track.sum(0).mean(0) ref = mix.std() starts = (abs(mix) >= 1e-2 * ref).float().argmax().item() track = track[..., starts:] cache = CACHE / dset.sig cache.mkdir(exist_ok=True, parents=True) cache_file = cache / f"{index}.pkl" cached = None if cache_file.exists(): cached = try_load(cache_file) if cached is not None: tempo, events, hist_kr = cached if cached is None: drums = track[0].mean(0) if drums.std() > 1e-2 * ref: tempo, events = beat_track(drums.numpy(), units='time', sr=SR) else: print("failed drums", drums.std(), ref) return None, track bass = track[1].mean(0) r = rms(bass) peak = r.max() mask = r >= 0.05 * peak bass = bass[mask] if bass.std() > 1e-2 * ref: kr = torch.from_numpy(chroma_cqt(bass.numpy(), sr=SR)) hist_kr = (kr.max(dim=0, keepdim=True)[0] == kr).float().mean(1) else: print("failed bass", bass.std(), ref) return None, track pickle.dump([tempo, events, hist_kr], open(cache_file, 'wb')) spec = Spec(tempo, events, hist_kr, track, index) return spec, None
def plt_show_solo(filename, filepath): wav_filename = audioconvert.convert_to_monowav(filename, filepath) timestart = time.time() y, sr = load(wav_filename, dtype="float32", res_type=TYPE) print("{LOAD TIME}:%f" % (time.time() - timestart)) tempo1, beats1 = beat_track(y=y, tightness=1) # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱 onset_envelope = onset_strength(y=y) rms_envelope = rmse(y=y) # -----------RMS ENVELOPE MAX_RMS = np.max(rms_envelope) AVERAGE_RMS = np.mean(rms_envelope) onset_all_beat = [] frame_all_beat = [] for beat in beats1: onset_all_beat.append(onset_envelope[beat]) frame_all_beat.append(beat) AVERAGE_ONSET = np.mean(onset_all_beat) plt_show(filename, rms_envelope.T, onset_all_beat, frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET)
def compute_beats_samp(y, sr): return beat_track(y, sr=sr, units='samples')
def compute_beats(y, sr): return beat_track(y, sr=sr)