def get_chords_old(_time, data, sr, filtered=False): time = _time + [len(data) / sr] length_result = len(time) - 1 chord_result = ["" for i in range(length_result)] decode = DeepChromaChordRecognitionProcessor() chroma = DeepChromaProcessor()(data) whole_song_chords = decode(chroma) chords = [t[2] for t in whole_song_chords] whole_song_chord_dict = dict() if filtered: for chord in chords: if chord not in whole_song_chord_dict: whole_song_chord_dict[chord] = 0 whole_song_chord_dict[chord] += 1 whole_song_chord_dict = sorted(whole_song_chord_dict.items(), key=operator.itemgetter(1), reverse=True) print(whole_song_chord_dict) whole_song_chord_dict = [t[0] for t in whole_song_chord_dict[:4]] print("Filter the following 4 chords", whole_song_chord_dict) chord_dictionary = dict() for i in range(length_result): start_frame = int(time[i] * sr) end_frame = int(time[i + 1] * sr) slice_data = data[start_frame:end_frame] chroma = DeepChromaProcessor()(slice_data) chords = decode(chroma) print("index", i, ":", time[i], ":", time[i + 1], "duration:", time[i + 1] - time[i], chords) if len(chords) > 1: chords = [[abs(tuple[0] - tuple[1]), tuple[2]] for tuple in chords] chord_name = max(chords, key=lambda x: x[0])[1] print("Possible error: >1 chord detected", chord_name) else: chord_name = chords[0][2] if chord_name not in chord_dictionary: chord_dictionary[chord_name] = [] chord_dictionary[chord_name].append(i) chord_result[i] = chord_name if filtered: for most_likely_chord in whole_song_chord_dict: name, modification = most_likely_chord.split(":") for i in range(len(chord_result)): if chord_result[i].split(":")[0] == name: chord_result[i] = most_likely_chord return chord_dictionary, chord_result
def initializeMadmom(fmin, fmax): """ Description: This function Initializes the deep chroma processor functions and the recognition module built inside madmom and returns the functions that can be used for these API calls Parameters: * fmin: This is the minimum frequency of the filterbank * fmax: This is the maximum frequency of the filterbank """ dcp = DeepChromaProcessor(fmin=fmin, fmax=fmax, unique_filters=True) decode = madmom.features.chords.DeepChromaChordRecognitionProcessor() return dcp, decode
def extract_deep_chroma_to_file(dataset_path, output_file): dcp = DeepChromaProcessor() octave_engine = oct2py.Oct2Py() octave_engine.eval('pkg load signal') with open(os.path.join(dataset_path, 'listfiles'), 'r') as list_of_files: with open( os.path.join(dataset_path, 'YTC.madmom_deepchromas_smoothed=octave.txt') if not output_file else os.path.join(dataset_path, output_file), 'w') as output_file: for cover_file in list_of_files.readlines(): song_path = '%s%s.mp3' % (dataset_path, cover_file.strip()) for chroma in octave_engine.smoothDownsampleFeature( np.transpose(dcp(song_path))): output_file.write('%s\n' % string_for_chroma(chroma))
def __init__(self, fps, fmin=65, fmax=2100, unique_filters=True, models=None, sample_rate=44100, fold=None): assert fps == 10, 'Cannot handle fps different from 10 yet.' from madmom.audio.chroma import DeepChromaProcessor from hashlib import sha1 self.fps = fps self.fmin = fmin self.fmax = fmax self.unique_filters = unique_filters self.dcp = DeepChromaProcessor(fmin=fmin, fmax=fmax, unique_filters=unique_filters, models=models) self.model_hash = sha1(pickle.dumps(self.dcp)).hexdigest()
def get_chords_new(time, data): decode = DeepChromaChordRecognitionProcessor() chroma = DeepChromaProcessor()(data) chords = decode(chroma) chord_timeLine = [chord[0] for chord in chords] chord_matching_timeLine = [chord[2] for chord in chords] for chord in chords: print(chord) length_result = len(time) chord_result = ["" for i in range(length_result)] chord_dictionary = dict() for i in range(length_result): idx = bisect.bisect_right(chord_timeLine, time[i]) - 1 if idx + 1 < len(chord_timeLine): idx = int( max(abs(time[i] - chord_timeLine[idx]), abs(time[i] - chord_timeLine[idx + 1]))) chord_result.append(chord_matching_timeLine[idx]) return chord_dictionary, chord_result
def getChords(filename): """ Description: This function obtains a dictionary of entries like [startTime endTime chordLabel] for the entire audio in filename.wav using Madmom backend. """ # This initializes the madmom modules for chroma, # fmin and fmax are the minimum and maximum frequencies of the filterbank respectively. # Link to documentation: https://madmom.readthedocs.io/en/latest/modules/audio/chroma.html dcp = DeepChromaProcessor(fmin=65, fmax=2000, unique_filters=True) # This initializes the madmom module for chord recognition from deep chroma obtained in the previous step # They allow us to input our own Conditional Random Field (CRF) model to be used. # This only returns maj and min chords (No sevenths/inversions etc identified) # Link to documentation: https://madmom.readthedocs.io/en/latest/modules/features/chords.html crfProcessor = madmom.features.chords.CRFChordRecognitionProcessor() # This initializes the madmom module for chord recognition from CNN according to Filip[2016] # Filip Korzeniowski and Gerhard Widmer, “A Fully Convolutional Deep Auditory Model for Musical Chord Recognition”, Proceedings of IEEE International Workshop on Machine Learning for Signal Processing (MLSP), 2016 cnnProcessor = madmom.features.chords.CNNChordFeatureProcessor() # IMPLEMENTATION DETAILS: # Here we Implement chord recognition using madmom modules in the following way # AudioFile --> CNN features --> Conditional Random Field --> Chord labels # Link in documentation: https://madmom.readthedocs.io/en/latest/modules/features/chords.html # This is used to calculate time taken by the function t1 = time.time() # Obtaining CNN Features features = cnnProcessor(filename) # Obtains chord labels as [startTime endTime chordLabel] from CNN features using CRF model chords = crfProcessor(features) print(f"Chord Estimation for {filename}\n ") print("Processing took %.02f seconds" % ((time.time() - t1))) return chords
#Output: .btmp file of music file #Usage: python bma.py [input music] [output] [easy/medium/hard/impossible] import time import sys import bmaFunctions import numpy from madmom.features.chords import DeepChromaChordRecognitionProcessor from madmom.audio.chroma import DeepChromaProcessor from madmom.features.beats import DBNBeatTrackingProcessor from madmom.features.beats import RNNBeatProcessor from madmom.features.onsets import OnsetPeakPickingProcessor from madmom.features.onsets import RNNOnsetProcessor #Setting up Deep Chroma Chord Recognition Processor dcp = DeepChromaProcessor() decode = DeepChromaChordRecognitionProcessor() chroma = dcp(sys.argv[1]) chords = decode(chroma) #Setting up Onset Peak Picking Processor proc = OnsetPeakPickingProcessor(fps=100, threshold=0.7, pre_avg=0.25, post_avg=0.25, smooth=0.01) act = RNNOnsetProcessor()(sys.argv[1]) beats = proc(act) #calculating msi beatsArray = numpy.array(beats)
def main(): dcp = DeepChromaProcessor() decode = DeepChromaChordRecognitionProcessor() chroma = dcp('data/song.mp3') chords = decode(chroma) print(chords)
def __init__(self): self.pa = pyaudio.PyAudio() self.c_count = 0 using_callback = True self.buffer = collections.deque(maxlen=self.RATE * 14) self.rnn = RNNBeatProcessor(online=True, nn_files=[BEATS_LSTM[0]]) self.act_proc = DBNBeatTrackingProcessor(fps=100, min_bpm=80.0, max_bpm=180.0) self.dcp = DeepChromaProcessor() self.decode = DeepChromaChordRecognitionProcessor() self.start_current_time = None if using_callback: self.stream = self.pa.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=True, frames_per_buffer=self.CHUNK, stream_callback=self.callback) print(self.pa.get_default_output_device_info()) print(self.pa.get_default_input_device_info()) self.t_start = time.time() beepsnd, _ = librosa.load('block.wav', sr=None) out1 = (beepsnd).tostring() #print(beepsnd.size, len(out1)) self.beepsnd = out1 self.Flag = False self.beep_count = 0 while self.stream.is_active(): if len(self.buffer) == self.RATE * 8: print('14 sec') print(self.time_info) print(time.time() - self.t_start) self.tmp = np.array(self.buffer) self.buffer.clear() print(time.time() - self.t_start) chroma_thread = threading.Thread(target=self.chroma_rec, args=()) chroma_thread.start() #chord = chroma_thread.run() tmp2 = self.rnn(self.tmp) # tmp2 = librosa.onset.onset_strength(tmp,sr=self.RATE, hop_length = int(self.RATE / 100),max_size=1,aggregate=np.median, n_mels=256) # tmp2 /= np.max(tmp2) #t_axes = librosa.frames_to_time(np.arange(len(tmp2)),sr=self.RATE) t_proc = time.time() - self.t_start print(t_proc) tmp3_2 = self.act_proc(tmp2) tmp3_1 = 60 / np.mean(np.diff(tmp3_2)) # print(tmp3) #tmp3_1,tmp3_2 = librosa.beat.beat_track(onset_envelope=tmp2, sr=self.RATE) print('tempo is %f' % tmp3_1) print('beat is ', tmp3_2) t_proc = time.time() - self.t_start chroma_thread.join() print(t_proc) t = threading.Timer(60. / tmp3_1 - t_proc, self.flagit, ()) t.daemon = True t.start() # self.stream.write(self.beepsnd) print(time.time() - self.t_start) else: time.sleep(0.001) else: self.stream = self.pa.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=True, frames_per_buffer=self.CHUNK) self.t_start = time.time() self.loop()
def __init__(self): self.pa = pyaudio.PyAudio() self.c_count = 0 using_callback = True self.buffer = collections.deque(maxlen=self.RATE * 14) self.rnn = RNNBeatProcessor(online=True, nn_files=[BEATS_LSTM[0]]) self.act_proc = DBNBeatTrackingProcessor(fps=100, min_bpm=80.0, max_bpm=180.0) self.dcp = DeepChromaProcessor() self.decode = DeepChromaChordRecognitionProcessor() self.start_current_time = None self.beep_count = 0 source_path = 'tool' style_name = 'test_midi_folder' self.test = InstScheduler(FoxDot.lib.Clock, source_path) self.test.AddMidiFolder(style_name) self.test.Live_event( ) # Online random playing event determined by prosperity function self.test.set_tempo_pattern( 4, 4 ) # if the meta file is exist, calling this routine is not required if using_callback: self.stream = self.pa.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=True, frames_per_buffer=self.CHUNK, stream_callback=self.callback) print(self.pa.get_default_output_device_info()) print(self.pa.get_default_input_device_info()) self.t_start = time.time() beepsnd, _ = librosa.load('block.wav', sr=None) out1 = (beepsnd).tostring() #print(beepsnd.size, len(out1)) self.beepsnd = out1 self.Flag = False while self.stream.is_active(): if len(self.buffer) == self.RATE * 14: print('14 sec') print(self.time_info) print(time.time() - self.t_start) self.tmp = np.array(self.buffer) self.buffer.clear() print(time.time() - self.t_start) chroma_thread = threading.Thread(target=self.chroma_rec, args=()) chroma_thread.start() #chord = chroma_thread.run() tmp2 = self.rnn(self.tmp) # tmp2 = librosa.onset.onset_strength(tmp,sr=self.RATE, hop_length = int(self.RATE / 100),max_size=1,aggregate=np.median, n_mels=256) # tmp2 /= np.max(tmp2) #t_axes = librosa.frames_to_time(np.arange(len(tmp2)),sr=self.RATE) t_proc = time.time() - self.t_start print(t_proc) tmp3_2 = self.act_proc(tmp2) tmp3_1 = 60 / np.mean(np.diff(tmp3_2)) # print(tmp3) #tmp3_1,tmp3_2 = librosa.beat.beat_track(onset_envelope=tmp2, sr=self.RATE) print('tempo is %f' % tmp3_1) print('beat is ', tmp3_2) t_proc = time.time() - self.t_start chroma_thread.join() print(t_proc) t = threading.Timer(60. / tmp3_1 - t_proc, self.flagit, ()) t.daemon = True t.start() print(int(tmp3_1)) self.test.StartInTime( np.mean(np.diff(tmp3_2)) * 4 - (14 - tmp3_2[-1]) - t_proc, int(tmp3_1)) break # self.stream.write(self.beepsnd) print(time.time() - self.t_start) else: time.sleep(0.001) while (1): time.sleep(0.01) else: self.stream = self.pa.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, output=True, frames_per_buffer=self.CHUNK) self.t_start = time.time() self.loop()
def getPCPHistogram(filename, fs=8192, show=False): res = {} sig = Signal(filename, num_channels=1) fsig = FramedSignal(sig, frame_size=fs) stft = ShortTimeFourierTransform(fsig) spec = Spectrogram(stft) chroma = PitchClassProfile(spec, num_classes=12) hist = [0 for i in range(12)] hist_f = [0 for i in range(12)] for f in range(len(chroma)): wf = chroma[f] hist = map(sum, zip(hist, wf)) f = flatness(wf) hist_f = map(sum, zip(hist_f, [w * f for w in wf])) s = sum(hist) hist = map(lambda x: x / s, hist) C_hist = [hist[i - 9] for i in range(12)] res['standard'] = C_hist s_f = sum(hist_f) hist_f = map(lambda x: x / s_f, hist_f) C_hist_f = [hist_f[i - 9] for i in range(12)] res['standard_f'] = C_hist_f hpss = HarmonicPercussiveSourceSeparation() h, _ = hpss.process(spec) chroma = PitchClassProfile(h, num_classes=12) hist = [0 for i in range(12)] hist_f = [0 for i in range(12)] for f in range(len(chroma)): wf = chroma[f] hist = map(sum, zip(hist, wf)) f = flatness(wf) hist_f = map(sum, zip(hist_f, [w * f for w in wf])) s = sum(hist) hist = map(lambda x: x / s, hist) C_hist = [hist[i - 9] for i in range(12)] res['hpss'] = C_hist s_f = sum(hist_f) hist_f = map(lambda x: x / s_f, hist_f) C_hist_f = [hist_f[i - 9] for i in range(12)] res['hpss_f'] = C_hist_f dcp = DeepChromaProcessor() deepchroma = dcp(filename) hist = [0 for i in range(12)] hist_f = [0 for i in range(12)] for f in range(len(deepchroma)): wf = deepchroma[f] hist = map(sum, zip(hist, wf)) f = flatness(wf) hist_f = map(sum, zip(hist_f, [w * f for w in wf])) s = sum(hist) hist = map(lambda x: x / s, hist) res['deep'] = hist s_f = sum(hist_f) hist_f = map(lambda x: x / s_f, hist_f) res['deep_f'] = hist_f if show: plt.subplot(131) plt.barh(range(12), res['standard']) plt.subplot(132) plt.barh(range(12), res['hpss']) plt.subplot(133) plt.barh(range(12), res['deep']) plt.show() return res
def setUp(self): self.processor = DeepChromaProcessor()