def det(tf): # fmt: off sp.check_call([ b"ffmpeg", b"-nostdin", b"-hide_banner", b"-v", b"fatal", b"-ss", b"13", b"-y", b"-i", fsenc(sys.argv[1]), b"-map", b"0:a:0", b"-ac", b"1", b"-ar", b"22050", b"-t", b"300", b"-f", b"f32le", fsenc(tf) ]) # fmt: on with open(tf, "rb") as f: d = np.fromfile(f, dtype=np.float32) try: # 98% accuracy on jcore c = vamp.collect(d, 22050, "beatroot-vamp:beatroot") cl = c["list"] except: # fallback; 73% accuracy plug = "vamp-example-plugins:fixedtempo" c = vamp.collect(d, 22050, plug, parameters={"maxdflen": 40}) print(c["list"][0]["label"].split(" ")[0]) return # throws if detection failed: bpm = float(cl[-1]["timestamp"] - cl[1]["timestamp"]) bpm = round(60 * ((len(cl) - 1) / bpm), 2) print(f"{bpm:.2f}")
def process(frames): #assert frames == client.blocksize #assert (len(l_f) == len(r_f)) l_f = client.inports[0].get_array() r_f = client.inports[1].get_array() ''' l_f = np.frombuffer(l.get_buffer(), dtype=float) r_f = np.frombuffer(r.get_buffer(), dtype=float) ''' m_f = np.add(l_f, r_f) m_f = np.true_divide(m_f, 2.0) try: data = vamp.collect(m_f, client.samplerate, "vamp-libxtract:loudness") vector = data['vector'] L = vector[1] if L > 0.0: data = vamp.collect(m_f, client.samplerate, "pyin:pyin") L = data['list'] print L[1]['values'] except: pass
def analyzeAudio(audio_array, sample_rate, channels): print "Found Vamp plugins:" for name in vamp.list_plugins(): print " " + name # convert audio array to float format expected by vamp audio_array2 = audio_array / float(numpy.iinfo(numpy.int16).max) audio_array2 = audio_array2.reshape((len(audio_array2)/channels,channels))[:,0] if False: # chuncked results = [] for subarray in numpy.array_split(audio_array2, numpy.ceil(audio_array2.shape[0] / float(BUFFER_SIZE)), axis=0): if DEBUG: print "subarray size: " + str(subarray.shape[0]) #assert(subarray.shape[0] == BUFFER_SIZE) results += [vamp.collect(subarray, sample_rate, VAMP_PLUGIN)] else: # all at once results = vamp.collect(audio_array2, sample_rate, VAMP_PLUGIN) if DEBUG: print "Vamp plugin output:" for result in results["list"][:15]: print " %s" % result results = [(float(row["timestamp"]), float(row["duration"]), int(row["values"][0])) for row in results["list"]] return results
def test_process_summary_param(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "input-summary", {"produce_output": False}) assert "vector" in rdict step, results = rdict["vector"] assert len(results) == 0 rdict = vamp.collect(buf, rate, plugin_key, "input-summary", {"produce_output": True}) assert "vector" in rdict step, results = rdict["vector"] assert len(results) > 0
def test_process_summary_param(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "input-summary", {"produce_output": False}) assert ("vector" in rdict) step, results = rdict["vector"] assert len(results) == 0 rdict = vamp.collect(buf, rate, plugin_key, "input-summary", {"produce_output": True}) assert ("vector" in rdict) step, results = rdict["vector"] assert len(results) > 0
def extract_F0_pYIN_vamp(folder, fn, sr=22050, H=221, N=2048): ''' Given an audio file, use the pYIN Vamp Plug-in to extract the F0 and the confidence of each frame to be voiced. ''' # load wave file x, fs = librosa.load(os.path.join(folder, fn), sr=sr) # pYIN parameters param = {'threshdistr': 2, 'outputunvoiced': 2, 'precisetime': 0} ''' outputs: 'voicedprob' outputs the probability of each frame to be voiced 'smoothedpitchtrack' outputs the smoothed pitch track parameters: 'outputunvoiced': 0 (No), 1 (Yes as zeros), 2 (Yes as negative frequencies) ''' pYIN_f0_output = vamp.collect(x, sr, "pyin:pyin", output='smoothedpitchtrack', parameters=param, step_size=H, block_size=N)['vector'] pYIN_voiced_prob = vamp.collect(x, sr, "pyin:pyin", output='voicedprob', parameters=param, step_size=H, block_size=N)['vector'] time_step = float(pYIN_f0_output[0]) F0 = pYIN_f0_output[1] voiced_prob = pYIN_voiced_prob[1] timestamp = np.arange(start=0, stop=(len(F0) - 0.5) * time_step, step=time_step) #[:-1] traj = np.vstack([timestamp, F0, voiced_prob]).transpose() if not os.path.exists(os.path.join(folder, 'pYIN')): os.mkdir(os.path.join(folder, 'pYIN')) pd.DataFrame(traj).to_csv(os.path.join(folder, 'pYIN', fn[:-3] + 'csv'), header=None) #print("{} F0 curve saved to {}".format(fn, os.path.join(folder, 'pYIN', fn[:-3] + 'csv'))) return traj
def load_audio_data_from_url(self, url, audio_path): """ ' 목적 : 음원의 유튜브 url로부터 음원의 코드정보(by vamp plugin)와 박자정보를 추출하는 함수 ' 리턴값 : 1. 예측된 코드와 코드의 출현시간값 리스트 / 2. 예측된 박자의 시간값 리스트 / 3. 다운로드 받은 오디오 파일명 / 4. vamp 플러그인이 박자를 성공적으로 추출했는지 확인하는 Boolean값 """ # 사용자가 앱을 수행하다 중단했을시, 제거되지 않고 남아있는 파일을 제거하여 youtube_dl 오류방지 files = glob.glob(audio_path + "/*.mp4") for x in files: if not os.path.isdir(x): os.remove(x) ydl_opts = { 'format': 'best', 'outtmpl': audio_path + '/%(title)s.%(ext)s', } with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) files = glob.glob(audio_path + "/*.mp4") audio_file_name = '' signal, sr = 0, 0 for x in files: if not os.path.isdir(x): filename = os.path.splitext(x) audio_file_name = filename[0].split(audio_path + '/')[1] signal, sr = librosa.load(x) os.remove(x) # 악보 구조를 생성하는데에 필요한 박자 데이터 추출 predicted_beat_list = vamp.collect(signal, sr, "beatroot-vamp:beatroot") isEmptyBeatList = False if (predicted_beat_list['list'] == []): isEmptyBeatList = True # 악보를 생성하는데에 필요한 코드 데이터 추출 predicted_chord_list = vamp.collect(signal, sr, "nnls-chroma:chordino") chord_list = [] for i in predicted_chord_list['list']: i['timestamp'] = float(i['timestamp']) chord_list.append(i) beat_list = [] for i in predicted_beat_list['list']: beat_list.append(float(i['timestamp'])) return chord_list, beat_list, audio_file_name, isEmptyBeatList
def FeatureThread(): global currentTempo, tempoGram, predictTempo global chords rb = RingBuffer(8) while True: os.system('jack_capture_ms -d 1000 out.wav > /dev/null 2>&1') y, sr = librosa.load('out.wav', sr=client.samplerate) onset_env = librosa.onset.onset_strength(y, sr=sr) tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr) rb.append(int(numpy.around(tempo))) tempogram = numpy.array(rb.get()) unique, counts = numpy.unique(tempogram, return_counts=True) max_unique_counts = numpy.asarray((unique, counts)).T currentTempo, tempoGram, predictTempo = tempo[0], rb.get(), max_unique_counts[0][0] vdata = vamp.collect(y, client.samplerate, "nnls-chroma:chordino") idx = 0 for i in vdata['list']: c = vdata['list'][idx]['label'] if c != 'N': chords = c idx = idx + 1 os.system('rm -f out.wav') if stopThreads: break
def audio_to_pitch_melodia(wav_data, fs=44100, minfqr=55.0, maxfqr=1760.0, voicing=0.2, minpeaksalience=0.0): # 用代码调用mtg-melodia时只能使用默认的block=2048,step=128 params = dict(minfqr=minfqr, maxfqr=maxfqr, voicing=voicing, minpeaksalience=minpeaksalience) melody = collect(wav_data, fs, 'mtg-melodia:melodia', parameters=params) timestep = melody['vector'][0].to_float() pitch = melody['vector'][1] # 采样时间的起点是第8个timestep starttime = timestep * 8 # 列表:(时间,频率) result = [] for i, p in enumerate(pitch): result.append((starttime + i * timestep, p)) return result
def get_hpcp(x, sr, n_bins=12, f_min=55, f_ref=440.0, min_magn=-100): """Compute HPCP features from raw audio using the HPCP Vamp plugin. Vamp, vamp python module and plug-in must be installed. Args: x (1d-array): audio signal, mono sr (int): sample rate n_bins (int): number of chroma bins f_min (float): minimum frequency f_ref (float): A4 tuning frequency min_magn (float): minimum magnitude for peak detection, in dB Returns: 1d-array: time vector 2d-array: HPCP features """ plugin = 'vamp-hpcp-mtg:MTG-HPCP' params = {'LF': f_min, 'nbins': n_bins, 'reff0': f_ref, 'peakMagThreshold': min_magn} data = vamp.collect(x, sr, plugin, parameters=params) vamp_hop, hpcp = data['matrix'] t = float(vamp_hop) * (8 + np.arange(len(hpcp))) return t, hpcp
def chordBeats(infile, outfile): print 'Loading audio file...', infile audio = essentia.standard.MonoLoader(filename=infile)() bt = BeatTrackerMultiFeature() beats, _ = bt(audio) #beats = beats[::4] parameters = {} stepsize, chroma = vamp.collect(audio, 44100, "nnls-chroma:nnls-chroma", output="chroma", step_size=2048)["matrix"] # to essentia convention #chroma = np.roll(chroma, 3, 1) chords = ChordsDetectionBeats(hopSize=2048) syms, strengths = chords(chroma, beats) segments = essentia_chord_utils.toMirexLab(0.0, len(audio) / 44100.0, beats, syms, strengths) with open(outfile, 'w') as content_file: for s in segments: content_file.write(str(s) + '\n')
def extract_melody(self): """ Extracts melody from the audio using the melodia vamp plugin. Uses arguments kept in self: - `self.minimum_frequency` (default: 55 Hz) - `self.maximum_frequency` (default: 1760 Hz) - `self.voicing_tolerance` (default: 0.2) - `self.minimum_peak_salience` (default: 0.0) This function sets two class members used in other parts: - `self.melody`: (numpy array) contains the melody in Hz for every timestep (0 indicates no voice). - `self.timestamps`: (numpy array) contains the timestamps for each melody note """ params = { 'minfqr': self.minimum_frequency, 'maxfqr': self.maximum_frequency, 'voicing': self.voicing_tolerance, 'minpeaksalience': self.minimum_peak_salience } data = vamp.collect(self.audio_signal.audio_data, self.sample_rate, "mtg-melodia:melodia", parameters=params) _, melody = data['vector'] hop = 128. / 44100. # hard coded hop in Melodia vamp plugin, converting it to frames. timestamps = 8 * hop + np.arange(len(melody)) * hop melody[melody < 0] = 0 self.melody = melody self.timestamps = timestamps
def extract_melody(self): params = {} params['minfqr'] = self.minimum_frequency params['maxfqr'] = self.maximum_frequency params['voicing'] = self.voicing_tolerance params['minpeaksalience'] = self.minimum_peak_salience try: data = vamp.collect(self.audio_signal.audio_data, self.sample_rate, "mtg-melodia:melodia", parameters=params) except Exception as e: print( '**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~**\n' '* Are Vamp and Melodia installed correctly? *\n' '* Check https://bit.ly/2DXbrAk for installation instructions! *\n' '**~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~**' ) raise e _, melody = data['vector'] hop = 128. / 44100. # hard coded hop in Melodia vamp plugin, converting it to frames. timestamps = 8 * hop + np.arange(len(melody)) * hop melody[melody < 0] = 0 self.melody = melody self.timestamps = timestamps
def extract(self, signal): """ Extract Melody of the sound x in numpy array format. """ start_time = time.time() if signal.ndim > 1: print "INFO: Input signal has more than 1 channel; the channels will be averaged." signal = np.mean(signal, axis=1) # Remove percussion if separate: if self.separate: audio, percussion = librosa.effects.hpss(signal) #Vamp UPF melody extractor plugin data = vamp.collect(audio, self.fs, "mtg-melodia:melodia", parameters=self.params) hop, melody = data['vector'] timestamps = 8 * 128 / self.fs + np.arange( len(melody)) * (128 / self.fs) # Melodia returns unvoiced (=no melody) sections as negative values.we will put them to 0: melody_pos = melody[:] melody_pos[melody <= 0] = 0 # Finally, you might want to plot the pitch sequence in cents rather than in Hz. # This especially makes sense if you are comparing two or more pitch sequences # to each other (e.g. comparing an estimate against a reference). melody_cents = 1200 * np.log2(melody / 55.0) melody_cents[melody <= 0] = 0 print("--- %s seconds --- for melody extraction" % (time.time() - start_time)) return melody_cents, timestamps
def get_hpcp(x, sr, n_bins=12, f_min=55, f_ref=440.0, min_magn=-100): """Compute HPCP features from raw audio using the HPCP Vamp plugin. Vamp, vamp python module and plug-in must be installed. Args: x (1d-array): audio signal, mono sr (int): sample rate n_bins (int): number of chroma bins f_min (float): minimum frequency f_ref (float): A4 tuning frequency min_magn (float): minimum magnitude for peak detection, in dB Returns: 1d-array: time vector 2d-array: HPCP features """ plugin = 'vamp-hpcp-mtg:MTG-HPCP' params = { 'LF': f_min, 'nbins': n_bins, 'reff0': f_ref, 'peakMagThreshold': min_magn } data = vamp.collect(x, sr, plugin, parameters=params) vamp_hop, hpcp = data['matrix'] t = float(vamp_hop) * (8 + np.arange(len(hpcp))) return t, hpcp
def rawChromaFromAudio(audiofile, sampleRate=44100, stepSize=2048): mywindow = np.array([ 0.001769, 0.015848, 0.043608, 0.084265, 0.136670, 0.199341, 0.270509, 0.348162, 0.430105, 0.514023, 0.597545, 0.678311, 0.754038, 0.822586, 0.882019, 0.930656, 0.967124, 0.990393, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999803, 0.999650, 0.996856, 0.991283, 0.982963, 0.971942, 0.958281, 0.942058, 0.923362, 0.902299, 0.878986, 0.853553, 0.826144, 0.796910, 0.766016, 0.733634, 0.699946, 0.665140, 0.629410, 0.592956, 0.555982, 0.518696, 0.481304, 0.444018, 0.407044, 0.370590, 0.334860, 0.300054, 0.266366, 0.233984, 0.203090, 0.173856, 0.146447, 0.121014, 0.097701, 0.076638, 0.057942, 0.041719, 0.028058, 0.017037, 0.008717, 0.003144, 0.000350 ]) audio = essentia.standard.MonoLoader(filename=audiofile, sampleRate=sampleRate)() # estimate audio duration just for caching purposes: audioDuration(audiofile, sampleRate=sampleRate, audioSamples=audio) stepsize, semitones = vamp.collect(audio, sampleRate, "nnls-chroma:nnls-chroma", output="semitonespectrum", step_size=stepSize)["matrix"] chroma = np.zeros((semitones.shape[0], 12)) for i in range(semitones.shape[0]): tones = semitones[i] * mywindow cc = chroma[i] for j in range(tones.size): cc[j % 12] = cc[j % 12] + tones[j] # roll from 'A' based to 'C' based chroma = np.roll(chroma, shift=-3, axis=1) return chroma
def get_melody(x, sr, f_min=55, f_max=1760, min_salience=0.0, unvoiced=True): """Extract main melody from raw audio using the Melodia Vamp plugin. Vamp, vamp python module and plug-in must be installed. Args: x (np.array): audio signal, mono sr (int): sample rate f_min (float): minimum frequency f_max (float): maximum frequency Return: 1d-array: time vector 1d-array: main melody (in cents) """ plugin = 'mtg-melodia:melodia' params = {'minfqr': f_min, 'maxfqr': f_max, 'minpeaksalience': min_salience} data = vamp.collect(x, sr, plugin, parameters=params) vamp_hop, f0 = data['vector'] if unvoiced: f0 = abs(f0) f0[f0 == 0] = None else: f0[f0 <= 0] = None hz2midi = lambda f: 69 + 12 * np.log2(abs(f) / 440) melody = hz2midi(f0) melody = melody[:, np.newaxis] t = float(vamp_hop) * (8 + np.arange(len(melody))) return t, melody
def melodia(self): logging.debug("Generating melody") df = self.df melodia = vamp.collect(self.y, self.sr, "mtg-melodia:melodia") hop, melody = melodia['vector'] timestamps = 8 * 128 / self.sr + np.arange( len(melody)) * (128 / self.sr) timestamps /= 2 melody_pos = melody[:] melody_pos[melody <= 0] = None df_melodia = pd.DataFrame(np.vstack([timestamps, melody]).T) #df_melodia.to_csv('/dataset/test/melodia.csv', index=False) df_melodia[1] = df_melodia[1].fillna(-1) #df_melodia = df_melodia[df_melodia[1]>0] #df_melodia[2] = librosa.hz_to_note(df_melodia[1], octave=False) #df_melodia[[0, 0, 2]].to_csv('%s.melody_note_all.txt' % self.file, sep='\t', header=False, index=False) tmp_df = pd.merge_asof(df_melodia, self.df_downbeats, left_on=0, right_on='beat_time') tmp_df = tmp_df[(tmp_df.beat_time > 0) & (tmp_df[1] > 0)] tmp_df['melody_note'] = tmp_df[1].map(self.write_melody_note) tmp_df['melody_note_ori'] = librosa.hz_to_note(tmp_df[1], octave=False) tmp_df = tmp_df.groupby([ 'beat_time' ]).agg(lambda x: scipy.stats.mode(x)[0]).reset_index(drop=False) tmp_df = tmp_df[['beat_time', 'melody_note', 'melody_note_ori']] self.df_melodia = tmp_df
def extract_notes_pYIN_vamp(x, Fs=config.fs, H=config.hopsize, N=config.framesize): # pYIN parameters param = {'threshdistr': 2, 'outputunvoiced': 2, 'precisetime': 0} # Options: smoothedpitchtrack, f0candidates, f0probs, voicedprob, candidatesalience, smoothedpitchtrack, notes pYIN_note_output = vamp.collect(x, Fs, "pyin:pyin", output='notes', parameters=param, step_size=H, block_size=N) # reformating traj = np.empty((0, 3)) for j, entry in enumerate(pYIN_note_output['list']): timestamp = float(entry['timestamp']) duration = float(entry['duration']) note = float(entry['values'][0]) if j == 0: traj = np.vstack((traj, [0, timestamp, 0])) elif timestamp != traj[-1][1]: traj = np.vstack((traj, [traj[-1][1], timestamp, 0])) traj = np.vstack((traj, [timestamp, timestamp + duration, note])) return traj
def audio_to_midi_melodia(infile, outfile, bpm, smooth=0.25, minduration=0.1, savejams=False): fs = 44100 hop = 128 data, sr = librosa.load(infile, sr=fs, mono=True) melody = vamp.collect(data, sr, "mtg-melodia:melodia", parameters={"voicing": 0.2}) pitch = melody['vector'][1] pitch = np.insert(pitch, 0, [0] * 8) midi_pitch = hz2midi(pitch) notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration) save_midi(outfile, notes, bpm) if savejams: jamsfile = outfile.replace(".mid", ".jams") track_duration = len(data) / float(fs) save_jams(jamsfile, notes, track_duration, os.path.basename(infile)) print("Conversion complete.")
def calculateBeatsAndSemitones(infile): print 'Loading audio file...', infile proc = BeatTrackingProcessor(fps=100, method='comb', min_bpm=40, max_bpm=240, act_smooth=0.09, hist_smooth=7, alpha=0.79) act = RNNBeatProcessor()(infile) beats = proc(act).astype('float32') audio = essentia.standard.MonoLoader(filename=infile)() #bt = BeatTrackerMultiFeature() #beats, _ = bt(audio) # TODO: best partameters. parameters = {} #stepsize, chroma = vamp.collect( # audio, 44100, "nnls-chroma:nnls-chroma", output = "chroma", step_size=2048)["matrix"] stepsize, semitones = vamp.collect(audio, 44100, "nnls-chroma:nnls-chroma", output="semitonespectrum", step_size=2048)["matrix"] return len(audio), beats, semitones
def extract_melody(filename_wav): data, rate = librosa.load( os.path.join(utils.get_dirname_audio_warped(), filename_wav)) data_melody = vamp.collect(data, rate, "mtg-melodia:melodia") return data_melody
def extractRawMelody(filename, position, around): data, rate = librosa.load(filename, offset = position - around,\ duration = 2 * around) result = vamp.collect(data, rate, "mtg-melodia:melodia"); frameDur, melody = result['vector'] #frameDur = frameDur.to_frame(rate) return melody, rate
def add_recommend_database(self, url, json_path, audio_path): """ ' 목적 : 음원의 유튜브 url으로부터 추출한, 해당 음원의 코드 정보와 url을 입력받은 json_path에 저장하는 함수 (코드 정보는 vamp 플러그인으로부터 추출) ' 리턴값 : 없음 """ # 사용자가 앱을 수행하다 중단했을시, 제거되지 않고 남아있는 파일을 제거하여 youtube_dl 오류방지 files = glob.glob(audio_path + "/*.mp4") for x in files: if not os.path.isdir(x): os.remove(x) ydl_opts = { 'format': 'best', 'outtmpl': audio_path + '/%(title)s.%(ext)s', } with youtube_dl.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) files = glob.glob(audio_path + "/*.mp4") audio_file_name = '' signal, sr = 0, 0 for x in files: if not os.path.isdir(x): filename = os.path.splitext(x) audio_file_name = filename[0].split(audio_path + '/')[1] signal, sr = librosa.load(x) os.remove(x) # load chord data from vamp plugin predicted_chord_list = vamp.collect(signal, sr, "nnls-chroma:chordino") # -----------------------추출한 데이터 형식을 다루기 쉽게 가공----------------------- # chord_list_with_timestamp = predicted_chord_list["list"] chord_list = [] for i in chord_list_with_timestamp: chord_list.append(i['label']) double_chord_list_with_url = [] for idx in range(len(chord_list) - 1): if chord_list[idx] != 'N' and chord_list[idx + 1] != 'N': double_chord = chord_list[idx] + '-' + chord_list[idx + 1] double_chord_list_with_url.append(double_chord) double_chord_list_with_url.append( double_chord_list_with_url[-1].split('-')[-1] + '-' + url) # ---------------------------------------------------------------------------- # with open(json_path, 'r', encoding='utf-8') as f: chord_list_dict = json.load(f) dict_key_music_name = audio_file_name if dict_key_music_name not in chord_list_dict: chord_list_dict[dict_key_music_name] = double_chord_list_with_url with open(json_path, "w", encoding='utf-8') as fp: json.dump(chord_list_dict, fp, indent=4, ensure_ascii=False)
def extract_melody_melodia(audio_path): voicing = .6 # Comments in this function are given by the creator of melodia # This is how we load audio using Librosa audio_1, sr_1 = librosa.load(audio_path, sr=44100, mono=True) # data_1 = vamp.collect(audio_1, sr_1, "mtg-melodia:melodia") # vector is a tuple of two values: the hop size used for analysis and the array of pitch values # Note that the hop size is *always* equal to 128/44100.0 = 2.9 ms # hop_1, melody_1 = data_1['vector'] # parameter values are specified by providing a dicionary to the optional "parameters" parameter: params = { "minfqr": 100.0, "maxfqr": 1760.0, "voicing": voicing, "minpeaksalience": 0.0 } data_1 = vamp.collect(audio_1, sr_1, "mtg-melodia:melodia", parameters=params) hop_1, melody_1 = data_1['vector'] # <h3>\*\*\* SUPER IMPORTANT SUPER IMPORTANT \*\*\*</h3> # For reasons internal to the vamp architecture, THE TIMESTAMP OF THE FIRST VALUE IN THE MELODY ARRAY IS ALWAYS: # # ``` # first_timestamp = 8 * hop = 8 * 128/44100.0 = 0.023219954648526078 # ``` # # This means that the timestamp of the pitch value at index i (starting with i=0) is given by: # # ``` # timestamp[i] = 8 * 128/44100.0 + i * (128/44100.0) # ``` # # So, if you want to generate a timestamp array to match the pitch values, you do it like this: timestamps_1 = 8 * 128 / 44100.0 + np.arange( len(melody_1)) * (128 / 44100.0) # Melodia has 4 parameters: # * **minfqr**: minimum frequency in Hertz (default 55.0) # * **maxfqr**: maximum frequency in Hertz (default 1760.0) # * **voicing**: voicing tolerance. Greater values will result in more pitch contours included in the final melody. # Smaller values will result in less pitch contours included in the final melody (default 0.2). # * **minpeaksalience**: (in Sonic Visualiser "Monophonic Noise Filter") is a hack to avoid silence turning into # junk contours when analyzing monophonic recordings (e.g. solo voice with no accompaniment). # Generally you want to leave this untouched (default 0.0). melody = melody_1.tolist() output = {'data': [{'value': melody, 'time': timestamps_1.tolist()}]} return output
def generate_melody(audio_file_path): # This is the audio file we'll be analyzing. # You can download it here: http://labrosa.ee.columbia.edu/projects/melody/mirex05TrainFiles.zip audio_file = audio_file_path # This is how we load audio using Librosa audio, sr = librosa.load(audio_file, offset=30.0, duration=50.0, sr=44100, mono=True) # Exracting the melody using Melodia with default parameter values data = vamp.collect(audio, sr, "mtg-melodia:melodia") # print(data) # vector is a tuple of two values: the hop size used for analysis and the array of pitch values # Note that the hop size is *always* equal to 128/44100.0 = 2.9 ms hop, melody = data['vector'] # print(hop) # print(melody) # timestamps = 8 * 128/44100.0 + np.arange(len(melody)) * (128/44100.0) # Extracting the melody using Melodia with custom parameter values # parameter values are specified by providing a dicionary to the optional "parameters" parameter: # params = {"minfqr": 100.0, "maxfqr": 800.0, "voicing": 0.2, "minpeaksalience": 0.0} # data = vamp.collect(audio, sr, "mtg-melodia:melodia", parameters=params) # hop, melody = data['vector'] # Melodia returns unvoiced (=no melody) sections as negative values. So by default, we get: # plt.figure(figsize=(18,6)) # plt.plot(timestamps, melody) # plt.xlabel('Time (s)') # plt.ylabel('Frequency (Hz)') # plt.show() # A clearer option is to get rid of the negative values before plotting # melody_pos = melody[:] # melody_pos[melody<=0] = None # plt.figure(figsize=(18,6)) # plt.plot(timestamps, melody_pos) # plt.xlabel('Time (s)') # plt.ylabel('Frequency (Hz)') # plt.show() # Finally, you might want to plot the pitch sequence in cents rather than in Hz. # This especially makes sense if you are comparing two or more pitch sequences # to each other (e.g. comparing an estimate against a reference). # melody_cents = 1200*np.log2(melody/55.0) # melody_cents[melody<=0] = None # plt.figure(figsize=(18,6)) # plt.plot(timestamps, melody_cents) # plt.xlabel('Time (s)') # plt.ylabel('Frequency (cents relative to 55 Hz)') # plt.show() return melody
def audio_to_midi_melodia(infile, outfile, bpm, smooth=0.25, minduration=0.1, savejams=False): # define analysis parameters fs = 44100 hop = 128 # load audio using librosa print("Loading audio...") data, sr = librosa.load(infile, sr=fs, mono=True) # extract melody using melodia vamp plugin print("Extracting melody f0 with MELODIA...") melody = vamp.collect(data, sr, "mtg-melodia:melodia", parameters={"voicing": 0.2}) # hop = melody['vector'][0] pitch = melody['vector'][1] # impute missing 0's to compensate for starting timestamp pitch = np.insert(pitch, 0, [0] * 8) # debug # np.asarray(pitch).dump('f0.npy') # print(len(pitch)) # convert f0 to midi notes print("Converting Hz to MIDI notes...") midi_pitch = hz2midi(pitch) # segment sequence into individual midi notes notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration) # save note sequence to a midi file print("Saving MIDI to disk...") save_midi(outfile, notes, bpm) '''import matplotlib.pyplot as plt notes = np.asarray(notes) x = notes[:,0]; y = np.fmod(notes[:,2],12); plt.plot(x, y) #plt.xticks(np.arange(0,lim,10)) #plt.yticks(np.arange(0,12)) #plt.grid() plt.show()''' if savejams: print("Saving JAMS to disk...") jamsfile = outfile.replace(".mid", ".jams") track_duration = len(data) / float(fs) save_jams(jamsfile, notes, track_duration, os.path.basename(infile)) print("Conversion complete.")
def test_collect_fixed_sample_rate_2(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "curve-fsr-timed") step, results = rdict["vector"] assert abs(float(step) - 0.4) < eps assert len(results) == 10 for i in range(len(results)): assert abs(results[i] - i * 0.1) < eps
def get_bars_and_beats_qm(self): import vamp if self.metre_qm is None: barbeattrack_output = vamp.collect(self.signal_mono, self.fs, 'qm-vamp-plugins:qm-barbeattracker') beat_times = np.array([float(item['timestamp']) for item in barbeattrack_output['list']]) beat_metre = np.array([int(item['label']) for item in barbeattrack_output['list']]) standard_beat_times_metre = np.array(zip(*(beat_times,beat_metre))) self.metre_qm = standard_beat_times_metre
def test_process_summary_param_kwargs_1(): buf = input_data(blocksize * 10) rdict = vamp.collect( plugin_key=plugin_key, output="input-summary", parameters={"produce_output": False}, data=buf, sample_rate=rate ) assert "vector" in rdict step, results = rdict["vector"] assert len(results) == 0
def convertToMIDI(infile, outfile, bpm=120, smooth=0, minduration=0.1, savejams=False): # define analysis parameters fs = 44100 hop = 128 # load audio using librosa print("Loading audio...") data, sr = soundfile.read(infile) # mixdown to mono if needed if len(data.shape) > 1 and data.shape[1] > 1: data = data.mean(axis=1) # resample to 44100 if needed if sr != fs: data = resampy.resample(data, sr, fs) sr = fs # extract melody using melodia vamp plugin print("Extracting melody f0 with MELODIA...") melody = vamp.collect(data, sr, "mtg-melodia:melodia", parameters={"voicing": 10}) # hop = melody['vector'][0] pitch = melody['vector'][1] # impute missing 0's to compensate for starting timestamp pitch = np.insert(pitch, 0, [0]*8) '''print(pitch) stored = 0.0 for i in pitch: if i != stored: print(i, end = ' ') stored = i''' # debug # np.asarray(pitch).dump('f0.npy') # print(len(pitch)) # convert f0 to midi notes print("Converting Hz to MIDI notes...") midi_pitch = hz2midi(pitch) stored = 0.0 for i in midi_pitch: if i != stored: stored = i # segment sequence into individual midi notes notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration) # save note sequence to a midi file print("Saving MIDI to disk...") save_midi(outfile, notes, bpm) print("Conversion complete.") #convertToMIDI('../testSongs/ksg.wav', '../testSongs/ksg.mid', 75, smooth=0, minduration=0.06)
def test_process_summary_param_kwargs_2(): buf = input_data(blocksize * 10) rdict = vamp.collect(plugin_key=plugin_key, output="input-summary", data=buf, sample_rate=rate) assert ("vector" in rdict) step, results = rdict["vector"] assert len(results) > 0
def rock(audio): jojo = vamp.collect(audio, config.fs, "pyin:pyin", step_size=config.hopsize, output="notes") import pdb pdb.set_trace()
def test_collect_grid_one_sample_per_step(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "grid-oss") step, results = rdict["matrix"] assert abs(float(step) - (1024.0 / rate)) < eps assert len(results) == 10 for i in range(len(results)): expected = np.array([(j + i + 2.0) / 30.0 for j in range(0, 10)]) assert (abs(results[i] - expected) < eps).all()
def audio_to_midi_melodia(infile, outfile, bpm, smooth=0.25, minduration=0.1, savejams=False): # define analysis parameters fs = 44100 hop = 128 # load audio using librosa print("Loading audio...") data, sr = soundfile.read(infile) # mixdown to mono if needed if len(data.shape) > 1 and data.shape[1] > 1: data = data.mean(axis=1) # resample to 44100 if needed if sr != fs: data = resampy.resample(data, sr, fs) sr = fs # extract melody using melodia vamp plugin print("Extracting melody f0 with MELODIA...") melody = vamp.collect(data, sr, "mtg-melodia:melodia", parameters={"voicing": 0.2}) # hop = melody['vector'][0] pitch = melody['vector'][1] # impute missing 0's to compensate for starting timestamp pitch = np.insert(pitch, 0, [0] * 8) # debug # np.asarray(pitch).dump('f0.npy') # print(len(pitch)) # convert f0 to midi notes print("Converting Hz to MIDI notes...") midi_pitch = hz2midi(pitch) # segment sequence into individual midi notes notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration) # save note sequence to a midi file print("Saving MIDI to disk...") save_midi(outfile, notes, bpm) if savejams: print("Saving JAMS to disk...") jamsfile = os.path.splitext(outfile)[0] + ".jams" track_duration = len(data) / float(fs) save_jams(jamsfile, notes, track_duration, os.path.basename(infile)) print("Conversion complete.")
def test_collect_variable_sample_rate(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "curve-vsr") results = rdict["list"] assert len(results) == 10 i = 0 for r in results: assert r["timestamp"] == vamp.vampyhost.RealTime("seconds", i * 0.75) assert abs(r["values"][0] - i * 0.1) < eps i = i + 1
def main(): fpath = raw_input('Enter full path to audio file: ') data, rate = librosa.load(fpath) plugin_choice = choose_vamp_plugin() # this script is assuming that university of alicante polyphonic transcription is being used num_voices = int(raw_input('Enter desired number of voices: ')) plugin_output = vamp.collect(data, rate, plugin_choice, parameters={'maxpolyphony':num_voices}) note_start_lst, dur_lst, note_lst = parse_note_transcription_output(plugin_output) print 'note_start_lst length: %d, dur_lst length: %d, note_lst length: %d.' % (len(note_start_lst), len(dur_lst), len(note_lst)) note_df = make_note_dataframe(note_start_lst, dur_lst, note_lst) print note_df make_reordered_wav_file(data, rate, note_df)
def test_process_summary_param_kwargs_3(): buf = input_data(blocksize * 10) rdict = vamp.collect( plugin_key=plugin_key, output="input-summary", data=buf, sample_rate=rate, process_timestamp_method=vamp.vampyhost.SHIFT_DATA, ) assert "vector" in rdict step, results = rdict["vector"] assert len(results) > 0
def test_collect_one_sample_per_step(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "input-timestamp") step, results = rdict["vector"] assert abs(float(step) - (1024.0 / rate)) < eps assert len(results) == 10 for i in range(len(results)): # The timestamp should be the frame number of the first frame in the # input buffer expected = i * blocksize actual = results[i] assert actual == expected
def test_process_summary_param_kwargs_fail(): buf = input_data(blocksize * 10) try: rdict = vamp.collect( plugin_key=plugin_key, output="input-summary", data=buf, sample_rate=rate, process_timestamp_method=vamp.vampyhost.SHIFT_DATA, unknown_argument=1, ) except Exception: # unknown kwarg pass
def transcribe(self, audio_path): if not os.path.exists(audio_path): raise ValueError('Invalid audio path') x, fs = load(audio_path, mono=True) notes = vamp.collect(x, fs, "qm-vamp-plugins:qm-transcription", output="transcription")['list'] # access attributes of a note event by: # ts: f.timestamp # duration: f.duration # MIDI notes: f.values return notes
def audio_to_midi_melodia(infile, outfile, bpm, smooth=0.25, minduration=0.1, savejams=False): # define analysis parameters fs = 44100 hop = 128 # load audio using librosa print("Loading audio...") data, sr = librosa.load(infile, sr=fs, mono=True) # extract melody using melodia vamp plugin print("Extracting melody f0 with MELODIA...") melody = vamp.collect(data, sr, "mtg-melodia:melodia", parameters={"voicing": 0.2}) # hop = melody['vector'][0] pitch = melody['vector'][1] # impute missing 0's to compensate for starting timestamp pitch = np.insert(pitch, 0, [0]*8) # debug # np.asarray(pitch).dump('f0.npy') # print(len(pitch)) # convert f0 to midi notes print("Converting Hz to MIDI notes...") midi_pitch = hz2midi(pitch) # segment sequence into individual midi notes notes = midi_to_notes(midi_pitch, fs, hop, smooth, minduration) # save note sequence to a midi file print("Saving MIDI to disk...") save_midi(outfile, notes, bpm) if savejams: print("Saving JAMS to disk...") jamsfile = outfile.replace(".mid", ".jams") track_duration = len(data) / float(fs) save_jams(jamsfile, notes, track_duration, os.path.basename(infile)) print("Conversion complete.")
def transcription_cal(audio_filename, monoNoteOut_filename): sr = 44100 loader = es.MonoLoader(filename=audio_filename, downmix = 'mix', sampleRate = sr) audio = loader() # original pyin # data = vamp.collect(audio, sr, "pyin:pyin", output = 'notes') # modified pyin data = vamp.collect(audio, sr, "pyinbobigram:pyin-jingju", output = 'notes') # for note in data['list']: # print note['duration'], note['timestamp'], note['values'][0] with open(monoNoteOut_filename, 'w') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') for note in data['list']: csv_writer.writerow([note['timestamp'], note['values'][0], note['duration']])
def get_pc_data_aubio(y,fs): """ Use SLIVET note transcription method to get melodic sequence. """ data = vamp.collect(y,fs,'vamp-aubio:aubionotes') freqs = [d['values'][0] for d in data['list']] midi = librosa.core.hz_to_midi(freqs) midi = [int(m) for m in np.round(midi)] start_t = [d['timestamp'].to_float() for d in data['list']] end_t = start_t[1:] end_t.append(y.shape[0]/float(fs)) # to pitch class representation labels = ['pc' + str(coreutils.midi_note_to_pc(n)) for n in midi] melody_sequence = coredata.Sequence(labels=labels,\ start_times=start_t,end_times=end_t) return melody_sequence
def get_midi_data_slivet(y,fs): """ Use SLIVET note transcription method to get melodic sequence. """ data = vamp.collect(y,fs,'silvet:silvet') labels = [] start_t = [] end_t = [] for d in data['list']: n = librosa.note_to_midi(d['label']) # pc = coreutils.midi_note_to_pc(n) # label = 'pc' + str(pc) st = d['timestamp'].to_float() # et = st + d['duration'].to_float() # labels.append(label) labels.append(n) start_t.append(st) # end_t.append(et) end_t = start_t[1:] # for some reason the last duration gets # screwed up # st = start_t[-1] # et = st + (float(y.shape[0])/float(fs)) delta_t = start_t[-1] - start_t[-2] et = end_t[-1] + delta_t end_t.append(et) # to pitch class representation labels = ['pc' + str(coreutils.midi_note_to_pc(n)) for n in labels] melody_sequence = coredata.Sequence(labels=labels,\ start_times=start_t,end_times=end_t) return melody_sequence
def test_collect_runs_at_all(): buf = input_data(blocksize * 10) rdict = vamp.collect(buf, rate, plugin_key, "input-timestamp") step, results = rdict["vector"] assert results != []
#fragment = '../traditional_dataset/allemande/fragments/allemande_first_fragment_nicolet' #fragment = '../traditional_dataset/allemande/fragments/allemande_third_fragment_rampal' #fragment = '../traditional_dataset/allemande/fragments/allemande_fourth_fragment_larrieu' #fragment = '../traditional_dataset/allemande/fragments/allemande_fifth_fragment_preston' fragment = '../traditional_dataset/sequenza/fragments/sequenza_first_fragment_robison' audio_file = fragment + '_mono.wav' gt_file = fragment + '.csv' audio, sr = librosa.load(audio_file, sr=44100, mono=True) #%% # parameter values are specified by providing a dicionary: params = {"minfqr": 100.0, "maxfqr": 2350.0, "voicing": 0.9, "minpeaksalience": 0.0} data = vamp.collect(audio, sr, "mtg-melodia:melodia", parameters=params) hop, melody_melodia = data['vector'] #melody_librosa, magnitudes = librosa.piptrack(audio, sr=sr, hop_length=128) #print(hop) #print(melody) import numpy as np timestamps = 8 * 128/44100.0 + np.arange(len(melody_melodia)) * (128/44100.0) melody_hz = copy.deepcopy(melody_melodia) melody_hz[melody_melodia<=0] = None #%% import melosynth as ms ms.melosynth_pitch(melody_melodia, 'melosynth.wav', fs=44100, nHarmonics=1, square=True, useneg=False)