def predict(flac_path, title="", model_path="./model", diff_root_only=True, max_num_chord=4): label_path = "chord_labels.txt" # Estimate the bpm of the audio beat_proc = RNNBeatProcessor() tempo_proc = TempoEstimationProcessor(min_bpm=50, max_bpm=180, fps=100) beat_processed = beat_proc(flac_path) tempo_estimation = tempo_proc(beat_processed) BPM = BPM_selector(tempo_estimation) sec_per_beat = 60 / BPM sec_per_frame = 2048 / 16000 # set eighth note as the minimum duration of the chord min_duration = sec_per_beat / 2 # Read chord labels file with open(label_path) as f: with torch.no_grad(): chord_labels = ast.literal_eval(f.read()) # Process raw audio X = cqt_preprocess(flac_path) X = Variable( torch.from_numpy(np.expand_dims(X, axis=0)).float().cpu()) # Load model model = Net(1).cpu() state_dict = torch.load(model_path, map_location="cpu")["state_dict"] new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) model.eval() # Estimate estimation = np.zeros((22, X.shape[2])) estimation = model(X).data.cpu()[0][0] estimation = to_probability(estimation) # Post-processing estimation = dp_post_processing(estimation) # predict_list_majmin = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_majmin) predict_list_seventh = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_seventh) text = '' for chord in predict_list_seventh: text += f'{chord[0]}\t{chord[1]}\t{chord[2]}\n' return text
def getMadmomTempo(filename): """ Call Madmom Tempo Estimation :return: Array of tempos sorted in decreasing order of strength """ from madmom.features.beats import RNNBeatProcessor from madmom.features.tempo import TempoEstimationProcessor act = RNNBeatProcessor()(filename) proc = TempoEstimationProcessor(fps=100) res = proc(act) return res[:, 0]
def madmom_features(self, fps=100): """ Call Madmom's implementation of RNN + DBN beat tracking. Madmom's results are returned in terms of seconds, but round and convert to be in terms of hop_size so that they line up with the features. The novelty function is also computed as a side effect (and is the bottleneck in the computation), so also return that Parameters ---------- fps: int Frames per second in processing Returns ------- { 'tempos': ndarray(n_levels, 2) An array of tempo estimates in beats per minute, along with their confidences 'onsets': ndarray(n_onsets) Array of onsets, where each onset indexes into a particular window 'novfn': ndarray(n_frames) Evaluation of the rnn audio novelty function at each audio frame, in time increments equal to self.hop_length 'snovfn': ndarray(n_frames) Superflux audio novelty function at each audio frame, in time increments equal to self.hop_length } """ from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor from madmom.features.tempo import TempoEstimationProcessor from madmom.features.onsets import SpectralOnsetProcessor from madmom.audio.filters import LogarithmicFilterbank beatproc = DBNBeatTrackingProcessor(fps=fps) tempoproc = TempoEstimationProcessor(fps=fps) novfn = RNNBeatProcessor()(self.audio_file) # This step is the computational bottleneck beats = beatproc(novfn) tempos = tempoproc(novfn) onsets = np.array(np.round(beats*self.fs/float(self.hop_length)), dtype=np.int64) # Resample the audio novelty function to correspond to the # correct hop length nframes = len(self.librosa_noveltyfn()) novfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(novfn))/float(fps), novfn) # For good measure, also compute and return superflux sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, \ filterbank=LogarithmicFilterbank,\ num_bands=24, log=np.log10) snovfn = sodf(self.audio_file) snovfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(snovfn))/float(fps), snovfn) return {'tempos':tempos, 'onsets':onsets, 'novfn':novfn, 'snovfn':snovfn}
import time import pydub import numpy as np from pydub import AudioSegment from pydub.playback import play from multiprocessing import Process from madmom.features.tempo import TempoEstimationProcessor from madmom.features.beats import RNNBeatProcessor robot = stretch_body.robot.Robot() robot.startup() robot.stow() filename = "./audios/forest.wav" proc = TempoEstimationProcessor(fps=100) act = RNNBeatProcessor()(filename) tempo = proc(act) tempo = tempo[0][0] t = 60.0 / tempo * 4 # interonsets = np.ediff1d(onsets) # interonsets = np.add.reduceat(interonsets, np.arange(0, len(interonsets), 8)) # print(interonsets) xrotate = 3.14 xtilt = 0.5 xpan = 1 xwrist = 1.5