예제 #1
0
def predict(flac_path,
            title="",
            model_path="./model",
            diff_root_only=True,
            max_num_chord=4):
    label_path = "chord_labels.txt"

    # Estimate the bpm of the audio
    beat_proc = RNNBeatProcessor()
    tempo_proc = TempoEstimationProcessor(min_bpm=50, max_bpm=180, fps=100)

    beat_processed = beat_proc(flac_path)
    tempo_estimation = tempo_proc(beat_processed)

    BPM = BPM_selector(tempo_estimation)
    sec_per_beat = 60 / BPM

    sec_per_frame = 2048 / 16000
    # set eighth note as the minimum duration of the chord
    min_duration = sec_per_beat / 2

    # Read chord labels file
    with open(label_path) as f:
        with torch.no_grad():
            chord_labels = ast.literal_eval(f.read())

            # Process raw audio
            X = cqt_preprocess(flac_path)
            X = Variable(
                torch.from_numpy(np.expand_dims(X, axis=0)).float().cpu())

            # Load model
            model = Net(1).cpu()
            state_dict = torch.load(model_path,
                                    map_location="cpu")["state_dict"]
            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                name = k[7:]
                new_state_dict[name] = v
            model.load_state_dict(new_state_dict)
            model.eval()

            # Estimate
            estimation = np.zeros((22, X.shape[2]))
            estimation = model(X).data.cpu()[0][0]
            estimation = to_probability(estimation)

            # Post-processing
            estimation = dp_post_processing(estimation)

            # predict_list_majmin = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_majmin)
            predict_list_seventh = _predict(estimation, chord_labels[13:],
                                            sec_per_frame, min_duration,
                                            mapping_seventh)

        text = ''
        for chord in predict_list_seventh:
            text += f'{chord[0]}\t{chord[1]}\t{chord[2]}\n'

        return text
예제 #2
0
def getMadmomTempo(filename):
    """
    Call Madmom Tempo Estimation
    :return: Array of tempos sorted in decreasing order of strength
    """
    from madmom.features.beats import RNNBeatProcessor
    from madmom.features.tempo import TempoEstimationProcessor
    act = RNNBeatProcessor()(filename)
    proc = TempoEstimationProcessor(fps=100)
    res = proc(act)
    return res[:, 0]
예제 #3
0
파일: features.py 프로젝트: ctralie/acoss
 def madmom_features(self, fps=100):
     """
     Call Madmom's implementation of RNN + DBN beat tracking. Madmom's
     results are returned in terms of seconds, but round and convert to
     be in terms of hop_size so that they line up with the features.
     The novelty function is also computed as a side effect (and is
     the bottleneck in the computation), so also return that
     Parameters
     ----------
     fps: int
         Frames per second in processing
     Returns
     -------
     {
         'tempos': ndarray(n_levels, 2)
             An array of tempo estimates in beats per minute,
             along with their confidences
         'onsets': ndarray(n_onsets)
             Array of onsets, where each onset indexes into a particular window
         'novfn': ndarray(n_frames)
             Evaluation of the rnn audio novelty function at each audio
             frame, in time increments equal to self.hop_length
         'snovfn': ndarray(n_frames)
             Superflux audio novelty function at each audio frame,
             in time increments equal to self.hop_length
     }
     """
     from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
     from madmom.features.tempo import TempoEstimationProcessor
     from madmom.features.onsets import SpectralOnsetProcessor
     from madmom.audio.filters import LogarithmicFilterbank
     beatproc = DBNBeatTrackingProcessor(fps=fps)
     tempoproc = TempoEstimationProcessor(fps=fps)
     novfn = RNNBeatProcessor()(self.audio_file) # This step is the computational bottleneck
     beats = beatproc(novfn)
     tempos = tempoproc(novfn)
     onsets = np.array(np.round(beats*self.fs/float(self.hop_length)), dtype=np.int64)
     # Resample the audio novelty function to correspond to the 
     # correct hop length
     nframes = len(self.librosa_noveltyfn())
     novfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(novfn))/float(fps), novfn) 
     
     # For good measure, also compute and return superflux
     sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, \
                         filterbank=LogarithmicFilterbank,\
                           num_bands=24, log=np.log10)
     snovfn = sodf(self.audio_file)
     snovfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(snovfn))/float(fps), snovfn) 
     return {'tempos':tempos, 'onsets':onsets, 'novfn':novfn, 'snovfn':snovfn}
예제 #4
0
import time
import pydub
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
from multiprocessing import Process
from madmom.features.tempo import TempoEstimationProcessor
from madmom.features.beats import RNNBeatProcessor

robot = stretch_body.robot.Robot()
robot.startup()

robot.stow()

filename = "./audios/forest.wav"
proc = TempoEstimationProcessor(fps=100)
act = RNNBeatProcessor()(filename)
tempo = proc(act)

tempo = tempo[0][0]
t = 60.0 / tempo * 4

# interonsets = np.ediff1d(onsets)
# interonsets = np.add.reduceat(interonsets, np.arange(0, len(interonsets), 8))

# print(interonsets)

xrotate = 3.14
xtilt = 0.5
xpan = 1
xwrist = 1.5