コード例 #1
0
def predict(flac_path,
            title="",
            model_path="./model",
            diff_root_only=True,
            max_num_chord=4):
    label_path = "chord_labels.txt"

    # Estimate the bpm of the audio
    beat_proc = RNNBeatProcessor()
    tempo_proc = TempoEstimationProcessor(min_bpm=50, max_bpm=180, fps=100)

    beat_processed = beat_proc(flac_path)
    tempo_estimation = tempo_proc(beat_processed)

    BPM = BPM_selector(tempo_estimation)
    sec_per_beat = 60 / BPM

    sec_per_frame = 2048 / 16000
    # set eighth note as the minimum duration of the chord
    min_duration = sec_per_beat / 2

    # Read chord labels file
    with open(label_path) as f:
        with torch.no_grad():
            chord_labels = ast.literal_eval(f.read())

            # Process raw audio
            X = cqt_preprocess(flac_path)
            X = Variable(
                torch.from_numpy(np.expand_dims(X, axis=0)).float().cpu())

            # Load model
            model = Net(1).cpu()
            state_dict = torch.load(model_path,
                                    map_location="cpu")["state_dict"]
            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                name = k[7:]
                new_state_dict[name] = v
            model.load_state_dict(new_state_dict)
            model.eval()

            # Estimate
            estimation = np.zeros((22, X.shape[2]))
            estimation = model(X).data.cpu()[0][0]
            estimation = to_probability(estimation)

            # Post-processing
            estimation = dp_post_processing(estimation)

            # predict_list_majmin = _predict(estimation, chord_labels[13:], sec_per_frame, min_duration, mapping_majmin)
            predict_list_seventh = _predict(estimation, chord_labels[13:],
                                            sec_per_frame, min_duration,
                                            mapping_seventh)

        text = ''
        for chord in predict_list_seventh:
            text += f'{chord[0]}\t{chord[1]}\t{chord[2]}\n'

        return text
コード例 #2
0
def getMadmomTempo(filename):
    """
    Call Madmom Tempo Estimation
    :return: Array of tempos sorted in decreasing order of strength
    """
    from madmom.features.beats import RNNBeatProcessor
    from madmom.features.tempo import TempoEstimationProcessor
    act = RNNBeatProcessor()(filename)
    proc = TempoEstimationProcessor(fps=100)
    res = proc(act)
    return res[:, 0]
コード例 #3
0
ファイル: beats.py プロジェクト: EQ4/madmom
    def add_tempo_arguments(cls, parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM,
                            act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH):
        """
        Add tempo related arguments to an existing parser.

        :param parser:      existing argparse parser
        :param min_bpm:     minimum tempo [bpm]
        :param max_bpm:     maximum tempo [bpm]
        :param act_smooth:  smooth the activations over N seconds
        :param hist_smooth: smooth the tempo histogram over N bins
        :return:            tempo argument parser group

        """
        # pylint: disable=arguments-differ

        # TODO: import the TempoEstimation here otherwise we have a
        #       loop. This is super ugly, but right now I can't think of a
        #       better solution...
        from madmom.features.tempo import TempoEstimationProcessor as tempo
        tempo.add_arguments(parser, method=None, min_bpm=min_bpm,
                            max_bpm=max_bpm, act_smooth=act_smooth,
                            hist_smooth=hist_smooth, alpha=None)
コード例 #4
0
ファイル: features.py プロジェクト: ctralie/acoss
 def madmom_features(self, fps=100):
     """
     Call Madmom's implementation of RNN + DBN beat tracking. Madmom's
     results are returned in terms of seconds, but round and convert to
     be in terms of hop_size so that they line up with the features.
     The novelty function is also computed as a side effect (and is
     the bottleneck in the computation), so also return that
     Parameters
     ----------
     fps: int
         Frames per second in processing
     Returns
     -------
     {
         'tempos': ndarray(n_levels, 2)
             An array of tempo estimates in beats per minute,
             along with their confidences
         'onsets': ndarray(n_onsets)
             Array of onsets, where each onset indexes into a particular window
         'novfn': ndarray(n_frames)
             Evaluation of the rnn audio novelty function at each audio
             frame, in time increments equal to self.hop_length
         'snovfn': ndarray(n_frames)
             Superflux audio novelty function at each audio frame,
             in time increments equal to self.hop_length
     }
     """
     from madmom.features.beats import RNNBeatProcessor, DBNBeatTrackingProcessor
     from madmom.features.tempo import TempoEstimationProcessor
     from madmom.features.onsets import SpectralOnsetProcessor
     from madmom.audio.filters import LogarithmicFilterbank
     beatproc = DBNBeatTrackingProcessor(fps=fps)
     tempoproc = TempoEstimationProcessor(fps=fps)
     novfn = RNNBeatProcessor()(self.audio_file) # This step is the computational bottleneck
     beats = beatproc(novfn)
     tempos = tempoproc(novfn)
     onsets = np.array(np.round(beats*self.fs/float(self.hop_length)), dtype=np.int64)
     # Resample the audio novelty function to correspond to the 
     # correct hop length
     nframes = len(self.librosa_noveltyfn())
     novfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(novfn))/float(fps), novfn) 
     
     # For good measure, also compute and return superflux
     sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, \
                         filterbank=LogarithmicFilterbank,\
                           num_bands=24, log=np.log10)
     snovfn = sodf(self.audio_file)
     snovfn = np.interp(np.arange(nframes)*self.hop_length/float(self.fs), np.arange(len(snovfn))/float(fps), snovfn) 
     return {'tempos':tempos, 'onsets':onsets, 'novfn':novfn, 'snovfn':snovfn}
コード例 #5
0
ファイル: beats.py プロジェクト: EQ4/madmom
    def add_tempo_arguments(cls, parser, method=TEMPO_METHOD, min_bpm=MIN_BPM,
                            max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH,
                            hist_smooth=HIST_SMOOTH, alpha=ALPHA):
        """
        Add tempo arguments to an existing parser.

        :param parser:      existing argparse parser
        :param method:      tempo estimation method ['comb', 'acf']
        :param min_bpm:     minimum tempo [bpm]
        :param max_bpm:     maximum tempo [bpm]
        :param act_smooth:  smooth the activations over N seconds
        :param hist_smooth: smooth the tempo histogram over N bins
        :param alpha:       scaling factor of the comb filter
        :return:            tempo argument parser group

        """
        # TODO: import the TempoEstimation here otherwise we have a
        #       loop. This is super ugly, but right now I can't think of a
        #       better solution...
        from madmom.features.tempo import TempoEstimationProcessor as Tempo
        return Tempo.add_arguments(parser, method=method, min_bpm=min_bpm,
                                   max_bpm=max_bpm, act_smooth=act_smooth,
                                   hist_smooth=hist_smooth, alpha=alpha)
コード例 #6
0
ファイル: beats.py プロジェクト: EQ4/madmom
    def __init__(self, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD, fps=None,
                 **kwargs):
        """
        Track the beats according to the previously determined (local) tempo
        by simply aligning them around the estimated position.

        :param look_aside: look this fraction of a beat interval to each side
                           of the assumed next beat position to look for the
                           most likely position of the next beat
        :param look_ahead: look N seconds in both directions to determine the
                           local tempo and align the beats accordingly

        If `look_ahead` is not set, a constant tempo throughout the whole piece
        is assumed. If `look_ahead` is set, the local tempo (in a range +/-
        look_ahead seconds around the actual position) is estimated and then
        the next beat is tracked accordingly. This procedure is repeated from
        the new position to the end of the piece.

        "Enhanced Beat Tracking with Context-Aware Neural Networks"
        Sebastian Böck and Markus Schedl
        Proceedings of the 14th International Conference on Digital Audio
        Effects (DAFx), 2011

        Instead of the auto-correlation based method for tempo estimation, it
        uses a comb filter per default. The behaviour can be controlled with
        the `tempo_method` parameter.

        """
        # import the TempoEstimation here otherwise we have a loop
        from madmom.features.tempo import TempoEstimationProcessor
        # save variables
        self.look_aside = look_aside
        self.look_ahead = look_ahead
        self.fps = fps
        # tempo estimator
        self.tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs)
コード例 #7
0
import time
import pydub
import numpy as np
from pydub import AudioSegment
from pydub.playback import play
from multiprocessing import Process
from madmom.features.tempo import TempoEstimationProcessor
from madmom.features.beats import RNNBeatProcessor

robot = stretch_body.robot.Robot()
robot.startup()

robot.stow()

filename = "./audios/forest.wav"
proc = TempoEstimationProcessor(fps=100)
act = RNNBeatProcessor()(filename)
tempo = proc(act)

tempo = tempo[0][0]
t = 60.0 / tempo * 4

# interonsets = np.ediff1d(onsets)
# interonsets = np.add.reduceat(interonsets, np.arange(0, len(interonsets), 8))

# print(interonsets)

xrotate = 3.14
xtilt = 0.5
xpan = 1
xwrist = 1.5
コード例 #8
0
ファイル: beats.py プロジェクト: EQ4/madmom
class BeatTrackingProcessor(Processor):
    """
    Class for tracking beats with a simple tempo estimation and beat aligning.

    """
    LOOK_ASIDE = 0.2
    LOOK_AHEAD = 10
    # tempo defaults
    TEMPO_METHOD = 'comb'
    MIN_BPM = 40
    MAX_BPM = 240
    ACT_SMOOTH = 0.09
    HIST_SMOOTH = 7
    ALPHA = 0.79

    def __init__(self, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD, fps=None,
                 **kwargs):
        """
        Track the beats according to the previously determined (local) tempo
        by simply aligning them around the estimated position.

        :param look_aside: look this fraction of a beat interval to each side
                           of the assumed next beat position to look for the
                           most likely position of the next beat
        :param look_ahead: look N seconds in both directions to determine the
                           local tempo and align the beats accordingly

        If `look_ahead` is not set, a constant tempo throughout the whole piece
        is assumed. If `look_ahead` is set, the local tempo (in a range +/-
        look_ahead seconds around the actual position) is estimated and then
        the next beat is tracked accordingly. This procedure is repeated from
        the new position to the end of the piece.

        "Enhanced Beat Tracking with Context-Aware Neural Networks"
        Sebastian Böck and Markus Schedl
        Proceedings of the 14th International Conference on Digital Audio
        Effects (DAFx), 2011

        Instead of the auto-correlation based method for tempo estimation, it
        uses a comb filter per default. The behaviour can be controlled with
        the `tempo_method` parameter.

        """
        # import the TempoEstimation here otherwise we have a loop
        from madmom.features.tempo import TempoEstimationProcessor
        # save variables
        self.look_aside = look_aside
        self.look_ahead = look_ahead
        self.fps = fps
        # tempo estimator
        self.tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs)

    def process(self, activations):
        """
        Detect the beats in the given activation function.

        :param activations: beat activation function
        :return:            detected beat positions [seconds]

        """
        # smooth activations
        act_smooth = int(self.fps * self.tempo_estimator.act_smooth)
        activations = smooth_signal(activations, act_smooth)
        # TODO: refactor interval stuff to use TempoEstimation
        # if look_ahead is not defined, assume a global tempo
        if self.look_ahead is None:
            # create a interval histogram
            histogram = self.tempo_estimator.interval_histogram(activations)
            # get the dominant interval
            interval = self.tempo_estimator.dominant_interval(histogram)
            # detect beats based on this interval
            detections = detect_beats(activations, interval, self.look_aside)
        else:
            # allow varying tempo
            look_ahead_frames = int(self.look_ahead * self.fps)
            # detect the beats
            detections = []
            pos = 0
            # TODO: make this _much_ faster!
            while pos < len(activations):
                # look N frames around the actual position
                start = pos - look_ahead_frames
                end = pos + look_ahead_frames
                if start < 0:
                    # pad with zeros
                    act = np.append(np.zeros(-start), activations[0:end])
                elif end > len(activations):
                    # append zeros accordingly
                    zeros = np.zeros(end - len(activations))
                    act = np.append(activations[start:], zeros)
                else:
                    act = activations[start:end]
                # create a interval histogram
                histogram = self.tempo_estimator.interval_histogram(act)
                # get the dominant interval
                interval = self.tempo_estimator.dominant_interval(histogram)
                # add the offset (i.e. the new detected start position)
                positions = detect_beats(act, interval, self.look_aside)
                # correct the beat positions
                positions += start
                # search the closest beat to the predicted beat position
                pos = positions[(np.abs(positions - pos)).argmin()]
                # append to the beats
                detections.append(pos)
                pos += interval

        # convert detected beats to a list of timestamps
        detections = np.array(detections) / float(self.fps)
        # remove beats with negative times and return them
        return detections[np.searchsorted(detections, 0):]
        # only return beats with a bigger inter beat interval than that of the
        # maximum allowed tempo
        # return np.append(detections[0], detections[1:][np.diff(detections) >
        #                                                (60. / max_bpm)])

    @classmethod
    def add_arguments(cls, parser, look_aside=LOOK_ASIDE,
                      look_ahead=LOOK_AHEAD):
        """
        Add beat tracking related arguments to an existing parser.

        :param parser:     existing argparse parser
        :param look_aside: look this fraction of a beat interval to each side
                           of the assumed next beat position to look for the
                           most likely position of the next beat
        :param look_ahead: look N seconds in both directions to determine the
                           local tempo and align the beats accordingly
        :return:           beat argument parser group

        Parameters are included in the group only if they are not 'None'.

        """
        # add beat detection related options to the existing parser
        g = parser.add_argument_group('beat detection arguments')
        # TODO: unify look_aside with CRFBeatDetection's interval_sigma
        if look_aside is not None:
            g.add_argument('--look_aside', action='store', type=float,
                           default=look_aside,
                           help='look this fraction of a beat interval to '
                                'each side of the assumed next beat position '
                                'to look for the most likely position of the '
                                'next beat [default=%(default).2f]')
        if look_ahead is not None:
            g.add_argument('--look_ahead', action='store', type=float,
                           default=look_ahead,
                           help='look this many seconds in both directions '
                                'to determine the local tempo and align the '
                                'beats accordingly [default=%(default).2f]')
        # return the argument group so it can be modified if needed
        return g

    @classmethod
    def add_tempo_arguments(cls, parser, method=TEMPO_METHOD, min_bpm=MIN_BPM,
                            max_bpm=MAX_BPM, act_smooth=ACT_SMOOTH,
                            hist_smooth=HIST_SMOOTH, alpha=ALPHA):
        """
        Add tempo arguments to an existing parser.

        :param parser:      existing argparse parser
        :param method:      tempo estimation method ['comb', 'acf']
        :param min_bpm:     minimum tempo [bpm]
        :param max_bpm:     maximum tempo [bpm]
        :param act_smooth:  smooth the activations over N seconds
        :param hist_smooth: smooth the tempo histogram over N bins
        :param alpha:       scaling factor of the comb filter
        :return:            tempo argument parser group

        """
        # TODO: import the TempoEstimation here otherwise we have a
        #       loop. This is super ugly, but right now I can't think of a
        #       better solution...
        from madmom.features.tempo import TempoEstimationProcessor as Tempo
        return Tempo.add_arguments(parser, method=method, min_bpm=min_bpm,
                                   max_bpm=max_bpm, act_smooth=act_smooth,
                                   hist_smooth=hist_smooth, alpha=alpha)