def CreateProcesser(fps=100):
    # define pre-processing chain
    sig = SignalProcessor(num_channels=1, sample_rate=44100)
    # process the multi-resolution spec & diff in parallel
    # process the multi-resolution spec & diff in parallel
    multi = ParallelProcessor([])
    frame_sizes = [1024, 2048, 4096]
    num_bands = [3, 6, 12]
    for frame_size, num_bands in zip(frame_sizes, num_bands):
        frames = FramedSignalProcessor(frame_size=frame_size, fps=fps)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(num_bands=num_bands,
                                            fmin=30,
                                            fmax=17000,
                                            norm_filters=True)
        spec = LogarithmicSpectrogramProcessor(mul=1, add=1)
        diff = SpectrogramDifferenceProcessor(diff_ratio=0.5,
                                              positive_diffs=True,
                                              stack_diffs=np.hstack)
        # process each frame size with spec and diff sequentially
        multi.append(SequentialProcessor((frames, stft, filt, spec, diff)))

    # stack the features and processes everything sequentially
    pre_processor = SequentialProcessor((sig, multi, np.hstack))
    return pre_processor
    def __init__(self, **kwargs):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor,
                                              SpectrogramDifferenceProcessor)
        from madmom.processors import SequentialProcessor, ParallelProcessor

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=44100)
        # process the multi-resolution spec & diff in parallel
        multi = ParallelProcessor([])
        for frame_size in [4096]:
            frames = FramedSignalProcessor(frame_size=frame_size, fps=100)
            stft = ShortTimeFourierTransformProcessor(
                window=np.hamming(frame_size))  # caching FFT window
            filt = FilteredSpectrogramProcessor(num_bands=12,
                                                fmin=30,
                                                fmax=16000,
                                                norm_filters=True)
            spec = LogarithmicSpectrogramProcessor(mul=5, add=1)
            #diff = SpectrogramDifferenceProcessor(diff_ratio=0.5, positive_diffs=True, stack_diffs=np.hstack)
            # process each frame size with spec and diff sequentially
            multi.append(SequentialProcessor((frames, stft, filt, spec)))
            #multi.append(SequentialProcessor((frames, stft, filt)))

        # stack the features and processes everything sequentially
        pre_processor = SequentialProcessor((sig, multi, np.hstack))
        super(PianoNoteProcessor, self).__init__(pre_processor)
    def __init__(self, fs, hopsize_t):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.filters import MelFilterbank
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=fs)
        frames = FramedSignalProcessor(frame_size=2048,
                                       hopsize=int(fs * hopsize_t))
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(filterbank=MelFilterbank,
                                            num_bands=80,
                                            fmin=27.5,
                                            fmax=16000,
                                            norm_filters=True,
                                            unique_filters=False)
        spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)

        single = SequentialProcessor([frames, stft, filt, spec])

        pre_processor = SequentialProcessor([sig, single])

        super(MadmomMelbankProcessor, self).__init__([pre_processor])
    def __init__(self, fs, hopsize_t):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.filters import MelFilterbank
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        # from madmom.features.onsets import _cnn_onset_processor_pad

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=fs)
        # process the multi-resolution spec in parallel
        multi = ParallelProcessor([])
        for frame_size in [2048, 1024, 4096]:
            frames = FramedSignalProcessor(frame_size=frame_size, fps=100)
            stft = ShortTimeFourierTransformProcessor()  # caching FFT window
            filt = FilteredSpectrogramProcessor(
                filterbank=MelFilterbank, num_bands=80, fmin=27.5, fmax=16000,
                norm_filters=True, unique_filters=False)
            spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)
            # process each frame size with spec and diff sequentially
            multi.append(SequentialProcessor([frames, stft, filt, spec]))
        # stack the features (in depth) and pad at beginning and end
        stack = np.dstack
        # pad = _cnn_onset_processor_pad
        # pre-processes everything sequentially
        pre_processor = SequentialProcessor([sig, multi, stack])
        # instantiate a SequentialProcessor
        super(MadmomMelbank3ChannelsProcessor, self).__init__([pre_processor])
    def __init__(self, fs, hopsize_t):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.filters import MelFilterbank
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        # from madmom.features.onsets import _cnn_onset_processor_pad

        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=fs)
        # process the multi-resolution spec in parallel
        frames = FramedSignalProcessor(frame_size=2048,
                                       hopsize=int(fs * hopsize_t))
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(filterbank=MelFilterbank,
                                            num_bands=80,
                                            fmin=27.5,
                                            fmax=16000,
                                            norm_filters=True,
                                            unique_filters=False)
        spec = LogarithmicSpectrogramProcessor(log=np.log, add=EPSILON)

        # process each frame size with spec and diff sequentially
        single = SequentialProcessor([frames, stft, filt, spec])

        # pre-processes everything sequentially
        pre_processor = SequentialProcessor([sig, single])

        # instantiate a SequentialProcessor
        super(MadmomMelbankProcessor, self).__init__([pre_processor])
Exemple #6
0
def create_feature_extraction_pipeline(sr=44100,
                                       frame_sizes=[1024, 2048, 4096],
                                       fps_hz=100.):
    audio_loading = Pipeline([
        ("load_audio", FeatureExtractor(librosa.load, sr=sr, mono=True)),
        ("normalize", FeatureExtractor(librosa.util.normalize, norm=np.inf))
    ])

    sig = SignalProcessor(num_channels=1, sample_rate=sr)
    multi = ParallelProcessor([])
    for frame_size in frame_sizes:
        frames = FramedSignalProcessor(frame_size=frame_size, fps=fps_hz)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank,
                                            num_bands=12,
                                            fmin=30,
                                            fmax=17000,
                                            norm_filters=True,
                                            unique_filters=True)
        spec = LogarithmicSpectrogramProcessor(log=np.log10, mul=5, add=1)
        diff = SpectrogramDifferenceProcessor(diff_ratio=0.5,
                                              positive_diffs=True,
                                              stack_diffs=np.hstack)
        # process each frame size with spec and diff sequentially
        multi.append(SequentialProcessor([frames, stft, filt, spec, diff]))
    feature_extractor = FeatureExtractor(
        SequentialProcessor([sig, multi, np.hstack]))

    feature_extraction_pipeline = Pipeline([("audio_loading", audio_loading),
                                            ("feature_extractor",
                                             feature_extractor)])
    return feature_extraction_pipeline
Exemple #7
0
 def __init__(self,
              sample_rate=44100,
              filter_length=8192,
              hop_length=8820,
              win_length=None,
              num_bands=24,
              fmin=65,
              fmax=2100,
              unique_filters=True):
     super(LMLFSpectrogram, self).__init__()
     self.stft = STFT(filter_length, hop_length, win_length)
     # filterbank from madmom
     fname = 'lmlf.wav'
     sf.write(fname, np.random.uniform(-1, 1, 100000), sample_rate)
     _sig = SignalProcessor(num_channels=1, sample_rate=sample_rate)
     _frames = FramedSignalProcessor(frame_size=filter_length,
                                     fps=sample_rate / hop_length)
     _stft = ShortTimeFourierTransformProcessor()  # caching FFT window
     _spec = LogarithmicFilteredSpectrogramProcessor(
         num_bands=num_bands,
         fmin=fmin,
         fmax=fmax,
         unique_filters=unique_filters)
     _spec(_stft(_frames(_sig(fname))))
     os.remove(fname)
     self.filterbank = torch.FloatTensor(np.asarray(_spec.filterbank))
Exemple #8
0
def _make_preprocessor(settings, pad):
    from madmom.audio.spectrogram import (
        LogarithmicFilteredSpectrogramProcessor,
        SpectrogramDifferenceProcessor)
    from madmom.audio.filters import LogarithmicFilterbank
    from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
    from madmom.audio.stft import ShortTimeFourierTransformProcessor
    from madmom.processors import SequentialProcessor

    sig = SignalProcessor(num_channels=1, sample_rate=settings['sample_rate'])
    frames = FramedSignalProcessor(frame_size=settings['frame_size'],
                                   fps=settings['fps'])
    stft = ShortTimeFourierTransformProcessor()  # caching FFT window
    spec = LogarithmicFilteredSpectrogramProcessor(
        num_channels=1,
        sample_rate=settings['sample_rate'],
        filterbank=LogarithmicFilterbank,
        frame_size=settings['frame_size'],
        fps=settings['fps'],
        num_bands=settings['num_bands'],
        fmin=settings['fmin'],
        fmax=settings['fmax'],
        norm_filters=settings['norm_filters'])
    if settings['diff']:
        if 'pad' in settings and settings['pad']:
            stack = _crnn_drum_processor_stack
        else:
            stack = np.hstack
        diff = SpectrogramDifferenceProcessor(diff_ratio=0.5,
                                              positive_diffs=True,
                                              stack_diffs=stack)
        # process input data
        if pad > 0:
            pre_processor = SequentialProcessor(
                (sig, frames, stft, spec, diff, PadProcessor(pad)))
        else:
            pre_processor = SequentialProcessor(
                (sig, frames, stft, spec, diff))

    else:
        if pad > 0:
            pre_processor = SequentialProcessor(
                (sig, frames, stft, spec, PadProcessor(pad)))
        else:
            pre_processor = SequentialProcessor((sig, frames, stft, spec))

    return pre_processor
Exemple #9
0
def build_cnn(madmom_processor_filename):
    from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
    from madmom.audio.stft import ShortTimeFourierTransformProcessor
    from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                          LogarithmicSpectrogramProcessor)

    from madmom.ml.nn import NeuralNetworkEnsemble
    # define pre-processing chain
    sig = SignalProcessor(num_channels=1, sample_rate=44100)
    frames = FramedSignalProcessor(frame_size=4096, hop_size=441 * 2)
    stft = ShortTimeFourierTransformProcessor()  # caching FFT window
    filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000)

    # this is the money param! it was not whitelisted in 'canonicalize_audio_options'!
    spec = LogarithmicSpectrogramProcessor(add=1)
    # pre-processes everything sequentially
    pre_processor = SequentialProcessor([
        sig, frames, stft, filt, spec, _cnn_pad
    ])
    # process the pre-processed signal with a NN
    nn = NeuralNetworkEnsemble.load([madmom_processor_filename])
    return madmom.processors.SequentialProcessor([pre_processor, nn])
    def __init__(self, sr=44100, **kwargs):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        from madmom.ml.nn import NeuralNetworkEnsemble
        sr_ratio = 44100 / sr
        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=sr)
        frames = FramedSignalProcessor(frame_size=4096 // sr_ratio,
                                       fps=50 // sr_ratio)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000)
        spec = LogarithmicSpectrogramProcessor(add=1)
        # pre-processes everything sequentially
        pre_processor = SequentialProcessor(
            (sig, frames, stft, filt, spec, _cnn_pad))
        # process the pre-processed signal with a NN
        nn = NeuralNetworkEnsemble.load(VIENNA_MODEL_PATH)
        # instantiate a SequentialProcessor
        super().__init__((pre_processor, nn))

        self.adsr = ADSRMaestro()
Exemple #11
0
    fmax = 2100

    # torch
    torch_lmlf = LMLFSpectrogram(sample_rate=sr,
                                 filter_length=filter_length,
                                 hop_length=hop_length,
                                 num_bands=num_bands,
                                 fmin=65,
                                 fmax=2100)
    lmlf = torch_lmlf(real_wave.unsqueeze(0))

    # madmom
    _sig = SignalProcessor(num_channels=1, sample_rate=sr)
    _frames = FramedSignalProcessor(frame_size=filter_length,
                                    fps=sr / hop_length)
    _stft = ShortTimeFourierTransformProcessor()  # caching FFT window
    _spec = LogarithmicFilteredSpectrogramProcessor(num_bands=num_bands,
                                                    fmin=fmin,
                                                    fmax=fmax)
    sig = _sig(librosa.util.example_audio_file())
    frames = _frames(sig)
    stft = _stft(frames)
    spec = _spec(stft)

    diff = np.mean(np.abs(lmlf.squeeze(0).numpy() - spec))
    print('===== log-magnitude log-frequency spectrogram =====')
    print('mean difference between outputs from torch and madmom : ', diff)
    print('shape : ', lmlf.shape)

    # ! Mel-spectrogram
    n_mels = 128