Ejemplo n.º 1
0
def main():
    """ music_process """

    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
        python music_process.py ./data/Jian_chrous.wav -o ./data/jian_beat.txt
        ''')
    # version
    p.add_argument('--version',
                   action='version',
                   version='FDGame-music-process-0.1')
    # input/output options
    # p.add_argument('single')
    # p.add_argument('./data/Jian_chrous.wav')
    io_arguments_single(p)
    ActivationsProcessor.add_arguments(p)

    # signal processing arguments
    SignalProcessor.add_arguments(p, norm=False, gain=0)

    # peak picking arguments
    DBNBeatTrackingProcessor.add_arguments(p)
    NeuralNetworkEnsemble.add_arguments(p, nn_files=None)

    # parse arguments
    args = p.parse_args()

    # set immutable arguments
    args.fps = 100

    # print arguments
    if args.verbose:
        print(args)

    # input processor
    if args.load:
        # load the activations from file
        in_processor = ActivationsProcessor(mode='r', **vars(args))
    else:
        # use a RNN to predict the beats
        in_processor = RNNBeatProcessor(**vars(args))

    # output processor
    if args.save:
        # save the RNN beat activations to file
        out_processor = ActivationsProcessor(mode='w', **vars(args))
    else:
        # track the beats with a DBN and output them
        beat_processor = DBNBeatTrackingProcessor(**vars(args))
        out_processor = [beat_processor, write_beats]

    # create an IOProcessor
    processor = IOProcessor(in_processor, out_processor)

    # and call the processing function
    args.func(processor, **vars(args))
Ejemplo n.º 2
0
 def __init__(self,
              fmin=65,
              fmax=2100,
              unique_filters=True,
              models=None,
              **kwargs):
     from ..models import CHROMA_DNN
     from ..audio.signal import SignalProcessor, FramedSignalProcessor
     from ..audio.stft import ShortTimeFourierTransformProcessor
     from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor
     from madmom.ml.nn import NeuralNetworkEnsemble
     # signal pre-processing
     sig = SignalProcessor(num_channels=1, sample_rate=44100)
     frames = FramedSignalProcessor(frame_size=8192, fps=10)
     stft = ShortTimeFourierTransformProcessor()  # caching FFT window
     spec = LogarithmicFilteredSpectrogramProcessor(
         num_bands=24, fmin=fmin, fmax=fmax, unique_filters=unique_filters)
     # split the spectrogram into overlapping frames
     spec_signal = SignalProcessor(sample_rate=10)
     spec_frames = FramedSignalProcessor(frame_size=15, hop_size=1, fps=10)
     # predict chroma bins with a DNN
     nn = NeuralNetworkEnsemble.load(models or CHROMA_DNN, **kwargs)
     # instantiate a SequentialProcessor
     super(DeepChromaProcessor, self).__init__([
         sig, frames, stft, spec, spec_signal, spec_frames, _dcp_flatten, nn
     ])
Ejemplo n.º 3
0
def Do():
    nn = NeuralNetworkEnsemble.load(DOWNBEATS_BLSTM)

    assert len(nn.processors) == 2
    assert type(nn.processors[0]) == madmom.processors.ParallelProcessor
    assert nn.processors[1] == madmom.ml.nn.average_predictions

    # 8个相同的网络
    pp = nn.processors[0]
    # for p in pp.processors:
    #     print(p)

    # 每个网络是个三层双向网络
    network = pp.processors[0]
    assert type(network) == madmom.ml.nn.NeuralNetwork

    BiLayers = network.layers
    print('biLayers size', len(BiLayers))
    PrintLayerParam(BiLayers[0])
    PrintLayerParam(BiLayers[1])
    PrintLayerParam(BiLayers[2])
    PrintFwdLayer(BiLayers[3])
    print(type(BiLayers))
    # print(BiLayers[3])  # a feedForwardLayer
    # BiLayers.pop()
    # return

    layer = BiLayers[0]
    assert type(layer) == madmom.ml.nn.layers.BidirectionalLayer

    lstmLayer = layer.fwd_layer
    assert type(lstmLayer) == madmom.ml.nn.layers.LSTMLayer

    # Test1(lstmLayer.input_gate)
    # Test2(lstmLayer)
    # Test3(lstmLayer)
    # Test4(layer)
    Test5(network)

    # GateParameter(lstmLayer.cell)
    # GateParameter(lstmLayer.input_gate)
    # GateParameter(lstmLayer.forget_gate)
    # GateParameter(lstmLayer.output_gate)

    print(type(lstmLayer.cell.activation_fn))

    print(lstmLayer.cell.activation_fn.__name__)
    # print(lstmLayer.cell.activation_fn == )
    print(lstmLayer.input_gate.activation_fn)
    print(lstmLayer.activation_fn)
Ejemplo n.º 4
0
    def __init__(self, fmin=65, fmax=2100, unique_filters=True, models=None,
                 **kwargs):
        from ..models import CHROMA_DNN
        from ..audio.signal import SignalProcessor, FramedSignalProcessor
        from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor
        from madmom.ml.nn import NeuralNetworkEnsemble

        sig = SignalProcessor(num_channels=1, sample_rate=44100)
        frames = FramedSignalProcessor(frame_size=8192, fps=10)
        spec = LogarithmicFilteredSpectrogramProcessor(
            num_bands=24, fmin=fmin, fmax=fmax, unique_filters=unique_filters)
        spec_frames = FramedSignalProcessor(frame_size=15, hop_size=1)

        nn = NeuralNetworkEnsemble.load(models or CHROMA_DNN, **kwargs)

        super(DeepChromaProcessor, self).__init__([
            sig, frames, spec, spec_frames, _dcp_flatten, nn
        ])
Ejemplo n.º 5
0
 def __init__(self, fmin=65, fmax=2100, unique_filters=True, models=None,
              **kwargs):
     from ..models import CHROMA_DNN
     from ..audio.signal import SignalProcessor, FramedSignalProcessor
     from ..audio.stft import ShortTimeFourierTransformProcessor
     from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor
     from madmom.ml.nn import NeuralNetworkEnsemble
     # signal pre-processing
     sig = SignalProcessor(num_channels=1, sample_rate=44100)
     frames = FramedSignalProcessor(frame_size=8192, fps=10)
     stft = ShortTimeFourierTransformProcessor()  # caching FFT window
     spec = LogarithmicFilteredSpectrogramProcessor(
         num_bands=24, fmin=fmin, fmax=fmax, unique_filters=unique_filters)
     # split the spectrogram into overlapping frames
     spec_signal = SignalProcessor(sample_rate=10)
     spec_frames = FramedSignalProcessor(frame_size=15, hop_size=1, fps=10)
     # predict chroma bins with a DNN
     nn = NeuralNetworkEnsemble.load(models or CHROMA_DNN, **kwargs)
     # instantiate a SequentialProcessor
     super(DeepChromaProcessor, self).__init__([
         sig, frames, stft, spec, spec_signal, spec_frames, _dcp_flatten, nn
     ])
Ejemplo n.º 6
0
def build_cnn(madmom_processor_filename):
    from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
    from madmom.audio.stft import ShortTimeFourierTransformProcessor
    from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                          LogarithmicSpectrogramProcessor)

    from madmom.ml.nn import NeuralNetworkEnsemble
    # define pre-processing chain
    sig = SignalProcessor(num_channels=1, sample_rate=44100)
    frames = FramedSignalProcessor(frame_size=4096, hop_size=441 * 2)
    stft = ShortTimeFourierTransformProcessor()  # caching FFT window
    filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000)

    # this is the money param! it was not whitelisted in 'canonicalize_audio_options'!
    spec = LogarithmicSpectrogramProcessor(add=1)
    # pre-processes everything sequentially
    pre_processor = SequentialProcessor([
        sig, frames, stft, filt, spec, _cnn_pad
    ])
    # process the pre-processed signal with a NN
    nn = NeuralNetworkEnsemble.load([madmom_processor_filename])
    return madmom.processors.SequentialProcessor([pre_processor, nn])
    def __init__(self, sr=44100, **kwargs):
        from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
        from madmom.audio.stft import ShortTimeFourierTransformProcessor
        from madmom.audio.spectrogram import (FilteredSpectrogramProcessor,
                                              LogarithmicSpectrogramProcessor)
        from madmom.ml.nn import NeuralNetworkEnsemble
        sr_ratio = 44100 / sr
        # define pre-processing chain
        sig = SignalProcessor(num_channels=1, sample_rate=sr)
        frames = FramedSignalProcessor(frame_size=4096 // sr_ratio,
                                       fps=50 // sr_ratio)
        stft = ShortTimeFourierTransformProcessor()  # caching FFT window
        filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000)
        spec = LogarithmicSpectrogramProcessor(add=1)
        # pre-processes everything sequentially
        pre_processor = SequentialProcessor(
            (sig, frames, stft, filt, spec, _cnn_pad))
        # process the pre-processed signal with a NN
        nn = NeuralNetworkEnsemble.load(VIENNA_MODEL_PATH)
        # instantiate a SequentialProcessor
        super().__init__((pre_processor, nn))

        self.adsr = ADSRMaestro()
Ejemplo n.º 8
0
def main():
    """DBNBeatTracker"""

    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
    The DBNBeatTracker.py program detects all beats in an audio file according to
    the method described in:

    "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music
     Styles"
    Sebastian Böck, Florian Krebs and Gerhard Widmer.
    Proceedings of the 15th International Society for Music Information
    Retrieval Conference (ISMIR), 2014.

    It does not use the multi-model (Section 2.2.) and selection stage (Section
    2.3), i.e. this version corresponds to the pure DBN version of the
    algorithm for which results are given in Table 2.

    Instead of the originally proposed state space and transition model for the
    DBN, the following is used:

    "An Efficient State Space Model for Joint Tempo and Meter Tracking"
    Florian Krebs, Sebastian Böck and Gerhard Widmer.
    Proceedings of the 16th International Society for Music Information
    Retrieval Conference (ISMIR), 2015.

    This program can be run in 'single' file mode to process a single audio
    file and write the detected beats to STDOUT or the given output file.

      $ DBNBeatTracker.py single INFILE [-o OUTFILE]

    If multiple audio files should be processed, the program can also be run
    in 'batch' mode to save the detected beats to files with the given suffix.

      $ DBNBeatTracker.py batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES

    If no output directory is given, the program writes the files with the
    detected beats to the same location as the audio files.

    The 'pickle' mode can be used to store the used parameters to be able to
    exactly reproduce experiments.

    ''')

    # version
    p.add_argument('--version',
                   action='version',
                   version='DBNBeatTracker.py.2016')
    # input/output options
    io_arguments(p, output_suffix='.beats.txt', online=True)
    ActivationsProcessor.add_arguments(p)
    # signal processing arguments
    SignalProcessor.add_arguments(p, norm=False, gain=0)
    # peak picking arguments
    DBNBeatTrackingProcessor.add_arguments(p)
    NeuralNetworkEnsemble.add_arguments(p, nn_files=None)

    # parse arguments
    args = p.parse_args()

    # set immutable arguments
    args.fps = 100

    # print arguments
    if args.verbose:
        print(args)

    # input processor
    if args.load:
        # load the activations from file
        in_processor = ActivationsProcessor(mode='r', **vars(args))
    else:
        # use a RNN to predict the beats
        in_processor = RNNBeatProcessor(**vars(args))

    # output processor
    if args.save:
        # save the RNN beat activations to file
        out_processor = ActivationsProcessor(mode='w', **vars(args))
    else:
        # track the beats with a DBN
        beat_processor = DBNBeatTrackingProcessor(**vars(args))
        # output handler
        from madmom.utils import write_events as writer
        # sequentially process everything
        out_processor = [beat_processor, writer]

    # create an IOProcessor
    processor = IOProcessor(in_processor, out_processor)
    # and call the processing function
    args.func(processor, **vars(args))
Ejemplo n.º 9
0
def main():
    """DBNBeatTracker"""

    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''
    The DBNBeatTracker program detects all beats in an audio file according to
    the method described in:

    "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music
     Styles"
    Sebastian Böck, Florian Krebs and Gerhard Widmer.
    Proceedings of the 15th International Society for Music Information
    Retrieval Conference (ISMIR), 2014.

    It does not use the multi-model (Section 2.2.) and selection stage (Section
    2.3), i.e. this version corresponds to the pure DBN version of the
    algorithm for which results are given in Table 2.

    Instead of the originally proposed state space and transition model for the
    DBN, the following is used:

    "An Efficient State Space Model for Joint Tempo and Meter Tracking"
    Florian Krebs, Sebastian Böck and Gerhard Widmer.
    Proceedings of the 16th International Society for Music Information
    Retrieval Conference (ISMIR), 2015.

    This program can be run in 'single' file mode to process a single audio
    file and write the detected beats to STDOUT or the given output file.

      $ DBNBeatTracker single INFILE [-o OUTFILE]

    If multiple audio files should be processed, the program can also be run
    in 'batch' mode to save the detected beats to files with the given suffix.

      $ DBNBeatTracker batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES

    If no output directory is given, the program writes the files with the
    detected beats to the same location as the audio files.

    The 'pickle' mode can be used to store the used parameters to be able to
    exactly reproduce experiments.

    ''')
    # version
    p.add_argument('--version',
                   action='version',
                   version='DBNBeatTracker.2016')
    # input/output options
    io_arguments(p, output_suffix='.beats.txt', online=True)
    ActivationsProcessor.add_arguments(p)
    # signal processing arguments
    SignalProcessor.add_arguments(p, norm=False, gain=0)
    # peak picking arguments
    DBNBeatTrackingProcessor.add_arguments(p)
    NeuralNetworkEnsemble.add_arguments(p, nn_files=None)

    # parse arguments
    args = p.parse_args()

    # set immutable arguments
    args.fps = 100

    # print arguments
    if args.verbose:
        print(args)
    print("The following mesxxsage shows the args :\n", args,
          "\n\n\nshows the vars(args) message:\n", vars(args))
    ''' 
    The args's message:

    Namespace(correct=True, fps=100, func=<function process_online at 0x00000000055E9AC8>, 
    gain=0, infile=None, load=False, max_bpm=215.0, min_bpm=55.0, nn_files=None, norm=False,
     num_frames=1, num_tempi=None, num_threads=1, observation_lambda=16, online=True, 
     origin='stream', outfile=<open file '<stdout>', mode 'w' at 0x0000000002DFB0C0>,
      save=False, sep=None, threshold=0, transition_lambda=100, verbose=None)
    '''

    # input processor
    if args.load:
        # load the activations from file
        in_processor = ActivationsProcessor(mode='r', **vars(args))
    else:
        # use a RNN to predict the beats
        in_processor = RNNBeatProcessor(**vars(args))

    # output processor
    if args.save:
        # save the RNN beat activations to file
        out_processor = ActivationsProcessor(mode='w', **vars(args))
    else:

        # track the beats with a DBN and output them
        beat_processor = DBNBeatTrackingProcessor(**vars(args))
        out_processor = [beat_processor, write_beats]
        print("ok")

    # create an IOProcessor
    processor = IOProcessor(in_processor, out_processor)

    # and call the processing function
    args.func(processor, **vars(args))