def main(): """ music_process """ # define parser p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=''' python music_process.py ./data/Jian_chrous.wav -o ./data/jian_beat.txt ''') # version p.add_argument('--version', action='version', version='FDGame-music-process-0.1') # input/output options # p.add_argument('single') # p.add_argument('./data/Jian_chrous.wav') io_arguments_single(p) ActivationsProcessor.add_arguments(p) # signal processing arguments SignalProcessor.add_arguments(p, norm=False, gain=0) # peak picking arguments DBNBeatTrackingProcessor.add_arguments(p) NeuralNetworkEnsemble.add_arguments(p, nn_files=None) # parse arguments args = p.parse_args() # set immutable arguments args.fps = 100 # print arguments if args.verbose: print(args) # input processor if args.load: # load the activations from file in_processor = ActivationsProcessor(mode='r', **vars(args)) else: # use a RNN to predict the beats in_processor = RNNBeatProcessor(**vars(args)) # output processor if args.save: # save the RNN beat activations to file out_processor = ActivationsProcessor(mode='w', **vars(args)) else: # track the beats with a DBN and output them beat_processor = DBNBeatTrackingProcessor(**vars(args)) out_processor = [beat_processor, write_beats] # create an IOProcessor processor = IOProcessor(in_processor, out_processor) # and call the processing function args.func(processor, **vars(args))
def __init__(self, fmin=65, fmax=2100, unique_filters=True, models=None, **kwargs): from ..models import CHROMA_DNN from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.stft import ShortTimeFourierTransformProcessor from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor from madmom.ml.nn import NeuralNetworkEnsemble # signal pre-processing sig = SignalProcessor(num_channels=1, sample_rate=44100) frames = FramedSignalProcessor(frame_size=8192, fps=10) stft = ShortTimeFourierTransformProcessor() # caching FFT window spec = LogarithmicFilteredSpectrogramProcessor( num_bands=24, fmin=fmin, fmax=fmax, unique_filters=unique_filters) # split the spectrogram into overlapping frames spec_signal = SignalProcessor(sample_rate=10) spec_frames = FramedSignalProcessor(frame_size=15, hop_size=1, fps=10) # predict chroma bins with a DNN nn = NeuralNetworkEnsemble.load(models or CHROMA_DNN, **kwargs) # instantiate a SequentialProcessor super(DeepChromaProcessor, self).__init__([ sig, frames, stft, spec, spec_signal, spec_frames, _dcp_flatten, nn ])
def Do(): nn = NeuralNetworkEnsemble.load(DOWNBEATS_BLSTM) assert len(nn.processors) == 2 assert type(nn.processors[0]) == madmom.processors.ParallelProcessor assert nn.processors[1] == madmom.ml.nn.average_predictions # 8个相同的网络 pp = nn.processors[0] # for p in pp.processors: # print(p) # 每个网络是个三层双向网络 network = pp.processors[0] assert type(network) == madmom.ml.nn.NeuralNetwork BiLayers = network.layers print('biLayers size', len(BiLayers)) PrintLayerParam(BiLayers[0]) PrintLayerParam(BiLayers[1]) PrintLayerParam(BiLayers[2]) PrintFwdLayer(BiLayers[3]) print(type(BiLayers)) # print(BiLayers[3]) # a feedForwardLayer # BiLayers.pop() # return layer = BiLayers[0] assert type(layer) == madmom.ml.nn.layers.BidirectionalLayer lstmLayer = layer.fwd_layer assert type(lstmLayer) == madmom.ml.nn.layers.LSTMLayer # Test1(lstmLayer.input_gate) # Test2(lstmLayer) # Test3(lstmLayer) # Test4(layer) Test5(network) # GateParameter(lstmLayer.cell) # GateParameter(lstmLayer.input_gate) # GateParameter(lstmLayer.forget_gate) # GateParameter(lstmLayer.output_gate) print(type(lstmLayer.cell.activation_fn)) print(lstmLayer.cell.activation_fn.__name__) # print(lstmLayer.cell.activation_fn == ) print(lstmLayer.input_gate.activation_fn) print(lstmLayer.activation_fn)
def __init__(self, fmin=65, fmax=2100, unique_filters=True, models=None, **kwargs): from ..models import CHROMA_DNN from ..audio.signal import SignalProcessor, FramedSignalProcessor from ..audio.spectrogram import LogarithmicFilteredSpectrogramProcessor from madmom.ml.nn import NeuralNetworkEnsemble sig = SignalProcessor(num_channels=1, sample_rate=44100) frames = FramedSignalProcessor(frame_size=8192, fps=10) spec = LogarithmicFilteredSpectrogramProcessor( num_bands=24, fmin=fmin, fmax=fmax, unique_filters=unique_filters) spec_frames = FramedSignalProcessor(frame_size=15, hop_size=1) nn = NeuralNetworkEnsemble.load(models or CHROMA_DNN, **kwargs) super(DeepChromaProcessor, self).__init__([ sig, frames, spec, spec_frames, _dcp_flatten, nn ])
def build_cnn(madmom_processor_filename): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) from madmom.ml.nn import NeuralNetworkEnsemble # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=44100) frames = FramedSignalProcessor(frame_size=4096, hop_size=441 * 2) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000) # this is the money param! it was not whitelisted in 'canonicalize_audio_options'! spec = LogarithmicSpectrogramProcessor(add=1) # pre-processes everything sequentially pre_processor = SequentialProcessor([ sig, frames, stft, filt, spec, _cnn_pad ]) # process the pre-processed signal with a NN nn = NeuralNetworkEnsemble.load([madmom_processor_filename]) return madmom.processors.SequentialProcessor([pre_processor, nn])
def __init__(self, sr=44100, **kwargs): from madmom.audio.signal import SignalProcessor, FramedSignalProcessor from madmom.audio.stft import ShortTimeFourierTransformProcessor from madmom.audio.spectrogram import (FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor) from madmom.ml.nn import NeuralNetworkEnsemble sr_ratio = 44100 / sr # define pre-processing chain sig = SignalProcessor(num_channels=1, sample_rate=sr) frames = FramedSignalProcessor(frame_size=4096 // sr_ratio, fps=50 // sr_ratio) stft = ShortTimeFourierTransformProcessor() # caching FFT window filt = FilteredSpectrogramProcessor(num_bands=24, fmin=30, fmax=10000) spec = LogarithmicSpectrogramProcessor(add=1) # pre-processes everything sequentially pre_processor = SequentialProcessor( (sig, frames, stft, filt, spec, _cnn_pad)) # process the pre-processed signal with a NN nn = NeuralNetworkEnsemble.load(VIENNA_MODEL_PATH) # instantiate a SequentialProcessor super().__init__((pre_processor, nn)) self.adsr = ADSRMaestro()
def main(): """DBNBeatTracker""" # define parser p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=''' The DBNBeatTracker.py program detects all beats in an audio file according to the method described in: "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music Styles" Sebastian Böck, Florian Krebs and Gerhard Widmer. Proceedings of the 15th International Society for Music Information Retrieval Conference (ISMIR), 2014. It does not use the multi-model (Section 2.2.) and selection stage (Section 2.3), i.e. this version corresponds to the pure DBN version of the algorithm for which results are given in Table 2. Instead of the originally proposed state space and transition model for the DBN, the following is used: "An Efficient State Space Model for Joint Tempo and Meter Tracking" Florian Krebs, Sebastian Böck and Gerhard Widmer. Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. This program can be run in 'single' file mode to process a single audio file and write the detected beats to STDOUT or the given output file. $ DBNBeatTracker.py single INFILE [-o OUTFILE] If multiple audio files should be processed, the program can also be run in 'batch' mode to save the detected beats to files with the given suffix. $ DBNBeatTracker.py batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES If no output directory is given, the program writes the files with the detected beats to the same location as the audio files. The 'pickle' mode can be used to store the used parameters to be able to exactly reproduce experiments. ''') # version p.add_argument('--version', action='version', version='DBNBeatTracker.py.2016') # input/output options io_arguments(p, output_suffix='.beats.txt', online=True) ActivationsProcessor.add_arguments(p) # signal processing arguments SignalProcessor.add_arguments(p, norm=False, gain=0) # peak picking arguments DBNBeatTrackingProcessor.add_arguments(p) NeuralNetworkEnsemble.add_arguments(p, nn_files=None) # parse arguments args = p.parse_args() # set immutable arguments args.fps = 100 # print arguments if args.verbose: print(args) # input processor if args.load: # load the activations from file in_processor = ActivationsProcessor(mode='r', **vars(args)) else: # use a RNN to predict the beats in_processor = RNNBeatProcessor(**vars(args)) # output processor if args.save: # save the RNN beat activations to file out_processor = ActivationsProcessor(mode='w', **vars(args)) else: # track the beats with a DBN beat_processor = DBNBeatTrackingProcessor(**vars(args)) # output handler from madmom.utils import write_events as writer # sequentially process everything out_processor = [beat_processor, writer] # create an IOProcessor processor = IOProcessor(in_processor, out_processor) # and call the processing function args.func(processor, **vars(args))
def main(): """DBNBeatTracker""" # define parser p = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=''' The DBNBeatTracker program detects all beats in an audio file according to the method described in: "A Multi-Model Approach to Beat Tracking Considering Heterogeneous Music Styles" Sebastian Böck, Florian Krebs and Gerhard Widmer. Proceedings of the 15th International Society for Music Information Retrieval Conference (ISMIR), 2014. It does not use the multi-model (Section 2.2.) and selection stage (Section 2.3), i.e. this version corresponds to the pure DBN version of the algorithm for which results are given in Table 2. Instead of the originally proposed state space and transition model for the DBN, the following is used: "An Efficient State Space Model for Joint Tempo and Meter Tracking" Florian Krebs, Sebastian Böck and Gerhard Widmer. Proceedings of the 16th International Society for Music Information Retrieval Conference (ISMIR), 2015. This program can be run in 'single' file mode to process a single audio file and write the detected beats to STDOUT or the given output file. $ DBNBeatTracker single INFILE [-o OUTFILE] If multiple audio files should be processed, the program can also be run in 'batch' mode to save the detected beats to files with the given suffix. $ DBNBeatTracker batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES If no output directory is given, the program writes the files with the detected beats to the same location as the audio files. The 'pickle' mode can be used to store the used parameters to be able to exactly reproduce experiments. ''') # version p.add_argument('--version', action='version', version='DBNBeatTracker.2016') # input/output options io_arguments(p, output_suffix='.beats.txt', online=True) ActivationsProcessor.add_arguments(p) # signal processing arguments SignalProcessor.add_arguments(p, norm=False, gain=0) # peak picking arguments DBNBeatTrackingProcessor.add_arguments(p) NeuralNetworkEnsemble.add_arguments(p, nn_files=None) # parse arguments args = p.parse_args() # set immutable arguments args.fps = 100 # print arguments if args.verbose: print(args) print("The following mesxxsage shows the args :\n", args, "\n\n\nshows the vars(args) message:\n", vars(args)) ''' The args's message: Namespace(correct=True, fps=100, func=<function process_online at 0x00000000055E9AC8>, gain=0, infile=None, load=False, max_bpm=215.0, min_bpm=55.0, nn_files=None, norm=False, num_frames=1, num_tempi=None, num_threads=1, observation_lambda=16, online=True, origin='stream', outfile=<open file '<stdout>', mode 'w' at 0x0000000002DFB0C0>, save=False, sep=None, threshold=0, transition_lambda=100, verbose=None) ''' # input processor if args.load: # load the activations from file in_processor = ActivationsProcessor(mode='r', **vars(args)) else: # use a RNN to predict the beats in_processor = RNNBeatProcessor(**vars(args)) # output processor if args.save: # save the RNN beat activations to file out_processor = ActivationsProcessor(mode='w', **vars(args)) else: # track the beats with a DBN and output them beat_processor = DBNBeatTrackingProcessor(**vars(args)) out_processor = [beat_processor, write_beats] print("ok") # create an IOProcessor processor = IOProcessor(in_processor, out_processor) # and call the processing function args.func(processor, **vars(args))