def parse_args(argv): parser = common.common_arguments_parser() parser.add_argument("--spectrogram", type=str, help="Postprocessing layer") parser.add_argument("--capacity_multiplier", default=8, type=int, help="Capacity") parser.add_argument("--voicing", action='store_true', help="Add voicing model.") args = parser.parse_args(argv) hop_length = 512 defaults = { "samplerate": 44100, "context_width": 14 * hop_length, "annotations_per_window": 20, "hop_size": 1, "frame_width": hop_length, "note_range": 72, "min_note": 24, "batch_size": 8, "evaluate_every": 5000, "evaluate_small_every": 1000, "annotation_smoothing": 0.177, "spectrogram": "hcqt", "capacity_multiplier": 8, "voicing": False } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "bittner") # common.name(args, "cqt_voicing_residual_batchnorm") return args
def parse_args(argv): parser = common.common_arguments_parser() parser.add_argument("--capacity_multiplier", type=int, help="Capacity multiplier") parser.add_argument("--spectrogram", type=str, help="Spectrogram method") parser.add_argument("--spectrogram_top_db", type=float, help="Spectrogram top_db") parser.add_argument("--spectrogram_filter_scale", type=float, help="Spectrogram filter_scale") parser.add_argument("--spectrogram_undertone_stacking", type=int, help="spectrogram undertone stacking") parser.add_argument("--spectrogram_overtone_stacking", type=int, help="spectrogram overtone stacking") parser.add_argument("--undertone_stacking", type=int, help="Undertone stacking in the model") parser.add_argument("--overtone_stacking", type=int, help="Overtone stacking in the model") args = parser.parse_args(argv) defaults = { "samplerate": 44100, "context_width": 0, "annotations_per_window": 50, "hop_size": 1, "frame_width": HOP_LENGTH, "note_range": 72, "min_note": 24, "evaluate_every": 5000, "evaluate_small_every": 1000, "spectrogram": "cqt", "learning_rate": 0.001, "learning_rate_decay": 0.85, "learning_rate_decay_steps": 5000, "undertone_stacking": 0, "overtone_stacking": 1, "spectrogram_undertone_stacking": 1, "spectrogram_overtone_stacking": 5, "spectrogram_top_db": 80, "spectrogram_filter_scale": 1.0, "capacity_multiplier": 64, } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "bittner") return args
def parse_args(argv): parser = common.common_arguments_parser() # Model specific arguments parser.add_argument("--input_normalization", type=int, help="Enable normalizing each input example") parser.add_argument("--capacity_multiplier", type=int, help="Capacity multiplier of the model") parser.add_argument( "--multiresolution_convolution", type=int, help="Number of different resolution of the first convolution layer") parser.add_argument("--variable_stride", action='store_true', help="Variable stride") parser.add_argument("--first_layer_capacity", type=int, help="Capacity multiplier") args = parser.parse_args(argv) defaults = { # Change some of the common defaults "context_width": 978, "input_normalization": 1, "capacity_multiplier": 16, "multiresolution_convolution": 0, "variable_stride": False, "first_layer_capacity": 1, } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "crepe") return args
def parse_args(argv): parser = common.common_arguments_parser() # Model specific arguments # input parser.add_argument("--spectrogram", type=str, help="Spectrogram method") parser.add_argument("--spectrogram_top_db", type=float, help="Spectrogram top_db") parser.add_argument("--spectrogram_filter_scale", type=float, help="Spectrogram filter_scale") parser.add_argument("--spectrogram_undertone_stacking", type=int, help="spectrogram undertone stacking") parser.add_argument("--spectrogram_overtone_stacking", type=int, help="spectrogram overtone stacking") parser.add_argument( "--cut_context", type=int, help="Cut unnecessary context, doesn't work with dilations!") # model parser.add_argument("--architecture", type=str, help="Model architecture") parser.add_argument("--filters", type=int, help="Filters in convolutions") parser.add_argument("--stacks", type=int, help="Stacks") parser.add_argument("--conv_range", type=int, help="Stack kernel size in frequency axis") parser.add_argument("--undertone_stacking", type=int, help="Undertone stacking in the model") parser.add_argument("--overtone_stacking", type=int, help="Overtone stacking in the model") parser.add_argument("--activation", type=str, help="Activation function for the convolution stack") # context parser.add_argument("--conv_ctx", nargs="+", type=int, help="Stack kernel sizes in time axis") parser.add_argument("--dilations", nargs="+", type=int, help="Dilation rate for the convolutions") parser.add_argument("--last_conv_kernel", nargs=2, type=int) # residual parser.add_argument( "--residual_hop", type=int, help="Size of one block around which there is a residual connection") parser.add_argument("--residual_end", type=int, help="No residual connection in last N layers") parser.add_argument( "--residual_op", type=str, help= "Residual connection operation (add for ResNet, concat for DenseNet)") # regularization parser.add_argument("--batchnorm", type=int) parser.add_argument("--dropout", type=float) parser.add_argument("--specaugment_prob", type=float) parser.add_argument("--specaugment_freq_mask_num", type=int) parser.add_argument("--specaugment_freq_mask_max", type=int) parser.add_argument("--specaugment_time_mask_num", type=int) parser.add_argument("--specaugment_time_mask_max", type=int) # voicing module parser.add_argument("--voicing", type=int) parser.add_argument("--voicing_input", type=str) args = parser.parse_args(argv) hop_length = 512 defaults = { # Change some of the common defaults "samplerate": 44100, "context_width": 10 * hop_length, "annotations_per_window": 5, "hop_size": 1, "frame_width": hop_length, "note_range": 72, "min_note": 24, "evaluate_every": 5000, "evaluate_small_every": 1000, "annotation_smoothing": 0.18, "batch_size": 8, # Model specific defaults "learning_rate_decay_steps": 10000, "learning_rate_decay": 0.8, "spectrogram": "cqt", "spectrogram_top_db": 80, "spectrogram_filter_scale": 1.0, "spectrogram_undertone_stacking": 1, "spectrogram_overtone_stacking": 5, "cut_context": 1, "architecture": "deep_hcnn", "filters": 16, "stacks": 10, "conv_range": 3, "undertone_stacking": 0, "overtone_stacking": 1, "activation": "relu", "conv_ctx": [1], "dilations": [1], "last_conv_kernel": [1, 1], "residual_hop": 1, "residual_end": 0, "residual_op": "add", "batchnorm": 0, "dropout": 0.3, "specaugment_prob": 0.0, "specaugment_freq_mask_num": 2, "specaugment_freq_mask_max": 27, "specaugment_time_mask_num": 1, "specaugment_time_mask_max": 5, "voicing": 0, "voicing_input": "spectrogram_salience", } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "spctrgrm") return args
def parse_args(argv): parser = common.common_arguments_parser() # Model specific arguments # input parser.add_argument("--spectrogram", type=str, help="Spectrogram method") parser.add_argument("--spectrogram_top_db", type=float, help="Spectrogram top_db") parser.add_argument("--spectrogram_filter_scale", type=float, help="Spectrogram filter_scale") parser.add_argument("--spectrogram_undertone_stacking", type=int, help="spectrogram undertone stacking") parser.add_argument("--spectrogram_overtone_stacking", type=int, help="spectrogram overtone stacking") parser.add_argument( "--cut_context", type=int, help="Cut unnecessary context, doesn't work with dilations!") # model parser.add_argument("--architecture", type=str, help="Model architecture") parser.add_argument("--faster_hcnn", type=int, help="HCNN implementation") parser.add_argument("--use_bias", type=int, help="use bias in conv2d") parser.add_argument("--class_weighting", type=int, help="use class weighting") parser.add_argument("--filters", type=int, help="Filters in convolutions") parser.add_argument("--stacks", type=int, help="Stacks") parser.add_argument("--conv_range", type=int, help="Stack kernel size in frequency axis") parser.add_argument("--undertone_stacking", type=int, help="Undertone stacking in the model") parser.add_argument("--overtone_stacking", type=int, help="Overtone stacking in the model") parser.add_argument("--stacking_until", type=int, help="Harmonic stacking in the model until Nth layer") parser.add_argument("--activation", type=str, help="Activation function for the convolution stack") # context parser.add_argument("--conv_ctx", nargs="+", type=int, help="Stack kernel sizes in time axis") parser.add_argument("--dilations", nargs="+", type=int, help="Dilation rate for the convolutions") parser.add_argument("--last_conv_kernel", nargs=2, type=int) parser.add_argument("--last_pooling", type=str) # residual parser.add_argument( "--residual_hop", type=int, help="Size of one block around which there is a residual connection") parser.add_argument("--residual_end", type=int, help="No residual connection in last N layers") parser.add_argument( "--residual_op", type=str, help= "Residual connection operation (add for ResNet, concat for DenseNet)") # regularization parser.add_argument("--batchnorm", type=int) parser.add_argument("--dropout", type=float) args = parser.parse_args(argv) # hop_length = 256 hop_length = 512 # FRAME-LEVEL INSTRUMENT RECOGNITION BY TIMBRE AND PITCH defaults = { # Change some of the common defaults "samplerate": 44100, "context_width": 4 * hop_length, "annotations_per_window": 1, "hop_size": 1, "frame_width": hop_length, "note_range": 11, "min_note": 0, "evaluate_every": 30000, "evaluate_small_every": 1000, "annotation_smoothing": 0.0, "batch_size": 32, "bins_per_semitone": 1, "unvoiced_loss_weight": 1.0, "datasets": ["musicnet_mir"], # Model specific defaults "learning_rate_decay_steps": 10000, "learning_rate_decay": 0.8, "spectrogram": "YunNingHung_cqt", "spectrogram_top_db": 110, "spectrogram_filter_scale": 1.0, "spectrogram_undertone_stacking": 1, "spectrogram_overtone_stacking": 5, "cut_context": 1, "architecture": "baseline", "faster_hcnn": 0, "use_bias": 1, "class_weighting": 1, "filters": 12, "stacks": 6, "conv_range": 3, "undertone_stacking": 1, "overtone_stacking": 3, "stacking_until": 999, "activation": "relu", "conv_ctx": [1], "dilations": [1], "last_conv_kernel": [1, 72], "last_pooling": "avg", "residual_hop": 1, "residual_end": 0, "residual_op": "add", "batchnorm": 0, "dropout": 0.3, } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "mir") return args
def parse_args(argv): parser = common.common_arguments_parser() # Model specific arguments parser.add_argument("--use_biases", action='store_true', default=False, help="Use biases in the convolutions") parser.add_argument("--input_normalization", type=int, help="Enable normalizing each input example") parser.add_argument("--initial_filter_width", type=int, help="First conv layer filter width") parser.add_argument("--initial_filter_padding", type=str, help="First conv layer padding") parser.add_argument("--filter_width", type=int, help="Dilation stack filter width (2 or 3)") parser.add_argument("--skip_channels", type=int, help="Skip channels") parser.add_argument("--residual_channels", type=int, help="Residual channels") parser.add_argument("--stack_number", type=int, help="Number of dilated stacks") parser.add_argument("--max_dilation", type=int, help="Maximum dilation rate") parser.add_argument("--dilation_layer_dropout", type=float, help="Dropout in dilation layer") parser.add_argument("--skip_layer_dropout", type=float, help="Dropout in skip connections") parser.add_argument("--skip", type=str, help="Skip add or concat") parser.add_argument("--postprocessing", type=str, help="Postprocessing layer") args = parser.parse_args(argv) defaults = { "note_range": 72, "min_note": 24, "evaluate_every": 5000, "evaluate_small_every": 5000, "batch_size": 8, "annotations_per_window": 10, "context_width": 94, "annotation_smoothing": 0.18, "input_normalization": 1, "initial_filter_width": 32, "initial_filter_padding": "same", "filter_width": 3, "skip_channels": 64, "residual_channels": 32, "stack_number": 1, "max_dilation": 512, "dilation_layer_dropout": 0.0, "skip_layer_dropout": 0.0, "skip": "add", "postprocessing": "avgpool_p93_s93_Psame--conv_f256_k16_s8_Psame_arelu--conv_f256_k16_s8_Psame_arelu", } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "wavenet") return args
def parse_args(argv): parser = common.common_arguments_parser() # Model specific arguments # input parser.add_argument("--spectrogram", type=str, help="Spectrogram method") # model parser.add_argument("--architecture", type=str, help="Model architecture") parser.add_argument("--filters", type=int, help="Filters in convolutions") parser.add_argument("--spectrogram_undertone_stacking", type=int, help="Undertone stacking in the spectrogram") parser.add_argument("--spectrogram_overtone_stacking", type=int, help="Overtone stacking in the spectrogram") parser.add_argument("--undertone_stacking", type=int, help="Undertone stacking in the model") parser.add_argument("--overtone_stacking", type=int, help="Overtone stacking in the model") parser.add_argument("--activation", type=str, help="Activation function for the convolution stack") # regularization parser.add_argument("--dropout", type=float) args = parser.parse_args(argv) hop_length = 441 * 4 # 25 fps # context_width: 10*hop_length defaults = { # Change some of the common defaults "samplerate": 44100, "context_width": 2 * hop_length, "annotations_per_window": 1, "hop_size": 1, "frame_width": hop_length, "note_range": 88, "min_note": 21, "evaluate_every": 5000, "evaluate_small_every": 1000, "annotation_smoothing": 0.0, "batch_size": 128, "batch_size_evaluation": 1024, "bins_per_semitone": 1, "datasets": ["maps"], # Model specific defaults "learning_rate_decay_steps": 10000, "learning_rate_decay": 0.8, "spectrogram": "kelz", "architecture": "allconv", "filters": 16, "undertone_stacking": 0, "overtone_stacking": 1, "spectrogram_undertone_stacking": 0, "spectrogram_overtone_stacking": 1, "activation": "relu", "dropout": 0.25, } specified_args = common.argument_defaults(args, defaults) common.name(args, specified_args, "kelz") return args