def samplernn_repro(): sweep = prod([ flag("experiment", ['samplernn-qautomusic']), prod([ flag("dataset.quantization", ['linear', 'mu-law']), ]), ]) return sweep
def s4_embedding_repro(): sweep = prod([ flag("experiment", ['s4-qautomusic']), prod([ flag("dataset.quantization", ['linear', 'mu-law']), ]), ]) return sweep
def samplernn_general_repro(): # This failed to be a good reproduction of the original model! sweep = prod([ flag("experiment", ['samplernn-qautomusic']), flag("model.reproduce", [False]), prod([ flag("dataset.quantization", ['linear', 'mu-law']), ]), ]) return sweep
def samplernn_repro(): sweep = prod([ flag("experiment", ['samplernn-qautomusic']), prod([ flag("dataset.quantization", ['linear', 'mu-law']), ]), ]) return sweep # SampleRNN with S4 # python -m train experiment=samplernn-qautomusic model.layer=s4 wandb=null loader.batch_size=1 dataset.sample_len=131135 train.state.chunk_len=131072 # CUDA_VISIBLE_DEVICES=3 python -m train experiment=samplernn-qautomusic wandb.group=samplernn-repro-11-29 dataset.quantization=linear dataset.path=/scr-ssd/mercury/hazy/hippo/data/music_data & # CUDA_VISIBLE_DEVICES=3 python -m train experiment=samplernn-qautomusic wandb.group=samplernn-repro-11-29 dataset.quantization=mu-law dataset.path=/scr-ssd/mercury/hazy/hippo/data/music_data &
def s4_youtube_smalltwo_16s(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.quantization", ['mu-law']), flag("model.expand", [2]), flag("model.n_layers", [2]), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [-1]), flag("dataset.sample_len", [262144]), flag("loader.batch_size", [1]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), # flag("task.torchmetrics", [['Accuracy@1', 'Accuracy@5', 'Accuracy@10']]), # slows down training ]) return sweep
def beethoven(): sweep = prod([ flag("experiment", ['samplernn-qautomusic']), flag("dataset.path", ['/home/workspace/hippo/data/beethoven/']), flag("dataset.sample_len", [128000]), flag("dataset.quantization", ['linear']), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [10]), lzip([ flag("model.n_rnn", [1, 2]), flag("model.frame_sizes", [[8, 2, 2], [16, 4]]), flag("train.state.overlap_len", [32, 64]), ]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), ]) return sweep
def s4_ljspeech_smalltwo(): sweep = prod([ flag("experiment", ['s4-ljspeech']), flag("dataset.quantization", ['mu-law']), flag("model.expand", [2]), flag("model.n_layers", [2]), flag("model.pool", [[4, 4]]), flag("callbacks.model_checkpoint.save_top_k", [-1]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), flag("loader.batch_size", [1]), ]) return sweep
def snet_sc09_bigger3(): sweep = prod([ flag("experiment", ['s4-sc09']), flag("model.n_layers", [8]), flag("loader.batch_size", [8]), flag("model.expand", [2]), flag("model.layer.postact", ['glu']), flag("trainer.max_epochs", [1000]), flag("optimizer.lr", [0.004]), flag("callbacks.model_checkpoint.save_top_k", [10]), ]) return sweep
def sc09(): sweep = prod([ flag("experiment", ['samplernn-qautomusic']), flag("dataset", ['sc09']), flag("dataset.quantization", ['mu-law']), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [10]), lzip([ flag("model.n_rnn", [1, 2]), flag("model.frame_sizes", [[8, 2, 2], [16, 4]]), flag("train.state.overlap_len", [32, 64]), ]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), ]) return sweep
def youtubemix(): sweep = prod([ flag("experiment", ['samplernn-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.quantization", ['linear', 'mu-law']), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [-1]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), flag("loader.batch_size", [32]), ]) return sweep
def snet_sc09_bigger2(): sweep = prod([ flag("experiment", ['s4-sc09']), flag("model.n_layers", [8]), flag("loader.batch_size", [4]), flag("model.expand", [3]), # flag("model.act_pool", ['glu']), # does this cause nans? flag("trainer.max_epochs", [1000]), flag("optimizer.lr", [0.004]), flag("callbacks.model_checkpoint.save_top_k", [10]), ]) return sweep
def s4_youtube_small(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.quantization", ['linear', 'mu-law']), flag("model.expand", [1, 2]), flag("model.n_layers", [2]), flag("trainer.max_epochs", [500]), ]) return sweep
def snet_sc09(): sweep = prod([ flag("experiment", ['s4-sc09']), lzip([ flag("model.n_layers", [2, 4, 8]), flag("loader.batch_size", [32, 16, 8]), ]), flag("trainer.max_epochs", [1000]), flag("optimizer.lr", [0.004]), flag("callbacks.model_checkpoint.save_top_k", [10]), ]) return sweep
def sc09_1(): sweep = prod([ flag("experiment", ['wavenet-qautomusic']), flag("dataset", ['sc09']), flag("dataset.quantization", ['mu-law']), flag("dataset.pad_len", [4093]), flag("loader.batch_size", [8]), flag("trainer.max_epochs", [500]), flag("scheduler.patience", [5]), flag("callbacks.model_checkpoint.save_top_k", [10]), flag("optimizer.lr", [0.001]), flag("model.skip_channels", [512, 1024]), flag("decoder.mode", ['ragged']), ]) return sweep
def vc_s3_sweep_4(): sweep = prod([ flag("pipeline", ['sc']), flag("dataset", ['voxceleb']), flag("dataset.clip_length", [3]), flag("model", ['s3']), flag("model.d_model", [128]), flag("model.layer.d_state", [64]), # flag("model.pool.pool", [4]), # flag("model.pool.expand", [2]), flag("model.n_layers", [8]), flag("model.prenorm", [True]), flag("model.dropout", [0.1]), flag("+encoder._name_", ['conv1d']), lzip([ flag("+encoder.kernel_size", [8]), flag("+encoder.stride", [8]), flag("+encoder.padding", [0]), flag("loader.batch_size", [32]), ]), flag("optimizer.lr", [4e-3]), flag("model.norm", ['layer', 'batch']), ]) return sweep
def vc_s3_sweep(): sweep = prod([ flag("pipeline", ['sc']), flag("dataset", ['voxceleb']), flag("model", ['s3']), flag("model.d_model", [64]), flag("model.layer.d_model", [64]), flag("model.pool.pool", [4]), flag("model.pool.expand", [2]), flag("model.n_layers", [4, 6]), flag("loader.batch_size", [64]), flag("loader.num_workers", [16]), flag("model.prenorm", [True]), flag("model.dropout", [0.]), flag("optimizer.lr", [1e-2, 4e-3, 1e-3, 5e-4]), flag("model.norm", ['batch']), ]) return sweep
def vc_s3_sweep_2(): sweep = prod([ flag("pipeline", ['sc']), flag("dataset", ['voxceleb']), flag("dataset.clip_length", [3, 1]), flag("model", ['s3']), flag("model.d_model", [64]), flag("model.layer.d_state", [64]), flag("model.pool.pool", [4]), flag("model.pool.expand", [2]), flag("model.n_layers", [4, 2]), flag("loader.batch_size", [64]), flag("loader.num_workers", [16]), flag("model.prenorm", [True]), flag("model.dropout", [0., 0.2]), flag("optimizer.lr", [4e-3]), flag("model.norm", ['layer']), ]) return sweep
def vc_s3_sweep_vsmall_1(): sweep = prod([ flag("pipeline", ['sc']), flag("dataset", ['voxceleb']), flag("dataset.clip_length", [3]), flag("dataset.num_classes", [10]), flag("dataset.noise", [False, True]), flag("dataset.self_normalize", [False, True]), flag("model", ['base']), flag("model.d_model", [32]), flag("model.layer.d_state", [32]), flag("model.pool.pool", [4]), flag("model.pool.expand", [2]), flag("model.n_layers", [4]), flag("model.prenorm", [True]), flag("model.dropout", [0.0]), # lzip([ # flag("+encoder._name_", ['conv1d']), # flag("+encoder.kernel_size", [8]), # flag("+encoder.stride", [8]), # flag("+encoder.padding", [0]), # flag("loader.batch_size", [32]), # ]), flag("optimizer.lr", [4e-3]), flag("optimizer.weight_decay", [0.]), flag("model.norm", ['layer', 'batch']), flag("loader.batch_size", [16]), flag("decoder.mode", ["pool", "last"]), ]) return sweep
def vc_s3_layernorm_8gpu(): sweep = prod([ flag("pipeline", ['sc']), flag("dataset", ['voxceleb']), flag("model", ['s3']), flag("model.d_model", [64]), flag("model.layer.d_model", [64]), flag("model.pool.pool", [4]), flag("model.pool.expand", [2]), flag("model.n_layers", [4]), flag("loader.batch_size", [64]), flag("loader.num_workers", [64]), flag("model.prenorm", [True]), flag("model.dropout", [0.]), flag("optimizer.lr", [4e-3]), flag("model.norm", ['layer']), flag("trainer.gpus", [8]), ]) return sweep
def s4_youtube_smalltwo_datavariations(): sweep = prod([ flag("experiment", ['s4-qautomusic']), lzip([ flag("dataset", ['youtubemix', 'youtubemix', 'youtubemix-hires']), flag("dataset.bits", [10, 12, 8]), flag("loader.batch_size", [1, 1, 1]), flag("model", ['unet', 'unet', 'snet']), flag("model.d_model", [64, 32, 64]), ]), flag("dataset.quantization", ['mu-law']), flag("model.expand", [2]), flag("model.n_layers", [2]), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [-1]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), ]) return sweep
def youtubemix_2(): sweep = prod([ flag("experiment", ['samplernn-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.path", ['/home/workspace/projects/hippo/data/youtube_mix/']), flag("dataset.quantization", ['mu-law']), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [10]), lzip([ flag("model.n_rnn", [1, 2]), flag("model.frame_sizes", [[8, 2, 2], [16, 4]]), flag("train.state.overlap_len", [32, 64]), ]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), flag("loader.batch_size", [32]), ]) return sweep
def beethoven_shorter_all_A_1(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset.path", ['/home/workspace/projects/hippo/data/beethoven/']), lzip([ flag("dataset.sample_len", [64000, 32000]), flag("loader.batch_size", [2, 4]), ]), flag("dataset.quantization", ['linear']), flag("model", ['snet']), flag("model.expand", [2]), flag("model.ff", [2]), flag("model.pool", [[4, 4]]), flag("model.n_layers", [8]), flag("model.layer.trainable.A", [1]), flag("optimizer.lr", [0.004]), flag("scheduler.patience", [20]), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [10]), ]) return sweep
def beethoven_8s_A_1(): """Run this later -- doesn't run on a V100.""" sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset.path", ['/home/workspace/projects/hippo/data/beethoven/']), lzip([ flag("dataset.sample_len", [128000]), flag("loader.batch_size", [1]), flag("model.layer.trainable.A", [1]), flag("model.layer.trainable.B", [1]), flag("model.layer.trainable.P", [1]), flag("model.layer.trainable.Q", [1]), flag("+model.layer.tied_lr", [True]), ]), flag("dataset.quantization", ['linear']), flag("model", ['snet']), flag("model.expand", [2]), flag("model.ff", [2]), flag("model.pool", [[4, 4]]), flag("model.n_layers", [8]), flag("optimizer.lr", [0.004]), flag("scheduler.patience", [20]), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [10]), ]) return sweep
def youtube_ablationssm_3_resume(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.path", ['/home/workspace/hippo/data/youtube_mix/']), flag("dataset.quantization", ['mu-law']), flag("model", ['snet']), lzip([ flag("model.layer.trainable.A", [1]), flag("model.layer.trainable.B", [1]), flag("model.layer.trainable.P", [1]), flag("model.layer.trainable.Q", [1]), flag("+model.layer.tied_lr", [True]), flag("+model.layer.hurwitz", [True]), ]), flag("model.layer.trainable.C", [1]), flag("model.layer.trainable.dt", [0]), flag("model.expand", [2]), flag("model.ff", [2]), flag("model.n_layers", [2]), flag("loader.batch_size", [4]), flag("trainer.max_epochs", [1000]), flag("optimizer.lr", [0.004]), flag("scheduler.patience", [20]), flag("callbacks.model_checkpoint.save_top_k", [20]), flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), flag("trainer.resume_from_checkpoint", ['/home/workspace/hippo/outputs/2022-02-02/22-47-27/checkpoints/last.ckpt']) ]) return sweep
def youtube_isotropic_new(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.path", ['/home/workspace/projects/hippo/data/youtube_mix/']), flag("dataset.quantization", ['mu-law']), flag("model", ['snet']), flag("model.expand", [0]), flag("model.ff", [4]), flag("model.pool", [[]]), flag("model.d_model", [256]), flag("loader.batch_size", [1]), lzip([ flag("model.n_layers", [4, 8]), flag("dataset.sample_len", [65536, 32768]), ]), flag("model.layer.trainable.A", [True]), flag("model.layer.trainable.B", [True]), flag("model.layer.trainable.P", [True]), flag("model.layer.trainable.dt", [True]), flag("model.layer.postact", ['glu']), flag("model.layer.hurwitz", [True]), flag("model.layer.tie_state", [True]), flag("trainer.max_epochs", [1000]), flag("optimizer.lr", [0.004]), flag("scheduler.patience", [20]), flag("callbacks.model_checkpoint.save_top_k", [10]), flag("dataset.drop_last", [False]), flag("decoder.mode", ['ragged']), ]) return sweep
def s4_youtube_snet_smalltwo_longseq(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.quantization", ['mu-law']), flag("model", ['snet']), flag("model.expand", [2]), flag("model.n_layers", [2]), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [-1]), flag("loader.batch_size", [1]), flag("model.dropout", [0.1]), flag("dataset.sample_len", [471040]), # 29.5s flag("task.metrics", [['bpb', 'accuracy', 'accuracy@3', 'accuracy@5', 'accuracy@10']]), ]) return sweep
def beethoven_1(): sweep = prod([ flag("experiment", ['wavenet-qautomusic']), flag("dataset.path", ['/home/workspace/hippo/data/beethoven/']), flag("dataset.sample_len", [128000]), flag("dataset.quantization", ['linear']), flag("dataset.pad_len", [4093]), flag("loader.batch_size", [1]), flag("trainer.max_epochs", [500]), flag("scheduler.patience", [5]), flag("callbacks.model_checkpoint.save_top_k", [10]), flag("optimizer.lr", [0.001]), flag("model.skip_channels", [512, 1024]), flag("decoder.mode", ['ragged']), ]) return sweep
def beethoven_shorter_resume(): sweep = prod([ flag("experiment", ['s4-qautomusic']), flag("dataset.path", ['/home/workspace/projects/hippo/data/beethoven/']), lzip([ flag("dataset.sample_len", [16000]), flag("loader.batch_size", [8]), flag( "trainer.resume_from_checkpoint", [ '/home/workspace/projects/hippo/outputs/2022-01-22/20-19-30/checkpoints/val/loss-v9.ckpt', ] ) ]), flag("dataset.quantization", ['linear']), flag("model", ['snet']), flag("model.expand", [2]), flag("model.ff", [2]), flag("model.pool", [[4, 4]]), flag("model.n_layers", [8]), flag("model.layer.trainable.A", [2]), flag("optimizer.lr", [0.004]), flag("scheduler.patience", [20]), flag("trainer.max_epochs", [500]), flag("callbacks.model_checkpoint.save_top_k", [10]), ]) return sweep
def youtubemix_2(): sweep = prod([ flag("experiment", ['wavenet-qautomusic']), flag("dataset", ['youtubemix']), flag("dataset.quantization", ['mu-law']), flag("dataset.pad_len", [4093]), flag("dataset.path", ['/home/workspace/projects/hippo/data/youtube_mix/']), flag("loader.batch_size", [1]), flag("trainer.max_epochs", [500]), flag("scheduler.patience", [5]), flag("callbacks.model_checkpoint.save_top_k", [10]), flag("optimizer.lr", [0.001]), flag("model.skip_channels", [512]), flag("model.residual_channels", [128, 256]), ]) return sweep
def youtube_statespaces_repro(): # to run sweep = prod([ flag("experiment", ['sashimi-beethoven', 'sashimi-youtubemix', 'sashimi-sc09']), ]) return sweep