def build_model(): model = Wavenet(out_channels=2, num_blocks=args.num_blocks, num_layers=args.num_layers, residual_channels=args.residual_channels, gate_channels=args.gate_channels, skip_channels=args.skip_channels, kernel_size=args.kernel_size, cin_channels=args.cin_channels, upsample_scales=[16, 16]) print(model) n_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print('number of teacher parameters:', n_params) return model
device=device) loadtr = data.DataLoader(training_set, batch_size=1, shuffle=True, num_workers=0, worker_init_fn=np.random.seed) loadval = data.DataLoader(validation_set, batch_size=1, num_workers=0) # In[6]: #model = Unet(skipDim, quantization_channels, residualDim,device) model = Wavenet(field, skipDim, residualDim, dilations0, dilations1) #model = nn.DataParallel(model) model = model.cuda() criterion = nn.CrossEntropyLoss() # in wavenet paper, they said crossentropyloss is far better than MSELoss optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5) # use adam to train maxloss = np.zeros(50) + 100 # In[7]: start_epoch = 0 if continueTrain: # if continueTrain, the program will find the checkpoints if os.path.isfile(resumefile): print("=> loading checkpoint '{}'".format(resumefile)) checkpoint = torch.load(resumefile) start_epoch = checkpoint['epoch'] # best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format(