Ejemplo n.º 1
0
def test():
    vis = Visualizer(env='svs')
    model = getattr(models, 'Unet')().eval()
    #    model.cuda()
    model.load_state_dict(
        t.load('G:/Unet_svs/check/epoch_219__0724_16_57_35.pth'))
    mix_wav, _ = load("C:/Users/lenovo/Music/c.mp3", sr=8192)
    mix_wav_mag, mix_wav_phase = magphase(
        stft(mix_wav, n_fft=1024, hop_length=768))
    START = 700
    END = START + 128

    mix_wav_mag = mix_wav_mag[:, START:END]
    mix_wav_phase = mix_wav_phase[:, START:END]

    print(mix_wav_mag.shape)

    gg = mix_wav_mag[1:]
    gg = t.from_numpy(gg)
    gg.unsqueeze_(0)
    gg.unsqueeze_(0)
    vis.img('a', gg)
    print(gg.shape)
    with t.no_grad():
        gg = Variable(gg)
    score = model(gg)
    predict = gg.data * score.data
    print(predict.shape)
    target_pred_mag = predict.view(512, 128).cpu().numpy()
    target_pred_mag = np.vstack((np.zeros((128)), target_pred_mag))
    vis.img('b', t.from_numpy(target_pred_mag))
    print(target_pred_mag.shape)
    write_wav(
        f'C:/Users/lenovo/Music/pred_vocal.wav',
        istft(
            target_pred_mag * mix_wav_phase
            #     (mix_wav_mag * target_pred_mag) * mix_wav_phase
            ,
            win_length=1024,
            hop_length=768),
        8192,
        norm=True)
    write_wav(f'C:/Users/lenovo/Music/pred_mix.wav',
              istft(mix_wav_mag * mix_wav_phase,
                    win_length=1024,
                    hop_length=768),
              8192,
              norm=True)
Ejemplo n.º 2
0
def train(**kwargs):
    # step1: configure
    opt.parse(**kwargs)
    if opt.vis:
        vis = Visualizer(opt.env)
    # step2: data
    normalize = T.Normalize(mean = [0.5,0.5,0.5], std = [0.5,0.5,0.5] )
    transforms = T.Compose(
    [
        T.Resize(opt.image_size),
        T.CenterCrop(opt.image_size),
        T.ToTensor(),
        normalize
    ])
    # 对于这个模型 transform对于train和test没有区别
    dataset = tv.datasets.ImageFolder(opt.data_path,transform=transforms)
    dataloader = DataLoader(dataset,
                            batch_size = opt.batch_size,
                            shuffle=True,
                            num_workers=opt.num_workers,
                            drop_last=True)                      # 加载图片,用于训练NetD模型
    
    true_labels = Variable(t.ones(opt.batch_size))
    fake_labels = Variable(t.zeros(opt.batch_size))
    fix_noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1))  # 固定噪声,用于验证NetG模型
    noises = Variable(t.randn(opt.batch_size, opt.nz, 1, 1))      # 随机噪声,用于训练和测试NetG模型
    
    # step3: model
    netg, netd = NetG(opt), NetD(opt)
    map_location = lambda storage, loc:storage
    if opt.netg_path:
        netg.load(opt.netg_path)
    if opt.netd_path:
    	netd.load(opt.netd_path)
        
    
    # step4: criterion and optimizer
    optimizer_g = t.optim.Adam(params=netg.parameters(), lr = opt.lrg, betas=(opt.beta1,0.999))
    optimizer_d = t.optim.Adam(params=netd.parameters(), lr = opt.lrd, betas=(opt.beta1,0.999))
    criterion = t.nn.BCELoss()
    
    # step: meters
    errord_meter = meter.AverageValueMeter()
    errorg_meter = meter.AverageValueMeter()
    
    if opt.use_gpu:
        netd.cuda()
        netg.cuda()
        criterion.cuda()
        true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda()
        fix_noises, noises = fix_noises.cuda(), noises.cuda()
    
    # step5: train
    for epoch in range(opt.max_epoch):
        ## step5.1 train
        for ii,(data, _) in tqdm(enumerate(dataloader)):
            real_img = Variable(data)
            if opt.use_gpu:
                real_img = real_img.cuda()
            if (ii+1) % opt.d_every ==0:
                # 判别器
                optimizer_d.zero_grad()
                # 真图片
                output = netd(real_img)
                error_d_real = criterion(output, true_labels)
                error_d_real.backward()
                # 假图片
                noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1))
                # 在第八章的时候想通了这里为什么要加detach,这个detach不是为了防止反向传播传到netg中,因为parameter已经保证了,是为了fake_output的requires_grad设置为False,不对fake_img求导,因为不需要,当然,在后来的实验室中我设置成没有也没有报错,但是这是为了节约内存,已经确定,可以停止反向传播,节约内存,与requires_grad=False的意义一样,但是requires_grad只能用于leaf节点,对于非leaf节点,使其不进行求导的方式是detach()
                fake_img =  netg(noises).detach() 
                fake_output = netd(fake_img)
                error_d_fake = criterion(fake_output, fake_labels)
                error_d_fake.backward()
                optimizer_d.step()
                
                error_d = error_d_real+error_d_fake
                errord_meter.add(error_d.data)
                
            if (ii+1) % opt.g_every == 0:
                optimizer_g.zero_grad()
                noises.data.copy_(t.randn(opt.batch_size, opt.nz, 1, 1))
                fake_img = netg(noises)
                output = netd(fake_img)
                error_g = criterion(output, true_labels)
                error_g.backward()
                optimizer_g.step()
                
                errorg_meter.add(error_g.data)
                
            ## step5.2 validate and visualize on batch_size  
            # 我们可以看到,损失函数并不是一个epoch画一次,而是几个batch画一次
            if (ii+1) % opt.print_freq == 0 and opt.vis:
                if os.path.exists(opt.debug_file):
                    # import ipdb
                    ipdb.set_trace()
                fix_fake_imgs = netg(fix_noises) # batch_size*nz*1*1 --> batch_size(256)*3*96*96 # 可以认为是在验证模型
                vis.img('fix_fake',fix_fake_imgs.data[:64]*0.5+0.5)
                vis.img('real', real_img.data[:64]*0.5+0.5)
                vis.plot(win = 'errord',y= errord_meter.value()[0])
                vis.plot(win = 'errorg',y= errorg_meter.value()[0])
                
            
        ## step5.3 validate and save model on epoch 
        # 模型保存是每几个epoch保存一次,
        # 按理来说模型验证也应该是每次或这每几次验证一次,这一点和这一章的模型验证有所不一样,不过不用太在意,因为这一章的模型验证没有指标。
        if (epoch+1)%opt.save_freq == 0:
            netg.save(opt.model_save_path,'netg_%s' %epoch)
            netd.save(opt.model_save_path,'netd_%s' %epoch)
            fix_fake_imgs = val(netg,fix_noises)
            tv.utils.save_image(fix_fake_imgs,'%s/%s.png' % (opt.img_save_path, epoch),normalize=True, range=(-1,1))
            # 和作者沟通后,因为数据集少,所以为了避免每次重置的噪声,多几个epoch再重置,等下试试每次重置的话这个误差的变化情况
            errord_meter.reset()
            errorg_meter.reset()
            """