def __init__(self, in_channels, out_channels, latent_dim, ini_num_features=16, unet_depth=5, enc_depth=5): super(ProbU_Net, self).__init__() self.latent_dim = latent_dim self.unet = U_Net(in_channels, out_channels, ini_num_features, unet_depth) self.prior = GenEncoder(in_channels, latent_dim, ini_num_features, enc_depth) self.posterior = GenEncoder(in_channels + 1, latent_dim, ini_num_features, enc_depth) self.fcomb = Comb(ini_num_features, latent_dim, out_channels, depth=4)
class ProbU_Net(nn.Module): def __init__(self, in_channels, out_channels, latent_dim, ini_num_features=16, unet_depth=5, enc_depth=5): super(ProbU_Net, self).__init__() self.latent_dim = latent_dim self.unet = U_Net(in_channels, out_channels, ini_num_features, unet_depth) self.prior = GenEncoder(in_channels, latent_dim, ini_num_features, enc_depth) self.posterior = GenEncoder(in_channels + 1, latent_dim, ini_num_features, enc_depth) self.fcomb = Comb(ini_num_features, latent_dim, out_channels, depth=4) def forward(self, x, segx, train=True): if train: self.post_latent_space = self.posterior.forward( torch.cat((x, segx), dim=1)) self.prior_latent_space = self.prior.forward(x) self.unet_features = self.unet.forward(x) def loss(self, segx, beta): self.kld_loss = kl.kl_divergence(self.post_latent_space, self.prior_latent_space) posterior_sample = self.post_latent_space.rsample() self.reconstruction = self.fcomb.forward(self.unet_features, posterior_sample) self.recon_loss = nn.BCEWithLogitsLoss()(self.reconstruction, segx) return self.recon_loss + beta * self.recon_loss def reconstruct(self, x, num_samples=1): prior_latent_space = self.prior.forward(x) res = [] for i in range(num_samples): prior_sample = prior_latent_space.rsample() res.append(self.fcomb.forward(self.unet.forward(x), prior_sample)) return res
def train(): train_dataset = SpectrogramDataset(C.PATH_FFT) valid_dataset = SpectrogramDataset(C.VAL_PATH_FFT) train_loader = DataLoader(train_dataset, batch_size=C.BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=C.BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True, pin_memory=True) unet = U_Net() trainer = Trainer(unet, C.CHECK_POINT, C.LR) trainer.run(train_loader, valid_loader, num_epoches=C.num_epoches)
PATH_test = "test/" audiolist = os.listdir(PATH_test) for fname in audiolist: mag, phase = utils.LoadAudio(os.path.join(PATH_test, fname)) leng = mag.shape[1] # song's length >= 1024frame # song's length = 2^n tmp = np.zeros((mag.shape[0], align(leng)), dtype=np.float32) mag = np.concatenate((mag, tmp), axis=1) print(mag.shape) unet = U_Net() mask = utils.ComputeMask(mag, unet, "unet_model.pkl", False) print(mask.shape) mag = mag[:, 0:leng] mask = mask[:, 0:leng] utils.SaveAudio("enhanced/unet-inst-%s" % fname, mag * mask, phase) utils.SaveAudio("enhanced/unet-vocal-%s" % fname, mag * (1 - mask), phase) """ fname = "test/A_22_02.wav" mag, phase = util.LoadAudio(fname) leng = mag.shape[1] print(mag.shape) # song's length >= 1024frame # song's length = 2^n
args = get_args() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') logging.info(f'Using device {device}') # Change here to adapt to your data # n_channels=3 for RGB images # n_classes is the number of probabilities you want to get per pixel # - For 1 class and background, use n_classes=1 # - For 2 classes, use n_classes=1 # - For N > 2 classes, use n_classes=N # net = UNet(n_channels=3, n_classes=1, bilinear=True) # net = AttU_Net() # net = R2U_Net() # net = R2AttU_Net() # net = NestedUNet() net = U_Net() # logging.info(f'Network:\n' # f'\t{net.n_channels} input channels\n' # f'\t{net.n_classes} output channels (classes)\n' # f'\t{"Bilinear" if net.bilinear else "Transposed conv"} upscaling') if args.load: net.load_state_dict(torch.load(args.load, map_location=device)) logging.info(f'Model loaded from {args.load}') net.to(device=device) # faster convolutions, but more memory # cudnn.benchmark = True try: train_net(net=net,
# device = torch.device('cpu') # if args.use_gpu: # device=torch.device('cuda:0') # net=U_Net(1,2,args.blinear) # net.initialize_weights() # net.to(device) # optimizer=optim.Adam(net.parameters(),lr=args.lr,weight_decay=1e-3) # criterion=nn.CrossEntropyLoss().to(device) # # train(net,device,args.epoch,args.bs,train_images,train_masks,test_images,test_masks,optimizer,criterion) #test device = torch.device('cpu') if args.use_gpu: device = torch.device('cuda:0') net = U_Net(1, 2, args.blinear) #与train对应好 net.to(device) if args.use_gpu: net.load_state_dict(torch.load('./best.mdl')) else: net.load_state_dict(torch.load('./best.mdl', map_location='cpu')) #取数据集进行预测 plot_examples(net, device, train_images, train_masks, 5) plot_examples(net, device, test_images, test_masks, 5) #取非数据集图片进行预测 image = cv2.imread('./1.png') image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) image = cv2.resize(image, (args.image_size, args.image_size)) / 255 plt.imshow(image) plt.show()