Esempio n. 1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 latent_dim,
                 ini_num_features=16,
                 unet_depth=5,
                 enc_depth=5):
        super(ProbU_Net, self).__init__()
        self.latent_dim = latent_dim

        self.unet = U_Net(in_channels, out_channels, ini_num_features,
                          unet_depth)
        self.prior = GenEncoder(in_channels, latent_dim, ini_num_features,
                                enc_depth)
        self.posterior = GenEncoder(in_channels + 1, latent_dim,
                                    ini_num_features, enc_depth)
        self.fcomb = Comb(ini_num_features, latent_dim, out_channels, depth=4)
Esempio n. 2
0
class ProbU_Net(nn.Module):
    def __init__(self,
                 in_channels,
                 out_channels,
                 latent_dim,
                 ini_num_features=16,
                 unet_depth=5,
                 enc_depth=5):
        super(ProbU_Net, self).__init__()
        self.latent_dim = latent_dim

        self.unet = U_Net(in_channels, out_channels, ini_num_features,
                          unet_depth)
        self.prior = GenEncoder(in_channels, latent_dim, ini_num_features,
                                enc_depth)
        self.posterior = GenEncoder(in_channels + 1, latent_dim,
                                    ini_num_features, enc_depth)
        self.fcomb = Comb(ini_num_features, latent_dim, out_channels, depth=4)

    def forward(self, x, segx, train=True):
        if train:
            self.post_latent_space = self.posterior.forward(
                torch.cat((x, segx), dim=1))
        self.prior_latent_space = self.prior.forward(x)
        self.unet_features = self.unet.forward(x)

    def loss(self, segx, beta):
        self.kld_loss = kl.kl_divergence(self.post_latent_space,
                                         self.prior_latent_space)

        posterior_sample = self.post_latent_space.rsample()
        self.reconstruction = self.fcomb.forward(self.unet_features,
                                                 posterior_sample)
        self.recon_loss = nn.BCEWithLogitsLoss()(self.reconstruction, segx)

        return self.recon_loss + beta * self.recon_loss

    def reconstruct(self, x, num_samples=1):
        prior_latent_space = self.prior.forward(x)
        res = []
        for i in range(num_samples):
            prior_sample = prior_latent_space.rsample()
            res.append(self.fcomb.forward(self.unet.forward(x), prior_sample))
        return res
Esempio n. 3
0
def train():
    train_dataset = SpectrogramDataset(C.PATH_FFT)
    valid_dataset = SpectrogramDataset(C.VAL_PATH_FFT)

    train_loader = DataLoader(train_dataset,
                              batch_size=C.BATCH_SIZE,
                              shuffle=True,
                              num_workers=4,
                              drop_last=True,
                              pin_memory=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=C.BATCH_SIZE,
                              shuffle=True,
                              num_workers=4,
                              drop_last=True,
                              pin_memory=True)

    unet = U_Net()
    trainer = Trainer(unet, C.CHECK_POINT, C.LR)
    trainer.run(train_loader, valid_loader, num_epoches=C.num_epoches)
Esempio n. 4
0

PATH_test = "test/"
audiolist = os.listdir(PATH_test)

for fname in audiolist:
    mag, phase = utils.LoadAudio(os.path.join(PATH_test, fname))
    leng = mag.shape[1]
    # song's length >= 1024frame
    # song's length = 2^n

    tmp = np.zeros((mag.shape[0], align(leng)), dtype=np.float32)
    mag = np.concatenate((mag, tmp), axis=1)
    print(mag.shape)

    unet = U_Net()

    mask = utils.ComputeMask(mag, unet, "unet_model.pkl", False)
    print(mask.shape)
    mag = mag[:, 0:leng]
    mask = mask[:, 0:leng]

    utils.SaveAudio("enhanced/unet-inst-%s" % fname, mag * mask, phase)
    utils.SaveAudio("enhanced/unet-vocal-%s" % fname, mag * (1 - mask), phase)
"""
fname = "test/A_22_02.wav"
mag, phase = util.LoadAudio(fname)
leng = mag.shape[1]
print(mag.shape)
# song's length >= 1024frame
# song's length = 2^n
Esempio n. 5
0
    args = get_args()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    logging.info(f'Using device {device}')

    # Change here to adapt to your data
    # n_channels=3 for RGB images
    # n_classes is the number of probabilities you want to get per pixel
    #   - For 1 class and background, use n_classes=1
    #   - For 2 classes, use n_classes=1
    #   - For N > 2 classes, use n_classes=N
    # net = UNet(n_channels=3, n_classes=1, bilinear=True)
    # net = AttU_Net()
    # net = R2U_Net()
    # net = R2AttU_Net()
    # net = NestedUNet()
    net = U_Net()
    # logging.info(f'Network:\n'
    #              f'\t{net.n_channels} input channels\n'
    #              f'\t{net.n_classes} output channels (classes)\n'
    #              f'\t{"Bilinear" if net.bilinear else "Transposed conv"} upscaling')

    if args.load:
        net.load_state_dict(torch.load(args.load, map_location=device))
        logging.info(f'Model loaded from {args.load}')

    net.to(device=device)
    # faster convolutions, but more memory
    # cudnn.benchmark = True

    try:
        train_net(net=net,
Esempio n. 6
0
    # device = torch.device('cpu')
    # if args.use_gpu:
    #     device=torch.device('cuda:0')
    # net=U_Net(1,2,args.blinear)
    # net.initialize_weights()
    # net.to(device)
    # optimizer=optim.Adam(net.parameters(),lr=args.lr,weight_decay=1e-3)
    # criterion=nn.CrossEntropyLoss().to(device)
    #
    # train(net,device,args.epoch,args.bs,train_images,train_masks,test_images,test_masks,optimizer,criterion)

    #test
    device = torch.device('cpu')
    if args.use_gpu:
        device = torch.device('cuda:0')
    net = U_Net(1, 2, args.blinear)  #与train对应好
    net.to(device)
    if args.use_gpu:
        net.load_state_dict(torch.load('./best.mdl'))
    else:
        net.load_state_dict(torch.load('./best.mdl', map_location='cpu'))
    #取数据集进行预测
    plot_examples(net, device, train_images, train_masks, 5)
    plot_examples(net, device, test_images, test_masks, 5)

    #取非数据集图片进行预测
    image = cv2.imread('./1.png')
    image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    image = cv2.resize(image, (args.image_size, args.image_size)) / 255
    plt.imshow(image)
    plt.show()