Esempio n. 1
0
 def build_model(self):
     hps = self.hps
     ns = self.hps.ns
     emb_size = self.hps.emb_size
     self.Encoder = cc(Encoder(ns=ns, dp=hps.enc_dp))
     self.Decoder = cc(Decoder(ns=ns, c_a=hps.n_speakers,
                               emb_size=emb_size))
     self.Generator = cc(
         Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size))
     self.SpeakerClassifier = cc(
         SpeakerClassifier(ns=ns, n_class=hps.n_speakers, dp=hps.dis_dp))
     self.PatchDiscriminator = cc(
         nn.DataParallel(PatchDiscriminator(ns=ns, n_class=hps.n_speakers)))
     betas = (0.5, 0.9)
     params = list(self.Encoder.parameters()) + list(
         self.Decoder.parameters())
     self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
     self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.gen_opt = optim.Adam(self.Generator.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                 lr=self.hps.lr,
                                 betas=betas)
 def build_model(self, wavenet_mel):
     hps = self.hps
     ns = self.hps.ns
     emb_size = self.hps.emb_size
     c = 80 if wavenet_mel else 513
     patch_classify_kernel = (3, 4) if wavenet_mel else (17, 4)
     self.Encoder = cc(Encoder(c_in=c, ns=ns, dp=hps.enc_dp))
     self.Decoder = cc(
         Decoder(c_out=c, ns=ns, c_a=hps.n_speakers, emb_size=emb_size))
     self.Generator = cc(
         Decoder(c_out=c, ns=ns, c_a=hps.n_speakers, emb_size=emb_size))
     self.SpeakerClassifier = cc(
         SpeakerClassifier(ns=ns, n_class=hps.n_speakers, dp=hps.dis_dp))
     self.PatchDiscriminator = cc(
         nn.DataParallel(
             PatchDiscriminator(
                 ns=ns,
                 n_class=hps.n_speakers,
                 classify_kernel_size=patch_classify_kernel)))
     betas = (0.5, 0.9)
     params = list(self.Encoder.parameters()) + list(
         self.Decoder.parameters())
     self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
     self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.gen_opt = optim.Adam(self.Generator.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                 lr=self.hps.lr,
                                 betas=betas)
Esempio n. 3
0
 def build_model(self):
     hps = self.hps
     ns = self.hps.ns
     emb_size = self.hps.emb_size
     self.Encoder = Encoder(ns=ns, dp=hps.enc_dp)
     self.Decoder = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size)
     self.Generator = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size)
     self.LatentDiscriminator = LatentDiscriminator(ns=ns, dp=hps.dis_dp)
     self.PatchDiscriminator = PatchDiscriminator(ns=ns,
                                                  n_class=hps.n_speakers)
     if torch.cuda.is_available():
         self.Encoder.cuda()
         self.Decoder.cuda()
         self.Generator.cuda()
         self.LatentDiscriminator.cuda()
         self.PatchDiscriminator.cuda()
     betas = (0.5, 0.9)
     params = list(self.Encoder.parameters()) + list(
         self.Decoder.parameters())
     self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
     self.gen_opt = optim.Adam(self.Generator.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.lat_opt = optim.Adam(self.LatentDiscriminator.parameters(),
                               lr=self.hps.lr,
                               betas=betas)
     self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                 lr=self.hps.lr,
                                 betas=betas)
Esempio n. 4
0
    transforms.RandomHorizontalFlip(),
    transforms.RandomGrayscale(),
    transforms.ToTensor(),
])

train_set = DS(args.root, train_tf)
iterator_train = iter(data.DataLoader(
    train_set,
    batch_size=args.batch_size,
    sampler=InfiniteSampler(len(train_set)),
    num_workers=args.n_threads))
print(len(train_set))

g_model = InpaintNet().to(device)
fd_model = FeaturePatchDiscriminator().to(device)
pd_model = PatchDiscriminator().to(device)
l1 = nn.L1Loss().to(device)
cons = ConsistencyLoss().to(device)

start_iter = 0
g_optimizer = torch.optim.Adam(
    g_model.parameters(),
    args.lr, (args.b1, args.b2))
fd_optimizer = torch.optim.Adam(
    fd_model.parameters(),
    args.lr, (args.b1, args.b2))
pd_optimizer = torch.optim.Adam(
    pd_model.parameters(),
    args.lr, (args.b1, args.b2))

if args.resume:
Esempio n. 5
0
    def build_model(self):
        hps = self.hps
        ns = self.hps.ns
        emb_size = self.hps.emb_size
        betas = (0.5, 0.9)

        #---stage one---#
        self.Encoder = cc(
            Encoder(ns=ns,
                    dp=hps.enc_dp,
                    emb_size=emb_size,
                    seg_len=hps.seg_len,
                    one_hot=self.one_hot,
                    binary_output=self.binary_output,
                    binary_ver=self.binary_ver))
        self.Decoder = cc(
            Decoder(ns=ns,
                    c_in=emb_size,
                    c_h=emb_size,
                    c_a=hps.n_speakers,
                    seg_len=hps.seg_len,
                    inp_emb=self.one_hot or self.binary_output))
        self.SpeakerClassifier = cc(
            SpeakerClassifier(
                ns=ns,
                c_in=emb_size if not self.binary_output else emb_size *
                emb_size,
                c_h=emb_size,
                n_class=hps.n_speakers,
                dp=hps.dis_dp,
                seg_len=hps.seg_len))

        #---stage one opts---#
        params = list(self.Encoder.parameters()) + \
            list(self.Decoder.parameters())
        self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas)
        self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)

        #---stage two---#
        self.Generator = cc(
            Decoder(ns=ns,
                    c_in=emb_size,
                    c_h=emb_size,
                    c_a=hps.n_speakers
                    if not self.targeted_G else hps.n_target_speakers))
        self.PatchDiscriminator = cc(
            nn.DataParallel(
                PatchDiscriminator(
                    ns=ns,
                    n_class=hps.n_speakers
                    if not self.targeted_G else hps.n_target_speakers,
                    seg_len=hps.seg_len)))

        #---stage two opts---#
        self.gen_opt = optim.Adam(self.Generator.parameters(),
                                  lr=self.hps.lr,
                                  betas=betas)
        self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(),
                                    lr=self.hps.lr,
                                    betas=betas)