def build_model(self): hps = self.hps ns = self.hps.ns emb_size = self.hps.emb_size self.Encoder = cc(Encoder(ns=ns, dp=hps.enc_dp)) self.Decoder = cc(Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size)) self.Generator = cc( Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size)) self.SpeakerClassifier = cc( SpeakerClassifier(ns=ns, n_class=hps.n_speakers, dp=hps.dis_dp)) self.PatchDiscriminator = cc( nn.DataParallel(PatchDiscriminator(ns=ns, n_class=hps.n_speakers))) betas = (0.5, 0.9) params = list(self.Encoder.parameters()) + list( self.Decoder.parameters()) self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas) self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(), lr=self.hps.lr, betas=betas) self.gen_opt = optim.Adam(self.Generator.parameters(), lr=self.hps.lr, betas=betas) self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(), lr=self.hps.lr, betas=betas)
def build_model(self, wavenet_mel): hps = self.hps ns = self.hps.ns emb_size = self.hps.emb_size c = 80 if wavenet_mel else 513 patch_classify_kernel = (3, 4) if wavenet_mel else (17, 4) self.Encoder = cc(Encoder(c_in=c, ns=ns, dp=hps.enc_dp)) self.Decoder = cc( Decoder(c_out=c, ns=ns, c_a=hps.n_speakers, emb_size=emb_size)) self.Generator = cc( Decoder(c_out=c, ns=ns, c_a=hps.n_speakers, emb_size=emb_size)) self.SpeakerClassifier = cc( SpeakerClassifier(ns=ns, n_class=hps.n_speakers, dp=hps.dis_dp)) self.PatchDiscriminator = cc( nn.DataParallel( PatchDiscriminator( ns=ns, n_class=hps.n_speakers, classify_kernel_size=patch_classify_kernel))) betas = (0.5, 0.9) params = list(self.Encoder.parameters()) + list( self.Decoder.parameters()) self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas) self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(), lr=self.hps.lr, betas=betas) self.gen_opt = optim.Adam(self.Generator.parameters(), lr=self.hps.lr, betas=betas) self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(), lr=self.hps.lr, betas=betas)
def build_model(self): hps = self.hps ns = self.hps.ns emb_size = self.hps.emb_size self.Encoder = Encoder(ns=ns, dp=hps.enc_dp) self.Decoder = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size) self.Generator = Decoder(ns=ns, c_a=hps.n_speakers, emb_size=emb_size) self.LatentDiscriminator = LatentDiscriminator(ns=ns, dp=hps.dis_dp) self.PatchDiscriminator = PatchDiscriminator(ns=ns, n_class=hps.n_speakers) if torch.cuda.is_available(): self.Encoder.cuda() self.Decoder.cuda() self.Generator.cuda() self.LatentDiscriminator.cuda() self.PatchDiscriminator.cuda() betas = (0.5, 0.9) params = list(self.Encoder.parameters()) + list( self.Decoder.parameters()) self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas) self.gen_opt = optim.Adam(self.Generator.parameters(), lr=self.hps.lr, betas=betas) self.lat_opt = optim.Adam(self.LatentDiscriminator.parameters(), lr=self.hps.lr, betas=betas) self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(), lr=self.hps.lr, betas=betas)
transforms.RandomHorizontalFlip(), transforms.RandomGrayscale(), transforms.ToTensor(), ]) train_set = DS(args.root, train_tf) iterator_train = iter(data.DataLoader( train_set, batch_size=args.batch_size, sampler=InfiniteSampler(len(train_set)), num_workers=args.n_threads)) print(len(train_set)) g_model = InpaintNet().to(device) fd_model = FeaturePatchDiscriminator().to(device) pd_model = PatchDiscriminator().to(device) l1 = nn.L1Loss().to(device) cons = ConsistencyLoss().to(device) start_iter = 0 g_optimizer = torch.optim.Adam( g_model.parameters(), args.lr, (args.b1, args.b2)) fd_optimizer = torch.optim.Adam( fd_model.parameters(), args.lr, (args.b1, args.b2)) pd_optimizer = torch.optim.Adam( pd_model.parameters(), args.lr, (args.b1, args.b2)) if args.resume:
def build_model(self): hps = self.hps ns = self.hps.ns emb_size = self.hps.emb_size betas = (0.5, 0.9) #---stage one---# self.Encoder = cc( Encoder(ns=ns, dp=hps.enc_dp, emb_size=emb_size, seg_len=hps.seg_len, one_hot=self.one_hot, binary_output=self.binary_output, binary_ver=self.binary_ver)) self.Decoder = cc( Decoder(ns=ns, c_in=emb_size, c_h=emb_size, c_a=hps.n_speakers, seg_len=hps.seg_len, inp_emb=self.one_hot or self.binary_output)) self.SpeakerClassifier = cc( SpeakerClassifier( ns=ns, c_in=emb_size if not self.binary_output else emb_size * emb_size, c_h=emb_size, n_class=hps.n_speakers, dp=hps.dis_dp, seg_len=hps.seg_len)) #---stage one opts---# params = list(self.Encoder.parameters()) + \ list(self.Decoder.parameters()) self.ae_opt = optim.Adam(params, lr=self.hps.lr, betas=betas) self.clf_opt = optim.Adam(self.SpeakerClassifier.parameters(), lr=self.hps.lr, betas=betas) #---stage two---# self.Generator = cc( Decoder(ns=ns, c_in=emb_size, c_h=emb_size, c_a=hps.n_speakers if not self.targeted_G else hps.n_target_speakers)) self.PatchDiscriminator = cc( nn.DataParallel( PatchDiscriminator( ns=ns, n_class=hps.n_speakers if not self.targeted_G else hps.n_target_speakers, seg_len=hps.seg_len))) #---stage two opts---# self.gen_opt = optim.Adam(self.Generator.parameters(), lr=self.hps.lr, betas=betas) self.patch_opt = optim.Adam(self.PatchDiscriminator.parameters(), lr=self.hps.lr, betas=betas)