def load_models(load_path): model_args = json.load(open(os.path.join(load_path, 'options.json'), 'r')) vars(args).update(model_args) autoencoder = Seq2Seq(emsize=args.emsize, nhidden=args.nhidden, ntokens=args.ntokens, nlayers=args.nlayers, noise_r=args.noise_r, hidden_init=args.hidden_init, dropout=args.dropout, gpu=args.cuda) gan_gen = MLP_G(ninput=args.z_size, noutput=args.nhidden, layers=args.arch_g) gan_disc = MLP_D(ninput=args.nhidden, noutput=1, layers=args.arch_d) autoencoder = autoencoder.cuda() gan_gen = gan_gen.cuda() gan_disc = gan_disc.cuda() word2idx = json.load(open(os.path.join(args.save, 'vocab.json'), 'r')) idx2word = {v: k for k, v in word2idx.items()} print('Loading models from {}'.format(args.save)) loaded = torch.load(os.path.join(args.save, "model.pt")) autoencoder.load_state_dict(loaded.get('ae')) gan_gen.load_state_dict(loaded.get('gan_g')) gan_disc.load_state_dict(loaded.get('gan_d')) return model_args, idx2word, autoencoder, gan_gen, gan_disc
############################################################################### # Build the models ############################################################################### ntokens = len(corpus.dictionary.word2idx) autoencoder = Seq2Seq2Decoder(emsize=args.emsize, nhidden=args.nhidden, ntokens=ntokens, nlayers=args.nlayers, noise_r=args.noise_r, hidden_init=args.hidden_init, dropout=args.dropout, gpu=args.cuda) gan_gen = MLP_G(ninput=args.z_size, noutput=args.nhidden, layers=args.arch_g) gan_disc = MLP_D(ninput=args.nhidden, noutput=1, layers=args.arch_d) classifier = MLP_Classify(ninput=args.nhidden, noutput=1, layers=args.arch_classify) g_factor = None print(autoencoder) print(gan_gen) print(gan_disc) print(classifier) optimizer_ae = optim.SGD(autoencoder.parameters(), lr=args.lr_ae) optimizer_gan_g = optim.Adam(gan_gen.parameters(), lr=args.lr_gan_g, betas=(args.beta1, 0.999))
ngf = int(opt.ngf) ndf = int(opt.ndf) nc = 1 #By default our data has only one channel niter = int(opt.niter) Diters = int(opt.Diters) workers = int(opt.workers) lambda_ = int(opt.lambda_) cuda = opt.cuda #datapath='../../../../../' #f = h5py.File(opt.datapath+'fields_z='+opt.redshift+'.hdf5', 'r') #f = f['delta_HI'] if opt.MLP == True: netG = MLP_G(s_sample, nz, nc, ngf, ngpu) netD = MLP_D(s_sample, nz, nc, ngf, ngpu) else: netG = DCGAN_G(s_sample, nz, nc, ngf, ngpu) netD = DCGAN_D(s_sample, nz, nc, ndf, ngpu) #experiments/ch128_lr0005_tanh/netD_epoch_47.pth epoch_load = opt.epoch_st - 1 wass_loss = [] if opt.load_weights == True: netG.load_state_dict( torch.load(opt.experiment + 'netG_epoch_' + str(epoch_load) + '.pth')) netD.load_state_dict(
# Build the models ############################################################################### autoencoders = [ Seq2Seq(emsize=ae_args.emsize, nhidden=ae_args.nhidden, ntokens=ae_args.ntokens, nlayers=ae_args.nlayers, noise_radius=ae_args.noise_radius, hidden_init=ae_args.hidden_init, dropout=ae_args.dropout, gpu=args.cuda) for ae_args in autoencoders_args ] gan_gens = [ MLP_G(ninput=args.z_size, noutput=ae_args.nhidden, layers=ae_args.arch_g) for ae_args in autoencoders_args ] gan_discs = [ MLP_D(ninput=ae_args.nhidden, noutput=1, layers=ae_args.arch_d) for ae_args in autoencoders_args ] print('autoencoders', autoencoders) ae_optimizers = [ optim.SGD(ae.parameters(), lr=args.lr_ae) for ae, ae_args in zip(autoencoders, autoencoders_args) ] gan_g_optimizers = [ optim.Adam(gan_gen.parameters(), lr=args.lr_gan_g,
############################################################################### # Build the models ############################################################################### ntokens = len(corpus.dictionary.word2idx) autoencoder = Seq2Seq2Decoder(emsize=args.emsize, nhidden=args.nhidden, ntokens=ntokens, nlayers=args.nlayers, noise_r=args.noise_r, hidden_init=args.hidden_init, dropout=args.dropout, gpu=args.cuda) gan_gen = MLP_G(input_dim=args.z_size, output_dim=args.nhidden, arch_layers=args.arch_g) gan_disc = MLP_D(input_dim=args.nhidden, output_dim=1, arch_layers=args.arch_d) classifier = MLP_Classify(ninput=args.nhidden, noutput=1, layers=args.arch_classify) g_factor = None print(autoencoder) print(gan_gen) print(gan_disc) print(classifier) optimizer_ae = optim.SGD(autoencoder.parameters(), lr=args.lr_ae) optimizer_gan_g = optim.Adam(gan_gen.parameters(), lr=args.lr_gan_g, betas=(args.beta1, 0.999)) optimizer_gan_d = optim.Adam(gan_disc.parameters(), lr=args.lr_gan_d,
def main(): state_dict = torch.load(args.ae_model) with open(args.ae_args) as f: ae_args = json.load(f) corpus = Corpus(args.data_file, args.dict_file, vocab_size=ae_args['vocab_size']) autoencoder = Seq2Seq(emsize=ae_args['emsize'], nhidden=ae_args['nhidden'], ntokens=ae_args['ntokens'], nlayers=ae_args['nlayers'], noise_radius=ae_args['noise_radius'], hidden_init=ae_args['hidden_init'], dropout=ae_args['dropout'], gpu=args.cuda) autoencoder.load_state_dict(state_dict) for param in autoencoder.parameters(): param.requires_grad = False # save arguments with open(os.path.join(out_dir, 'args.json'), 'w') as f: json.dump(vars(args), f) log.info('[Data and AE model loaded.]') gan_gen = MLP_G(ninput=args.nhidden, noutput=args.nhidden, layers=args.arch_g) gan_disc = MLP_D(ninput=2 * args.nhidden, noutput=1, layers=args.arch_d) optimizer_gan_g = optim.Adam(gan_gen.parameters(), lr=args.lr_gan_g, betas=(args.beta1, 0.999)) optimizer_gan_d = optim.Adam(gan_disc.parameters(), lr=args.lr_gan_d, betas=(args.beta1, 0.999)) criterion_ce = nn.CrossEntropyLoss() if args.cuda: autoencoder = autoencoder.cuda() gan_gen = gan_gen.cuda() gan_disc = gan_disc.cuda() criterion_ce = criterion_ce.cuda() one = to_gpu(args.cuda, torch.FloatTensor([1])) mone = one * -1 train_pairs = BatchGen(corpus.get_chunks(size=2), args.batch_size) def train_gan_g(batch): gan_gen.train() gan_gen.zero_grad() source, _ = batch source = to_gpu(args.cuda, Variable(source)) source_hidden = autoencoder(source, noise=False, encode_only=True) fake_hidden = gan_gen(source_hidden) errG = gan_disc(source_hidden, fake_hidden) # loss / backprop errG.backward(one) optimizer_gan_g.step() return errG def train_gan_d(batch): # clamp parameters to a cube for p in gan_disc.parameters(): p.data.clamp_(-args.gan_clamp, args.gan_clamp) gan_disc.train() gan_disc.zero_grad() # positive samples ---------------------------- # generate real codes source, target = batch source = to_gpu(args.cuda, Variable(source)) target = to_gpu(args.cuda, Variable(target)) # batch_size x nhidden source_hidden = autoencoder(source, noise=False, encode_only=True) target_hidden = autoencoder(target, noise=False, encode_only=True) # loss / backprop errD_real = gan_disc(source_hidden, target_hidden) errD_real.backward(one) # negative samples ---------------------------- # loss / backprop fake_hidden = gan_gen(source_hidden) errD_fake = gan_disc(source_hidden.detach(), fake_hidden.detach()) errD_fake.backward(mone) optimizer_gan_d.step() errD = -(errD_real - errD_fake) return errD, errD_real, errD_fake niter = 0 start_time = datetime.now() for t in range(args.updates): niter += 1 # train discriminator/critic for i in range(args.niters_gan_d): # feed a seen sample within this epoch; good for early training errD, errD_real, errD_fake = \ train_gan_d(next(train_pairs)) # train generator for i in range(args.niters_gan_g): errG = train_gan_g(next(train_pairs)) if niter % args.log_interval == 0: eta = str((datetime.now() - start_time) / (t + 1) * (args.updates - t - 1)).split('.')[0] log.info('[{}/{}] Loss_D: {:.6f} (real: {:.6f} ' 'fake: {:.6f}) Loss_G: {:.6f} ETA: {}'.format( niter, args.updates, errD.data.cpu()[0], errD_real.data.cpu()[0], errD_fake.data.cpu()[0], errG.data.cpu()[0], eta)) if niter % args.save_interval == 0: save_model(gan_gen, out_dir, 'gan_gen_model_{}.pt'.format(t)) save_model(gan_disc, out_dir, 'gan_disc_model_{}.pt'.format(t))
elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # In[16]: from tqdm import tqdm if model_name=='DC': G = DCGAN_G(isize=img_size, nz=z_size, nc=image_chanel, ngf=hidden_size, ngpu=0) G.apply(weights_init) D = Sinkhorn_DCGAN_D(isize=img_size, nz=z_size, nc=image_chanel, ndf=hidden_size, ngpu=0, output_dimension=output_dimension) D.apply(weights_init) if model_name=='MLP': G = MLP_G(isize=img_size, nz=z_size, nc=image_chanel, ngf=hidden_size, ngpu=0) D = Sinkhorn_MLP_D(isize=img_size, nz=z_size, nc=image_chanel, ndf=hidden_size, ngpu=0) print(G) print(D) if use_cuda: G.cuda() D.cuda() G_lr = D_lr = 5e-5 optimizers = { 'D': torch.optim.RMSprop(D.parameters(), lr=D_lr), 'G': torch.optim.RMSprop(G.parameters(), lr=G_lr) } data_iter=iter(data_loader) errs_real=[] errs_fake=[]
############################################################################### ntokens = len(corpus.dictionary.word2idx) corpus.dictionary.weights_matrix = to_gpu(args.cuda, corpus.dictionary.weights_matrix) autoencoder = Seq2Seq2Decoder(emsize=args.emsize, nhidden=args.nhidden, ntokens=ntokens, nlayers=args.nlayers, noise_r=args.noise_r, hidden_init=args.hidden_init, dropout=args.dropout, gpu=args.cuda, weights_matrix=corpus.dictionary.weights_matrix) gan_gen = MLP_G(ninput=args.z_size, noutput=args.nhidden, layers=args.arch_g) gan_disc = MLP_D(ninput=args.nhidden, noutput=1, layers=args.arch_d) classifier = MLP_Classify(ninput=args.nhidden, noutput=1, layers=args.arch_classify) g_factor = None print(autoencoder) print(gan_gen) print(gan_disc) print(classifier) optimizer_ae = optim.SGD(autoencoder.parameters(), lr=args.lr_ae) optimizer_gan_g = optim.Adam(gan_gen.parameters(), lr=args.lr_gan_g, betas=(args.beta1, 0.999))
############################################################################### ntokens = len(corpus.dictionary.word2idx) autoencoder = Seq2Seq(emsize=emsize, nhidden=nhidden, ntokens=ntokens, nlayers=nlayers, noise_radius=noise_radius, hidden_init=False, dropout=dropout, gpu=cuda) # In[695]: gan_gen = MLP_G(ninput=z_size, noutput=nhidden, ncategory=ncategory, layers=arch_g) # In[696]: gan_disc = MLP_D(ninput=nhidden, noutput=1, ncategory=ncategory, layers=arch_d) # In[697]: #1204delete #print(autoencoder) #print(gan_gen) #print(gan_disc) optimizer_ae = optim.SGD(autoencoder.parameters(), lr=lr_ae) optimizer_gan_g = optim.Adam(gan_gen.parameters(),
conv_layer=args.arch_conv_filters, conv_windows=args.arch_conv_windows, conv_strides=args.arch_conv_strides, pooling_enc=args.pooling_enc, gpu=args.cuda) else: autoencoder = Seq2Seq2Decoder(emsize=args.emsize, nhidden=args.nhidden, ntokens=ntokens, nlayers=args.nlayers, noise_r=args.noise_r, hidden_init=args.hidden_init, dropout=args.dropout, gpu=args.cuda) gan_gen = MLP_G(ninput=args.z_size, noutput=args.nhidden, layers=args.arch_g) gan_disc = MLP_D(ninput=args.nhidden, noutput=1, layers=args.arch_d) classifier = MLP_Classify(ninput=args.nhidden, noutput=1, layers=args.arch_classify) g_factor = None print(autoencoder) print(gan_gen) print(gan_disc) print(classifier) if args.cuda: autoencoder = autoencoder.cuda() gan_gen = gan_gen.cuda() gan_disc = gan_disc.cuda()
############################################################################### # Build the models ############################################################################### ntokens = len(corpus.dictionary.word2idx) autoencoder = Seq2Seq(emsize=args.emsize, nhidden=args.nhidden, ntokens=ntokens, nlayers=args.nlayers, noise_radius=args.noise_radius, hidden_init=args.hidden_init, dropout=args.dropout, gpu=args.cuda) gan_gen = MLP_G(ninput=args.z_size, noutput=args.nhidden, layers=args.arch_g) gan_disc = MLP_D(ninput=args.nhidden, noutput=1, layers=args.arch_d) print(autoencoder) print(gan_gen) print(gan_disc) optimizer_ae = optim.SGD(autoencoder.parameters(), lr=args.lr_ae) optimizer_gan_g = optim.Adam(gan_gen.parameters(), lr=args.lr_gan_g, betas=(args.beta1, 0.999)) optimizer_gan_d = optim.Adam(gan_disc.parameters(), lr=args.lr_gan_d, betas=(args.beta1, 0.999)) criterion_ce = nn.CrossEntropyLoss()
char_ae.load_state_dict(char_ae_params) word_ae = Seq2Seq(emsize=word_args.emsize, nhidden=word_args.nhidden, ntokens=word_args.ntokens, nlayers=word_args.nlayers, noise_r=word_args.noise_r, hidden_init=word_args.hidden_init, dropout=word_args.dropout) word_ae.load_state_dict(word_ae_params) D = MLP_D(input_dim=args.nhidden, output_dim=1, arch_layers=args.arch_d) G = MLP_G(input_dim=args.nhidden, output_dim=args.nhidden, noise_dim=args.z_size, arch_layers=args.arch_g) if args.finetune_ae: logger.info("AE will be fine-tuned") optimizer_D = optim.Adam(list(D.parameters()) + list(char_ae.parameters()) + list(word_ae.parameters()), lr=args.lr_gan_d, betas=(args.beta1, 0.999)) optimizer_G = optim.Adam(list(G.parameters()) + list(char_ae.parameters()) + list(word_ae.parameters()), lr=args.lr_gan_g, betas=(args.beta1, 0.999)) else: logger.info("AE will not be fine-tuned")