def demo_mnist(opt, epochs=10, btsz=100, lr = 0.1, beta = 0.9, eta0 = 0.0005, mu=0.02, lmbd=0.99, w=None): """ """ from misc import load_mnist from losses import zero_one from opt import msgd, smd # trainset, valset, testset = load_mnist() inputs, targets = trainset test_in, test_tar = testset # di = inputs.shape[1] dt = np.max(targets) + 1 # setup weights if w is None: if opt is smd: # needs complex initialization weights = np.zeros((di*dt+dt), dtype=np.complex) weights[:] = 0.001 * np.random.randn(di*dt+dt) else: weights = 0.* np.random.randn(di*dt+dt) weights[-dt:] = 0. else: print "Continue with provided weights w." weights = w # print "Training starts..." params = dict() params["x0"] = weights if opt is msgd or opt is smd: params["fandprime"] = score_grad_xe params["nos"] = inputs.shape[0] params["args"] = {} params["batch_args"] = {"inputs": inputs, "targets": targets} params["epochs"] = epochs params["btsz"] = btsz # msgd params["beta"] = beta params["lr"] = lr # smd params["eta0"] = eta0 params["mu"] = mu params["lmbd"] = lmbd params["verbose"] = True else: # opt from scipy params["func"] = score_xe params["fprime"] = grad_xe params["args"] = (inputs, targets) params["maxfun"] = epochs params["m"] = 50 weights = opt(**params)[0] print "Training done." # print "Test set preformance:",\ zero_one(predict(weights, test_in), test_tar) return weights
def demo_mnist(hiddens, opt, l2=1e-6, epochs=10, lr=1e-4, beta=0., btsz=128, eta0 = 0.0005, mu=0.02, lmbd=0.99, weightvar=0.01, w=None): """ """ from misc import sigmoid, load_mnist from losses import xe, zero_one from opt import msgd, smd # trainset, valset, testset = load_mnist() inputs, targets = trainset test_in, test_tar = testset di = inputs.shape[1] dt = np.max(targets) + 1 structure = {} structure["hdim"] = hiddens structure["odim"] = dt structure["af"] = np.tanh structure["score"] = xe structure["l2"] = l2 # get weight initialized if w is None: weights = np.zeros(di*hiddens + hiddens + hiddens*dt + dt) weights[:hiddens*di] = 0.001 * np.random.randn(di*hiddens) weights[hiddens*(di+1):-dt] = 0.001 * np.random.randn(hiddens*dt) if opt is smd: # needs complex weights weights = np.asarray(weights, dtype=np.complex) else: print "Continue with provided weights w." weights = w # print "Training starts..." params = dict() params["x0"] = weights params["fandprime"] = score_grad if opt is msgd or opt is smd: params["nos"] = inputs.shape[0] params["args"] = {"structure": structure} params["batch_args"] = {"inputs": inputs, "targets": targets} params["epochs"] = epochs params["btsz"] = btsz params["verbose"] = True # for msgd params["lr"] = lr params["beta"] = beta # for smd params["eta0"] = eta0 params["mu"] = mu params["lmbd"] = lmbd else: params["args"] = (structure, inputs, targets) params["maxfun"] = epochs # for lbfgs params["m"] = 25 weights = opt(**params)[0] print "Training done." # Evaluate on test set test_perf = zero_one(predict(weights, structure, test_in), test_tar) print "Test set performance:", test_perf return weights
def demo_mnist(hiddens, opt, l2=1e-6, epochs=10, lr=1e-4, beta=0., btsz=128, eta0=0.0005, mu=0.02, lmbd=0.99, weightvar=0.01, w=None): """ """ from misc import sigmoid, load_mnist from losses import xe, zero_one from opt import msgd, smd # trainset, valset, testset = load_mnist() inputs, targets = trainset test_in, test_tar = testset di = inputs.shape[1] dt = np.max(targets) + 1 structure = {} structure["hdim"] = hiddens structure["odim"] = dt structure["af"] = np.tanh structure["score"] = xe structure["l2"] = l2 # get weight initialized if w is None: weights = np.zeros(di * hiddens + hiddens + hiddens * dt + dt) weights[:hiddens * di] = 0.001 * np.random.randn(di * hiddens) weights[hiddens * (di + 1):-dt] = 0.001 * np.random.randn(hiddens * dt) if opt is smd: # needs complex weights weights = np.asarray(weights, dtype=np.complex) else: print "Continue with provided weights w." weights = w # print "Training starts..." params = dict() params["x0"] = weights params["fandprime"] = score_grad if opt is msgd or opt is smd: params["nos"] = inputs.shape[0] params["args"] = {"structure": structure} params["batch_args"] = {"inputs": inputs, "targets": targets} params["epochs"] = epochs params["btsz"] = btsz params["verbose"] = True # for msgd params["lr"] = lr params["beta"] = beta # for smd params["eta0"] = eta0 params["mu"] = mu params["lmbd"] = lmbd else: params["args"] = (structure, inputs, targets) params["maxfun"] = epochs # for lbfgs params["m"] = 25 weights = opt(**params)[0] print "Training done." # Evaluate on test set test_perf = zero_one(predict(weights, structure, test_in), test_tar) print "Test set performance:", test_perf return weights
def demo_mnist(opt, epochs=10, btsz=100, lr=0.1, beta=0.9, eta0=0.0005, mu=0.02, lmbd=0.99, w=None): """ """ from misc import load_mnist from losses import zero_one from opt import msgd, smd # trainset, valset, testset = load_mnist() inputs, targets = trainset test_in, test_tar = testset # di = inputs.shape[1] dt = np.max(targets) + 1 # setup weights if w is None: if opt is smd: # needs complex initialization weights = np.zeros((di * dt + dt), dtype=np.complex) weights[:] = 0.001 * np.random.randn(di * dt + dt) else: weights = 0. * np.random.randn(di * dt + dt) weights[-dt:] = 0. else: print "Continue with provided weights w." weights = w # print "Training starts..." params = dict() params["x0"] = weights if opt is msgd or opt is smd: params["fandprime"] = score_grad_xe params["nos"] = inputs.shape[0] params["args"] = {} params["batch_args"] = {"inputs": inputs, "targets": targets} params["epochs"] = epochs params["btsz"] = btsz # msgd params["beta"] = beta params["lr"] = lr # smd params["eta0"] = eta0 params["mu"] = mu params["lmbd"] = lmbd params["verbose"] = True else: # opt from scipy params["func"] = score_xe params["fprime"] = grad_xe params["args"] = (inputs, targets) params["maxfun"] = epochs params["m"] = 50 weights = opt(**params)[0] print "Training done." # print "Test set preformance:",\ zero_one(predict(weights, test_in), test_tar) return weights
def train(args): if args.dataset.lower() == 'celeba': train_loader, _, _ = misc.load_celebA(args.batch_s, args.img_s) img_c = 3 elif args.dataset.lower() == 'lsun': train_loader, val_loader, _ = misc.load_LSUN(args.batch_s, args.img_s) img_c = 3 elif args.dataset.lower() == 'imagenet': train_loader, val_loader, _ = misc.load_imagenet( args.batch_s, args.img_s) img_c = 3 elif args.dataset.lower() == 'mnist': train_loader, val_loader, _ = misc.load_mnist(args.batch_s, args.img_s) img_c = 1 else: raise NotImplementedError fm_gen = [ args.base_fm_n * pow(2, args.layer_n - 1 - l) for l in range(args.layer_n) ] fm_disc = [args.base_fm_n * pow(2, l) for l in range(args.layer_n)] gen = model.Generator(args.z_dim, img_c, fm_gen).cuda() gen.apply(model.init_weights) disc = model.Discriminator(img_c, fm_disc).cuda() disc.apply(model.init_weights) criterion = nn.BCELoss() label_real = 1 label_fake = 0 optim_gen = optim.Adam(gen.parameters(), lr=args.learning_rate, betas=(args.beta1, 0.999)) optim_disc = optim.Adam(disc.parameters(), lr=args.learning_rate, betas=(args.beta1, 0.999)) if args.resume: filename = args.ckpt_dir + args.resume if os.path.isfile(filename): print("==> loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) start_epoch = checkpoint['epoch'] + 1 gen.load_state_dict(checkpoint['state_dict_gen']) disc.load_state_dict(checkpoint['state_dict_disc']) optim_gen.load_state_dict(checkpoint['optimizer_gen']) optim_disc.load_state_dict(checkpoint['optimizer_disc']) print("==> loaded checkpoint '{}' (epoch {})".format( filename, checkpoint['epoch'])) else: print("==> no checkpoint found at '{}'".format(filename)) else: start_epoch = 0 if not os.path.isdir(args.img_dir): os.system('mkdir ' + args.img_dir) if not os.path.isdir(args.ckpt_dir): os.system('mkdir ' + args.ckpt_dir) ######################################### #### Train ## 1. Update Discriminator: maximize log(D(x)) + log(1-D(G(z))) # 1-1. with real image x # 1-2. with fake image G(z) ## 2. Update Generator: maximize log(D(G(z))) for e in range(args.epochs): epoch = start_epoch + e loss_meter_gen = AverageMeter() loss_meter_disc = AverageMeter() out_meter_disc_f = AverageMeter() out_meter_disc_r = AverageMeter() out_meter_disc_g = AverageMeter() for i, data in enumerate(train_loader): img_real, _ = data img_real = img_real.cuda() batch_s = img_real.size(0) optim_disc.zero_grad() # 1-1. with real image x label_r = torch.full((batch_s, 1), label_real).cuda() out_disc_r = disc(img_real).view(batch_s, -1) error_disc_r = criterion(out_disc_r, label_r) error_disc_r.backward() # 1-2. with fake image G(z) img_fake = gen(torch.randn(batch_s, args.z_dim, 1, 1).cuda()) label_f = torch.full((batch_s, 1), label_fake).cuda() out_disc_f = disc(img_fake.detach()).view(batch_s, -1) error_disc_f = criterion(out_disc_f, label_f) error_disc = error_disc_r + error_disc_f error_disc_f.backward() optim_disc.step() # 2. Update Generator for g_iter in range(3): img_fake = gen(torch.randn(batch_s, args.z_dim, 1, 1).cuda()) out_disc_g = disc(img_fake).view(batch_s, -1) error_gen = criterion(out_disc_g, label_r) optim_gen.zero_grad() error_gen.backward() optim_gen.step() loss_meter_gen.update(error_gen.item(), batch_s) loss_meter_disc.update(error_disc.item(), batch_s) out_meter_disc_f.update(torch.sum(out_disc_f).item(), batch_s) out_meter_disc_r.update(torch.sum(out_disc_r).item(), batch_s) out_meter_disc_g.update(torch.sum(out_disc_g).item(), batch_s) if i % args.log_term == 0: print( 'epoch: %d, batch: %d \t Loss(D/G): %.4f / %.4f \t D(R/F/G): %.4f / %.4f / %.4f' % (epoch, i, loss_meter_disc.avg, loss_meter_gen.avg, out_meter_disc_r.avg / batch_s, out_meter_disc_f.avg / batch_s, out_meter_disc_g.avg / batch_s)) fd = open('save_log.txt', 'a') fd.write( 'epoch: %d, batch: %d \t Loss(D/G): /%.4f / %.4f/ || D(R/F/G): /%.4f / %.4f / %.4f/ \n' % (epoch, i, loss_meter_disc.avg, loss_meter_gen.avg, out_meter_disc_r.avg, out_meter_disc_f.avg, out_meter_disc_g.avg)) fd.close() misc.plot_samples_from_images( img_fake, batch_s, args.img_dir, 'img_e{}b{}.jpg'.format(epoch, i)) torch.save( { 'epoch': epoch, 'state_dict_gen': gen.state_dict(), 'state_dict_disc': disc.state_dict(), 'optimizer_gen': optim_gen.state_dict(), 'optimizer_disc': optim_disc.state_dict() }, args.ckpt_dir + 'checkpoint_e{}b{}.pt'.format(epoch, i)) loss_meter_gen = AverageMeter() loss_meter_disc = AverageMeter() out_meter_disc_f = AverageMeter() out_meter_disc_r = AverageMeter() out_meter_disc_g = AverageMeter()