def __init__(self, progress_dir, checkpoint_dir, z_dim=100, test_size=4, batch_size=100, learning_rate=0.0002, beta_1=0.5): self.batch_size = batch_size self.z_dim = z_dim self.test_size = test_size self.progress_dir = progress_dir self.test_points = tf.random.normal(shape=(test_size**2, z_dim)) self.ckpt = tf.train.Checkpoint( epoch=tf.Variable(1), val_loss=tf.Variable(np.inf), gan_optimizer=optimizers.Adam(lr=learning_rate, beta_1=beta_1), dis_optimizer=optimizers.Adam(lr=learning_rate, beta_1=beta_1), generator=Generator(z_dim), discriminator=Discriminator()) self.ckpt_manager = tf.train.CheckpointManager( checkpoint=self.ckpt, directory=checkpoint_dir, max_to_keep=5) self.restore_checkpoint() self.gen_loss_fn = tf.keras.losses.BinaryCrossentropy() self.dis_loss_fn = tf.keras.losses.BinaryCrossentropy() self.gen_metric = tf.metrics.Mean() self.dis_metric = tf.metrics.Mean()
def main(): tf.random.set_seed(22) np.random.seed(22) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' assert tf.__version__.startswith('2.') z_dim = 100 epochs = 3000000 batch_size = 512 learning_rate = 0.005 training = True img_path = glob.glob(r'.\faces\*.jpg') dataset, img_shape, _ = make_anime_dataset(img_path, batch_size) print(dataset, img_shape) sample_picture = next(iter(dataset)) print(sample_picture.shape, tf.reduce_max(sample_picture).numpy(), tf.reduce_min(sample_picture).numpy()) dataset = dataset.repeat() ds_iter = iter(dataset) generator = Generator() generator.build(input_shape=(None, z_dim)) discriminator = Discriminator() discriminator.build(input_shape=(None, 64, 64, 3)) g_optimizer = tf.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) d_optimizer = tf.optimizers.RMSprop(learning_rate=learning_rate) for epoch in range(epochs): batch_z = tf.random.uniform([batch_size, z_dim], minval=-1., maxval=1.) batch_r = next(ds_iter) # discriminator training with tf.GradientTape() as tape: d_loss = d_loss_func(generator, discriminator, batch_z, batch_r, training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients(zip(grads, discriminator.trainable_variables)) with tf.GradientTape() as tape: g_loss = g_loss_func(generator, discriminator, batch_z, training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print('Current epoch:', epoch, 'd_loss:', d_loss, 'g_loss:', g_loss) z = tf.random.uniform([100, z_dim]) g_imgs = generator(z, training=False) save_path = os.path.join('images', 'gan-%d.png' % epoch) save_result(g_imgs.numpy(), 10, save_path, color_mode='P')
def build_model(self): self.netG = Generator(nz=self.config.nz, ngf=self.config.ngf, nc=self.config.nc) self.netD = Discriminator(nz=self.config.nz, ndf=self.config.ndf, nc=self.config.nc) # 是否将网络搬运至cuda if self.config.cuda: self.netG = self.net.cuda() self.netD = self.net.cuda() cudnn.benchmark = True # self.net.train() # 设置eval状态 self.netG.eval() # use_global_stats = True self.netD.eval() # 载入预训练模型或自行训练模型 if self.config.load == '': self.netG.load_state_dict(torch.load(self.config.pretrained_model)) self.netD.load_state_dict(torch.load(self.config.pretrained_model)) else: self.netG.load_state_dict(torch.load(self.config.load)) self.netD.load_state_dict(torch.load(self.config.load)) # 设置优化器 self.optimizerD = Adam(self.netD.parameters(), lr=self.config.lr, betas=(self.config.beta1, self.config.beta2), weight_decay=self.config.wd) self.optimizerG = Adam(self.netG.parameters(), lr=self.config.lr, betas=(self.config.beta1, self.config.beta2), weight_decay=self.config.wd) # 打印网络结构 self.print_network(self.netG, 'Generator Structure') self.print_network(self.netD, 'Discriminator Structure')
train=True, transform=T.Compose([ T.Resize(IMAGE_SIZE), T.ToTensor(), T.Normalize((0.5), (1)) ]), download=True) dataloader = torch.utils.data.DataLoader(data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, drop_last=True) else: raise Exception("Not a valid dataset") G = Generator(args.num_noises, NUM_COLORS, args.depths, IMAGE_SIZE).to(device) D = Discriminator(NUM_COLORS, args.depths, IMAGE_SIZE).to(device) def init_weight(model): classname = model.__class__.__name__ if classname.find('conv') != -1: torch.nn.init.normal_(model.weight.data, 0, 0.02) G.apply(init_weight) D.apply(init_weight) criterion_d = torch.nn.BCELoss() criterion_g = torch.nn.MSELoss( ) if args.feature_matching else torch.nn.BCELoss()
def train(z_channels, c_channels, epoch_num, batch_size, lr=0.0002, beta1=0.5, model_path='models/dcgan_checkpoint.pth'): use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') if use_cuda: cudnn.benchmark = True else: print("***** Warning: Cuda isn't available! *****") loader = load_mnist(batch_size) generator = Generator(z_channels, c_channels).to(device) discriminator = Discriminator(c_channels).to(device) g_optimizer = optim.Adam(generator.parameters(), lr=lr, betas=(beta1, 0.999)) d_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, 0.999)) start_epoch = 0 if os.path.exists(model_path): checkpoint = torch.load(model_path) generator.load_state_dict(checkpoint['g']) discriminator.load_state_dict(checkpoint['d']) g_optimizer.load_state_dict(checkpoint['g_optim']) d_optimizer.load_state_dict(checkpoint['d_optim']) start_epoch = checkpoint['epoch'] + 1 criterion = nn.BCELoss().to(device) generator.train() discriminator.train() std = 0.1 for epoch in range(start_epoch, start_epoch + epoch_num): d_loss_sum, g_loss_sum = 0, 0 print('---- epoch: %d ----' % (epoch, )) for i, (real_image, number) in enumerate(loader): real_image = real_image.to(device) image_noise = torch.randn(real_image.size(), device=device).normal_(0, std) d_optimizer.zero_grad() real_label = torch.randn(number.size(), device=device).normal_(0.9, 0.1) real_image.add_(image_noise) out = discriminator(real_image) d_real_loss = criterion(out, real_label) d_real_loss.backward() noise_z = torch.randn((number.size(0), z_channels, 1, 1), device=device) fake_image = generator(noise_z) fake_label = torch.zeros(number.size(), device=device) fake_image = fake_image.add(image_noise) out = discriminator(fake_image.detach()) d_fake_loss = criterion(out, fake_label) d_fake_loss.backward() d_optimizer.step() g_optimizer.zero_grad() out = discriminator(fake_image) g_loss = criterion(out, real_label) g_loss.backward() g_optimizer.step() d_loss_sum += d_real_loss.item() + d_fake_loss.item() g_loss_sum += g_loss.item() # if i % 10 == 0: # print(d_loss, g_loss) print('d_loss: %f \t\t g_loss: %f' % (d_loss_sum / (i + 1), g_loss_sum / (i + 1))) std *= 0.9 if epoch % 1 == 0: checkpoint = { 'g': generator.state_dict(), 'd': discriminator.state_dict(), 'g_optim': g_optimizer.state_dict(), 'd_optim': d_optimizer.state_dict(), 'epoch': epoch, } save_image(fake_image, 'out/fake_samples_epoch_%03d.png' % (epoch, ), normalize=False) torch.save(checkpoint, model_path) os.system('cp ' + model_path + ' models/model%d' % (epoch, )) print('saved!')
def main(): # load training data trainset = Dataset('./data/brilliant_blue') trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True) # init netD and netG netD = Discriminator().to(device) netD.apply(weights_init) netG = Generator(nz).to(device) netG.apply(weights_init) # used for visualizing training process fixed_noise = torch.randn(16, nz, 1, device=device) # optimizers optimizerD = optim.RMSprop(netD.parameters(), lr=lr) optimizerG = optim.RMSprop(netG.parameters(), lr=lr) for epoch in range(epoch_num): for step, (data, _) in enumerate(trainloader): # training netD real_cpu = data.to(device) b_size = real_cpu.size(0) netD.zero_grad() noise = torch.randn(b_size, nz, 1, device=device) fake = netG(noise) loss_D = -torch.mean(netD(real_cpu)) + torch.mean(netD(fake)) loss_D.backward() optimizerD.step() for p in netD.parameters(): p.data.clamp_(-clip_value, clip_value) if step % n_critic == 0: # training netG noise = torch.randn(b_size, nz, 1, device=device) netG.zero_grad() fake = netG(noise) loss_G = -torch.mean(netD(fake)) netD.zero_grad() netG.zero_grad() loss_G.backward() optimizerG.step() if step % 5 == 0: print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f' % (epoch, epoch_num, step, len(trainloader), loss_D.item(), loss_G.item())) # save training process with torch.no_grad(): fake = netG(fixed_noise).detach().cpu() f, a = plt.subplots(4, 4, figsize=(8, 8)) for i in range(4): for j in range(4): a[i][j].plot(fake[i * 4 + j].view(-1)) a[i][j].set_xticks(()) a[i][j].set_yticks(()) plt.savefig('./img/wgan_epoch_%d.png' % epoch) plt.close() # save model torch.save(netG, './nets/wgan_netG.pkl') torch.save(netD, './nets/wgan_netD.pkl')
import torchvision import torchvision.transforms as T from dcgan import Generator parser = argparse.ArgumentParser() parser.add_argument("--epochs", "-e", type=int, default=10) parser.add_argument("--num-noises", "-nn", type=int, default=100) parser.add_argument("--num-colors", "-nc", type=int, default=3) parser.add_argument("--depths", "-d", type=int, default=128) parser.add_argument("--image-size", "-is", type=int, default=64) args = parser.parse_args() if args.image_size % 16 != 0: raise Exception("Size of the image must be divisible by 16") DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") MODEL_PATH = "./models/g%d-mini.pt" % args.epochs OUTPUT_PATH = "./images/" NUM_IMAGES = 10 G = Generator(args.num_noises, args.num_colors, args.depths, args.image_size) G.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE)) for i in range(NUM_IMAGES): noise = torch.FloatTensor(args.num_noises).uniform_(-1, 1) fake = G(noise) torchvision.utils.save_image( fake.view(args.num_colors, args.image_size, args.image_size), path.join(OUTPUT_PATH, "g%d_image%d.bmp" % (args.epochs, i)) )
def main(): # Loss function adversarial_loss = torch.nn.BCELoss() # Initialize generator and discriminator generator = Generator() discriminator = Discriminator() # Initialize weights generator.apply(weights_init_normal) discriminator.apply(weights_init_normal) # DataParallel generator = nn.DataParallel(generator).to(device) discriminator = nn.DataParallel(discriminator).to(device) # Dataloader # data preparation, loaders normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # cudnn.benchmark = True # preparing the training laoder train_loader = torch.utils.data.DataLoader( ImageLoader( opt.img_path, transforms.Compose([ transforms.Scale( 128 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 128), # we get only the center of that rescaled transforms.RandomCrop( 128), # random crop within the center crop transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]), data_path=opt.data_path, partition='train'), batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers, pin_memory=True) print('Training loader prepared.') # preparing validation loader val_loader = torch.utils.data.DataLoader( ImageLoader( opt.img_path, transforms.Compose([ transforms.Scale( 128 ), # rescale the image keeping the original aspect ratio transforms.CenterCrop( 128), # we get only the center of that rescaled transforms.ToTensor(), normalize, ]), data_path=opt.data_path, partition='val'), batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers, pin_memory=True) print('Validation loader prepared.') # Optimizers optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) # ---------- # Training # ---------- for epoch in range(opt.n_epochs): pbar = tqdm(total=len(train_loader)) start_time = time.time() for i, data in enumerate(train_loader): input_var = list() for j in range(len(data)): # if j>1: input_var.append(data[j].to(device)) imgs = input_var[0] # Adversarial ground truths valid = np.ones((imgs.shape[0], 1)) valid = torch.FloatTensor(valid).to(device) fake = np.zeros((imgs.shape[0], 1)) fake = torch.FloatTensor(fake).to(device) # ----------------- # Train Generator # ----------------- optimizer_G.zero_grad() # Sample noise as generator input z = np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim)) z = torch.FloatTensor(z).to(device) # Generate a batch of images gen_imgs = generator(z, input_var[1], input_var[2], input_var[3], input_var[4]) # Loss measures generator's ability to fool the discriminator g_loss = adversarial_loss(discriminator(gen_imgs), valid) g_loss.backward() optimizer_G.step() # --------------------- # Train Discriminator # --------------------- optimizer_D.zero_grad() # Measure discriminator's ability to classify real from generated samples real_loss = adversarial_loss(discriminator(imgs), valid) fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake) d_loss = (real_loss + fake_loss) / 2 d_loss.backward() optimizer_D.step() pbar.update(1) pbar.close() print( "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f] [Time Elapsed: %f]" % (epoch, opt.n_epochs, i, len(train_loader), d_loss.item(), g_loss.item(), time.time() - start_time)) if epoch % opt.sample_interval == 0: save_samples(epoch, gen_imgs.data[:25]) save_model(epoch, generator.state_dict(), discriminator.state_dict())
def main(): tf.random.set_seed(22) np.random.seed(22) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' assert tf.__version__.startswith('2.') # hyper parameters z_dim = 100 epochs = 3000000 batch_size = 128 learning_rate = 0.0002 is_training = True # for validation purpose assets_dir = './images' if not os.path.isdir(assets_dir): os.makedirs(assets_dir) val_block_size = 10 val_size = val_block_size * val_block_size # load mnist data (x_train, _), (x_test, _) = keras.datasets.mnist.load_data() x_train = x_train.astype(np.float32) / 255. db = tf.data.Dataset.from_tensor_slices(x_train).shuffle( batch_size * 4).batch(batch_size).repeat() db_iter = iter(db) inputs_shape = [-1, 28, 28, 1] # create generator & discriminator generator = Generator() generator.build(input_shape=(batch_size, z_dim)) generator.summary() discriminator = Discriminator() discriminator.build(input_shape=(batch_size, 28, 28, 1)) discriminator.summary() # prepare optimizer d_optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) g_optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.5) for epoch in range(epochs): # no need labels batch_x = next(db_iter) # rescale images to origin neighborhood batch_x = tf.reshape(batch_x, shape=inputs_shape) # -1 -1 batch_x = batch_x * 2.0 - 1.0 batch_z = tf.random.uniform(shape=[batch_size, z_dim], minval=-1., maxval=1.) with tf.GradientTape() as tape: d_loss = d_loss_fn(generator, discriminator, batch_z, batch_x, is_training) grads = tape.gradient(d_loss, discriminator.trainable_variables) d_optimizer.apply_gradients( zip(grads, discriminator.trainable_variables)) with tf.GradientTape() as tape: g_loss = g_loss_fn(generator, discriminator, batch_z, is_training) grads = tape.gradient(g_loss, generator.trainable_variables) g_optimizer.apply_gradients(zip(grads, generator.trainable_variables)) if epoch % 100 == 0: print(epoch, 'd loss', float(d_loss), 'g loss:', float(g_loss)) # validation results at every epoch val_z = np.random.uniform(-1, 1, size=(val_size, z_dim)) fake_image = generator(val_z, training=False) image_fn = os.path.join( r'C:\Users\Wojtek\Documents\Projects\DeepLearningPlayground\DCGAN\images', 'gan-val-{:03d}.png'.format(epoch + 1)) save_result(fake_image.numpy(), val_block_size, image_fn, color_mode='L')
def main(): # define the command line arguments g_help = "teacher + student activation function: 'erf' or 'relu'" M_help = "number of teacher hidden nodes" K_help = "number of student hidden nodes" device_help = "which device to run on: 'cuda' or 'cpu'" scenario_help = "Some pre-configured scenarios: rand, dcgan_rand, dcgan_cifar10, nvp_imnet32." steps_help = "training steps as multiples of N" seed_help = "random number generator seed." hmm_help = "have teacher act on latent representation." parser = argparse.ArgumentParser() parser.add_argument("-f", "--f", default="tanh", help=g_help) parser.add_argument("-g", "--g", default="erf", help=g_help) parser.add_argument("-L", "--depth", type=int, default=4, help="generator depth") parser.add_argument("-D", "--D", type=int, default=100, help="latent dimension") parser.add_argument("-N", "--N", type=int, default=1000, help="input dimension") parser.add_argument("-M", "--M", type=int, default=2, help=M_help) parser.add_argument("-K", "--K", type=int, default=2, help=K_help) parser.add_argument("--scenario", help=scenario_help, default="rand") parser.add_argument("--device", "-d", help=device_help) parser.add_argument("--lr", type=float, default=0.2, help="learning rate") parser.add_argument("--bs", type=int, default=1, help="mini-batch size") parser.add_argument("--steps", type=int, default=10000, help=steps_help) parser.add_argument("-q", "--quiet", help="be quiet", action="store_true") parser.add_argument("-s", "--seed", type=int, default=0, help=seed_help) parser.add_argument("--hmm", action="store_true", help=hmm_help) parser.add_argument("--store", action="store_true", help="store initial conditions") args = parser.parse_args() torch.manual_seed(args.seed) if args.device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device(args.device) (L, D, N, M, K, lr) = (args.depth, args.D, args.N, args.M, args.K, args.lr) scenario = args.scenario args.hmm = True # Find the right generator for the given scenario loadweightsfrom = None scenario_desc = None num_gen_params = 0 if scenario == "rand": Ds = [args.D] * L + [N] f = Sign generator = RandomGenerator(Ds, f, batchnorm=False) scenario_desc = "rand_sign_L%d" % L generator.eval() generator.to(device) elif args.scenario in ["dcgan_rand", "dcgan_cifar10"]: D = 100 N = 3072 generator = Generator(ngpu=1) # load weights loadweightsfrom = "models/%s_weights.pth" % args.scenario generator.load_state_dict( torch.load(loadweightsfrom, map_location=device)) scenario_desc = scenario generator.eval() generator.to(device) elif args.scenario == "nvp_cifar10": D = 3072 N = 3072 scenario_desc = args.scenario flow = torch.load("models/nvp_cifar10.model", map_location=device) num_gen_params = sum(p.numel() for p in flow.parameters()) generator = flow.g else: raise ValueError("Did not recognise the scenario here, will exit now.") if num_gen_params == 0: num_gen_params = sum(p.numel() for p in generator.parameters()) # output file + welcome message hmm_desc = "hmm_" if args.hmm else "" log_fname = "deepgen_online_%s_D%d_N%d_%s%s_M%d_K%d_lr%g_i1_s%d.dat" % ( scenario_desc, D, N, hmm_desc, args.g, M, K, lr, args.seed, ) logfile = open(log_fname, "w", buffering=1) welcome = "# Two-layer nets on inputs from a generator (scenario %s)\n" % scenario welcome += "# M=%d, K=%d, lr=%g, batch size=%d, seed=%d\n" % ( M, K, lr, args.bs, args.seed, ) welcome += "# Generator has %d parameters\n" % num_gen_params if loadweightsfrom is not None: welcome += "# generator weights from %s\n" % (loadweightsfrom) welcome += "# Using device:" + str(device) log(welcome, logfile) # networks and loss g = erfscaled if args.g == "erf" else F.relu gs = (g, identity) student = TwoLayer(gs, N, args.K, 1, normalise1=True, std0=1) student.to(device) teacher_input_dim = D if args.hmm else N teacher = TwoLayer(gs, teacher_input_dim, args.M, 1, normalise1=True, std0=1) nn.init.constant_(teacher.fc2.weight, 1) teacher.freeze() teacher.to(device) B = teacher.fc1.weight.data A = teacher.fc2.weight.data # collect the parameters that are going to be optimised by SGD params = [] params += [{"params": student.fc1.parameters()}] # If we train the last layer, ensure its learning rate scales correctly params += [{"params": student.fc2.parameters(), "lr": lr / N}] optimizer = optim.SGD(params, lr=lr) criterion = HalfMSELoss() print("# Generator, Teacher and Student: ") for net in [generator, teacher, student]: msg = "# " + str(net).replace("\n", "\n# ") log(msg, logfile) # when to print? end = torch.log10(torch.tensor([1.0 * args.steps])).item() times_to_print = list(torch.logspace(-1, end, steps=200)) # Obtain the right covariance matrices Phi = None Omega = None mean_x = torch.zeros(N, device=device) if scenario == "rand": b = math.sqrt(2 / np.pi) c = 1 b2 = pow(b, 2) for l in range(generator.num_layers): F = generator.generator[l * 2].weight.data if l == 0: Omega = b2 * F @ F.T Phi = b * F else: I = torch.eye(F.shape[1]).to(device) Omega = b2 * F @ ((c - b2) * I + Omega) @ F.T Phi = b * F @ Phi Omega[np.diag_indices(N)] = c torch.save(Omega, "models/rand_omega.pt") torch.save(Phi, "models/rand_phi.pt") elif scenario in [ "fc_inverse", "dcgan_rand", "dcgan_cifar10", "nvp_cifar10" ]: Omega = torch.load("models/%s_omega.pt" % scenario, map_location=device) Phi = torch.load("models/%s_phi.pt" % scenario, map_location=device) mean_x = torch.load("models/%s_mean_x.pt" % scenario, map_location=device) # generate the test set test_cs, test_xs, test_ys = get_samples( args.scenario, args.hmm, NUM_TESTSAMPLES, D, N, generator, teacher, mean_x, device, ) teacher_inputs = test_cs if args.hmm else test_xs nus = B.mm(teacher_inputs.T) / math.sqrt(teacher_inputs.shape[1]) msg = "# test xs: mean=%g, std=%g; test ys: std=%g" % ( torch.mean(test_xs), torch.std(test_xs), torch.std(test_ys), ) log(msg, logfile) T = 1.0 / B.shape[1] * B @ B.T rotation = Phi.T @ Phi tildeT = 1 / N * B @ rotation @ B.T if args.store: with torch.no_grad(): # compute the exact densities of r and q exq = torch.zeros((K, K, N), device=device) exr = torch.zeros((K, M, N), device=device) extildet = torch.zeros((M, M, N), device=device) sqrtN = math.sqrt(N) w = student.fc1.weight.data v = student.fc2.weight.data rhos, psis = torch.symeig(Omega, eigenvectors=True) rhos.to(device) psis.to(device) # make sure to normalise, orient evectors according to the note psis = sqrtN * psis.T GammaB = 1.0 / sqrtN * B @ Phi.T @ psis.T GammaW = 1.0 / sqrtN * w @ psis.T for k in range(K): for l in range(K): exq[k, l] = GammaW[k, :] * GammaW[l, :] for n in range(M): exr[k, n] = GammaW[k, :] * GammaB[n, :] for n in range(M): for m in range(M): extildet[n, m] = GammaB[n, :] * GammaB[m, :] root_name = log_fname[:-4] np.savetxt(root_name + "_T.dat", T.cpu().numpy(), delimiter=",") np.savetxt(root_name + "_rhos.dat", rhos.cpu().numpy(), delimiter=",") np.savetxt(root_name + "_T.dat", T.cpu().numpy(), delimiter=",") np.savetxt(root_name + "_A.dat", A[0].cpu().numpy(), delimiter=",") np.savetxt(root_name + "_v0.dat", v[0].cpu().numpy(), delimiter=",") write_density(root_name + "_q0.dat", exq) write_density(root_name + "_r0.dat", exr) write_density(root_name + "_tildet.dat", extildet) time = 0 dt = 1 / N msg = eval_student(time, student, test_xs, test_ys, nus, T, tildeT, A, criterion) log(msg, logfile) while len(times_to_print) > 0: # get the inputs cs, inputs, targets = get_samples(args.scenario, args.hmm, args.bs, D, N, generator, teacher, mean_x, device) for i in range(args.bs): student.train() preds = student(inputs[i]) loss = criterion(preds, targets[i]) # TRAINING student.zero_grad() loss.backward() optimizer.step() time += dt if time >= times_to_print[0].item() or time == 0: msg = eval_student(time, student, test_xs, test_ys, nus, T, tildeT, A, criterion) log(msg, logfile) times_to_print.pop(0) print("Bye-bye")
def run_dcgan(device, image_size, noise_size, batch_size, config, run_dir, saved_dir, run_name, num_epochs, val_dataset, train_dataset=None, checkpoints=None, mode='train', gpu_num=1): #Run DCGAN type = 'DCGAN' dcgan_generator = Generator(noise_size=noise_size, image_size=image_size).to(device) dcgan_discriminator = Discriminator(image_size=image_size).to(device) #Parallel for improved performence if device.type == 'cuda' and gpu_num > 1: dcgan_generator = nn.DataParallel(dcgan_generator, list(range(gpu_num))) dcgan_discriminator = nn.DataParallel(dcgan_discriminator, list(range(gpu_num))) #Print networks print('Discriminator') summary(dcgan_discriminator, (3, image_size, image_size)) print('Generator') summary(dcgan_generator, (noise_size, 1, 1)) if checkpoints is not None: utils.load_from_checkpoint(dcgan_generator, saved_dir, checkpoints["generator"]) utils.load_from_checkpoint(dcgan_discriminator, saved_dir, checkpoints["discriminator"]) run_name = 'DCGAN' + '_' + run_name #We train the model in train phase and only calculate scores in test mode if mode == 'train': inception_FID_scores, inception_scores = train_gan(num_epochs, batch_size, noise_size, device, train_dataset, val_dataset, dcgan_generator, dcgan_discriminator, type='DCGAN', config=config, run_dir=run_dir, saved_dir=saved_dir, run_name=run_name) elif mode == 'test': inception_FID_scores = [ calc_inception_FID_score(batch_size, device, val_dataset, dcgan_generator, type, noise_size) ] inception_scores = [ calc_inception_score(device, noise_size, dcgan_generator, eval_size=len(val_dataset)) ] #Return list of all score accumulated in epochs date_str = datetime.datetime.now().strftime("%m%d%Y%H") save_to_pickle( inception_FID_scores, os.path.join(saved_dir, 'dcgan_fid_' + run_name + date_str + ".pickle")) save_to_pickle( inception_scores, os.path.join(saved_dir, 'dcgan_IS_' + run_name + date_str + ".pickle")) return inception_FID_scores, inception_scores
def run_wsgan(device, image_size, noise_size, batch_size, config, run_dir, saved_dir, run_name, num_epochs, val_dataset, train_dataset=None, checkpoints=None, mode='train', gpu_num=1): type = 'WSGAN' wsgan_generator = Generator(noise_size=noise_size, image_size=image_size).to(device) wsgan_critic = Discriminator(image_size=image_size, as_critic=True).to(device) # Parallel for improved performence if ((device.type == 'cuda') and (gpu_num > 1)): wsgan_generator = nn.DataParallel(wsgan_generator, list(range(gpu_num))) wsgan_critic = nn.DataParallel(wsgan_critic, list(range(gpu_num))) # Print networks print('Critic') summary(wsgan_critic, (3, image_size, image_size)) print('Generator') summary(wsgan_generator, (noise_size, 1, 1)) if checkpoints is not None: utils.load_from_checkpoint(wsgan_generator, saved_dir, checkpoints["generator"]) utils.load_from_checkpoint(wsgan_critic, saved_dir, checkpoints["discriminator"]) run_name = 'WSGAN' + '_' + run_name if mode == 'train': inception_FID_scores, inception_scores = train_gan(num_epochs, batch_size, noise_size, device, train_dataset, val_dataset, wsgan_generator, wsgan_critic, type='WSGAN', config=config, run_dir=run_dir, saved_dir=saved_dir, run_name=run_name) elif mode == 'test': inception_FID_scores = [ calc_inception_FID_score(batch_size, device, val_dataset, wsgan_generator, type, noise_size) ] inception_scores = [ calc_inception_score(device, noise_size, wsgan_generator, eval_size=len(val_dataset)) ] date_str = datetime.datetime.now().strftime("%m%d%Y%H") save_to_pickle( inception_FID_scores, os.path.join(saved_dir, 'wsgan_fid_' + run_name + date_str + ".pickle")) save_to_pickle( inception_scores, os.path.join(saved_dir, 'wsgan_IS_' + run_name + date_str + ".pickle")) return inception_FID_scores, inception_scores
import torch from tqdm import tqdm import torch.nn as nn import engine import torch.nn.init as init # from config import * import config import torch.optim as optim import data if __name__ == "__main__": # Create the nets device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset, dataloader = data.create_dataset() gen_net = Generator().to(device) gen_net.apply(weights_init) dis_net = Discriminator().to(device) dis_net.apply(weights_init) # Imporant. We need to add noise to images to learn properly fixed_noise = torch.randn(config.batchSize, config.nz, 1, 1, device=device) real_label = 1 fake_label = 0 criterion = nn.BCELoss() # We need 2 seperate optimizers, the Generator and the Discriminator gen_opt = optim.Adam(gen_net.parameters(), lr=config.lr,
def main(): dataSize = 32 batchSize = 8 elpipsBatchSize = 1 # imageSize = 32 imageSize = 64 nz = 100 # discCheckpointPath = r'E:\projects\visus\PyTorch-GAN\implementations\dcgan\checkpoints\2020_07_10_15_53_34\disc_step4800.pth' discCheckpointPath = r'E:\projects\visus\pytorch-examples\dcgan\out\netD_epoch_24.pth' genCheckpointPath = r'E:\projects\visus\pytorch-examples\dcgan\out\netG_epoch_24.pth' gpu = torch.device('cuda') # For now we normalize the vectors to have norm 1, but don't make sure # that the data has certain mean/std. pointDataset = AuthorDataset( jsonPath=r'E:\out\scripts\metaphor-vis\authors-all.json' ) # Take top N points. points = np.asarray([pointDataset[i][0] for i in range(dataSize)]) distPointsCpu = l2_sqr_dist_matrix(torch.tensor(points)).numpy() latents = torch.tensor(np.random.normal(0.0, 1.0, (dataSize, nz)), requires_grad=True, dtype=torch.float32, device=gpu) scale = torch.tensor(2.7, requires_grad=True, dtype=torch.float32, device=gpu) # todo Re-check! bias = torch.tensor(0.0, requires_grad=True, dtype=torch.float32, device=gpu) # todo Re-check! lpips = models.PerceptualLoss(model='net-lin', net='vgg', use_gpu=True).to(gpu) # lossModel = lpips config = elpips.Config() config.batch_size = elpipsBatchSize # Ensemble size for ELPIPS. config.set_scale_levels_by_image_size(imageSize, imageSize) lossModel = elpips.ElpipsMetric(config, lpips).to(gpu) discriminator = Discriminator(3, 64, 1) if discCheckpointPath: discriminator.load_state_dict(torch.load(discCheckpointPath)) else: discriminator.init_params() discriminator = discriminator.to(gpu) generator = Generator(nz=nz, ngf=64) if genCheckpointPath: generator.load_state_dict(torch.load(genCheckpointPath)) else: generator.init_params() generator = generator.to(gpu) # optimizerImages = torch.optim.Adam([images, scale], lr=1e-2, betas=(0.9, 0.999)) optimizerScale = torch.optim.Adam([scale, bias], lr=0.001) # optimizerGen = torch.optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999)) # optimizerDisc = torch.optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.9, 0.999)) # optimizerDisc = torch.optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999)) optimizerLatents = torch.optim.Adam([latents], lr=5e-3, betas=(0.9, 0.999)) fig, axes = plt.subplots(nrows=2, ncols=batchSize // 2) fig2 = plt.figure() ax2 = fig2.add_subplot(1, 1, 1) outPath = os.path.join('runs', datetime.datetime.today().strftime('%Y_%m_%d_%H_%M_%S')) os.makedirs(outPath) summaryWriter = SummaryWriter(outPath) for batchIndex in range(10000): # noinspection PyTypeChecker randomIndices = np.random.randint(0, dataSize, batchSize).tolist() # type: List[int] # # randomIndices = list(range(dataSize)) # type: List[int] distTarget = torch.tensor(distPointsCpu[randomIndices, :][:, randomIndices], dtype=torch.float32, device=gpu) latentsBatch = latents[randomIndices] imageBatchFake = generator(latentsBatch[:, :, None, None].float()) # todo It's possible to compute this more efficiently, but would require re-implementing lpips. # For now, compute the full BSxBS matrix row-by-row to avoid memory issues. lossDistTotal = torch.tensor(0.0, device=gpu) distanceRows = [] for iRow in range(batchSize): distPredFlat = lossModel(imageBatchFake[iRow].repeat(repeats=(batchSize, 1, 1, 1)).contiguous(), imageBatchFake, normalize=True) distPred = distPredFlat.reshape((1, batchSize)) distanceRows.append(distPred) lossDist = torch.sum((distTarget[iRow] - (distPred * scale + bias)) ** 2) # MSE lossDistTotal += lossDist lossDistTotal /= batchSize * batchSize # Compute the mean. distPredFull = torch.cat(distanceRows, dim=0) # print('{} - {} || {} - {}'.format( # torch.min(distPred).item(), # torch.max(distPred).item(), # torch.min(distTarget).item(), # torch.max(distTarget).item() # )) # discPred = discriminator(imageBatchFake) # lossRealness = bceLoss(discPred, torch.ones(imageBatchFake.shape[0], device=gpu)) # lossGen = lossDist + 1.0 * lossRealness lossLatents = lossDistTotal # optimizerGen.zero_grad() # optimizerScale.zero_grad() # lossGen.backward() # optimizerGen.step() # optimizerScale.step() optimizerLatents.zero_grad() # optimizerScale.zero_grad() lossLatents.backward() optimizerLatents.step() # optimizerScale.step() # with torch.no_grad(): # # todo We're clamping all the images every batch, can we clamp only the ones updated? # # images = torch.clamp(images, 0, 1) # For some reason this was making the training worse. # images.data = torch.clamp(images.data, 0, 1) if batchIndex % 100 == 0: msg = 'iter {} loss dist {:.3f} scale: {:.3f} bias: {:.3f}'.format(batchIndex, lossDistTotal.item(), scale.item(), bias.item()) print(msg) summaryWriter.add_scalar('loss-dist', lossDistTotal.item(), global_step=batchIndex) def gpu_images_to_numpy(images): imagesNumpy = images.cpu().data.numpy().transpose(0, 2, 3, 1) imagesNumpy = (imagesNumpy + 1) / 2 return imagesNumpy # print(discPred.tolist()) imageBatchFakeCpu = gpu_images_to_numpy(imageBatchFake) # imageBatchRealCpu = gpu_images_to_numpy(imageBatchReal) for iCol, ax in enumerate(axes.flatten()[:batchSize]): ax.imshow(imageBatchFakeCpu[iCol]) fig.suptitle(msg) with torch.no_grad(): images = gpu_images_to_numpy(generator(latents[..., None, None])) authorVectorsProj = umap.UMAP(n_neighbors=min(5, dataSize), random_state=1337).fit_transform(points) plot_image_scatter(ax2, authorVectorsProj, images, downscaleRatio=2) fig.savefig(os.path.join(outPath, f'batch_{batchIndex}.png')) fig2.savefig(os.path.join(outPath, f'scatter_{batchIndex}.png')) plt.close(fig) plt.close(fig2) with torch.no_grad(): imagesGpu = generator(latents[..., None, None]) imageNumber = imagesGpu.shape[0] # Compute LPIPS distances, batch to avoid memory issues. bs = min(imageNumber, 8) assert imageNumber % bs == 0 distPredEval = np.zeros((imagesGpu.shape[0], imagesGpu.shape[0])) for iCol in range(imageNumber // bs): startA, endA = iCol * bs, (iCol + 1) * bs imagesA = imagesGpu[startA:endA] for j in range(imageNumber // bs): startB, endB = j * bs, (j + 1) * bs imagesB = imagesGpu[startB:endB] distBatchEval = lossModel(imagesA.repeat(repeats=(bs, 1, 1, 1)).contiguous(), imagesB.repeat_interleave(repeats=bs, dim=0).contiguous(), normalize=True).cpu().numpy() distPredEval[startA:endA, startB:endB] = distBatchEval.reshape((bs, bs)) distPredEval = (distPredEval * scale.item() + bias.item()) # Move to the CPU and append an alpha channel for rendering. images = gpu_images_to_numpy(imagesGpu) images = [np.concatenate([im, np.ones(im.shape[:-1] + (1,))], axis=-1) for im in images] distPoints = distPointsCpu assert np.abs(distPoints - distPoints.T).max() < 1e-5 distPoints = np.minimum(distPoints, distPoints.T) # Remove rounding errors, guarantee symmetry. config = DistanceMatrixConfig() config.dataRange = (0., 4.) _, pointIndicesSorted = render_distance_matrix( os.path.join(outPath, f'dist_point_{batchIndex}.png'), distPoints, images, config=config ) # print(np.abs(distPredFlat - distPredFlat.T).max()) # assert np.abs(distPredFlat - distPredFlat.T).max() < 1e-5 # todo The symmetry doesn't hold for E-LPIPS, since it's stochastic. distPredEval = np.minimum(distPredEval, distPredEval.T) # Remove rounding errors, guarantee symmetry. config = DistanceMatrixConfig() config.dataRange = (0., 4.) render_distance_matrix( os.path.join(outPath, f'dist_images_{batchIndex}.png'), distPredEval, images, config=config ) config = DistanceMatrixConfig() config.dataRange = (0., 4.) render_distance_matrix( os.path.join(outPath, f'dist_images_aligned_{batchIndex}.png'), distPredEval, images, predefinedOrder=pointIndicesSorted, config=config ) fig, axes = plt.subplots(ncols=2) axes[0].matshow(distTarget.cpu().numpy(), vmin=0, vmax=4) axes[1].matshow(distPredFull.cpu().numpy() * scale.item(), vmin=0, vmax=4) fig.savefig(os.path.join(outPath, f'batch_dist_{batchIndex}.png')) plt.close(fig) surveySize = 30 fig, axes = plt.subplots(nrows=3, ncols=surveySize, figsize=(surveySize, 3)) assert len(images) == dataSize allIndices = list(range(dataSize)) with open(os.path.join(outPath, f'survey_{batchIndex}.txt'), 'w') as file: for iCol in range(surveySize): randomIndices = random.sample(allIndices, k=3) leftToMid = distPointsCpu[randomIndices[0], randomIndices[1]] rightToMid = distPointsCpu[randomIndices[2], randomIndices[1]] correctAnswer = 'left' if leftToMid < rightToMid else 'right' file.write("{}\t{}\t{}\t{}\t{}\n".format(iCol, correctAnswer, leftToMid, rightToMid, str(tuple(randomIndices)))) for iRow in (0, 1, 2): axes[iRow][iCol].imshow(images[randomIndices[iRow]]) fig.savefig(os.path.join(outPath, f'survey_{batchIndex}.png')) plt.close(fig) torch.save(generator.state_dict(), os.path.join(outPath, 'gen_{}.pth'.format(batchIndex))) torch.save(discriminator.state_dict(), os.path.join(outPath, 'gen_{}.pth'.format(batchIndex))) summaryWriter.close()
def main(): dataSize = 128 batchSize = 8 # imageSize = 32 imageSize = 64 # discCheckpointPath = r'E:\projects\visus\PyTorch-GAN\implementations\dcgan\checkpoints\2020_07_10_15_53_34\disc_step4800.pth' # discCheckpointPath = r'E:\projects\visus\pytorch-examples\dcgan\out\netD_epoch_24.pth' discCheckpointPath = None gpu = torch.device('cuda') # imageDataset = CatDataset( # imageSubdirPath=r'E:\data\cat-vs-dog\cat', # transform=transforms.Compose( # [ # transforms.Resize((imageSize, imageSize)), # transforms.ToTensor(), # transforms.Normalize([0.5], [0.5]) # ] # ) # ) imageDataset = datasets.CIFAR10(root=r'e:\data\images\cifar10', download=True, transform=transforms.Compose([ transforms.Resize((imageSize, imageSize)), transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Normalize([0.5], [0.5]), ])) # For now we normalize the vectors to have norm 1, but don't make sure # that the data has certain mean/std. pointDataset = AuthorDataset( jsonPath=r'E:\out\scripts\metaphor-vis\authors-all.json' ) imageLoader = DataLoader(imageDataset, batch_size=batchSize, sampler=InfiniteSampler(imageDataset)) pointLoader = DataLoader(pointDataset, batch_size=batchSize, sampler=InfiniteSampler(pointDataset)) # Generate a random distance matrix. # # Make a matrix with positive values. # distancesCpu = np.clip(np.random.normal(0.5, 1.0 / 3, (dataSize, dataSize)), 0, 1) # # Make it symmetrical. # distancesCpu = np.matmul(distancesCpu, distancesCpu.T) # Generate random points and compute distances, guaranteeing that the triangle rule isn't broken. # randomPoints = generate_points(dataSize) # distancesCpu = scipy.spatial.distance_matrix(randomPoints, randomPoints, p=2) # catImagePath = os.path.expandvars(r'${DEV_METAPHOR_DATA_PATH}/cats/cat.247.jpg') # catImage = skimage.transform.resize(imageio.imread(catImagePath), (64, 64), 1).transpose(2, 0, 1) # imagesInitCpu = np.clip(np.random.normal(0.5, 0.5 / 3, (dataSize, 3, imageSize, imageSize)), 0, 1) # imagesInitCpu = np.clip(np.tile(catImage, (dataSize, 1, 1, 1)) + np.random.normal(0., 0.5 / 6, (dataSize, 3, 64, 64)), 0, 1) # images = torch.tensor(imagesInitCpu, requires_grad=True, dtype=torch.float32, device=gpu) scale = torch.tensor(4.0, requires_grad=True, dtype=torch.float32, device=gpu) lossModel = models.PerceptualLoss(model='net-lin', net='vgg', use_gpu=True).to(gpu) bceLoss = torch.nn.BCELoss() # discriminator = Discriminator(imageSize, 3) discriminator = Discriminator(3, 64, 1) if discCheckpointPath: discriminator.load_state_dict(torch.load(discCheckpointPath)) else: discriminator.init_params() discriminator = discriminator.to(gpu) generator = Generator(nz=pointDataset[0][0].shape[0], ngf=64) generator.init_params() generator = generator.to(gpu) # todo init properly, if training # discriminator.apply(weights_init_normal) # optimizerImages = torch.optim.Adam([images, scale], lr=1e-2, betas=(0.9, 0.999)) optimizerScale = torch.optim.Adam([scale], lr=0.001) optimizerGen = torch.optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999)) # optimizerDisc = torch.optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.9, 0.999)) optimizerDisc = torch.optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999)) import matplotlib.pyplot as plt fig, axes = plt.subplots(nrows=2 * 2, ncols=batchSize // 2) fig2 = plt.figure() ax2 = fig2.add_subplot(1, 1, 1) outPath = os.path.join('runs', datetime.datetime.today().strftime('%Y_%m_%d_%H_%M_%S')) os.makedirs(outPath) imageIter = iter(imageLoader) pointIter = iter(pointLoader) for batchIndex in range(10000): imageBatchReal, _ = next(imageIter) # type: Tuple(torch.Tensor, Any) imageBatchReal = imageBatchReal.to(gpu) # imageBatchReal = torch.tensor(realImageBatchCpu, device=gpu) # noinspection PyTypeChecker # randomIndices = np.random.randint(0, dataSize, batchSize).tolist() # type: List[int] # # randomIndices = list(range(dataSize)) # type: List[int] # distanceBatch = torch.tensor(distancesCpu[randomIndices, :][:, randomIndices], dtype=torch.float32, device=gpu) # imageBatchFake = images[randomIndices].contiguous() vectorBatch, _ = next(pointIter) vectorBatch = vectorBatch.to(gpu) distanceBatch = l2_sqr_dist_matrix(vectorBatch) # In-batch vector distances. imageBatchFake = generator(vectorBatch[:, :, None, None].float()) # todo It's possible to compute this more efficiently, but would require re-implementing lpips. distImages = lossModel.forward(imageBatchFake.repeat(repeats=(batchSize, 1, 1, 1)).contiguous(), imageBatchFake.repeat_interleave(repeats=batchSize, dim=0).contiguous(), normalize=True) distPredMat = distImages.reshape((batchSize, batchSize)) lossDist = torch.sum((distanceBatch - distPredMat * scale) ** 2) # MSE discPred = discriminator(imageBatchFake) lossRealness = bceLoss(discPred, torch.ones(imageBatchFake.shape[0], device=gpu)) lossGen = lossDist + 1.0 * lossRealness optimizerGen.zero_grad() optimizerScale.zero_grad() lossGen.backward() optimizerGen.step() optimizerScale.step() lossDiscReal = bceLoss(discriminator(imageBatchReal), torch.ones(imageBatchReal.shape[0], device=gpu)) lossDiscFake = bceLoss(discriminator(imageBatchFake.detach()), torch.zeros(imageBatchFake.shape[0], device=gpu)) lossDisc = (lossDiscFake + lossDiscReal) / 2 # lossDisc = torch.tensor(0) optimizerDisc.zero_grad() lossDisc.backward() optimizerDisc.step() # with torch.no_grad(): # # todo We're clamping all the images every batch, can we clamp only the ones updated? # # images = torch.clamp(images, 0, 1) # For some reason this was making the training worse. # images.data = torch.clamp(images.data, 0, 1) if batchIndex % 100 == 0: msg = 'iter {}, loss gen {:.3f}, loss dist {:.3f}, loss real {:.3f}, loss disc {:.3f}, scale: {:.3f}'.format( batchIndex, lossGen.item(), lossDist.item(), lossRealness.item(), lossDisc.item(), scale.item() ) print(msg) def gpu_images_to_numpy(images): imagesNumpy = images.cpu().data.numpy().transpose(0, 2, 3, 1) imagesNumpy = (imagesNumpy + 1) / 2 return imagesNumpy # print(discPred.tolist()) imageBatchFakeCpu = gpu_images_to_numpy(imageBatchFake) imageBatchRealCpu = gpu_images_to_numpy(imageBatchReal) for i, ax in enumerate(axes.flatten()[:batchSize]): ax.imshow(imageBatchFakeCpu[i]) for i, ax in enumerate(axes.flatten()[batchSize:]): ax.imshow(imageBatchRealCpu[i]) fig.suptitle(msg) with torch.no_grad(): points = np.asarray([pointDataset[i][0] for i in range(200)], dtype=np.float32) images = gpu_images_to_numpy(generator(torch.tensor(points[..., None, None], device=gpu))) authorVectorsProj = umap.UMAP(n_neighbors=5, random_state=1337).fit_transform(points) plot_image_scatter(ax2, authorVectorsProj, images, downscaleRatio=2) fig.savefig(os.path.join(outPath, f'batch_{batchIndex}.png')) fig2.savefig(os.path.join(outPath, f'scatter_{batchIndex}.png')) plt.close(fig) plt.close(fig2) with torch.no_grad(): imageNumber = 48 points = np.asarray([pointDataset[i][0] for i in range(imageNumber)], dtype=np.float32) imagesGpu = generator(torch.tensor(points[..., None, None], device=gpu)) # Compute LPIPS distances, batch to avoid memory issues. bs = 8 assert imageNumber % bs == 0 distImages = np.zeros((imagesGpu.shape[0], imagesGpu.shape[0])) for i in range(imageNumber // bs): startA, endA = i * bs, (i + 1) * bs imagesA = imagesGpu[startA:endA] for j in range(imageNumber // bs): startB, endB = j * bs, (j + 1) * bs imagesB = imagesGpu[startB:endB] distBatch = lossModel.forward(imagesA.repeat(repeats=(bs, 1, 1, 1)).contiguous(), imagesB.repeat_interleave(repeats=bs, dim=0).contiguous(), normalize=True).cpu().numpy() distImages[startA:endA, startB:endB] = distBatch.reshape((bs, bs)) # Move to the CPU and append an alpha channel for rendering. images = gpu_images_to_numpy(imagesGpu) images = [np.concatenate([im, np.ones(im.shape[:-1] + (1,))], axis=-1) for im in images] distPoints = l2_sqr_dist_matrix(torch.tensor(points, dtype=torch.double)).numpy() assert np.abs(distPoints - distPoints.T).max() < 1e-5 distPoints = np.minimum(distPoints, distPoints.T) # Remove rounding errors, guarantee symmetry. config = DistanceMatrixConfig() config.dataRange = (0., 4.) render_distance_matrix(os.path.join(outPath, f'dist_point_{batchIndex}.png'), distPoints, images, config) assert np.abs(distImages - distImages.T).max() < 1e-5 distImages = np.minimum(distImages, distImages.T) # Remove rounding errors, guarantee symmetry. config = DistanceMatrixConfig() config.dataRange = (0., 1.) render_distance_matrix(os.path.join(outPath, f'dist_images_{batchIndex}.png'), distImages, images, config) torch.save(generator.state_dict(), os.path.join(outPath, 'gen_{}.pth'.format(batchIndex))) torch.save(discriminator.state_dict(), os.path.join(outPath, 'disc_{}.pth'.format(batchIndex)))
# Create the dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers) # Decide which device we want to run on device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu") # Plot some training images #real_batch = next(iter(dataloader)) #plt.figure(figsize=(8,8)) #plt.axis("off") #plt.title("Training Images") #plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0))) # Create the networks netG = Generator(ngpu).to(device) netD = Discriminator(ngpu).to(device) # Handle multi-gpu if desired if (device.type == 'cuda') and (ngpu > 1): netG = nn.DataParallel(netG, list(range(ngpu))) netD = nn.DataParallel(netD, list(range(ngpu))) # Apply the weights_init function to randomly initialize all weights # to mean=0, stdev=0.2. netG.apply(weights_init) netD.apply(weights_init) # Initialize BCELoss function criterion = nn.BCELoss()
i = input("Accept Image? 1: Accept, anything else: reject - ") if i == "1": print("Vector accepted") vector_set[counter, :] = gen_inp #print(vector_set) counter += 1 else: img.close() return vector_set if __name__ == "__main__": device = torch.device('cpu') model_save_path = r"./generator_mnist" gen_model = Generator() gen_model.load_state_dict(torch.load(model_save_path)) n_samples = 3 print("Selecting Vector Set 1") vector_set1 = make_vector_set(n_samples=n_samples) print("Selecting Vector Set 2") vector_set2 = make_vector_set(n_samples=n_samples) with open("saved_vectors.pkl", "wb") as f: pickle.dump((vector_set1, vector_set2), f) with open("saved_vectors.pkl", "rb") as f: vector_set1, vector_set2 = pickle.load(f) print(vector_set1.size(), vector_set2.size())
def main(): parser = Flags() parser.set_arguments() parser.add_argument('--z_dim', type=int, default=100) FG = parser.parse_args() vis = Visdom(port=FG.vis_port, env=FG.model) report = parser.report(end='<br>') vis.text(report, win='report f{}'.format(FG.cur_fold)) transform = transforms.Compose([ transforms.RandomResizedCrop(64, scale=(0.5, 1.0)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]) trainset = datasets.MNIST(root='./mnist', train=True, transform=transform) trainloader = torch.utils.data.DataLoader( trainset, batch_size=FG.batch_size, worker_init_fn=lambda _: torch.initial_seed(), num_workers=5, shuffle=True, pin_memory=True, drop_last=True) # trainloader, _ = get_dataloader(FG.fold, FG.cur_fold, FG.data_root, FG.modality, # labels=FG.labels, batch_size=FG.batch_size, # balancing=FG.data_balancing) torch.cuda.set_device(FG.devices[0]) device = torch.device(FG.devices[0]) netG = nn.DataParallel(Generator(FG.ckpt_dir, FG.z_dim).weight_init(), device_ids=FG.devices) netD = nn.DataParallel(Discriminator(FG.ckpt_dir).weight_init(), device_ids=FG.devices) optimG = Adam(netG.parameters(), lr=FG.lr, amsgrad=True) optimD = Adam(netD.parameters(), lr=FG.lr, amsgrad=True) z_sampler = ZSampler(torch.randn, (FG.batch_size, FG.z_dim, 1, 1), device=device) trainer = create_gan_trainer(netG, netD, optimG, optimD, F.binary_cross_entropy, z_sampler, device=device, non_blocking=True) monitoring_metrics = ['LD', 'LG', 'Dx', 'DGz1', 'DGz2'] RunningAverage(alpha=0.98, output_transform=lambda x: x['LD']).attach(trainer, 'LD') RunningAverage(alpha=0.98, output_transform=lambda x: x['LG']).attach(trainer, 'LG') RunningAverage(alpha=0.98, output_transform=lambda x: x['Dx']).attach(trainer, 'Dx') RunningAverage(alpha=0.98, output_transform=lambda x: x['DGz1']).attach( trainer, 'DGz1') RunningAverage(alpha=0.98, output_transform=lambda x: x['DGz2']).attach( trainer, 'DGz2') real_rate = Accuracy() fake_rate = Accuracy() trackers = dict() for monitoring_metric in monitoring_metrics: trackers[monitoring_metric] = Scalar(vis, monitoring_metric, monitoring_metric, opts=dict( title=monitoring_metric, y_label=monitoring_metric, xlabel='epoch', showlegend=True)) trackers['real_rate'] = Scalar(vis, 'real_rate', 'real_rate', opts=dict(title='real_rate', y_label='real_rate', ytickmin=0, ytickmax=1, xlabel='epoch', showlegend=True)) trackers['fake_rate'] = Scalar(vis, 'fake_rate', 'fake_rate', opts=dict(title='fake_rate', y_label='fake_rate', ytickmin=0, ytickmax=1, xlabel='epoch', showlegend=True)) fakeshow = Image2D(vis, 'fake') realshow = Image2D(vis, 'real') @trainer.on(Events.ITERATION_COMPLETED) def track_logs(engine): i = engine.state.iteration / len(trainloader) metrics = engine.state.metrics for key in metrics.keys(): trackers[key](i, metrics[key]) y_pred_real = (engine.state.output['output_real'] >= 0.5).long() y_pred_fake = (engine.state.output['output_fake'] < 0.5).long() real_rate.update((y_pred_real, z_sampler.real_label.long())) fake_rate.update((y_pred_fake, z_sampler.fake_label.long())) @trainer.on(Events.EPOCH_COMPLETED) def show_fake_example(engine): netG.eval() fake = netG(z_sampler.fixed_noise) fakeshow('fake_images', fake * 0.5 + 0.5) realshow('real_images', engine.state.batch[0] * 0.5 + 0.5) trackers['real_rate'](engine.state.epoch, real_rate.compute()) trackers['fake_rate'](engine.state.epoch, fake_rate.compute()) real_rate.reset() fake_rate.reset() trainer.run(trainloader, FG.num_epoch)
import torchvision import torch.nn as nn from torchvision import transforms from torchvision.utils import save_image from torch.autograd import Variable import matplotlib.pyplot as plt import pylab import numpy as np num_gpu = 1 if torch.cuda.is_available() else 0 # load the models from dcgan import Discriminator, Generator D = Discriminator(ngpu=1).eval() G = Generator(ngpu=1).eval() D = D.double() # load weights D.load_state_dict(torch.load('weights/netD_epoch_199.pth', map_location='cpu')) G.load_state_dict(torch.load('weights/netG_epoch_199.pth', map_location='cpu')) if torch.cuda.is_available(): D = D.cuda() G = G.cuda() batch_size = 25 latent_size = 100 fixed_noise = torch.randn(batch_size, latent_size, 1, 1) if torch.cuda.is_available():
disc_file = 'parameters/dcgan_mnist/discriminator.pth' gen_file = 'parameters/dcgan_mnist/generator.pth' transforms = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize([0.5 for _ in range(channels_img)], [0.5 for _ in range(channels_img)]), ]) dataset = datasets.MNIST(root='../datasets/', train=True, transform=transforms, download=True) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) gen = Generator(z_dim, channels_img, features_gen).to(device) disc = Discriminator(channels_img, features_disc).to(device) load_model(disc, disc_file, device) load_model(gen, gen_file, device) opt_gen = optim.Adam(gen.parameters(), lr=learning_rate, betas=(0.5, 0.999)) opt_disc = optim.Adam(disc.parameters(), lr=learning_rate, betas=(0.5, 0.999)) criterion = nn.BCELoss() fixed_noise = torch.randn(32, z_dim, 1, 1).to(device) writer_real = SummaryWriter('runs/dcgan_mnist/real') writer_fake = SummaryWriter('runs/dcgan_mnist/fake') step = 0 gen.train() disc.train()
import torch from torch import nn import torchvision import matplotlib.pyplot as plt import pylab from dcgan import Generator from utils import load_model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') channels_img = 1 features_gen = 64 image_size = 64 z_dim = 100 gen_file = 'parameters/dcgan_mnist/generator.pth' gen = Generator(z_dim, channels_img, features_gen) load_model(gen, gen_file, device) noise = torch.randn(32, z_dim, 1, 1).to(device) fake = gen(noise).reshape(-1, 1, image_size, image_size) for idx, image in enumerate(fake): pylab.subplot(4, 8, idx + 1) pylab.imshow(image[0].detach().numpy(), cmap='gray') pylab.axis('off') pylab.tight_layout() pylab.show()
image_captions = {} h = h5py.File( '/content/drive/MyDrive/Deep_Learning/projects/test_faces1000.hdf5', 'r') face_captions = {} for key in h.keys(): if h[key].shape[0] == 0: continue face_captions[key] = h[key] print(len(face_captions)) # 1. Encode text using skipthought model # model = skipthoughts.load_model() ngpu = 1 device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") netG = Generator(1, args.nz + args.t_in, args.ngf, args.nc).to(device) model_name = 'netG_' + args.epoch_load + '.pth' print(model_name) netG.load_state_dict( torch.load(os.path.join(args.checkpoint_dir, model_name))) print(f"Done loading {model_name}.") if torch.cuda.is_available(): netG = netG.cuda() for key in face_captions.keys(): print(key) text_embedding = torch.FloatTensor(face_captions[key][0]) # text_embedding = torch.FloatTensor(skipthoughts.encode(model, text)[0]) # 1x4800 embedding vector # 2. Gen faces using DcGAN '''
transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) # Create the dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers) # Decide which device we want to run on device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") print(device) # custom weights initialization called on netG and netD # Create the generator netG = Generator(ngpu, nz + t_in, ngf, nc).to(device) netD = Discriminator(ngpu, nc, ndf, t_in).to(device) net_g_path = '/content/drive/MyDrive/Deep_Learning/projects/dcgan/continue_train/checkpoints/netG_latest.pth' net_d_path = '/content/drive/MyDrive/Deep_Learning/projects/dcgan/continue_train/checkpoints/netD_latest.pth' netG.load_state_dict(torch.load(net_g_path)) netD.load_state_dict(torch.load(net_d_path)) print("Done loading model!") # Handle multi-gpu if desired # if (device.type == 'cuda') and (ngpu > 1): # netG = nn.DataParallel(netG, list(range(ngpu))) # netD = nn.DataParallel(netD, list(range(ngpu))) # netG.apply(weights_init) # netD.apply(weights_init) netG = netG.cuda() netD = netD.cuda()
ngf = int(opt.ngf) ndf = int(opt.ndf) imageSize = int(opt.imageSize) # custom weights initialization called on netG and netD def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) netG = Generator(ngpu, ngf, nc, imageSize, nz).to(device) netG.apply(weights_init) if opt.netG != '': netG.load_state_dict(torch.load(opt.netG)) print(netG) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) netD = Discriminator(ngpu, ndf, nc, imageSize).to(device) netD.apply(weights_init) if opt.netD != '': netD.load_state_dict(torch.load(opt.netD)) print(netD)
# Get the data. true_dataloader, masked_dataloader = get_celeba(params) ''' # Plot the training images. sample_batch = next(iter(dataloader)) plt.figure(figsize=(8, 8)) plt.axis("off") plt.title("Training Images") plt.imshow(np.transpose(vutils.make_grid( sample_batch[0].to(device)[ : 64], padding=2, normalize=True).cpu(), (1, 2, 0))) plt.show() ''' # Create the generator. netG = Generator(params) # Apply the weights_init() function to randomly initialize all # weights to mean=0.0, stddev=0.2 netG.apply(weights_init) netG = netG.to(device) # Print the model. print(netG) # Create the discriminator. netD = Discriminator(params) # Apply the weights_init() function to randomly initialize all # weights to mean=0.0, stddev=0.2 netD.apply(weights_init) netD = netD.to(device) # Print the model.
transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )), ]) # prepare the data train_data = datasets.MNIST(root='../input/data', train=True, download=True, transform=transform) mini_train_data, mnist_restset = torch.utils.data.random_split( train_data, [int(0.9 * len(train_data)), int(0.1 * len(train_data))]) train_loader = DataLoader(mini_train_data, batch_size=batch_size, shuffle=True) # initialize models generator = Generator(nz).to(device) discriminator = Discriminator().to(device) # initialize generator weights generator.apply(weights_init) # initialize discriminator weights discriminator.apply(weights_init) print('##### GENERATOR #####') print(generator) params = list(generator.parameters()) for i in range(13): print(params[i].size()) # conv1's .weight print('######################') print('\n##### DISCRIMINATOR #####') print(discriminator)
def main(): # load training data trainset = Dataset('./data/brilliant_blue') trainloader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True ) # init netD and netG netD = Discriminator().to(device) netD.apply(weights_init) netG = Generator(nz).to(device) netG.apply(weights_init) criterion = nn.BCELoss() # used for visualzing training process fixed_noise = torch.randn(16, nz, 1, device=device) real_label = 1. fake_label = 0. optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999)) optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999)) for epoch in range(epoch_num): for step, (data, _) in enumerate(trainloader): real_cpu = data.to(device) b_size = real_cpu.size(0) # train netD label = torch.full((b_size,), real_label, dtype=torch.float, device=device) netD.zero_grad() output = netD(real_cpu).view(-1) errD_real = criterion(output, label) errD_real.backward() D_x = output.mean().item() # train netG noise = torch.randn(b_size, nz, 1, device=device) fake = netG(noise) label.fill_(fake_label) output = netD(fake.detach()).view(-1) errD_fake = criterion(output, label) errD_fake.backward() D_G_z1 = output.mean().item() errD = errD_real + errD_fake optimizerD.step() netG.zero_grad() label.fill_(real_label) output = netD(fake).view(-1) errG = criterion(output, label) errG.backward() D_G_z2 = output.mean().item() optimizerG.step() print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f' % (epoch, epoch_num, step, len(trainloader), errD.item(), errG.item(), D_x, D_G_z1, D_G_z2)) # save training process with torch.no_grad(): fake = netG(fixed_noise).detach().cpu() f, a = plt.subplots(4, 4, figsize=(8, 8)) for i in range(4): for j in range(4): a[i][j].plot(fake[i * 4 + j].view(-1)) a[i][j].set_xticks(()) a[i][j].set_yticks(()) plt.savefig('./img/dcgan_epoch_%d.png' % epoch) plt.close() # save models torch.save(netG, './nets/dcgan_netG.pkl') torch.save(netD, './nets/dcgan_netD.pkl')
# step 1: data fixed_noise = torch.randn(num_img, nz, 1, 1, device=device) flag = 0 # flag = 1 if flag: z_idx = 0 single_noise = torch.randn(1, nz, 1, 1, device=device) for i in range(num_img): add_noise = single_noise add_noise = add_noise[0, z_idx, 0, 0] + i * 0.01 fixed_noise[i, ...] = add_noise # step 2: model net_g = Generator(nz=nz, ngf=ngf, nc=nc) # net_d = Discriminator(nc=nc, ndf=ndf) checkpoint = torch.load(path_checkpoint, map_location="cpu") state_dict_g = checkpoint["g_model_state_dict"] state_dict_g = remove_module(state_dict_g) net_g.load_state_dict(state_dict_g) net_g.to(device) # net_d.load_state_dict(checkpoint["d_model_state_dict"]) # net_d.to(device) # step3: inference with torch.no_grad(): fake_data = net_g(fixed_noise).detach().cpu() img_grid = vutils.make_grid(fake_data, padding=2, normalize=True).numpy() img_grid = np.transpose(img_grid, (1, 2, 0))
args = parser.parse_args() # Load the checkpoint file. for epoch in range(12, 16, 2): path = f'./model/model_360cnn_epoch_{epoch}.pth' state_dict = torch.load(path, map_location='cpu') print("Generating images for model : {}".format(epoch)) # Set the device to run on: GPU or CPU. # device = torch.device("cuda:0" if(torch.cuda.is_available()) else "cpu") device = torch.device("cpu") # Get the 'params' dictionary from the loaded state_dict. params = state_dict['params'] # Create the generator network. netG = Generator(params).to(device) # Load the trained generator weights. netG.load_state_dict(state_dict['generator']) # print(netG) # print(args.num_output) # Get latent vector Z from unit normal distribution. for i in range(30): noise = torch.randn(int(args.num_output), params['nz'], 1, 1, device=device) # Turn off gradient calculation to speed up the process. with torch.no_grad():
def main(): parser = argparse.ArgumentParser() device_help = "which device to run on: 'cuda:x' or 'cpu'" scenario_help = "rand: four-layer random generator. dcgan: dcgan with random weights. cifar10: pre-trained dcgan." checkpoint_help = "checkpoint every ... steps" seed_help = "random number generator seed." parser.add_argument("--scenario", help=scenario_help, default="dcgan") parser.add_argument("--device", "-d", help=device_help) parser.add_argument("--bs", type=int, default=4096, help="batch size.") parser.add_argument("--steps", type=int, default=1e9, help="number of steps") parser.add_argument("--checkpoint", type=int, default=1000, help=checkpoint_help) parser.add_argument("-q", "--quiet", help="be quiet", action="store_true") parser.add_argument("-s", "--seed", type=int, default=0, help=seed_help) args = parser.parse_args() torch.manual_seed(args.seed) if args.device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") else: device = torch.device(args.device) # Will use chunks of data of size (batch_size, N) or (batch_size, D) etc. batch_size = args.bs if args.scenario in ["dcgan", "cifar10"]: D = 100 N = 3072 generator = Generator(ngpu=1) generator.eval() generator.to(device) # load weights if args.scenario == "cifar10": loadedweightsfrom = "models/dcgan_cifar10.pth" else: loadedweightsfrom = "models/dcgan_rand.pth" generator.load_state_dict( torch.load(loadedweightsfrom, map_location=device)) elif args.scenario == "rand": # Find the right generator for the given scenario D = 100 N = 3072 L = 4 Ds = [D] * L + [N] f = Sign generator = RandomGenerator(Ds, f, batchnorm=False) generator.to(device) generator.eval() elif args.scenario == "spiked": # Find the right generator for the given scenario D = 100 N = 3072 L = 1 Ds = [D] * L + [N] f = Sign generator = RandomGenerator(Ds, f, batchnorm=False) loadedweightsfrom = "models/rand_spiked_L1.pth" generator.load_state_dict( torch.load(loadedweightsfrom, map_location=device)) generator.to(device) generator.eval() else: raise ValueError("Invalid scenario given.") max_P = args.steps * args.bs log_fname = "covariance_%s_P%g_s%d.dat" % (args.scenario, max_P, args.seed) logfile = open(log_fname, "w", buffering=1) welcome = "# Computing the covariance for %s\n" % args.scenario if loadedweightsfrom is not None: welcome += "# with weights from %s\n" % loadedweightsfrom welcome += "# batch size=%d, seed=%d\n" % (batch_size, args.seed) welcome += "# Using device: %s\n" % str(device) welcome += "# samples, diff E c, diff E x, diff Omega, diff Phi" log(welcome, logfile) # Hold the Monte Carlo estimators computed here variables = ["mean_c", "mean_x", "omega", "phi"] mc = { "mean_c": torch.zeros(D).to(device), # estimate of mean of c "mean_x": torch.zeros(N).to(device), # estimate of mean of x "omega": torch.zeros(N, N).to(device), # input-input covariance "phi": torch.zeros(N, D).to(device), # input-latent covariance } M2_omega = torch.zeros(N, N).to(device) # running estimate of residuals M2_phi = torch.zeros(N, D).to(device) # running estimate of residuals exact = {"mean_c": None, "mean_x": None, "omega": None, "phi": None} if args.scenario == "rand": exact["mean_c"] = torch.zeros(D).to(device) exact["mean_x"] = torch.zeros(N).to(device) b = np.sqrt(2 / np.pi) c = 1 b2 = pow(b, 2) Phi = None Omega = None for l in range(generator.num_layers): F = generator.generator[l * 2].weight.data if l == 0: Omega = b2 * F @ F.T Phi = b * F else: Omega = (b2 * F @ ( (c - b2) * torch.eye(F.shape[1]).to(device) + Omega) @ F.T) Phi = b * F @ Phi Omega[np.diag_indices(N)] = c exact["omega"] = Omega exact["phi"] = Phi # store the values of the covariance matrices at the last checkpoint mc_last = dict() for name in variables: mc_last[name] = torch.zeros(mc[name].shape).to(device) step = -1 with torch.no_grad(): while step < args.steps: for _ in tqdm(range(args.checkpoint)): # slighly unsual place for step increment; is to preserve the usual notation # when computing the current estimate of the covariance outside this loop step += 1 # Generate a new batch of data cs = torch.randn(batch_size, D).to(device) # add dimensions for the convolutions if args.scenario in ["rand", "spiked"]: latent = cs else: latent = cs.unsqueeze(-1).unsqueeze(-1) # pass through the generator xs = generator(latent).reshape(-1, N) # Update the estimators. ######################## mc_mean_x_old = mc["mean_x"] # Start with the means dmean_c = torch.mean(cs, axis=0) - mc["mean_c"] mc["mean_c"] += dmean_c / (step + 1) dmean_x = torch.mean(xs, axis=0) - mc["mean_x"] mc["mean_x"] += dmean_x / (step + 1) # now the residuals M2_omega += (xs - mc_mean_x_old).T @ ( xs - mc["mean_x"]) / batch_size M2_phi += (xs - mc_mean_x_old).T @ (cs - mc["mean_c"]) / batch_size mc["omega"] = M2_omega / (step + 1) mc["phi"] = M2_phi / (step + 1) # Build status message status = "%g" % (step * args.bs) for name in variables: diff = torch.sqrt(torch.mean((mc[name] - mc_last[name])**2)) status += ", %g" % diff # if exact expression is available, also compute the error of the current estimate for name in ["omega", "phi"]: if exact[name] is None: status += ", nan" else: diff = torch.sum((mc[name] - exact[name])**2) / torch.sum( exact[name]**2) status += ", %g" % diff log(status, logfile) # Write the estimates to files for name in variables: fname = log_fname[:-4] + ("_%s_%g.pt" % (name, step * batch_size)) torch.save(mc[name], fname) for name in variables: mc_last[name] = mc[name].clone().detach()