def main(): try: os.mkdir(args.snapshot_path) except: pass comm = chainermn.create_communicator() device = comm.intra_rank print("device", device, "/", comm.size) cuda.get_device(device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.channels_chz = args.channels_chz hyperparams.generator_channels_u = args.channels_u hyperparams.inference_channels_map_x = args.channels_map_x hyperparams.pixel_n = args.pixel_n hyperparams.pixel_sigma_i = args.initial_pixel_sigma hyperparams.pixel_sigma_f = args.final_pixel_sigma if comm.rank == 0: hyperparams.save(args.snapshot_path) hyperparams.print() model = Model(hyperparams, snapshot_directory=args.snapshot_path) model.to_gpu() optimizer = Optimizer( model.parameters, communicator=comm, mu_i=args.initial_lr, mu_f=args.final_lr) if comm.rank == 0: optimizer.print() dataset_mean, dataset_std = dataset.load_mean_and_std() if comm.rank == 0: np.save(os.path.join(args.snapshot_path, "mean.npy"), dataset_mean) np.save(os.path.join(args.snapshot_path, "std.npy"), dataset_std) # avoid division by zero dataset_std += 1e-12 sigma_t = hyperparams.pixel_sigma_i pixel_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, sigma_t**2, dtype="float32") pixel_ln_var = xp.full( (args.batch_size, 3) + hyperparams.image_size, math.log(sigma_t**2), dtype="float32") random.seed(0) subset_indices = list(range(len(dataset.subset_filenames))) current_training_step = 0 for iteration in range(args.training_iterations): mean_kld = 0 mean_nll = 0 total_batch = 0 subset_size_per_gpu = len(subset_indices) // comm.size start_time = time.time() for subset_loop in range(subset_size_per_gpu): random.shuffle(subset_indices) subset_index = subset_indices[comm.rank] subset = dataset.read(subset_index) iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # preprocessing images = (images - dataset_mean) / dataset_std # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)) total_views = images.shape[1] # sample number of views num_views = random.choice(range(total_views)) query_index = random.choice(range(total_views)) if current_training_step == 0 and num_views == 0: num_views = 1 # avoid OpenMPI error if num_views > 0: r = model.compute_observation_representation( images[:, :num_views], viewpoints[:, :num_views]) else: r = xp.zeros( (args.batch_size, hyperparams.channels_r) + hyperparams.chrz_size, dtype="float32") r = chainer.Variable(r) query_images = images[:, query_index] query_viewpoints = viewpoints[:, query_index] # transfer to gpu query_images = to_gpu(query_images) query_viewpoints = to_gpu(query_viewpoints) h0_gen, c0_gen, u_0, h0_enc, c0_enc = model.generate_initial_state( args.batch_size, xp) loss_kld = 0 hl_enc = h0_enc cl_enc = c0_enc hl_gen = h0_gen cl_gen = c0_gen ul_enc = u_0 xq = model.inference_downsampler.downsample(query_images) for l in range(model.generation_steps): inference_core = model.get_inference_core(l) inference_posterior = model.get_inference_posterior(l) generation_core = model.get_generation_core(l) generation_piror = model.get_generation_prior(l) h_next_enc, c_next_enc = inference_core.forward_onestep( hl_gen, hl_enc, cl_enc, xq, query_viewpoints, r) mean_z_q = inference_posterior.compute_mean_z(hl_enc) ln_var_z_q = inference_posterior.compute_ln_var_z(hl_enc) ze_l = cf.gaussian(mean_z_q, ln_var_z_q) mean_z_p = generation_piror.compute_mean_z(hl_gen) ln_var_z_p = generation_piror.compute_ln_var_z(hl_gen) h_next_gen, c_next_gen, u_next_enc = generation_core.forward_onestep( hl_gen, cl_gen, ul_enc, ze_l, query_viewpoints, r) kld = gqn.nn.chainer.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) hl_gen = h_next_gen cl_gen = c_next_gen ul_enc = u_next_enc hl_enc = h_next_enc cl_enc = c_next_enc mean_x = model.generation_observation.compute_mean_x(ul_enc) negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var) loss_nll = cf.sum(negative_log_likelihood) loss_nll /= args.batch_size loss_kld /= args.batch_size loss = loss_nll + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) if comm.rank == 0: printr( "Iteration {}: Subset {} / {}: Batch {} / {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f}". format(iteration + 1, subset_loop * comm.size + 1, len(dataset), batch_index + 1, len(subset) // args.batch_size, float(loss_nll.data), float(loss_kld.data), optimizer.learning_rate, sigma_t)) sf = hyperparams.pixel_sigma_f si = hyperparams.pixel_sigma_i sigma_t = max( sf + (si - sf) * (1.0 - current_training_step / hyperparams.pixel_n), sf) pixel_var[...] = sigma_t**2 pixel_ln_var[...] = math.log(sigma_t**2) total_batch += 1 current_training_step += comm.size # current_training_step += 1 mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) if comm.rank == 0: model.serialize(args.snapshot_path) if comm.rank == 0: elapsed_time = time.time() - start_time print( "\033[2KIteration {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f} - step: {} - elapsed_time: {:.3f} min". format(iteration + 1, mean_nll / total_batch, mean_kld / total_batch, optimizer.learning_rate, sigma_t, current_training_step, elapsed_time / 60)) model.serialize(args.snapshot_path)
def main(): try: os.mkdir(args.snapshot_directory) except: pass images = [] files = os.listdir(args.dataset_path) for filename in files: image = np.load(os.path.join(args.dataset_path, filename)) image = image / 255 * 2.0 - 1.0 images.append(image) images = np.vstack(images) images = images.transpose((0, 3, 1, 2)).astype(np.float32) train_dev_split = 0.9 num_images = images.shape[0] num_train_images = int(num_images * train_dev_split) num_dev_images = num_images - num_train_images images_train = images[:args.batch_size] images_dev = images[args.batch_size:] xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.layer_normalization_enabled = args.layer_normalization hyperparams.pixel_n = args.pixel_n hyperparams.chz_channels = args.chz_channels hyperparams.inference_channels_downsampler_x = args.channels_downsampler_x hyperparams.pixel_sigma_i = args.initial_pixel_sigma hyperparams.pixel_sigma_f = args.final_pixel_sigma hyperparams.chrz_size = (32, 32) hyperparams.save(args.snapshot_directory) hyperparams.print() model = Model(hyperparams, snapshot_directory=args.snapshot_directory) if using_gpu: model.to_gpu() optimizer = AdamOptimizer(model.parameters, lr_i=args.initial_lr, lr_f=args.final_lr) optimizer.print() sigma_t = hyperparams.pixel_sigma_i pixel_var = xp.full((args.batch_size, 3) + hyperparams.image_size, sigma_t**2, dtype="float32") pixel_ln_var = xp.full((args.batch_size, 3) + hyperparams.image_size, math.log(sigma_t**2), dtype="float32") num_pixels = images.shape[1] * images.shape[2] * images.shape[3] figure = plt.figure(figsize=(20, 4)) axis_1 = figure.add_subplot(1, 5, 1) axis_2 = figure.add_subplot(1, 5, 2) axis_3 = figure.add_subplot(1, 5, 3) axis_4 = figure.add_subplot(1, 5, 4) axis_5 = figure.add_subplot(1, 5, 5) for iteration in range(args.training_steps): x = to_gpu(images_train) loss_kld = 0 z_t_params_array, r_final = model.generate_z_params_and_x_from_posterior( x) for params in z_t_params_array: mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params kld = draw.nn.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) mean_x_enc = r_final negative_log_likelihood = draw.nn.functions.gaussian_negative_log_likelihood( x, mean_x_enc, pixel_var, pixel_ln_var) loss_nll = cf.sum(negative_log_likelihood) loss_mse = cf.mean_squared_error(mean_x_enc, x) loss_nll /= args.batch_size loss_kld /= args.batch_size loss = loss_nll + loss_kld loss = loss_nll model.cleargrads() loss.backward() optimizer.update(iteration) sf = hyperparams.pixel_sigma_f si = hyperparams.pixel_sigma_i sigma_t = max(sf + (si - sf) * (1.0 - iteration / hyperparams.pixel_n), sf) pixel_var[...] = sigma_t**2 pixel_ln_var[...] = math.log(sigma_t**2) model.serialize(args.snapshot_directory) print( "\033[2KIteration {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e} - sigma_t: {:.6f}" .format(iteration + 1, float(loss_nll.data) / num_pixels, float(loss_mse.data), float(loss_kld.data), optimizer.learning_rate, sigma_t)) if iteration % 10 == 0: axis_1.imshow(make_uint8(x[0])) axis_2.imshow(make_uint8(mean_x_enc.data[0])) x_dev = images_dev[random.choice(range(num_dev_images))] axis_3.imshow(make_uint8(x_dev)) with chainer.using_config("train", False), chainer.using_config( "enable_backprop", False): x_dev = to_gpu(x_dev)[None, ...] _, r_final = model.generate_z_params_and_x_from_posterior( x_dev) mean_x_enc = r_final axis_4.imshow(make_uint8(mean_x_enc.data[0])) mean_x_d = model.generate_image(batch_size=1, xp=xp) axis_5.imshow(make_uint8(mean_x_d[0])) plt.pause(0.01)
def main(): try: os.mkdir(args.snapshot_directory) except: pass images = [] files = os.listdir(args.dataset_path) files.sort() for filename in files: image = np.load(os.path.join(args.dataset_path, filename)) image = image / 255 images.append(image) images = np.vstack(images) images = images.transpose((0, 3, 1, 2)).astype(np.float32) train_dev_split = 0.9 num_images = images.shape[0] num_train_images = int(num_images * train_dev_split) num_dev_images = num_images - num_train_images images_train = images[:num_train_images] images_dev = images[num_dev_images:] xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cp hyperparams = HyperParameters(snapshot_directory=args.snapshot_directory) hyperparams.print() if hyperparams.use_gru: model = GRUModel(hyperparams, snapshot_directory=args.snapshot_directory) else: model = LSTMModel(hyperparams, snapshot_directory=args.snapshot_directory) if using_gpu: model.to_gpu() dataset = draw.data.Dataset(images_dev) iterator = draw.data.Iterator(dataset, batch_size=1) cols = hyperparams.generator_generation_steps figure = plt.figure(figsize=(8, 4 * cols)) axis_1 = figure.add_subplot(cols, 3, 1) axis_1.set_title("Data") axis_rec_array = [] for n in range(cols): axis_rec_array.append(figure.add_subplot(cols, 3, n * 3 + 2)) axis_rec_array[0].set_title("Reconstruction") axis_gen_array = [] for n in range(cols): axis_gen_array.append(figure.add_subplot(cols, 3, n * 3 + 3)) axis_gen_array[0].set_title("Generation") for batch_index, data_indices in enumerate(iterator): with chainer.using_config("train", False), chainer.using_config( "enable_backprop", False): x = dataset[data_indices] x = to_gpu(x) axis_1.imshow(make_uint8(x[0])) r_t_array, x_param = model.sample_image_at_each_step_from_posterior( x, zero_variance=args.zero_variance, step_limit=args.step_limit) for r_t, axis in zip(r_t_array, axis_rec_array[:-1]): r_t = to_cpu(r_t) axis.imshow(make_uint8(r_t[0])) mu_x, ln_var_x = x_param mu_x = to_cpu(mu_x.data) axis_rec_array[-1].imshow(make_uint8(mu_x[0])) r_t_array, x_param = model.sample_image_at_each_step_from_prior( batch_size=1, xp=xp) for r_t, axis in zip(r_t_array, axis_gen_array[:-1]): r_t = to_cpu(r_t) axis.imshow(make_uint8(r_t[0])) mu_x, ln_var_x = x_param mu_x = to_cpu(mu_x.data) axis_gen_array[-1].imshow(make_uint8(mu_x[0])) plt.pause(0.01)
def main(): try: os.mkdir(args.snapshot_directory) except: pass comm = chainermn.create_communicator() device = comm.intra_rank cuda.get_device(device).use() xp = cp images = [] files = os.listdir(args.dataset_path) files.sort() subset_size = int(math.ceil(len(files) / comm.size)) files = deque(files) files.rotate(-subset_size * comm.rank) files = list(files)[:subset_size] for filename in files: image = np.load(os.path.join(args.dataset_path, filename)) image = image / 256 images.append(image) print(comm.rank, files) images = np.vstack(images) images = images.transpose((0, 3, 1, 2)).astype(np.float32) train_dev_split = 0.9 num_images = images.shape[0] num_train_images = int(num_images * train_dev_split) num_dev_images = num_images - num_train_images images_train = images[:num_train_images] # To avoid OpenMPI bug # multiprocessing.set_start_method("forkserver") # p = multiprocessing.Process(target=print, args=("", )) # p.start() # p.join() hyperparams = HyperParameters() hyperparams.chz_channels = args.chz_channels hyperparams.generator_generation_steps = args.generation_steps hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_share_upsampler = args.generator_share_upsampler hyperparams.generator_downsampler_channels = args.generator_downsampler_channels hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.inference_downsampler_channels = args.inference_downsampler_channels hyperparams.batch_normalization_enabled = args.enable_batch_normalization hyperparams.use_gru = args.use_gru hyperparams.no_backprop_diff_xr = args.no_backprop_diff_xr if comm.rank == 0: hyperparams.save(args.snapshot_directory) hyperparams.print() if args.use_gru: model = GRUModel(hyperparams, snapshot_directory=args.snapshot_directory) else: model = LSTMModel(hyperparams, snapshot_directory=args.snapshot_directory) model.to_gpu() optimizer = AdamOptimizer(model.parameters, lr_i=args.initial_lr, lr_f=args.final_lr, beta_1=args.adam_beta1, communicator=comm) if comm.rank == 0: optimizer.print() num_pixels = images.shape[1] * images.shape[2] * images.shape[3] dataset = draw.data.Dataset(images_train) iterator = draw.data.Iterator(dataset, batch_size=args.batch_size) num_updates = 0 for iteration in range(args.training_steps): mean_kld = 0 mean_nll = 0 mean_mse = 0 start_time = time.time() for batch_index, data_indices in enumerate(iterator): x = dataset[data_indices] x += np.random.uniform(0, 1 / 256, size=x.shape) x = to_gpu(x) z_t_param_array, x_param, r_t_array = model.sample_z_and_x_params_from_posterior( x) loss_kld = 0 for params in z_t_param_array: mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params kld = draw.nn.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) loss_sse = 0 for r_t in r_t_array: loss_sse += cf.sum(cf.squared_error(r_t, x)) mu_x, ln_var_x = x_param loss_nll = cf.gaussian_nll(x, mu_x, ln_var_x) loss_nll /= args.batch_size loss_kld /= args.batch_size loss_sse /= args.batch_size loss = args.loss_beta * loss_nll + loss_kld + args.loss_alpha * loss_sse model.cleargrads() loss.backward(loss_scale=optimizer.loss_scale()) optimizer.update(num_updates, loss_value=float(loss.array)) num_updates += 1 mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) mean_mse += float(loss_sse.data) / num_pixels / ( hyperparams.generator_generation_steps - 1) printr( "Iteration {}: Batch {} / {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e}" .format( iteration + 1, batch_index + 1, len(iterator), float(loss_nll.data) / num_pixels + math.log(256.0), float(loss_sse.data) / num_pixels / (hyperparams.generator_generation_steps - 1), float(loss_kld.data), optimizer.learning_rate)) if comm.rank == 0 and batch_index > 0 and batch_index % 100 == 0: model.serialize(args.snapshot_directory) if comm.rank == 0: model.serialize(args.snapshot_directory) if comm.rank == 0: elapsed_time = time.time() - start_time print( "\r\033[2KIteration {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e} - elapsed_time: {:.3f} min" .format( iteration + 1, mean_nll / len(iterator) / num_pixels + math.log(256.0), mean_mse / len(iterator), mean_kld / len(iterator), optimizer.learning_rate, elapsed_time / 60))
def main(): try: os.mkdir(args.snapshot_path) except: pass xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy dataset = gqn.data.Dataset(args.dataset_path) hyperparams = HyperParameters() hyperparams.generator_share_core = args.generator_share_core hyperparams.generator_share_prior = args.generator_share_prior hyperparams.generator_generation_steps = args.generation_steps hyperparams.inference_share_core = args.inference_share_core hyperparams.inference_share_posterior = args.inference_share_posterior hyperparams.pixel_n = args.pixel_n hyperparams.pixel_sigma_i = args.initial_pixel_sigma hyperparams.pixel_sigma_f = args.final_pixel_sigma hyperparams.save(args.snapshot_path) hyperparams.print() model = Model(hyperparams, snapshot_directory=args.snapshot_path) if using_gpu: model.to_gpu() optimizer = Optimizer(model.parameters, mu_i=args.initial_lr, mu_f=args.final_lr) optimizer.print() if args.with_visualization: figure = gqn.imgplot.figure() axis1 = gqn.imgplot.image() axis2 = gqn.imgplot.image() axis3 = gqn.imgplot.image() figure.add(axis1, 0, 0, 1 / 3, 1) figure.add(axis2, 1 / 3, 0, 1 / 3, 1) figure.add(axis3, 2 / 3, 0, 1 / 3, 1) plot = gqn.imgplot.window( figure, (500 * 3, 500), "Query image / Reconstructed image / Generated image") plot.show() sigma_t = hyperparams.pixel_sigma_i pixel_var = xp.full((args.batch_size, 3) + hyperparams.image_size, sigma_t**2, dtype="float32") pixel_ln_var = xp.full((args.batch_size, 3) + hyperparams.image_size, math.log(sigma_t**2), dtype="float32") dataset_mean, dataset_std = dataset.load_mean_and_std() np.save(os.path.join(args.snapshot_path, "mean.npy"), dataset_mean) np.save(os.path.join(args.snapshot_path, "std.npy"), dataset_std) # avoid division by zero dataset_std += 1e-12 current_training_step = 0 for iteration in range(args.training_iterations): mean_kld = 0 mean_nll = 0 total_batch = 0 for subset_index, subset in enumerate(dataset): iterator = gqn.data.Iterator(subset, batch_size=args.batch_size) for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = subset[data_indices] # preprocessing images = (images - dataset_mean) / dataset_std # (batch, views, height, width, channels) -> (batch, views, channels, height, width) images = images.transpose((0, 1, 4, 2, 3)) total_views = images.shape[1] # sample number of views num_views = random.choice(range(total_views)) query_index = random.choice(range(total_views)) if num_views > 0: r = model.compute_observation_representation( images[:, :num_views], viewpoints[:, :num_views]) else: r = xp.zeros((args.batch_size, hyperparams.channels_r) + hyperparams.chrz_size, dtype="float32") r = chainer.Variable(r) query_images = images[:, query_index] query_viewpoints = viewpoints[:, query_index] # transfer to gpu query_images = to_gpu(query_images) query_viewpoints = to_gpu(query_viewpoints) h0_gen, c0_gen, u_0, h0_enc, c0_enc = model.generate_initial_state( args.batch_size, xp) loss_kld = 0 hl_enc = h0_enc cl_enc = c0_enc hl_gen = h0_gen cl_gen = c0_gen ul_enc = u_0 xq = model.inference_downsampler.downsample(query_images) for l in range(model.generation_steps): inference_core = model.get_inference_core(l) inference_posterior = model.get_inference_posterior(l) generation_core = model.get_generation_core(l) generation_piror = model.get_generation_prior(l) h_next_enc, c_next_enc = inference_core.forward_onestep( hl_gen, hl_enc, cl_enc, xq, query_viewpoints, r) mean_z_q = inference_posterior.compute_mean_z(hl_enc) ln_var_z_q = inference_posterior.compute_ln_var_z(hl_enc) ze_l = cf.gaussian(mean_z_q, ln_var_z_q) mean_z_p = generation_piror.compute_mean_z(hl_gen) ln_var_z_p = generation_piror.compute_ln_var_z(hl_gen) h_next_gen, c_next_gen, u_next_enc = generation_core.forward_onestep( hl_gen, cl_gen, ul_enc, ze_l, query_viewpoints, r) kld = gqn.nn.chainer.functions.gaussian_kl_divergence( mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p) loss_kld += cf.sum(kld) hl_gen = h_next_gen cl_gen = c_next_gen ul_enc = u_next_enc hl_enc = h_next_enc cl_enc = c_next_enc mean_x = model.generation_observation.compute_mean_x(ul_enc) negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood( query_images, mean_x, pixel_var, pixel_ln_var) loss_nll = cf.sum(negative_log_likelihood) loss_nll /= args.batch_size loss_kld /= args.batch_size loss = loss_nll + loss_kld model.cleargrads() loss.backward() optimizer.update(current_training_step) if args.with_visualization and plot.closed() is False: axis1.update( make_uint8(query_images[0], dataset_mean, dataset_std)) axis2.update( make_uint8(mean_x.data[0], dataset_mean, dataset_std)) with chainer.no_backprop_mode(): generated_x = model.generate_image( query_viewpoints[None, 0], r[None, 0], xp) axis3.update( make_uint8(generated_x[0], dataset_mean, dataset_std)) printr( "Iteration {}: Subset {} / {}: Batch {} / {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f}" .format(iteration + 1, subset_index + 1, len(dataset), batch_index + 1, len(iterator), float(loss_nll.data), float(loss_kld.data), optimizer.learning_rate, sigma_t)) sf = hyperparams.pixel_sigma_f si = hyperparams.pixel_sigma_i sigma_t = max( sf + (si - sf) * (1.0 - current_training_step / hyperparams.pixel_n), sf) pixel_var[...] = sigma_t**2 pixel_ln_var[...] = math.log(sigma_t**2) total_batch += 1 current_training_step += 1 mean_kld += float(loss_kld.data) mean_nll += float(loss_nll.data) model.serialize(args.snapshot_path) print( "\033[2KIteration {} - loss: nll: {:.3f} kld: {:.3f} - lr: {:.4e} - sigma_t: {:.6f} - step: {}" .format(iteration + 1, mean_nll / total_batch, mean_kld / total_batch, optimizer.learning_rate, sigma_t, current_training_step))
def main(): xp = np using_gpu = args.gpu_device >= 0 if using_gpu: cuda.get_device(args.gpu_device).use() xp = cupy hyperparams = HyperParameters(args.snapshot_path) hyperparams.print() model = Model(hyperparams, hdf5_path=args.snapshot_path) if using_gpu: model.to_gpu() screen_size = hyperparams.image_size camera = gqn.three.PerspectiveCamera(eye=(3, 1, 0), center=(0, 0, 0), up=(0, 1, 0), fov_rad=math.pi / 2.0, aspect_ratio=screen_size[0] / screen_size[1], z_near=0.1, z_far=10) figure = gqn.imgplot.figure() axis_observation = gqn.imgplot.image() axis_generation = gqn.imgplot.image() figure.add(axis_observation, 0, 0, 0.5, 1) figure.add(axis_generation, 0.5, 0, 0.5, 1) window = gqn.imgplot.window(figure, (1600, 800), "Viewpoint") window.show() raw_observed_images = np.zeros(screen_size + (3, ), dtype="uint32") renderer = gqn.three.Renderer(screen_size[0], screen_size[1]) observed_image = xp.zeros((1, 3) + screen_size, dtype="float32") observed_viewpoint = xp.zeros((1, 7), dtype="float32") query_viewpoint = xp.zeros((1, 7), dtype="float32") with chainer.no_backprop_mode(): while True: if window.closed(): exit() scene, _ = gqn.environment.shepard_metzler.build_scene( num_blocks=random.choice([x for x in range(7, 8)])) renderer.set_scene(scene) rad = random.uniform(0, math.pi * 2) eye = (3.0 * math.cos(rad), 0, 3.0 * math.sin(rad)) center = (0, 0, 0) yaw = gqn.math.yaw(eye, center) pitch = gqn.math.pitch(eye, center) camera.look_at( eye=eye, center=center, up=(0.0, 1.0, 0.0), ) renderer.render(camera, raw_observed_images) # [0, 255] -> [-1, 1] observed_image[0] = to_gpu((raw_observed_images.transpose( (2, 0, 1)) / 255 - 0.5) * 2.0) axis_observation.update(make_uint8(observed_image[0])) observed_viewpoint[0] = xp.array( (eye[0], eye[1], eye[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch)), dtype="float32") r = model.representation_network.compute_r(observed_image, observed_viewpoint) num_samples = 50 for _ in range(num_samples): if window.closed(): exit() yaw += np.random.normal(0, 0.05, size=1)[0] pitch += np.random.normal(0, 0.05, size=1)[0] query_viewpoint[0] = xp.array( (eye[0], eye[1], eye[2], math.cos(yaw), math.sin(yaw), math.cos(pitch), math.sin(pitch)), dtype="float32") generated_image = model.generate_image(query_viewpoint, r, xp) axis_generation.update(make_uint8(generated_image[0]))