Ejemplo n.º 1
0
    def encode_scene(images, viewpoints):
        # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
        images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)

        # Sample number of views
        total_views = images.shape[1]
        num_views = random.choice(range(1, total_views + 1))

        # Sample views
        observation_view_indices = list(range(total_views))
        random.shuffle(observation_view_indices)
        observation_view_indices = observation_view_indices[:num_views]

        observation_images = preprocess_images(
            images[:, observation_view_indices])

        observation_query = viewpoints[:, observation_view_indices]
        representation = model.compute_observation_representation(
            observation_images, observation_query)

        # Sample query view
        query_index = random.choice(range(total_views))
        query_images = preprocess_images(images[:, query_index])
        query_viewpoints = viewpoints[:, query_index]

        # Transfer to gpu if necessary
        query_images = to_device(query_images, gpu_device)
        query_viewpoints = to_device(query_viewpoints, gpu_device)

        return representation, query_images, query_viewpoints
Ejemplo n.º 2
0
def main():
    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    #==============================================================================
    # Utilities
    #==============================================================================
    def read_files(directory):
        filenames = []
        files = os.listdir(directory)
        # ipdb.set_trace()
        for filename in files:
            if filename.endswith(".h5"):
                filenames.append(filename)
        filenames.sort()

        dataset_images = []
        dataset_viewpoints = []
        for i in range(len(filenames)):
            F = h5py.File(os.path.join(directory, filenames[i]))
            tmp_images = list(F["images"])
            tmp_viewpoints = list(F["viewpoints"])

            dataset_images.extend(tmp_images)
            dataset_viewpoints.extend(tmp_viewpoints)

        # for i in range(len(filenames)):
        #     images_npy_path = os.path.join(directory, "images", filenames[i])
        #     viewpoints_npy_path = os.path.join(directory, "viewpoints", filenames[i])
        #     tmp_images = np.load(images_npy_path)
        #     tmp_viewpoints = np.load(viewpoints_npy_path)

        #     assert tmp_images.shape[0] == tmp_viewpoints.shape[0]

        #     dataset_images.extend(tmp_images)
        #     dataset_viewpoints.extend(tmp_viewpoints)
        dataset_images = np.array(dataset_images)
        dataset_viewpoints = np.array(dataset_viewpoints)

        dataset = list()
        for i in range(len(dataset_images)):
            item = {
                'image': dataset_images[i],
                'viewpoint': dataset_viewpoints[i]
            }
            dataset.append(item)

        return dataset

    def to_device(array):
        # if using_gpu:
        array = cuda.to_gpu(array)
        return array

    def fill_observations_axis(observation_images):
        axis_observations_image = np.full(
            (3, image_shape[1], total_observations_per_scene * image_shape[2]),
            black_color,
            dtype=np.float32)
        num_current_obs = len(observation_images)
        total_obs = total_observations_per_scene
        width = image_shape[2]
        x_start = width * (total_obs - num_current_obs) // 2
        for obs_image in observation_images:
            x_end = x_start + width
            axis_observations_image[:, :, x_start:x_end] = obs_image
            x_start += width
        return axis_observations_image

    def compute_camera_angle_at_frame(t):
        return t * 2 * math.pi / (fps * 2)

    def rotate_query_viewpoint(horizontal_angle_rad, camera_distance,
                               camera_position_y):
        camera_position = np.array([
            camera_distance * math.sin(horizontal_angle_rad),  # x
            camera_position_y,
            camera_distance * math.cos(horizontal_angle_rad),  # z
        ])
        center = np.array((0, camera_position_y, 0))
        camera_direction = camera_position - center
        yaw, pitch = compute_yaw_and_pitch(camera_direction)

        query_viewpoints = xp.array(
            (
                camera_position[0],
                camera_position[1],
                camera_position[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)

        return query_viewpoints

    def render(representation,
               camera_distance,
               camera_position_y,
               total_frames,
               animation_frame_array,
               rotate_camera=True):

        # viewpoint_file = open('viewpoints.txt','w')
        for t in range(0, total_frames):
            artist_array = [
                axis_observations.imshow(cv2.cvtColor(
                    make_uint8(axis_observations_image), cv2.COLOR_BGR2RGB),
                                         interpolation="none",
                                         animated=True)
            ]

            horizontal_angle_rad = compute_camera_angle_at_frame(t)
            if rotate_camera == False:
                horizontal_angle_rad = compute_camera_angle_at_frame(0)

            query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad,
                                                      camera_distance,
                                                      camera_position_y)

            generated_images = model.generate_image(query_viewpoints,
                                                    representation)[0]
            generated_images = chainer.backends.cuda.to_cpu(generated_images)
            generated_images = make_uint8(generated_images)
            generated_images = cv2.cvtColor(generated_images,
                                            cv2.COLOR_BGR2RGB)

            artist_array.append(
                axis_generation.imshow(generated_images,
                                       interpolation="none",
                                       animated=True))

            animation_frame_array.append(artist_array)

    def render_wVar(representation,
                    camera_distance,
                    camera_position_y,
                    total_frames,
                    animation_frame_array,
                    no_of_samples,
                    rotate_camera=True,
                    wVariance=True):

        # highest_var = 0.0
        # with open("queries.txt",'w') as file_wviews, open("variance.txt",'w') as file_wvar:
        for t in range(0, total_frames):
            artist_array = [
                axis_observations.imshow(cv2.cvtColor(
                    make_uint8(axis_observations_image), cv2.COLOR_BGR2RGB),
                                         interpolation="none",
                                         animated=True)
            ]

            horizontal_angle_rad = compute_camera_angle_at_frame(t)
            if rotate_camera == False:
                horizontal_angle_rad = compute_camera_angle_at_frame(0)

            query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad,
                                                      camera_distance,
                                                      camera_position_y)

            # q_x, q_y, q_z, _, _, _, _ = query_viewpoints[0]

            # file_wviews.writelines("".join(str(q_x))+", "+
            #                         "".join(str(q_y))+", "+
            #                         "".join(str(q_z))+"\n")

            generated_images = cp.squeeze(
                cp.array(
                    model.generate_images(query_viewpoints, representation,
                                          no_of_samples)))
            # ipdb.set_trace()
            var_image = cp.var(generated_images, axis=0)
            mean_image = cp.mean(generated_images, axis=0)
            mean_image = make_uint8(
                np.squeeze(chainer.backends.cuda.to_cpu(mean_image)))
            mean_image_rgb = cv2.cvtColor(mean_image, cv2.COLOR_BGR2RGB)

            var_image = chainer.backends.cuda.to_cpu(var_image)

            # grayscale
            r, g, b = var_image
            gray_var_image = 0.2989 * r + 0.5870 * g + 0.1140 * b
            # thresholding Otsu's method
            # thresh = threshold_otsu(gray_var_image)
            # var_binary = gray_var_image > thresh

            ## hill climb algorthm for searching highest variance
            # cur_var = np.mean(gray_var_image)
            # if cur_var>highest_var:
            #     highest_var = cur_var

            #     if wVariance==True:
            #         print('highest variance: '+str(highest_var)+', viewpoint: '+str(query_viewpoints[0]))
            #         highest_var_vp = query_viewpoints[0]
            #         file_wvar.writelines('highest variance: '+str(highest_var)+', viewpoint: '+str(highest_var_vp)+'\n')
            #     else:
            #         pass

            artist_array.append(
                axis_generation_var.imshow(gray_var_image,
                                           cmap=plt.cm.gray,
                                           interpolation="none",
                                           animated=True))

            artist_array.append(
                axis_generation_mean.imshow(mean_image_rgb,
                                            interpolation="none",
                                            animated=True))

            animation_frame_array.append(artist_array)

            # if wVariance==True:
            #     print('final highest variance: '+str(highest_var)+', viewpoint: '+str(highest_var_vp))
            #     file_wvar.writelines('final highest variance: '+str(highest_var)+', viewpoint: '+str(highest_var_vp)+'\n')
            # else:
            #     pass

        # file_wviews.close()
        # file_wvar.close()

    # loading dataset & model
    cuda.get_device(args.gpu_device).use()
    xp = cp

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    chainer.serializers.load_hdf5(args.snapshot_file, model)
    model.to_gpu()

    total_observations_per_scene = 4
    fps = 30

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.zeros(
        (3, image_shape[1], total_observations_per_scene * image_shape[2]),
        dtype=np.float32)

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.style.use("dark_background")
    fig = plt.figure(figsize=(6, 7))
    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95)
    # fig.suptitle("GQN")
    axis_observations = fig.add_subplot(2, 1, 1)
    axis_observations.axis("off")
    axis_observations.set_title("observations")
    axis_generation = fig.add_subplot(2, 1, 2)
    axis_generation.axis("off")
    axis_generation.set_title("Rendered Predictions")
    axis_generation_var = fig.add_subplot(2, 2, 3)
    axis_generation_var.axis("off")
    axis_generation_var.set_title("Variance Render")
    axis_generation_mean = fig.add_subplot(2, 2, 4)
    axis_generation_mean.axis("off")
    axis_generation_mean.set_title("Mean Render")

    # iterator
    dataset = read_files(args.dataset_directory)
    file_number = 1
    with chainer.no_backprop_mode():

        iterator = chainer.iterators.SerialIterator(dataset, batch_size=1)
        # ipdb.set_trace()
        for i in tqdm(range(len(iterator.dataset))):
            animation_frame_array = []
            images, viewpoints = np.array([
                iterator.dataset[i]["image"]
            ]), np.array([iterator.dataset[i]["viewpoint"]])

            camera_distance = np.mean(
                np.linalg.norm(viewpoints[:, :, :3], axis=2))
            camera_position_y = np.mean(viewpoints[:, :, 1])

            images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
            images = preprocess_images(images)

            batch_index = 0

            total_views = images.shape[1]
            random_observation_view_indices = list(range(total_views))
            random.shuffle(random_observation_view_indices)
            random_observation_view_indices = random_observation_view_indices[:
                                                                              total_observations_per_scene]
            observed_images = images[batch_index,
                                     random_observation_view_indices]
            observed_viewpoints = viewpoints[batch_index,
                                             random_observation_view_indices]

            observed_images = to_device(observed_images)
            observed_viewpoints = to_device(observed_viewpoints)

            # Scene encoder
            representation = model.compute_observation_representation(
                observed_images[None, :1], observed_viewpoints[None, :1])

            # Update figure
            observation_index = random_observation_view_indices[0]
            observed_image = images[batch_index, observation_index]
            axis_observations_image = fill_observations_axis([observed_image])

            # Neural rendering
            # render(representation, camera_distance, camera_position_y,
            #         fps * 2, animation_frame_array)
            render_wVar(representation, camera_distance, camera_position_y,
                        fps * 2, animation_frame_array, 100)

            for n in range(total_observations_per_scene):
                observation_indices = random_observation_view_indices[:n + 1]
                axis_observations_image = fill_observations_axis(
                    images[batch_index, observation_indices])

                # Scene encoder
                representation = model.compute_observation_representation(
                    observed_images[None, :n + 1],
                    observed_viewpoints[None, :n + 1])
                # Neural rendering
                # render(representation, camera_distance, camera_position_y,
                #     fps // 2, animation_frame_array,rotate_camera=False)
                render_wVar(representation,
                            camera_distance,
                            camera_position_y,
                            fps // 2,
                            animation_frame_array,
                            100,
                            rotate_camera=False,
                            wVariance=False)

            # Scene encoder with all given observations
            representation = model.compute_observation_representation(
                observed_images[None, :total_observations_per_scene + 1],
                observed_viewpoints[None, :total_observations_per_scene + 1])

            # Neural rendering
            # render(representation, camera_distance, camera_position_y,
            #         fps * 6, animation_frame_array)
            render_wVar(representation, camera_distance, camera_position_y,
                        fps * 6, animation_frame_array, 100)

            anim = animation.ArtistAnimation(
                fig,
                animation_frame_array,
                interval=1 / fps,  # originally 1/fps
                blit=True,
                repeat_delay=0)

            anim.save("{}/observations_{}.gif".format(args.figure_directory,
                                                      file_number),
                      writer="imagemagick",
                      fps=10)
            # ipdb.set_trace()
            # anim.save(
            #     "{}/rooms_ring_camera_observations_{}.mp4".format(
            #         args.figure_directory, file_number),
            #     writer='ffmpeg',
            #     fps=10)

            file_number += 1
Ejemplo n.º 3
0
def main():
    try:
        os.mkdir(args.snapshot_directory)
    except:
        pass

    np.random.seed(0)

    xp = np
    device_gpu = args.gpu_device
    device_cpu = -1
    using_gpu = device_gpu >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cupy

    dataset = gqn.data.Dataset(args.dataset_directory)

    hyperparams = HyperParameters()
    hyperparams.generator_share_core = args.generator_share_core
    hyperparams.generator_share_prior = args.generator_share_prior
    hyperparams.generator_generation_steps = args.generation_steps
    hyperparams.generator_share_upsampler = args.generator_share_upsampler
    hyperparams.inference_share_core = args.inference_share_core
    hyperparams.inference_share_posterior = args.inference_share_posterior
    hyperparams.h_channels = args.h_channels
    hyperparams.z_channels = args.z_channels
    hyperparams.u_channels = args.u_channels
    hyperparams.image_size = (args.image_size, args.image_size)
    hyperparams.representation_channels = args.representation_channels
    hyperparams.representation_architecture = args.representation_architecture
    hyperparams.pixel_n = args.pixel_n
    hyperparams.pixel_sigma_i = args.initial_pixel_variance
    hyperparams.pixel_sigma_f = args.final_pixel_variance
    hyperparams.save(args.snapshot_directory)
    print(hyperparams)

    model = Model(hyperparams,
                  snapshot_directory=args.snapshot_directory,
                  optimized=args.optimized)
    if using_gpu:
        model.to_gpu()

    scheduler = Scheduler(sigma_start=args.initial_pixel_variance,
                          sigma_end=args.final_pixel_variance,
                          final_num_updates=args.pixel_n,
                          snapshot_directory=args.snapshot_directory)
    print(scheduler)

    optimizer = AdamOptimizer(model.parameters,
                              mu_i=args.initial_lr,
                              mu_f=args.final_lr,
                              initial_training_step=scheduler.num_updates)
    print(optimizer)

    pixel_var = xp.full((args.batch_size, 3) + hyperparams.image_size,
                        scheduler.pixel_variance**2,
                        dtype="float32")
    pixel_ln_var = xp.full((args.batch_size, 3) + hyperparams.image_size,
                           math.log(scheduler.pixel_variance**2),
                           dtype="float32")

    representation_shape = (args.batch_size,
                            hyperparams.representation_channels,
                            args.image_size // 4, args.image_size // 4)

    fig = plt.figure(figsize=(9, 3))
    axis_data = fig.add_subplot(1, 3, 1)
    axis_data.set_title("Data")
    axis_data.axis("off")
    axis_reconstruction = fig.add_subplot(1, 3, 2)
    axis_reconstruction.set_title("Reconstruction")
    axis_reconstruction.axis("off")
    axis_generation = fig.add_subplot(1, 3, 3)
    axis_generation.set_title("Generation")
    axis_generation.axis("off")

    current_training_step = 0
    for iteration in range(args.training_iterations):
        mean_kld = 0
        mean_nll = 0
        mean_mse = 0
        mean_elbo = 0
        total_num_batch = 0
        start_time = time.time()

        for subset_index, subset in enumerate(dataset):
            iterator = gqn.data.Iterator(subset, batch_size=args.batch_size)

            for batch_index, data_indices in enumerate(iterator):
                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)

                total_views = images.shape[1]

                # Sample number of views
                num_views = random.choice(range(1, total_views + 1))
                observation_view_indices = list(range(total_views))
                random.shuffle(observation_view_indices)
                observation_view_indices = observation_view_indices[:num_views]
                query_index = random.choice(range(total_views))

                if num_views > 0:
                    observation_images = preprocess_images(
                        images[:, observation_view_indices])
                    observation_query = viewpoints[:, observation_view_indices]
                    representation = model.compute_observation_representation(
                        observation_images, observation_query)
                else:
                    representation = xp.zeros(representation_shape,
                                              dtype="float32")
                    representation = chainer.Variable(representation)

                # Sample query
                query_index = random.choice(range(total_views))
                query_images = preprocess_images(images[:, query_index])
                query_viewpoints = viewpoints[:, query_index]

                # Transfer to gpu if necessary
                query_images = to_device(query_images, device_gpu)
                query_viewpoints = to_device(query_viewpoints, device_gpu)

                z_t_param_array, mean_x = model.sample_z_and_x_params_from_posterior(
                    query_images, query_viewpoints, representation)

                # Compute loss
                ## KL Divergence
                loss_kld = 0
                for params in z_t_param_array:
                    mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params
                    kld = gqn.functions.gaussian_kl_divergence(
                        mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p)
                    loss_kld += cf.sum(kld)

                ## Negative log-likelihood of generated image
                loss_nll = cf.sum(
                    gqn.functions.gaussian_negative_log_likelihood(
                        query_images, mean_x, pixel_var, pixel_ln_var))

                # Calculate the average loss value
                loss_nll = loss_nll / args.batch_size
                loss_kld = loss_kld / args.batch_size

                loss = loss_nll / scheduler.pixel_variance + loss_kld

                model.cleargrads()
                loss.backward()
                optimizer.update(current_training_step)

                loss_nll = float(loss_nll.data) + math.log(256.0)
                loss_kld = float(loss_kld.data)

                elbo = -(loss_nll + loss_kld)

                loss_mse = float(
                    cf.mean_squared_error(query_images, mean_x).data)

                printr(
                    "Iteration {}: Subset {} / {}: Batch {} / {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {:.6e} kld: {:.5f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {}  "
                    .format(iteration + 1,
                            subset_index + 1, len(dataset), batch_index + 1,
                            len(iterator), elbo, loss_nll, loss_mse, loss_kld,
                            optimizer.learning_rate, scheduler.pixel_variance,
                            current_training_step))

                scheduler.step(iteration, current_training_step)
                pixel_var[...] = scheduler.pixel_variance**2
                pixel_ln_var[...] = math.log(scheduler.pixel_variance**2)

                total_num_batch += 1
                current_training_step += 1
                mean_kld += loss_kld
                mean_nll += loss_nll
                mean_mse += loss_mse
                mean_elbo += elbo

            model.serialize(args.snapshot_directory)

            # Visualize
            if args.with_visualization:
                axis_data.imshow(make_uint8(query_images[0]),
                                 interpolation="none")
                axis_reconstruction.imshow(make_uint8(mean_x.data[0]),
                                           interpolation="none")

                with chainer.no_backprop_mode():
                    generated_x = model.generate_image(
                        query_viewpoints[None, 0], representation[None, 0])
                    axis_generation.imshow(make_uint8(generated_x[0]),
                                           interpolation="none")
                plt.pause(1e-8)

        elapsed_time = time.time() - start_time
        print(
            "\033[2KIteration {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {:.6e} kld: {:.5f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {} - time: {:.3f} min"
            .format(iteration + 1, mean_elbo / total_num_batch,
                    mean_nll / total_num_batch, mean_mse / total_num_batch,
                    mean_kld / total_num_batch, optimizer.learning_rate,
                    scheduler.pixel_variance, current_training_step,
                    elapsed_time / 60))
        model.serialize(args.snapshot_directory)
Ejemplo n.º 4
0
def main():
    try:
        os.mkdir(args.output_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cupy

    dataset = gqn.data.Dataset(args.dataset_path)

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_path)
    model = Model(hyperparams, snapshot_directory=args.snapshot_path)
    if using_gpu:
        model.to_gpu()

    plt.style.use("dark_background")
    fig = plt.figure(figsize=(10, 5))

    axis_observation_array = []
    axis_observation_array.append(fig.add_subplot(2, 4, 1))
    axis_observation_array.append(fig.add_subplot(2, 4, 2))
    axis_observation_array.append(fig.add_subplot(2, 4, 5))
    axis_observation_array.append(fig.add_subplot(2, 4, 6))

    for axis in axis_observation_array:
        axis.axis("off")

    axis_generation_array = []
    axis_generation_array.append(fig.add_subplot(2, 4, 3))
    axis_generation_array.append(fig.add_subplot(2, 4, 4))
    axis_generation_array.append(fig.add_subplot(2, 4, 7))
    axis_generation_array.append(fig.add_subplot(2, 4, 8))

    for axis in axis_generation_array:
        axis.axis("off")

    num_views_per_scene = 4
    num_generation = 4
    total_frames_per_rotation = 24

    image_shape = (3, ) + hyperparams.image_size
    blank_image = np.full(image_shape, -0.5)
    file_number = 1

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                artist_frame_array = []

                observed_image_array = xp.zeros(
                    (num_views_per_scene, ) + image_shape, dtype=np.float32)
                observed_viewpoint_array = xp.zeros((num_views_per_scene, 7),
                                                    dtype=np.float32)

                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                # Generate images without observations
                r = xp.zeros((
                    num_generation,
                    hyperparams.representation_channels,
                ) + hyperparams.chrz_size,
                             dtype=np.float32)

                angle_rad = 0
                for t in range(total_frames_per_rotation):
                    artist_array = []

                    for axis in axis_observation_array:
                        axis_image = axis.imshow(make_uint8(blank_image),
                                                 interpolation="none",
                                                 animated=True)
                        artist_array.append(axis_image)

                    query_viewpoints = rotate_query_viewpoint(
                        angle_rad, num_generation, xp)
                    generated_images = model.generate_image(
                        query_viewpoints, r)

                    for j, axis in enumerate(axis_generation_array):
                        image = make_uint8(generated_images[j])
                        axis_image = axis.imshow(image,
                                                 interpolation="none",
                                                 animated=True)
                        artist_array.append(axis_image)

                    angle_rad += 2 * math.pi / total_frames_per_rotation

                    # plt.pause(1e-8)
                    axis = axis_generation_array[-1]
                    add_annotation(axis, artist_array)
                    artist_frame_array.append(artist_array)

                # Generate images with observations
                for m in range(num_views_per_scene):
                    observed_image = images[batch_index, m]
                    observed_viewpoint = viewpoints[batch_index, m]

                    observed_image_array[m] = to_gpu(observed_image)
                    observed_viewpoint_array[m] = to_gpu(observed_viewpoint)

                    r = model.compute_observation_representation(
                        observed_image_array[None, :m + 1],
                        observed_viewpoint_array[None, :m + 1])

                    r = cf.broadcast_to(r, (num_generation, ) + r.shape[1:])

                    angle_rad = 0
                    for t in range(total_frames_per_rotation):
                        artist_array = []

                        for axis, observed_image in zip(
                                axis_observation_array, observed_image_array):
                            axis_image = axis.imshow(
                                make_uint8(observed_image),
                                interpolation="none",
                                animated=True)
                            artist_array.append(axis_image)

                        query_viewpoints = rotate_query_viewpoint(
                            angle_rad, num_generation, xp)
                        generated_images = model.generate_image(
                            query_viewpoints, r)

                        for j in range(num_generation):
                            axis = axis_generation_array[j]
                            axis_image = axis.imshow(make_uint8(
                                generated_images[j]),
                                                     interpolation="none",
                                                     animated=True)
                            artist_array.append(axis_image)

                        angle_rad += 2 * math.pi / total_frames_per_rotation
                        # plt.pause(1e-8)

                        axis = axis_generation_array[-1]
                        add_annotation(axis, artist_array)
                        artist_frame_array.append(artist_array)

                # plt.tight_layout()
                # plt.subplots_adjust(
                #     left=None,
                #     bottom=None,
                #     right=None,
                #     top=None,
                #     wspace=0,
                #     hspace=0)
                anim = animation.ArtistAnimation(fig,
                                                 artist_frame_array,
                                                 interval=1 / 24,
                                                 blit=True,
                                                 repeat_delay=0)

                anim.save("{}/rooms_ring_camera_{}.gif".format(
                    args.output_directory, file_number),
                          writer="imagemagick")
                anim.save("{}/rooms_ring_camera_{}.mp4".format(
                    args.output_directory, file_number),
                          writer="ffmpeg",
                          fps=12)
                file_number += 1
Ejemplo n.º 5
0
def main():
    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    dataset = gqn.data.Dataset(args.dataset_directory)

    meter = Meter()
    assert meter.load(args.snapshot_directory)

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    assert model.load(args.snapshot_directory, meter.epoch)

    if using_gpu:
        model.to_gpu()

    total_observations_per_scene = 4
    fps = 30

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.zeros(
        (3, image_shape[1], total_observations_per_scene * image_shape[2]),
        dtype=np.float32)

    #==============================================================================
    # Utilities
    #==============================================================================
    def to_device(array):
        if using_gpu:
            array = cuda.to_gpu(array)
        return array

    def fill_observations_axis(observation_images):
        axis_observations_image = np.full(
            (3, image_shape[1], total_observations_per_scene * image_shape[2]),
            black_color,
            dtype=np.float32)
        num_current_obs = len(observation_images)
        total_obs = total_observations_per_scene
        width = image_shape[2]
        x_start = width * (total_obs - num_current_obs) // 2
        for obs_image in observation_images:
            x_end = x_start + width
            axis_observations_image[:, :, x_start:x_end] = obs_image
            x_start += width
        return axis_observations_image

    def compute_camera_angle_at_frame(t):
        horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4
        y_rad_top = math.pi / 3
        y_rad_bottom = -math.pi / 3
        y_rad_range = y_rad_bottom - y_rad_top
        if t < fps * 1.5:
            vertical_angle_rad = y_rad_top
        elif fps * 1.5 <= t and t < fps * 2.5:
            interp = (t - fps * 1.5) / fps
            vertical_angle_rad = y_rad_top + interp * y_rad_range
        elif fps * 2.5 <= t and t < fps * 4:
            vertical_angle_rad = y_rad_bottom
        elif fps * 4.0 <= t and t < fps * 5:
            interp = (t - fps * 4.0) / fps
            vertical_angle_rad = y_rad_bottom - interp * y_rad_range
        else:
            vertical_angle_rad = y_rad_top
        return horizontal_angle_rad, vertical_angle_rad

    def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad):
        camera_direction = np.array([
            math.sin(horizontal_angle_rad),  # x
            math.sin(vertical_angle_rad),  # y
            math.cos(horizontal_angle_rad),  # z
        ])
        camera_direction = args.camera_distance * camera_direction / np.linalg.norm(
            camera_direction)
        yaw, pitch = compute_yaw_and_pitch(camera_direction)
        query_viewpoints = xp.array(
            (
                camera_direction[0],
                camera_direction[1],
                camera_direction[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)
        return query_viewpoints

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.style.use("dark_background")
    fig = plt.figure(figsize=(6, 7))
    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95)
    # fig.suptitle("GQN")
    axis_observations = fig.add_subplot(2, 1, 1)
    axis_observations.axis("off")
    axis_observations.set_title("observations")
    axis_generation = fig.add_subplot(2, 1, 2)
    axis_generation.axis("off")
    axis_generation.set_title("neural rendering")

    #==============================================================================
    # Generating animation
    #==============================================================================
    file_number = 1
    random.seed(0)
    np.random.seed(0)

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                animation_frame_array = []

                observed_image_array = xp.full(
                    (total_observations_per_scene, ) + image_shape,
                    black_color,
                    dtype=np.float32)
                observed_viewpoint_array = xp.zeros(
                    (total_observations_per_scene, 7), dtype=np.float32)

                # shape: (batch, views, height, width, channels)
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                #------------------------------------------------------------------------------
                # Generate images with a single observation
                #------------------------------------------------------------------------------
                observation_index = 0

                # Scene encoder
                observed_image = images[batch_index, observation_index]
                observed_viewpoint = viewpoints[batch_index, observation_index]

                observed_image_array[observation_index] = to_device(
                    observed_image)
                observed_viewpoint_array[observation_index] = to_device(
                    observed_viewpoint)

                representation = model.compute_observation_representation(
                    observed_image_array[None, :observation_index + 1],
                    observed_viewpoint_array[None, :observation_index + 1])

                # Update figure
                axis_observations_image = fill_observations_axis(
                    [observed_image])

                # Rotate camera
                for t in range(fps, fps * 6):
                    artist_array = [
                        axis_observations.imshow(
                            make_uint8(axis_observations_image),
                            interpolation="none",
                            animated=True)
                    ]

                    horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame(
                        t)
                    query_viewpoints = rotate_query_viewpoint(
                        horizontal_angle_rad, vertical_angle_rad)
                    generated_images = model.generate_image(
                        query_viewpoints, representation)[0]

                    artist_array.append(
                        axis_generation.imshow(make_uint8(generated_images),
                                               interpolation="none",
                                               animated=True))

                    animation_frame_array.append(artist_array)

                #------------------------------------------------------------------------------
                # Add observations
                #------------------------------------------------------------------------------
                for n in range(total_observations_per_scene):
                    axis_observations_image = fill_observations_axis(
                        images[batch_index, :n + 1])

                    # Scene encoder
                    representation = model.compute_observation_representation(
                        observed_image_array[None, :n + 1],
                        observed_viewpoint_array[None, :n + 1])

                    for t in range(fps // 2):
                        artist_array = [
                            axis_observations.imshow(
                                make_uint8(axis_observations_image),
                                interpolation="none",
                                animated=True)
                        ]

                        horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame(
                            0)
                        query_viewpoints = rotate_query_viewpoint(
                            horizontal_angle_rad, vertical_angle_rad)
                        generated_images = model.generate_image(
                            query_viewpoints, representation)[0]

                        artist_array.append(
                            axis_generation.imshow(
                                make_uint8(generated_images),
                                interpolation="none",
                                animated=True))

                        animation_frame_array.append(artist_array)

                #------------------------------------------------------------------------------
                # Generate images with all observations
                #------------------------------------------------------------------------------
                # Scene encoder
                representation = model.compute_observation_representation(
                    observed_image_array[None, :total_observations_per_scene +
                                         1],
                    observed_viewpoint_array[
                        None, :total_observations_per_scene + 1])
                # Rotate camera
                for t in range(0, fps * 6):
                    artist_array = [
                        axis_observations.imshow(
                            make_uint8(axis_observations_image),
                            interpolation="none",
                            animated=True)
                    ]

                    horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame(
                        t)
                    query_viewpoints = rotate_query_viewpoint(
                        horizontal_angle_rad, vertical_angle_rad)
                    generated_images = model.generate_image(
                        query_viewpoints, representation)[0]

                    artist_array.append(
                        axis_generation.imshow(make_uint8(generated_images),
                                               interpolation="none",
                                               animated=True))

                    animation_frame_array.append(artist_array)

                #------------------------------------------------------------------------------
                # Write to file
                #------------------------------------------------------------------------------
                anim = animation.ArtistAnimation(fig,
                                                 animation_frame_array,
                                                 interval=1 / fps,
                                                 blit=True,
                                                 repeat_delay=0)

                # anim.save(
                #     "{}/shepard_matzler_observations_{}.gif".format(
                #         args.figure_directory, file_number),
                #     writer="imagemagick",
                #     fps=fps)
                anim.save("{}/shepard_matzler_observations_{}.mp4".format(
                    args.figure_directory, file_number),
                          writer="ffmpeg",
                          fps=fps)

                file_number += 1
Ejemplo n.º 6
0
def gqn_process():
    # load model
    my_gpu = args.gpu_device
    if my_gpu < 0:
        xp = np
    else:
        cuda.get_device(args.gpu_device).use()
        xp = cp
    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    chainer.serializers.load_hdf5(args.snapshot_file, model)
    if my_gpu > -1:
        model.to_gpu()
    chainer.print_runtime_info()

    observed_viewpoint, observed_image, offset = data_recv.get()
    observed_viewpoint = np.expand_dims(np.expand_dims(
        np.asarray(observed_viewpoint).astype(np.float32), axis=0),
                                        axis=0)
    observed_image = np.expand_dims(np.expand_dims(
        np.asarray(observed_image).astype(np.float32), axis=0),
                                    axis=0)
    offset = np.asarray(offset)

    camera_distance = np.mean(
        np.linalg.norm(observed_viewpoint[:, :, :3], axis=2))
    camera_position_z = np.mean(observed_viewpoint[:, :, 1])

    observed_image = observed_image.transpose(
        (0, 1, 4, 2, 3)).astype(np.float32)
    observed_image = preprocess_images(observed_image)

    # create representation and generate uncertainty map of environment [1000 viewpoints?]
    total_frames = 10
    representation = model.compute_observation_representation(
        observed_image, observed_viewpoint)

    # get predictions
    highest_var = 0.0
    no_of_samples = 20
    highest_var_vp = 0
    try:
        for i in range(0, total_frames):
            horizontal_angle_rad = compute_camera_angle_at_frame(
                i, total_frames)

            query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad,
                                                      camera_distance,
                                                      camera_position_z, xp)

            generated_images = xp.squeeze(
                xp.array(
                    model.generate_images(query_viewpoints, representation,
                                          no_of_samples)))
            var_image = xp.var(generated_images, axis=0)
            # var_image = chainer.backends.cuda.to_cpu(var_image)
            # grayscale
            # r,g,b = var_image
            # gray_var_image = 0.2989*r+0.5870*g+0.1140*b
            current_var = xp.mean(var_image)

            if highest_var == 0:
                highest_var = current_var
                highest_var_vp = query_viewpoints[0]
            elif current_var > highest_var:
                highest_var = current_var
                highest_var_vp = query_viewpoints[0]
    except KeyboardInterrupt:
        logging.warning('interrupt')

    # return next viewpoint and unit vector of end effector based on highest uncertainty found in the uncertainty map
    _x, _y, _z, _, _, _, _ = highest_var_vp

    _yaw, _pitch = compute_yaw_and_pitch([_x, _y, _z])
    next_viewpoint = [_x, _y, _z, _yaw, _pitch]
    next_viewpoint = [chainer.backends.cuda.to_cpu(x) for x in next_viewpoint]
    next_viewpoint = [float(x) for x in next_viewpoint]
    data_send.put(next_viewpoint)
Ejemplo n.º 7
0
def main():
    try:
        os.mkdir(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cupy

    dataset = gqn.data.Dataset(args.dataset_path)

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_path)
    model = Model(hyperparams, snapshot_directory=args.snapshot_path)
    if using_gpu:
        model.to_gpu()

    plt.style.use("dark_background")
    fig = plt.figure(figsize=(10, 5))
    fig.suptitle("GQN")
    axis_observations = fig.add_subplot(1, 2, 1)
    axis_observations.axis("off")
    axis_observations.set_title("Observations")
    axis_generation = fig.add_subplot(1, 2, 2)
    axis_generation.axis("off")
    axis_generation.set_title("Generation")

    total_observations_per_scene = 2**2
    num_observations_per_column = int(math.sqrt(total_observations_per_scene))
    num_generation = 1
    total_frames_per_rotation = 48

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.full(
        (3, num_observations_per_column * image_shape[1],
         num_observations_per_column * image_shape[2]),
        black_color,
        dtype=np.float32)
    file_number = 1

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                animation_frame_array = []
                axis_observations_image[...] = black_color

                observed_image_array = xp.full(
                    (total_observations_per_scene, ) + image_shape,
                    black_color,
                    dtype=np.float32)
                observed_viewpoint_array = xp.zeros(
                    (total_observations_per_scene, 7), dtype=np.float32)

                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                # Generate images without observations
                representation = xp.zeros((
                    num_generation,
                    hyperparams.representation_channels,
                ) + (hyperparams.image_size[0] // 4,
                     hyperparams.image_size[1] // 4),
                                          dtype=np.float32)

                angle_rad = 0
                for t in range(total_frames_per_rotation):
                    artist_array = [
                        axis_observations.imshow(
                            make_uint8(axis_observations_image),
                            interpolation="none",
                            animated=True)
                    ]

                    query_viewpoints = rotate_query_viewpoint(
                        angle_rad, num_generation, xp)
                    generated_image = model.generate_image_from_zero_z(
                        query_viewpoints, representation)[0]

                    artist_array.append(
                        axis_generation.imshow(make_uint8(generated_image),
                                               interpolation="none",
                                               animated=True))

                    angle_rad += 2 * math.pi / total_frames_per_rotation
                    animation_frame_array.append(artist_array)

                # Generate images with observations
                for observation_index in range(total_observations_per_scene):
                    observed_image = images[batch_index, observation_index]
                    observed_viewpoint = viewpoints[batch_index,
                                                    observation_index]

                    observed_image_array[observation_index] = to_gpu(
                        observed_image)
                    observed_viewpoint_array[observation_index] = to_gpu(
                        observed_viewpoint)

                    representation = model.compute_observation_representation(
                        observed_image_array[None, :observation_index + 1],
                        observed_viewpoint_array[None, :observation_index + 1])

                    representation = cf.broadcast_to(representation,
                                                     (num_generation, ) +
                                                     representation.shape[1:])

                    # Update figure
                    x_start = image_shape[1] * (observation_index %
                                                num_observations_per_column)
                    x_end = x_start + image_shape[1]
                    y_start = image_shape[2] * (observation_index //
                                                num_observations_per_column)
                    y_end = y_start + image_shape[2]
                    axis_observations_image[:, y_start:y_end,
                                            x_start:x_end] = observed_image

                    angle_rad = 0
                    for t in range(total_frames_per_rotation):
                        artist_array = [
                            axis_observations.imshow(
                                make_uint8(axis_observations_image),
                                interpolation="none",
                                animated=True)
                        ]

                        query_viewpoints = rotate_query_viewpoint(
                            angle_rad, num_generation, xp)
                        generated_images = model.generate_image_from_zero_z(
                            query_viewpoints, representation)[0]

                        artist_array.append(
                            axis_generation.imshow(
                                make_uint8(generated_images),
                                interpolation="none",
                                animated=True))

                        angle_rad += 2 * math.pi / total_frames_per_rotation
                        animation_frame_array.append(artist_array)

                anim = animation.ArtistAnimation(fig,
                                                 animation_frame_array,
                                                 interval=1 / 24,
                                                 blit=True,
                                                 repeat_delay=0)

                anim.save("{}/shepard_matzler_observations_{}.gif".format(
                    args.figure_directory, file_number),
                          writer="imagemagick")
                anim.save("{}/shepard_matzler_observations_{}.mp4".format(
                    args.figure_directory, file_number),
                          writer="ffmpeg",
                          fps=12)
                file_number += 1
Ejemplo n.º 8
0
def main():
    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    # loading dataset & model
    cuda.get_device(args.gpu_device).use()
    xp=cp

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    chainer.serializers.load_hdf5(args.snapshot_file, model)
    model.to_gpu()

    total_observations_per_scene = 4
    fps = 30

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.zeros(
        (3, image_shape[1], total_observations_per_scene * image_shape[2]),
        dtype=np.float32)

    #==============================================================================
    # Utilities
    #==============================================================================
    def read_files(directory):
        filenames = []
        files = os.listdir(directory)
        # ipdb.set_trace()
        for filename in files:
            if filename.endswith(".h5"):
                filenames.append(filename)
        filenames.sort()
        
        dataset_images = []
        dataset_viewpoints = []
        for i in range(len(filenames)):
            F = h5py.File(os.path.join(directory,filenames[i]))
            tmp_images = list(F["images"])
            tmp_viewpoints = list(F["viewpoints"])
            
            dataset_images.extend(tmp_images)
            dataset_viewpoints.extend(tmp_viewpoints)
        # for i in range(len(filenames)):
        #     images_npy_path = os.path.join(directory, "images", filenames[i])
        #     viewpoints_npy_path = os.path.join(directory, "viewpoints", filenames[i])
        #     tmp_images = np.load(images_npy_path)
        #     tmp_viewpoints = np.load(viewpoints_npy_path)
        
        #     assert tmp_images.shape[0] == tmp_viewpoints.shape[0]
            
        #     dataset_images.extend(tmp_images)
        #     dataset_viewpoints.extend(tmp_viewpoints)
        dataset_images = np.array(dataset_images)
        dataset_viewpoints = np.array(dataset_viewpoints)

        dataset = list()
        for i in range(len(dataset_images)):
            item = {'image':dataset_images[i],'viewpoint':dataset_viewpoints[i]}
            dataset.append(item)
        
        return dataset
    
    def to_device(array):
        # if using_gpu:
        array = cuda.to_gpu(array)
        return array

    def fill_observations_axis(observation_images):
        axis_observations_image = np.full(
            (3, image_shape[1], total_observations_per_scene * image_shape[2]),
            black_color,
            dtype=np.float32)
        num_current_obs = len(observation_images)
        total_obs = total_observations_per_scene
        width = image_shape[2]
        x_start = width * (total_obs - num_current_obs) // 2
        for obs_image in observation_images:
            x_end = x_start + width
            axis_observations_image[:, :, x_start:x_end] = obs_image
            x_start += width
        return axis_observations_image

    def compute_camera_angle_at_frame(t):
        return t * 2 * math.pi / (fps * 2)

    def rotate_query_viewpoint(horizontal_angle_rad, camera_distance,
                               camera_position_y):
        camera_position = np.array([
            camera_distance * math.sin(horizontal_angle_rad),  # x
            camera_position_y,
            camera_distance * math.cos(horizontal_angle_rad),  # z
        ])
        center = np.array((0, camera_position_y, 0))
        camera_direction = camera_position - center
        yaw, pitch = compute_yaw_and_pitch(camera_direction)
        query_viewpoints = xp.array(
            (
                camera_position[0],
                camera_position[1],
                camera_position[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)
        return query_viewpoints

    def render(representation,
               camera_distance,
               camera_position_y,
               total_frames,
               animation_frame_array,
               rotate_camera=True):
        for t in range(0, total_frames):
            artist_array = [
                axis_observations.imshow(
                    make_uint8(axis_observations_image),
                    interpolation="none",
                    animated=True)
            ]

            horizontal_angle_rad = compute_camera_angle_at_frame(t)
            if rotate_camera == False:
                horizontal_angle_rad = compute_camera_angle_at_frame(0)

            query_viewpoints = rotate_query_viewpoint(
                horizontal_angle_rad, camera_distance, camera_position_y)
            generated_images = model.generate_image(query_viewpoints,
                                                    representation)[0]

            artist_array.append(
                axis_generation.imshow(
                    make_uint8(generated_images),
                    interpolation="none",
                    animated=True))

            animation_frame_array.append(artist_array)

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.style.use("dark_background")
    fig = plt.figure(figsize=(6, 7))
    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95)
    # fig.suptitle("GQN")
    axis_observations = fig.add_subplot(2, 1, 1)
    axis_observations.axis("off")
    axis_observations.set_title("observations")
    axis_generation = fig.add_subplot(2, 1, 2)
    axis_generation.axis("off")
    axis_generation.set_title("neural rendering")

    #==============================================================================
    # Generating animation
    #==============================================================================
    dataset = read_files(args.dataset_directory)
    file_number = 1
    random.seed(0)
    np.random.seed(0)

    with chainer.no_backprop_mode():
        iterator  = chainer.iterators.SerialIterator(dataset,batch_size=1)
        for i in range(len(iterator.dataset)):
            animation_frame_array = []

            # shape: (batch, views, height, width, channels)
            images, viewpoints = np.array([iterator.dataset[i]["image"]]),np.array([iterator.dataset[i]["viewpoint"]])
            camera_distance = np.mean(
                np.linalg.norm(viewpoints[:, :, :3], axis=2))
            camera_position_y = np.mean(viewpoints[:, :, 1])

            # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
            images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
            images = preprocess_images(images)

            batch_index = 0

            total_views = images.shape[1]
            random_observation_view_indices = list(range(total_views))
            random.shuffle(random_observation_view_indices)
            random_observation_view_indices = random_observation_view_indices[:
                                                                                total_observations_per_scene]

            #------------------------------------------------------------------------------
            # Observations
            #------------------------------------------------------------------------------
            observed_images = images[batch_index,
                                        random_observation_view_indices]
            observed_viewpoints = viewpoints[
                batch_index, random_observation_view_indices]

            observed_images = to_device(observed_images)
            observed_viewpoints = to_device(observed_viewpoints)

            #------------------------------------------------------------------------------
            # Generate images with a single observation
            #------------------------------------------------------------------------------
            # Scene encoder
            representation = model.compute_observation_representation(
                observed_images[None, :1], observed_viewpoints[None, :1])

            # Update figure
            observation_index = random_observation_view_indices[0]
            observed_image = images[batch_index, observation_index]
            axis_observations_image = fill_observations_axis(
                [observed_image])

            # Neural rendering
            render(representation, camera_distance, camera_position_y,
                    fps * 2, animation_frame_array)
            

            #------------------------------------------------------------------------------
            # Add observations
            #------------------------------------------------------------------------------
            for n in range(total_observations_per_scene):
                observation_indices = random_observation_view_indices[:n +
                                                                        1]
                axis_observations_image = fill_observations_axis(
                    images[batch_index, observation_indices])

                # Scene encoder
                representation = model.compute_observation_representation(
                    observed_images[None, :n + 1],
                    observed_viewpoints[None, :n + 1])
                # Neural rendering
                render(
                    representation,
                    camera_distance,
                    camera_position_y,
                    fps // 2,
                    animation_frame_array,
                    rotate_camera=False)

            #------------------------------------------------------------------------------
            # Generate images with all observations
            #------------------------------------------------------------------------------
            # Scene encoder
            representation = model.compute_observation_representation(
                observed_images[None, :total_observations_per_scene + 1],
                observed_viewpoints[None, :total_observations_per_scene +
                                    1])

            # Neural rendering
            render(representation, camera_distance, camera_position_y,
                    fps * 4, animation_frame_array)

            #------------------------------------------------------------------------------
            # Write to file
            #------------------------------------------------------------------------------
            anim = animation.ArtistAnimation(
                fig,
                animation_frame_array,
                interval=1 / fps,
                blit=True,
                repeat_delay=0)

            # anim.save(
            #     "{}/shepard_matzler_observations_{}.gif".format(
            #         args.figure_directory, file_number),
            #     writer="imagemagick",
            #     fps=fps)
            anim.save(
                "{}/rooms_ring_camera_observations_{}.mp4".format(
                    args.figure_directory, file_number),
                writer="ffmpeg",
                fps=fps)

            file_number += 1
Ejemplo n.º 9
0
def main():
    try:
        os.mkdir(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cupy

    dataset = gqn.data.Dataset(args.dataset_path)

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_path)
    model = Model(hyperparams, snapshot_directory=args.snapshot_path)
    if using_gpu:
        model.to_gpu()

    plt.style.use("dark_background")
    fig = plt.figure(figsize=(15, 5))
    fig.suptitle("GQN")
    axis_observations = fig.add_subplot(1, 3, 1)
    axis_observations.axis("off")
    axis_observations.set_title("Observations")
    axis_ground_truth = fig.add_subplot(1, 3, 2)
    axis_ground_truth.axis("off")
    axis_ground_truth.set_title("Ground Truth")
    axis_reconstruction = fig.add_subplot(1, 3, 3)
    axis_reconstruction.axis("off")
    axis_reconstruction.set_title("Reconstruction")

    total_observations_per_scene = 2**2
    num_observations_per_column = int(math.sqrt(total_observations_per_scene))

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.full(
        (3, num_observations_per_column * image_shape[1],
         num_observations_per_column * image_shape[2]),
        black_color,
        dtype=np.float32)
    file_number = 1

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                animation_frame_array = []
                axis_observations_image[...] = black_color

                observed_image_array = xp.full(
                    (total_observations_per_scene, ) + image_shape,
                    black_color,
                    dtype=np.float32)
                observed_viewpoint_array = xp.zeros(
                    (total_observations_per_scene, 7), dtype=np.float32)

                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                query_index = total_observations_per_scene
                query_image = images[batch_index, query_index]
                query_viewpoint = to_gpu(viewpoints[None, batch_index,
                                                    query_index])

                axis_ground_truth.imshow(make_uint8(query_image),
                                         interpolation="none")

                for observation_index in range(total_observations_per_scene):
                    observed_image = images[batch_index, observation_index]
                    observed_viewpoint = viewpoints[batch_index,
                                                    observation_index]

                    observed_image_array[observation_index] = to_gpu(
                        observed_image)
                    observed_viewpoint_array[observation_index] = to_gpu(
                        observed_viewpoint)

                    representation = model.compute_observation_representation(
                        observed_image_array[None, :observation_index + 1],
                        observed_viewpoint_array[None, :observation_index + 1])

                    representation = cf.broadcast_to(representation, (1, ) +
                                                     representation.shape[1:])

                    # Update figure
                    x_start = image_shape[1] * (observation_index %
                                                num_observations_per_column)
                    x_end = x_start + image_shape[1]
                    y_start = image_shape[2] * (observation_index //
                                                num_observations_per_column)
                    y_end = y_start + image_shape[2]
                    axis_observations_image[:, y_start:y_end,
                                            x_start:x_end] = observed_image

                    axis_observations.imshow(
                        make_uint8(axis_observations_image),
                        interpolation="none",
                        animated=True)

                    generated_images = model.generate_image(
                        query_viewpoint, representation)[0]

                    axis_reconstruction.imshow(make_uint8(generated_images),
                                               interpolation="none")

                    plt.pause(1)
Ejemplo n.º 10
0
def main():
    try:
        os.mkdir(args.output_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cupy

    dataset = gqn.data.Dataset(args.dataset_path)

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_path)
    model = Model(hyperparams, snapshot_directory=args.snapshot_path)
    if using_gpu:
        model.to_gpu()

    plt.style.use("dark_background")
    fig = plt.figure(figsize=(10, 5))

    axis_observation_array = []
    axis_observation_array.append(plt.subplot2grid((2, 4), (0, 0)))
    axis_observation_array.append(plt.subplot2grid((2, 4), (0, 1)))
    axis_observation_array.append(plt.subplot2grid((2, 4), (1, 0)))
    axis_observation_array.append(plt.subplot2grid((2, 4), (1, 1)))

    for axis in axis_observation_array:
        axis.axis("off")

    axis_generation = plt.subplot2grid((2, 4), (0, 2), rowspan=2, colspan=2)
    axis_generation.axis("off")

    num_views_per_scene = 4
    num_generation = 1
    total_frames_per_movement = 72

    image_shape = (3, ) + hyperparams.image_size
    blank_image = np.full(image_shape, -0.5)
    file_number = 1

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                artist_frame_array = []

                observed_image_array = xp.full(
                    (num_views_per_scene, ) + image_shape,
                    -0.5,
                    dtype=np.float32)
                observed_viewpoint_array = xp.zeros((num_views_per_scene, 7),
                                                    dtype=np.float32)

                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                # Generate images without observations
                r = xp.zeros((
                    num_generation,
                    hyperparams.representation_channels,
                ) + hyperparams.chrz_size,
                             dtype=np.float32)

                # Generate images with observations
                for m in range(num_views_per_scene):
                    observed_image = images[batch_index, m]
                    observed_viewpoint = viewpoints[batch_index, m]

                    observed_image_array[m] = to_gpu(observed_image)
                    observed_viewpoint_array[m] = to_gpu(observed_viewpoint)

                    r = model.compute_observation_representation(
                        observed_image_array[None, :m + 1],
                        observed_viewpoint_array[None, :m + 1])

                    r = cf.broadcast_to(r, (num_generation, ) + r.shape[1:])

                    grid_size = 8
                    trajectory_length = grid_size / 3

                    eye_start = (-trajectory_length, -0.125, trajectory_length)
                    eye_end = (-trajectory_length, -0.125, -trajectory_length)
                    center_start = (-trajectory_length, -0.125, grid_size / 2)
                    center_end = (-trajectory_length, -0.125, 0)

                    for t in range(total_frames_per_movement):
                        artist_array = []

                        for axis, observed_image in zip(
                                axis_observation_array, observed_image_array):
                            axis_image = axis.imshow(
                                make_uint8(observed_image),
                                interpolation="none",
                                animated=True)
                            artist_array.append(axis_image)

                        interp = t / (total_frames_per_movement - 1)
                        eye = interpolate(eye_start, eye_end, interp)
                        center = interpolate(center_start, center_end, interp)
                        query_viewpoints = make_query_viewpoint(
                            eye, center, num_generation, xp)
                        generated_images = model.generate_image(
                            query_viewpoints, r)

                        image = make_uint8(generated_images[0])
                        axis_image = axis_generation.imshow(
                            image, interpolation="none", animated=True)
                        artist_array.append(axis_image)

                        # plt.pause(1e-8)
                        add_annotation(axis_generation, artist_array)
                        artist_frame_array.append(artist_array)

                    eye_start = (-trajectory_length, -0.125,
                                 -trajectory_length)
                    eye_end = (trajectory_length, -0.125, -trajectory_length)
                    center_start = (-trajectory_length, -0.125, 0)
                    center_end = (trajectory_length, -0.125, 0)

                    for t in range(total_frames_per_movement):
                        artist_array = []

                        for axis, observed_image in zip(
                                axis_observation_array, observed_image_array):
                            axis_image = axis.imshow(
                                make_uint8(observed_image),
                                interpolation="none",
                                animated=True)
                            artist_array.append(axis_image)

                        interp = t / (total_frames_per_movement - 1)
                        eye = interpolate(eye_start, eye_end, interp)
                        center = interpolate(center_start, center_end, interp)
                        query_viewpoints = make_query_viewpoint(
                            eye, center, num_generation, xp)
                        generated_images = model.generate_image(
                            query_viewpoints, r)

                        image = make_uint8(generated_images[0])
                        axis_image = axis_generation.imshow(
                            image, interpolation="none", animated=True)
                        artist_array.append(axis_image)

                        # plt.pause(1e-8)
                        add_annotation(axis_generation, artist_array)
                        artist_frame_array.append(artist_array)

                    eye_start = (trajectory_length, -0.125, -trajectory_length)
                    eye_end = (trajectory_length, -0.125, trajectory_length)
                    center_start = (trajectory_length, -0.125, 0)
                    center_end = (trajectory_length, -0.125, grid_size / 2)

                    for t in range(total_frames_per_movement):
                        artist_array = []

                        for axis, observed_image in zip(
                                axis_observation_array, observed_image_array):
                            axis_image = axis.imshow(
                                make_uint8(observed_image),
                                interpolation="none",
                                animated=True)
                            artist_array.append(axis_image)

                        interp = t / (total_frames_per_movement - 1)
                        eye = interpolate(eye_start, eye_end, interp)
                        center = interpolate(center_start, center_end, interp)
                        query_viewpoints = make_query_viewpoint(
                            eye, center, num_generation, xp)
                        generated_images = model.generate_image(
                            query_viewpoints, r)

                        image = make_uint8(generated_images[0])
                        axis_image = axis_generation.imshow(
                            image, interpolation="none", animated=True)
                        artist_array.append(axis_image)

                        # plt.pause(1e-8)
                        add_annotation(axis_generation, artist_array)
                        artist_frame_array.append(artist_array)

                plt.tight_layout()
                plt.subplots_adjust(left=None,
                                    bottom=None,
                                    right=None,
                                    top=None,
                                    wspace=0,
                                    hspace=0)
                anim = animation.ArtistAnimation(fig,
                                                 artist_frame_array,
                                                 interval=1 / 24,
                                                 blit=True,
                                                 repeat_delay=0)

                anim.save("{}/rooms_free_camera_{}.gif".format(
                    args.output_directory, file_number),
                          writer="imagemagick")
                anim.save("{}/rooms_free_camera_{}.mp4".format(
                    args.output_directory, file_number),
                          writer="ffmpeg",
                          fps=12)
                file_number += 1
Ejemplo n.º 11
0
def main():
    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    dataset = gqn.data.Dataset(args.dataset_directory)

    meter = Meter()
    assert meter.load(args.snapshot_directory)

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    assert model.load(args.snapshot_directory, meter.epoch)

    if using_gpu:
        model.to_gpu()

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.figure(figsize=(12, 16))

    axis_observation_1 = plt.subplot2grid((4, 3), (0, 0))
    axis_observation_2 = plt.subplot2grid((4, 3), (0, 1))
    axis_observation_3 = plt.subplot2grid((4, 3), (0, 2))

    axis_predictions = plt.subplot2grid((4, 3), (1, 0), rowspan=3, colspan=3)

    axis_observation_1.axis("off")
    axis_observation_2.axis("off")
    axis_observation_3.axis("off")
    axis_predictions.set_xticks([], [])
    axis_predictions.set_yticks([], [])

    axis_observation_1.set_title("Observation 1", fontsize=22)
    axis_observation_2.set_title("Observation 2", fontsize=22)
    axis_observation_3.set_title("Observation 3", fontsize=22)

    axis_predictions.set_title("Neural Rendering", fontsize=22)
    axis_predictions.set_xlabel("Yaw", fontsize=22)
    axis_predictions.set_ylabel("Pitch", fontsize=22)

    #==============================================================================
    # Generating images
    #==============================================================================
    num_views_per_scene = 3
    num_yaw_pitch_steps = 10
    image_width, image_height = hyperparams.image_size
    prediction_images = make_uint8(
        np.full((num_yaw_pitch_steps * image_width,
                 num_yaw_pitch_steps * image_height, 3), 0))
    file_number = 1
    random.seed(0)
    np.random.seed(0)

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]
                camera_distance = np.mean(
                    np.linalg.norm(viewpoints[:, :, :3], axis=2))

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                #------------------------------------------------------------------------------
                # Observations
                #------------------------------------------------------------------------------
                total_views = images.shape[1]
                random_observation_view_indices = list(range(total_views))
                random.shuffle(random_observation_view_indices)
                random_observation_view_indices = random_observation_view_indices[:
                                                                                  num_views_per_scene]

                observed_images = images[:, random_observation_view_indices]
                observed_viewpoints = viewpoints[:,
                                                 random_observation_view_indices]
                representation = model.compute_observation_representation(
                    observed_images, observed_viewpoints)

                axis_observation_1.imshow(
                    make_uint8(observed_images[batch_index, 0]))
                axis_observation_2.imshow(
                    make_uint8(observed_images[batch_index, 1]))
                axis_observation_3.imshow(
                    make_uint8(observed_images[batch_index, 2]))

                y_angle_rad = math.pi / 2

                for pitch_loop in range(num_yaw_pitch_steps):
                    camera_y = math.sin(y_angle_rad)
                    x_angle_rad = math.pi

                    for yaw_loop in range(num_yaw_pitch_steps):
                        camera_direction = np.array([
                            math.sin(x_angle_rad), camera_y,
                            math.cos(x_angle_rad)
                        ])
                        camera_direction = camera_distance * camera_direction / np.linalg.norm(
                            camera_direction)
                        yaw, pitch = compute_yaw_and_pitch(camera_direction)

                        query_viewpoints = xp.array(
                            (
                                camera_direction[0],
                                camera_direction[1],
                                camera_direction[2],
                                math.cos(yaw),
                                math.sin(yaw),
                                math.cos(pitch),
                                math.sin(pitch),
                            ),
                            dtype=np.float32,
                        )
                        query_viewpoints = xp.broadcast_to(
                            query_viewpoints, (1, ) + query_viewpoints.shape)

                        generated_images = model.generate_image(
                            query_viewpoints, representation)[0]

                        yi_start = pitch_loop * image_height
                        yi_end = (pitch_loop + 1) * image_height
                        xi_start = yaw_loop * image_width
                        xi_end = (yaw_loop + 1) * image_width
                        prediction_images[yi_start:yi_end,
                                          xi_start:xi_end] = make_uint8(
                                              generated_images)

                        x_angle_rad -= 2 * math.pi / num_yaw_pitch_steps
                    y_angle_rad -= math.pi / num_yaw_pitch_steps

                axis_predictions.imshow(prediction_images)

                plt.savefig("{}/shepard_metzler_predictions_{}.png".format(
                    args.figure_directory, file_number))
                file_number += 1
Ejemplo n.º 12
0
def main():
    ##############################################
    # To avoid OpenMPI bug
    multiprocessing.set_start_method("forkserver")
    p = multiprocessing.Process(target=print, args=("", ))
    p.start()
    p.join()
    ##############################################

    try:
        os.mkdir(args.snapshot_directory)
    except:
        pass

    comm = chainermn.create_communicator()
    device = comm.intra_rank
    print("device", device, "/", comm.size)
    cuda.get_device(device).use()
    xp = cupy

    dataset = gqn.data.Dataset(args.dataset_directory)

    hyperparams = HyperParameters()
    hyperparams.generator_share_core = args.generator_share_core
    hyperparams.generator_share_prior = args.generator_share_prior
    hyperparams.generator_generation_steps = args.generation_steps
    hyperparams.generator_share_upsampler = args.generator_share_upsampler
    hyperparams.inference_share_core = args.inference_share_core
    hyperparams.inference_share_posterior = args.inference_share_posterior
    hyperparams.h_channels = args.h_channels
    hyperparams.z_channels = args.z_channels
    hyperparams.u_channels = args.u_channels
    hyperparams.image_size = (args.image_size, args.image_size)
    hyperparams.representation_channels = args.representation_channels
    hyperparams.representation_architecture = args.representation_architecture
    hyperparams.pixel_n = args.pixel_n
    hyperparams.pixel_sigma_i = args.initial_pixel_variance
    hyperparams.pixel_sigma_f = args.final_pixel_variance
    if comm.rank == 0:
        hyperparams.save(args.snapshot_directory)

        ##   Debug   ##
        hyperparams.save("results")
        print(hyperparams)

    model = Model(
        hyperparams,
        snapshot_directory=args.snapshot_directory,
        optimized=args.optimized)
    model.to_gpu()

    optimizer = optimizer_by_name(
        args.optimizer,
        model.parameters,
        communicator=comm,
        mu_i=args.initial_lr,
        mu_f=args.final_lr)
    if comm.rank == 0:
        print(optimizer)

    scheduler = Scheduler(
        sigma_start=args.initial_pixel_variance,
        sigma_end=args.final_pixel_variance,
        final_num_updates=args.pixel_n,
        snapshot_directory=args.snapshot_directory)
    if comm.rank == 0:
        print(scheduler)

    pixel_var = xp.full(
        (args.batch_size, 3) + hyperparams.image_size,
        scheduler.pixel_variance**2,
        dtype="float32")
    pixel_ln_var = xp.full(
        (args.batch_size, 3) + hyperparams.image_size,
        math.log(scheduler.pixel_variance**2),
        dtype="float32")

    random.seed(0)
    subset_indices = list(range(len(dataset.subset_filenames)))

    representation_shape = (args.batch_size,
                            hyperparams.representation_channels,
                            args.image_size // 4, args.image_size // 4)

    current_training_step = scheduler.num_updates
    for iteration in range(scheduler.iteration, args.training_iterations):
        mean_kld = 0
        mean_nll = 0
        mean_mse = 0
        mean_elbo = 0
        total_num_batch = 0
        subset_size_per_gpu = len(subset_indices) // comm.size
        if len(subset_indices) % comm.size != 0:
            subset_size_per_gpu += 1
        start_time = time.time()

        for subset_loop in range(subset_size_per_gpu):
            random.shuffle(subset_indices)
            subset_index = subset_indices[comm.rank]
            subset = dataset.read(subset_index)
            iterator = gqn.data.Iterator(subset, batch_size=args.batch_size)

            for batch_index, data_indices in enumerate(iterator):
                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) ->  (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)

                total_views = images.shape[1]

                # Sample observations
                num_views = random.choice(range(1, total_views + 1))

                observation_view_indices = list(range(total_views))
                random.shuffle(observation_view_indices)
                observation_view_indices = observation_view_indices[:num_views]

                if num_views > 0:
                    observation_images = preprocess_images(
                        images[:, observation_view_indices])
                    observation_query = viewpoints[:, observation_view_indices]
                    representation = model.compute_observation_representation(
                        observation_images, observation_query)
                else:
                    representation = xp.zeros(
                        representation_shape, dtype="float32")
                    representation = chainer.Variable(representation)

                # Sample query
                query_index = random.choice(range(total_views))
                query_images = preprocess_images(images[:, query_index])
                query_viewpoints = viewpoints[:, query_index]

                # Transfer to gpu
                query_images = to_gpu(query_images)
                query_viewpoints = to_gpu(query_viewpoints)

                z_t_param_array, mean_x = model.sample_z_and_x_params_from_posterior(
                    query_images, query_viewpoints, representation)

                # Compute loss
                ## KL Divergence
                loss_kld = chainer.Variable(xp.zeros((), dtype=xp.float32))
                for params in z_t_param_array:
                    mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params
                    kld = gqn.functions.gaussian_kl_divergence(
                        mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p)
                    loss_kld += cf.sum(kld)

                ##Negative log-likelihood of generated image
                loss_nll = cf.sum(
                    gqn.functions.gaussian_negative_log_likelihood(
                        query_images, mean_x, pixel_var, pixel_ln_var))

                # Calculate the average loss value
                loss_nll = loss_nll / args.batch_size
                loss_kld = loss_kld / args.batch_size

                loss = (loss_nll / scheduler.pixel_variance) + loss_kld

                model.cleargrads()
                loss.backward()
                optimizer.update(current_training_step)

                loss_nll = float(loss_nll.data)
                loss_kld = float(loss_kld.data)

                elbo = -(loss_nll + loss_kld)

                loss_mse = float(
                    cf.mean_squared_error(query_images, mean_x).data)

                # if comm.rank == 0:
                #     printr(
                #         "Iteration {}: Subset {} / {}: Batch {} / {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {:.5f} kld: {:.5f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {}  ".
                #         format(iteration, subset_loop + 1,
                #                subset_size_per_gpu, batch_index + 1,
                #                len(iterator), elbo, loss_nll, loss_mse,
                #                loss_kld, optimizer.learning_rate,
                #                scheduler.pixel_variance,
                #                current_training_step))

                total_num_batch += 1
                current_training_step += 1
                mean_kld += loss_kld
                mean_nll += loss_nll
                mean_mse += loss_mse
                mean_elbo += elbo

                scheduler.step(iteration, current_training_step)
                pixel_var[...] = scheduler.pixel_variance**2
                pixel_ln_var[...] = math.log(scheduler.pixel_variance**2)

                # keys = ("name", "memory.total", "memory.free", "memory.used",
                #         "utilization.gpu", "utilization.memory")
                # cmd = "nvidia-smi --query-gpu={} --format=csv".format(
                #     ",".join(keys))
                # output = str(subprocess.check_output(cmd, shell=True))
                # if comm.rank == 0:
                #     print(output)

            # if comm.rank == 0:
            #     model.serialize(args.snapshot_directory)

        if comm.rank == 0:
            elapsed_time = time.time() - start_time
            mean_elbo /= total_num_batch
            mean_nll /= total_num_batch
            mean_mse /= total_num_batch
            mean_kld /= total_num_batch
            print(
                "\033[2KIteration {} - elbo: {:.2f} - loss: nll: {:.2f} mse: {} kld: {:.6f} - lr: {:.4e} - pixel_variance: {:.5f} - step: {} - time: {:.3f} min".
                format(iteration, mean_elbo, mean_nll, mean_mse, mean_kld,
                       optimizer.learning_rate, scheduler.pixel_variance,
                       current_training_step, elapsed_time / 60))
            model.serialize(args.snapshot_directory)
            scheduler.save(args.snapshot_directory)

            ##   Debug   ##
            model.serialize("results")
def main():
    start_time = time.time()

    writer = SummaryWriter('/GQN/chainer-gqn/tensor-log')

    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    dataset = gqn.data.Dataset(args.dataset_directory)

    meter = Meter()
    assert meter.load(args.snapshot_directory)

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    assert model.load(args.snapshot_directory, meter.epoch)

    if using_gpu:
        model.to_gpu()

    total_observations_per_scene = 4
    fps = 30

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.zeros(
        (3, image_shape[1], total_observations_per_scene * image_shape[2]),
        dtype=np.float32)

    #==============================================================================
    # Utilities
    #==============================================================================
    def to_device(array):
        if using_gpu:
            array = cuda.to_gpu(array)
        return array

    def fill_observations_axis(observation_images):
        axis_observations_image = np.full(
            (3, image_shape[1], total_observations_per_scene * image_shape[2]),
            black_color,
            dtype=np.float32)
        num_current_obs = len(observation_images)
        total_obs = total_observations_per_scene
        width = image_shape[2]
        x_start = width * (total_obs - num_current_obs) // 2
        for obs_image in observation_images:
            x_end = x_start + width
            axis_observations_image[:, :, x_start:x_end] = obs_image
            x_start += width
        return axis_observations_image

    def compute_camera_angle_at_frame(t):
        horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4
        y_rad_top = math.pi / 3
        y_rad_bottom = -math.pi / 3
        y_rad_range = y_rad_bottom - y_rad_top
        if t < fps * 1.5:
            vertical_angle_rad = y_rad_top
        elif fps * 1.5 <= t and t < fps * 2.5:
            interp = (t - fps * 1.5) / fps
            vertical_angle_rad = y_rad_top + interp * y_rad_range
        elif fps * 2.5 <= t and t < fps * 4:
            vertical_angle_rad = y_rad_bottom
        elif fps * 4.0 <= t and t < fps * 5:
            interp = (t - fps * 4.0) / fps
            vertical_angle_rad = y_rad_bottom - interp * y_rad_range
        else:
            vertical_angle_rad = y_rad_top
        return horizontal_angle_rad, vertical_angle_rad

    def compute_vertical_rotation_at_frame(horizontal, vertical, t):
        # move horizontal view only
        horizontal_angle_rad = horizontal + (t - fps) * (math.pi / 64)
        vertical_angle_rad = vertical + 0

        return horizontal_angle_rad, vertical_angle_rad

    def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad,
                               camera_distance):
        camera_direction = np.array([
            math.sin(horizontal_angle_rad),  # x
            math.sin(vertical_angle_rad),  # y
            math.cos(horizontal_angle_rad),  # z
        ])

        # removed linalg norm for observation purposes
        camera_direction = camera_distance * camera_direction
        # ipdb.set_trace()
        yaw, pitch = compute_yaw_and_pitch(camera_direction)
        query_viewpoints = xp.array(
            (
                camera_direction[0],
                camera_direction[1],
                camera_direction[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)
        return query_viewpoints

    def render(representation,
               camera_distance,
               obs_viewpoint,
               start_t,
               end_t,
               animation_frame_array,
               savename=None,
               rotate_camera=True):

        all_var_bg = []
        all_var = []
        all_var_z = []
        all_q_view = []

        all_c = []
        all_h = []
        all_u = []
        for t in range(start_t, end_t):
            artist_array = [
                axis_observations.imshow(make_uint8(axis_observations_image),
                                         interpolation="none",
                                         animated=True)
            ]

            # convert x,y into radians??
            # try reversing the camera direction calculation in rotate query viewpoint (impossible to reverse the linalg norm...)

            horizontal_angle_rad = np.arctan2(obs_viewpoint[0],
                                              obs_viewpoint[2])
            vertical_angle_rad = np.arcsin(obs_viewpoint[1] / camera_distance)

            # xz_diagonal = np.sqrt(np.square(obs_viewpoint[0])+np.square(obs_viewpoint[2]))

            # vertical_angle_rad = np.arctan2(obs_viewpoint[1],xz_diagonal)
            # vertical_angle_rad = np.arcsin(obs_viewpoint[1]/camera_distance)

            # horizontal_angle_rad, vertical_angle_rad = 0,0
            # ipdb.set_trace()
            horizontal_angle_rad, vertical_angle_rad = compute_vertical_rotation_at_frame(
                horizontal_angle_rad, vertical_angle_rad, t)
            if rotate_camera == False:
                horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame(
                    0)

            query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad,
                                                      vertical_angle_rad,
                                                      camera_distance)

            # obtain generated images, as well as mean and variance before gaussian
            generated_images, var_bg, latent_z, ct = model.generate_multi_image(
                query_viewpoints, representation, 100)
            logging.info("retrieved variables, time elapsed: " +
                         str(time.time() - start_time))

            # cpu_generated_images = chainer.backends.cuda.to_cpu(generated_images)
            generated_images = np.squeeze(generated_images)

            latent_z = np.squeeze(latent_z)
            # ipdb.set_trace()
            ct = np.squeeze(ct)

            # ht = np.squeeze(np.asarray(ht))
            # ut = np.squeeze(np.asarray(ut))

            # obtain data from Chainer Variable and obtain mean
            var_bg = cp.mean(var_bg, axis=0)
            logging.info("variance of bg, time elapsed: " +
                         str(time.time() - start_time))
            var_z = cp.var(latent_z, axis=0)
            logging.info("variance of z, time elapsed: " +
                         str(time.time() - start_time))
            # ipdb.set_trace()
            # print(ct.shape())
            var_c = cp.var(ct, axis=0)

            logging.info("variance of c, time elapsed: " +
                         str(time.time() - start_time))
            # var_h = cp.var(ht,axis=0)
            # var_u = cp.var(ut,axis=0)

            # write viewpoint and image variance to file
            gen_img_var = np.var(generated_images, axis=0)
            logging.info("calculated variance of gen images, time elapsed: " +
                         str(time.time() - start_time))

            all_var_bg.append((var_bg)[None])
            all_var.append((gen_img_var)[None])
            all_var_z.append((var_z)[None])
            all_q_view.append(
                chainer.backends.cuda.to_cpu(horizontal_angle_rad)[None] *
                180 / math.pi)

            all_c.append((var_c)[None])
            logging.info("appending, time elapsed: " +
                         str(time.time() - start_time))
            # all_h.append(chainer.backends.cuda.to_cpu(var_h)[None])
            # all_u.append(chainer.backends.cuda.to_cpu(var_u)[None])

            # sample = generated_images[0]
            pred_mean = cp.mean(generated_images, axis=0)

            # artist_array.append(
            #     axis_generation.imshow(
            #         make_uint8(pred_mean),
            #         interpolation="none",
            #         animated=True))

            # animation_frame_array.append(artist_array)

        all_var_bg = np.concatenate(chainer.backends.cuda.to_cpu(all_var_bg),
                                    axis=0)
        all_var = np.concatenate(chainer.backends.cuda.to_cpu(all_var), axis=0)
        all_var_z = np.concatenate(chainer.backends.cuda.to_cpu(all_var_z),
                                   axis=0)

        all_c = np.concatenate(chainer.backends.cuda.to_cpu(all_c), axis=0)
        # all_h = np.concatenate(all_h,axis=0)
        # all_u = np.concatenate(all_u,axis=0)
        logging.info("concatenating, time elapsed: " +
                     str(time.time() - start_time))

        with h5py.File(savename, "a") as f:
            f.create_dataset("variance_all_viewpoints", data=all_var)
            f.create_dataset("query_viewpoints",
                             data=np.squeeze(np.asarray(all_q_view)))
            f.create_dataset("variance_b4_gaussian", data=all_var_bg)
            f.create_dataset("variance_of_z", data=all_var_z)

            f.create_dataset("c", data=all_c)
            # f.create_dataset("h",data=all_h)
            # f.create_dataset("u",data=all_u)
        logging.info("saving, time elapsed: " + str(time.time() - start_time))

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.style.use("dark_background")
    fig = plt.figure(figsize=(6, 7))
    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95)
    # fig.suptitle("GQN")
    axis_observations = fig.add_subplot(2, 1, 1)
    axis_observations.axis("off")
    axis_observations.set_title("observations")
    axis_generation = fig.add_subplot(2, 1, 2)
    axis_generation.axis("off")
    axis_generation.set_title("neural rendering")

    #==============================================================================
    # Generating animation
    #==============================================================================
    file_number = 1
    random.seed(0)
    np.random.seed(0)

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s'
    )

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                animation_frame_array = []

                # shape: (batch, views, height, width, channels)
                images, viewpoints = subset[data_indices]
                camera_distance = np.mean(
                    np.linalg.norm(viewpoints[:, :, :3], axis=2))

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)
                logging.info('preprocess ' + str(time.time() - start_time))

                batch_index = 0

                total_views = images.shape[1]
                random_observation_view_indices = list(range(total_views))
                random.shuffle(random_observation_view_indices)
                random_observation_view_indices = random_observation_view_indices[:
                                                                                  total_observations_per_scene]

                #------------------------------------------------------------------------------
                # Observations
                #------------------------------------------------------------------------------
                observed_images = images[batch_index,
                                         random_observation_view_indices]
                observed_viewpoints = viewpoints[
                    batch_index, random_observation_view_indices]

                observed_images = to_device(observed_images)
                observed_viewpoints = to_device(observed_viewpoints)

                #------------------------------------------------------------------------------
                # Generate images with a single observation
                #------------------------------------------------------------------------------
                # Scene encoder
                representation = model.compute_observation_representation(
                    observed_images[None, :1], observed_viewpoints[None, :1])

                # Update figure
                observation_index = random_observation_view_indices[0]
                observed_image = images[batch_index, observation_index]
                axis_observations_image = fill_observations_axis(
                    [observed_image])

                # save observed viewpoint
                filename = "{}/variance_{}.hdf5".format(
                    args.figure_directory, file_number)
                if os.path.exists(filename):
                    os.remove(filename)
                with h5py.File(filename, "a") as f:
                    f.create_dataset("observed_viewpoint",
                                     data=chainer.backends.cuda.to_cpu(
                                         observed_viewpoints[0]))
                    f.create_dataset(
                        "obs_viewpoint_horizontal_angle",
                        data=np.arcsin(
                            chainer.backends.cuda.to_cpu(
                                observed_viewpoints[0][0]) / camera_distance) *
                        180 / math.pi)

                logging.info('write 2 variables to hdf5 file, time elapsed: ' +
                             str(time.time() - start_time))
                obs_viewpoint = np.squeeze(observed_viewpoints[0])
                # Neural rendering
                render(representation,
                       camera_distance,
                       observed_viewpoints[0],
                       fps,
                       fps * 6,
                       animation_frame_array,
                       savename=filename)
                logging.info(
                    'write 4 other variables to hdf5 file, time elapsed: ' +
                    str(time.time() - start_time))
                #------------------------------------------------------------------------------
                # Write to file
                #------------------------------------------------------------------------------
                # anim = animation.ArtistAnimation(
                #     fig,
                #     animation_frame_array,
                #     interval=1 / fps,
                #     blit=True,
                #     repeat_delay=0)

                # anim.save(
                #     "{}/shepard_metzler_observations_{}.gif".format(
                #         args.figure_directory, file_number),
                #     writer="imagemagick",
                #     fps=fps)
                # anim.save(
                #     "{}/shepard_metzler_observations_{}.mp4".format(
                #         args.figure_directory, file_number),
                #     writer="ffmpeg",
                #     fps=2)

                if file_number == 20:
                    break
                else:
                    file_number += 1
Ejemplo n.º 14
0
def main():
    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    dataset = gqn.data.Dataset(args.dataset_directory,
                               # use_ground_truth=True
                               )

    meter = Meter()
    assert meter.load(args.snapshot_directory)

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    assert model.load(args.snapshot_directory, meter.epoch)

    if using_gpu:
        model.to_gpu()

    total_observations_per_scene = 4
    fps = 30

    black_color = -0.5
    image_shape = (3, ) + hyperparams.image_size
    axis_observations_image = np.zeros(
        (3, image_shape[1], total_observations_per_scene * image_shape[2]),
        dtype=np.float32)

    #==============================================================================
    # Utilities
    #==============================================================================
    def to_device(array):
        if using_gpu:
            array = cuda.to_gpu(array)
        return array

    def fill_observations_axis(observation_images):
        axis_observations_image = np.full(
            (3, image_shape[1], total_observations_per_scene * image_shape[2]),
            black_color,
            dtype=np.float32)
        num_current_obs = len(observation_images)
        total_obs = total_observations_per_scene
        width = image_shape[2]
        x_start = width * (total_obs - num_current_obs) // 2
        for obs_image in observation_images:
            x_end = x_start + width
            axis_observations_image[:, :, x_start:x_end] = obs_image
            x_start += width
        return axis_observations_image

    def compute_camera_angle_at_frame(t):
        horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4
        y_rad_top = math.pi / 3
        y_rad_bottom = -math.pi / 3
        y_rad_range = y_rad_bottom - y_rad_top
        if t < fps * 1.5:
            vertical_angle_rad = y_rad_top
        elif fps * 1.5 <= t and t < fps * 2.5:
            interp = (t - fps * 1.5) / fps
            vertical_angle_rad = y_rad_top + interp * y_rad_range
        elif fps * 2.5 <= t and t < fps * 4:
            vertical_angle_rad = y_rad_bottom
        elif fps * 4.0 <= t and t < fps * 5:
            interp = (t - fps * 4.0) / fps
            vertical_angle_rad = y_rad_bottom - interp * y_rad_range
        else:
            vertical_angle_rad = y_rad_top
        return horizontal_angle_rad, vertical_angle_rad

    def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad,
                               camera_distance):
        camera_direction = np.array([
            math.sin(horizontal_angle_rad),  # x
            math.sin(vertical_angle_rad),  # y
            math.cos(horizontal_angle_rad),  # z
        ])
        camera_direction = camera_distance * camera_direction / np.linalg.norm(
            camera_direction)
        yaw, pitch = compute_yaw_and_pitch(camera_direction)
        query_viewpoints = xp.array(
            (
                camera_direction[0],
                camera_direction[1],
                camera_direction[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)
        return query_viewpoints


# added/modified

    def compute_horizontal_rotation_at_frame(t):
        '''This rotates the scene horizontally.'''
        horizontal_angle_rad = 2 * t * math.pi / (fps * 2) + math.pi / 4
        vertical_angle_rad = 0

        return horizontal_angle_rad, vertical_angle_rad

    def get_mse_image(ground_truth, predicted):
        '''Calculates MSE between ground truth and predicted observation, and returns an image.'''
        assert ground_truth.shape == predicted.shape

        mse_image = np.square(ground_truth - predicted) * 0.5
        mse_image = np.concatenate(mse_image).astype(np.float32)
        mse_image = np.reshape(mse_image, (3, 64, 64))

        return mse_image.transpose(1, 2, 0)

    def render(representation,
               camera_distance,
               start_t,
               end_t,
               gt_images,
               gt_viewpoints,
               animation_frame_array,
               rotate_camera=True):

        gt_images = np.squeeze(gt_images)
        gt_viewpoints = cp.reshape(cp.asarray(gt_viewpoints), (15, 1, 7))
        idx = cp.argsort(cp.squeeze(gt_viewpoints)[:, 0])

        gt_images = [
            i
            for i, v in sorted(zip(gt_images, idx), key=operator.itemgetter(1))
        ]
        gt_viewpoints = [
            i for i, v in sorted(zip(gt_viewpoints, idx),
                                 key=operator.itemgetter(1))
        ]
        count = 0
        '''shows variance and mean images of 100 samples from the Gaussian.'''
        for t in range(start_t, end_t):
            artist_array = [
                axis_observations.imshow(make_uint8(axis_observations_image),
                                         interpolation="none",
                                         animated=True)
            ]

            horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame(
                t)

            if rotate_camera == False:
                horizontal_angle_rad, vertical_angle_rad = compute_camera_angle_at_frame(
                    0)
            query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad,
                                                      vertical_angle_rad,
                                                      camera_distance)

            # shape 100x1x3x64x64, when Model is from model_testing.py
            generated_images = model.generate_image(query_viewpoints,
                                                    representation, 100)

            # generate predicted from ground truth viewpoints
            predicted_images = model.generate_image(gt_viewpoints[count],
                                                    representation, 1)

            # predicted_images = model.generate_image(query_viewpoints, representation,1)
            predicted_images = np.squeeze(predicted_images)
            image_mse = get_mse_image(gt_images[count], predicted_images)

            # when sampling with 100
            cpu_generated_images = chainer.backends.cuda.to_cpu(
                generated_images)
            generated_images = np.squeeze(cpu_generated_images)

            # # cpu calculation
            # cpu_image_mean = np.mean(cpu_generated_images,axis=0)
            # cpu_image_std = np.std(cpu_generated_images,axis=0)
            # cpu_image_var = np.var(cpu_generated_images,axis=0)
            # image_mean = np.squeeze(chainer.backends.cuda.to_gpu(cpu_image_mean))
            # image_std = chainer.backends.cuda.to_gpu(cpu_image_std)
            # image_var = np.squeeze(chainer.backends.cuda.to_gpu(cpu_image_var))

            image_mean = cp.mean(cp.squeeze(generated_images), axis=0)
            image_var = cp.var(cp.squeeze(generated_images), axis=0)

            # convert to black and white.
            # grayscale
            r, g, b = image_var
            gray_image_var = 0.2989 * r + 0.5870 * g + 0.1140 * b
            # thresholding Otsu's method
            thresh = threshold_otsu(gray_image_var)
            var_binary = gray_image_var > thresh

            sample_image = np.squeeze(generated_images[0])

            if count == 14:
                count = 0
            elif (t - fps) % 10 == 0:
                count += 1

            print("computed an image. Count =", count)

            artist_array.append(
                axis_generation_variance.imshow(var_binary,
                                                cmap=plt.cm.gray,
                                                interpolation="none",
                                                animated=True))
            artist_array.append(
                axis_generation_mean.imshow(make_uint8(image_mean),
                                            interpolation="none",
                                            animated=True))
            artist_array.append(
                axis_generation_sample.imshow(make_uint8(sample_image),
                                              interpolation="none",
                                              animated=True))
            artist_array.append(
                axis_generation_mse.imshow(make_uint8(image_mse),
                                           cmap='gray',
                                           interpolation="none",
                                           animated=True))

            animation_frame_array.append(artist_array)

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.style.use("dark_background")
    fig = plt.figure(figsize=(6, 7))
    plt.subplots_adjust(left=0.1, right=0.95, bottom=0.1, top=0.95)
    # fig.suptitle("GQN")
    axis_observations = fig.add_subplot(3, 1, 1)
    axis_observations.axis("off")
    axis_observations.set_title("observations")

    axis_generation_mse = fig.add_subplot(3, 2, 3)
    axis_generation_mse.axis("off")
    axis_generation_mse.set_title("MSE")

    axis_generation_variance = fig.add_subplot(3, 2, 4)
    axis_generation_variance.axis("off")
    axis_generation_variance.set_title("Variance")

    axis_generation_mean = fig.add_subplot(3, 2, 5)
    axis_generation_mean.axis("off")
    axis_generation_mean.set_title("Mean")

    axis_generation_sample = fig.add_subplot(3, 2, 6)
    axis_generation_sample.axis("off")
    axis_generation_sample.set_title("Normal Rendering")

    #==============================================================================
    # Generating animation
    #==============================================================================
    file_number = 1
    random.seed(0)
    np.random.seed(0)

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                animation_frame_array = []

                # shape: (batch, views, height, width, channels)
                images, viewpoints = subset[data_indices]
                # images, viewpoints, original images = subset[data_indices]
                camera_distance = np.mean(
                    np.linalg.norm(viewpoints[:, :, :3], axis=2))

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                # original_images = original_images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                # original_images = preprocess_images(original_images)

                batch_index = 0

                total_views = images.shape[1]
                random_observation_view_indices = list(range(total_views))
                random.shuffle(random_observation_view_indices)
                random_viewed_observation_indices = random_observation_view_indices[:
                                                                                    total_observations_per_scene]
                #------------------------------------------------------------------------------
                # Ground Truth
                #------------------------------------------------------------------------------

                gt_images = images
                gt_viewpoints = viewpoints
                # gt_images = original_images

                #------------------------------------------------------------------------------
                # Observations
                #------------------------------------------------------------------------------
                observed_images = images[batch_index,
                                         random_viewed_observation_indices]
                observed_viewpoints = viewpoints[
                    batch_index, random_viewed_observation_indices]

                observed_images = to_device(observed_images)
                observed_viewpoints = to_device(observed_viewpoints)

                #------------------------------------------------------------------------------
                # Generate images with a single observation
                #------------------------------------------------------------------------------
                # Scene encoder
                representation = model.compute_observation_representation(
                    observed_images[None, :1], observed_viewpoints[None, :1])

                # Update figure
                observation_index = random_viewed_observation_indices[0]
                observed_image = images[batch_index, observation_index]
                axis_observations_image = fill_observations_axis(
                    [observed_image])

                # Neural rendering
                render(representation, camera_distance, fps, fps * 6,
                       gt_images, gt_viewpoints, animation_frame_array)

                #------------------------------------------------------------------------------
                # Add observations
                #------------------------------------------------------------------------------
                for n in range(1, total_observations_per_scene):
                    observation_indices = random_viewed_observation_indices[:
                                                                            n +
                                                                            1]
                    axis_observations_image = fill_observations_axis(
                        images[batch_index, observation_indices])

                    # Scene encoder
                    representation = model.compute_observation_representation(
                        observed_images[None, :n + 1],
                        observed_viewpoints[None, :n + 1])

                    # Neural rendering
                    render(representation,
                           camera_distance,
                           0,
                           fps // 2,
                           gt_images,
                           gt_viewpoints,
                           animation_frame_array,
                           rotate_camera=False)

                #------------------------------------------------------------------------------
                # Generate images with all observations
                #------------------------------------------------------------------------------
                # Scene encoder
                representation = model.compute_observation_representation(
                    observed_images[None, :total_observations_per_scene + 1],
                    observed_viewpoints[None, :total_observations_per_scene +
                                        1])

                # Neural rendering
                render(representation, camera_distance, 0, fps * 6, gt_images,
                       gt_viewpoints, animation_frame_array)

                #------------------------------------------------------------------------------
                # Write to file
                #------------------------------------------------------------------------------
                anim = animation.ArtistAnimation(fig,
                                                 animation_frame_array,
                                                 interval=1 / fps,
                                                 blit=True,
                                                 repeat_delay=0)

                # anim.save(
                #     "{}/shepard_metzler_observations_{}.gif".format(
                #         args.figure_directory, file_number),
                #     writer="imagemagick",
                #     fps=fps)
                anim.save("{}/shepard_metzler_observations_{}.mp4".format(
                    args.figure_directory, file_number),
                          writer="ffmpeg",
                          fps=fps)

                print("video saved")
                file_number += 1
Ejemplo n.º 15
0
def main():
    try:
        os.mkdir(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cupy

    dataset = gqn.data.Dataset(args.dataset_path)

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_path)
    model = Model(hyperparams, snapshot_directory=args.snapshot_path)
    if using_gpu:
        model.to_gpu()

    fig = plt.figure(figsize=(12, 16))

    axis_observation_1 = plt.subplot2grid((4, 3), (0, 0))
    axis_observation_2 = plt.subplot2grid((4, 3), (0, 1))
    axis_observation_3 = plt.subplot2grid((4, 3), (0, 2))

    axis_predictions = plt.subplot2grid((4, 3), (1, 0), rowspan=3, colspan=3)

    axis_observation_1.axis("off")
    axis_observation_2.axis("off")
    axis_observation_3.axis("off")
    axis_predictions.set_xticks([], [])
    axis_predictions.set_yticks([], [])

    axis_observation_1.set_title("Observation 1", fontsize=22)
    axis_observation_2.set_title("Observation 2", fontsize=22)
    axis_observation_3.set_title("Observation 3", fontsize=22)

    axis_predictions.set_title("Neural Rendering", fontsize=22)
    axis_predictions.set_xlabel("Yaw", fontsize=22)
    axis_predictions.set_ylabel("Pitch", fontsize=22)

    num_views_per_scene = 3
    num_yaw_pitch_steps = 10
    image_width, image_height = hyperparams.image_size
    image_shape = (3, ) + hyperparams.image_size
    prediction_images = make_uint8(
        np.full((num_yaw_pitch_steps * image_width,
                 num_yaw_pitch_steps * image_height, 3), 0))
    file_number = 1

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                total_views = images.shape[1]
                observation_view_indices = list(range(total_views))
                random.shuffle(observation_view_indices)
                observation_view_indices = observation_view_indices[:
                                                                    num_views_per_scene]

                observed_image_array = images[:, observation_view_indices]
                representation = model.compute_observation_representation(
                    observed_image_array, viewpoints[:,
                                                     observation_view_indices])

                axis_observation_1.imshow(
                    make_uint8(observed_image_array[batch_index, 0]))
                axis_observation_2.imshow(
                    make_uint8(observed_image_array[batch_index, 1]))
                axis_observation_3.imshow(
                    make_uint8(observed_image_array[batch_index, 2]))

                x_angle_rad = math.pi / 2
                for pitch_loop in range(num_yaw_pitch_steps):
                    y_angle_rad = math.pi
                    for yaw_loop in range(num_yaw_pitch_steps):
                        eye_norm = 3
                        eye_y = eye_norm * math.sin(x_angle_rad)
                        radius = math.cos(x_angle_rad)
                        eye = (radius * math.sin(y_angle_rad), eye_y,
                               radius * math.cos(y_angle_rad))
                        center = (0, 0, 0)
                        yaw = gqn.math.yaw(eye, center)
                        pitch = gqn.math.pitch(eye, center)
                        query_viewpoints = xp.array(
                            (eye[0], eye[1], eye[2], math.cos(yaw),
                             math.sin(yaw), math.cos(pitch), math.sin(pitch)),
                            dtype=np.float32)
                        query_viewpoints = xp.broadcast_to(
                            query_viewpoints, (1, ) + query_viewpoints.shape)

                        generated_images = model.generate_image(
                            query_viewpoints, representation)[0]

                        yi_start = pitch_loop * image_height
                        yi_end = (pitch_loop + 1) * image_height
                        xi_start = yaw_loop * image_width
                        xi_end = (yaw_loop + 1) * image_width
                        prediction_images[yi_start:yi_end,
                                          xi_start:xi_end] = make_uint8(
                                              generated_images)

                        y_angle_rad -= 2 * math.pi / num_yaw_pitch_steps
                    x_angle_rad -= math.pi / num_yaw_pitch_steps

                axis_predictions.imshow(prediction_images)

                plt.savefig("{}/shepard_matzler_predictions_{}.png".format(
                    args.figure_directory, file_number))
                file_number += 1
Ejemplo n.º 16
0
def main():
    try:
        os.mkdir(args.figure_directory)
    except:
        pass

    #### Model ####
    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    hyperparams = HyperParameters(snapshot_directory=args.snapshot_path)
    model = Model(hyperparams, snapshot_directory=args.snapshot_path)
    if using_gpu:
        model.to_gpu()
    print(hyperparams)

    #### Renderer ####
    # Set GPU device
    rtx.set_device(args.gpu_device)

    # Initialize colors
    color_array = []
    for n in range(args.num_colors):
        hue = n / (args.num_colors - 1)
        saturation = 0.9
        lightness = 1
        red, green, blue = colorsys.hsv_to_rgb(hue, saturation, lightness)
        color_array.append((red, green, blue, 1))

    screen_width = args.image_size
    screen_height = args.image_size

    # Setting up a raytracer
    rt_args = rtx.RayTracingArguments()
    rt_args.num_rays_per_pixel = 2048
    rt_args.max_bounce = 4
    rt_args.supersampling_enabled = False

    cuda_args = rtx.CUDAKernelLaunchArguments()
    cuda_args.num_threads = 64
    cuda_args.num_rays_per_thread = 32

    renderer = rtx.Renderer()
    render_buffer = np.zeros((screen_height, screen_width, 3),
                             dtype=np.float32)

    camera = rtx.OrthographicCamera()

    #### Figure ####
    plt.style.use("dark_background")
    fig = plt.figure(figsize=(8, 4))
    fig.suptitle("GQN")

    axis_observation = fig.add_subplot(1, 2, 1)
    axis_observation.axis("off")
    axis_observation.set_title("Observation")

    axis_generation = fig.add_subplot(1, 2, 2)
    axis_generation.axis("off")
    axis_generation.set_title("Generation")

    for scene_index in range(1, 100):
        scene = build_scene(color_array)

        eye_scale = 3
        total_frames_per_rotation = 48
        artist_frame_array = []

        observation_viewpoint_angle_rad = 0
        for k in range(5):
            eye = tuple(p * eye_scale for p in [
                math.cos(observation_viewpoint_angle_rad),
                math.sin(observation_viewpoint_angle_rad), 0
            ])
            center = (0, 0, 0)
            camera.look_at(eye, center, up=(0, 1, 0))

            renderer.render(scene, camera, rt_args, cuda_args, render_buffer)

            # Convert to sRGB
            frame = np.power(np.clip(render_buffer, 0, 1), 1.0 / 2.2)
            frame = np.uint8(frame * 255)
            frame = cv2.bilateralFilter(frame, 3, 25, 25)

            observation_viewpoint_angle_rad += math.pi / 20

            yaw = gqn.math.yaw(eye, center)
            pitch = gqn.math.pitch(eye, center)
            ovserved_viewpoint = np.array(
                eye + (math.cos(yaw), math.sin(yaw), math.cos(pitch),
                       math.sin(pitch)),
                dtype=np.float32)
            ovserved_viewpoint = ovserved_viewpoint[None, None, ...]

            observed_image = frame.astype(np.float32)
            observed_image = preprocess_images(observed_image, add_noise=False)
            observed_image = observed_image[None, None, ...]
            observed_image = observed_image.transpose((0, 1, 4, 2, 3))

            if using_gpu:
                ovserved_viewpoint = to_gpu(ovserved_viewpoint)
                observed_image = to_gpu(observed_image)

            representation = model.compute_observation_representation(
                observed_image, ovserved_viewpoint)

            query_viewpoint_angle_rad = 0
            for t in range(total_frames_per_rotation):
                artist_array = []

                query_viewpoint = rotate_query_viewpoint(
                    query_viewpoint_angle_rad, 1, xp)
                # query_viewpoint = rotate_query_viewpoint(math.pi / 6, 1, xp)
                generated_image = model.generate_image(query_viewpoint,
                                                       representation)
                generated_image = make_uint8(generated_image[0])

                artist_array.append(
                    axis_observation.imshow(frame,
                                            interpolation="none",
                                            animated=True))
                artist_array.append(
                    axis_generation.imshow(generated_image, animated=True))

                query_viewpoint_angle_rad += 2 * math.pi / total_frames_per_rotation
                artist_frame_array.append(artist_array)
                anim = animation.ArtistAnimation(fig,
                                                 artist_frame_array,
                                                 interval=1 / 24,
                                                 blit=True,
                                                 repeat_delay=0)
                anim.save("{}/shepard_matzler_uncertainty_{}.mp4".format(
                    args.figure_directory, scene_index),
                          writer="ffmpeg",
                          fps=12)