예제 #1
0
    def rotate_query_viewpoint(horizontal_angle_rad, camera_distance,
                               camera_position_y):
        camera_position = np.array([
            camera_distance * math.sin(horizontal_angle_rad),  # x
            camera_position_y,
            camera_distance * math.cos(horizontal_angle_rad),  # z
        ])
        center = np.array((0, camera_position_y, 0))
        camera_direction = camera_position - center
        yaw, pitch = compute_yaw_and_pitch(camera_direction)

        query_viewpoints = xp.array(
            (
                camera_position[0],
                camera_position[1],
                camera_position[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)

        return query_viewpoints
예제 #2
0
 def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad,
                            camera_distance):
     camera_direction = np.array([
         math.sin(horizontal_angle_rad),  # x
         math.sin(vertical_angle_rad),  # y
         math.cos(horizontal_angle_rad),  # z
     ])
     camera_direction = camera_distance * camera_direction / np.linalg.norm(
         camera_direction)
     yaw, pitch = compute_yaw_and_pitch(camera_direction)
     query_viewpoints = xp.array(
         (
             camera_direction[0],
             camera_direction[1],
             camera_direction[2],
             math.cos(yaw),
             math.sin(yaw),
             math.cos(pitch),
             math.sin(pitch),
         ),
         dtype=np.float32,
     )
     query_viewpoints = xp.broadcast_to(query_viewpoints,
                                        (1, ) + query_viewpoints.shape)
     return query_viewpoints
    def rotate_query_viewpoint(horizontal_angle_rad, vertical_angle_rad,
                               camera_distance):
        camera_direction = np.array([
            math.sin(horizontal_angle_rad),  # x
            math.sin(vertical_angle_rad),  # y
            math.cos(horizontal_angle_rad),  # z
        ])

        # removed linalg norm for observation purposes
        camera_direction = camera_distance * camera_direction
        # ipdb.set_trace()
        yaw, pitch = compute_yaw_and_pitch(camera_direction)
        query_viewpoints = xp.array(
            (
                camera_direction[0],
                camera_direction[1],
                camera_direction[2],
                math.cos(yaw),
                math.sin(yaw),
                math.cos(pitch),
                math.sin(pitch),
            ),
            dtype=np.float32,
        )
        query_viewpoints = xp.broadcast_to(query_viewpoints,
                                           (1, ) + query_viewpoints.shape)
        return query_viewpoints
예제 #4
0
def gqn_process():
    # load model
    my_gpu = args.gpu_device
    if my_gpu < 0:
        xp = np
    else:
        cuda.get_device(args.gpu_device).use()
        xp = cp
    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    chainer.serializers.load_hdf5(args.snapshot_file, model)
    if my_gpu > -1:
        model.to_gpu()
    chainer.print_runtime_info()

    observed_viewpoint, observed_image, offset = data_recv.get()
    observed_viewpoint = np.expand_dims(np.expand_dims(
        np.asarray(observed_viewpoint).astype(np.float32), axis=0),
                                        axis=0)
    observed_image = np.expand_dims(np.expand_dims(
        np.asarray(observed_image).astype(np.float32), axis=0),
                                    axis=0)
    offset = np.asarray(offset)

    camera_distance = np.mean(
        np.linalg.norm(observed_viewpoint[:, :, :3], axis=2))
    camera_position_z = np.mean(observed_viewpoint[:, :, 1])

    observed_image = observed_image.transpose(
        (0, 1, 4, 2, 3)).astype(np.float32)
    observed_image = preprocess_images(observed_image)

    # create representation and generate uncertainty map of environment [1000 viewpoints?]
    total_frames = 10
    representation = model.compute_observation_representation(
        observed_image, observed_viewpoint)

    # get predictions
    highest_var = 0.0
    no_of_samples = 20
    highest_var_vp = 0
    try:
        for i in range(0, total_frames):
            horizontal_angle_rad = compute_camera_angle_at_frame(
                i, total_frames)

            query_viewpoints = rotate_query_viewpoint(horizontal_angle_rad,
                                                      camera_distance,
                                                      camera_position_z, xp)

            generated_images = xp.squeeze(
                xp.array(
                    model.generate_images(query_viewpoints, representation,
                                          no_of_samples)))
            var_image = xp.var(generated_images, axis=0)
            # var_image = chainer.backends.cuda.to_cpu(var_image)
            # grayscale
            # r,g,b = var_image
            # gray_var_image = 0.2989*r+0.5870*g+0.1140*b
            current_var = xp.mean(var_image)

            if highest_var == 0:
                highest_var = current_var
                highest_var_vp = query_viewpoints[0]
            elif current_var > highest_var:
                highest_var = current_var
                highest_var_vp = query_viewpoints[0]
    except KeyboardInterrupt:
        logging.warning('interrupt')

    # return next viewpoint and unit vector of end effector based on highest uncertainty found in the uncertainty map
    _x, _y, _z, _, _, _, _ = highest_var_vp

    _yaw, _pitch = compute_yaw_and_pitch([_x, _y, _z])
    next_viewpoint = [_x, _y, _z, _yaw, _pitch]
    next_viewpoint = [chainer.backends.cuda.to_cpu(x) for x in next_viewpoint]
    next_viewpoint = [float(x) for x in next_viewpoint]
    data_send.put(next_viewpoint)
예제 #5
0
def main():
    try:
        os.makedirs(args.figure_directory)
    except:
        pass

    xp = np
    using_gpu = args.gpu_device >= 0
    if using_gpu:
        cuda.get_device(args.gpu_device).use()
        xp = cp

    dataset = gqn.data.Dataset(args.dataset_directory)

    meter = Meter()
    assert meter.load(args.snapshot_directory)

    hyperparams = HyperParameters()
    assert hyperparams.load(args.snapshot_directory)

    model = Model(hyperparams)
    assert model.load(args.snapshot_directory, meter.epoch)

    if using_gpu:
        model.to_gpu()

    #==============================================================================
    # Visualization
    #==============================================================================
    plt.figure(figsize=(12, 16))

    axis_observation_1 = plt.subplot2grid((4, 3), (0, 0))
    axis_observation_2 = plt.subplot2grid((4, 3), (0, 1))
    axis_observation_3 = plt.subplot2grid((4, 3), (0, 2))

    axis_predictions = plt.subplot2grid((4, 3), (1, 0), rowspan=3, colspan=3)

    axis_observation_1.axis("off")
    axis_observation_2.axis("off")
    axis_observation_3.axis("off")
    axis_predictions.set_xticks([], [])
    axis_predictions.set_yticks([], [])

    axis_observation_1.set_title("Observation 1", fontsize=22)
    axis_observation_2.set_title("Observation 2", fontsize=22)
    axis_observation_3.set_title("Observation 3", fontsize=22)

    axis_predictions.set_title("Neural Rendering", fontsize=22)
    axis_predictions.set_xlabel("Yaw", fontsize=22)
    axis_predictions.set_ylabel("Pitch", fontsize=22)

    #==============================================================================
    # Generating images
    #==============================================================================
    num_views_per_scene = 3
    num_yaw_pitch_steps = 10
    image_width, image_height = hyperparams.image_size
    prediction_images = make_uint8(
        np.full((num_yaw_pitch_steps * image_width,
                 num_yaw_pitch_steps * image_height, 3), 0))
    file_number = 1
    random.seed(0)
    np.random.seed(0)

    with chainer.no_backprop_mode():
        for subset in dataset:
            iterator = gqn.data.Iterator(subset, batch_size=1)

            for data_indices in iterator:
                # shape: (batch, views, height, width, channels)
                # range: [-1, 1]
                images, viewpoints = subset[data_indices]
                camera_distance = np.mean(
                    np.linalg.norm(viewpoints[:, :, :3], axis=2))

                # (batch, views, height, width, channels) -> (batch, views, channels, height, width)
                images = images.transpose((0, 1, 4, 2, 3)).astype(np.float32)
                images = preprocess_images(images)

                batch_index = 0

                #------------------------------------------------------------------------------
                # Observations
                #------------------------------------------------------------------------------
                total_views = images.shape[1]
                random_observation_view_indices = list(range(total_views))
                random.shuffle(random_observation_view_indices)
                random_observation_view_indices = random_observation_view_indices[:
                                                                                  num_views_per_scene]

                observed_images = images[:, random_observation_view_indices]
                observed_viewpoints = viewpoints[:,
                                                 random_observation_view_indices]
                representation = model.compute_observation_representation(
                    observed_images, observed_viewpoints)

                axis_observation_1.imshow(
                    make_uint8(observed_images[batch_index, 0]))
                axis_observation_2.imshow(
                    make_uint8(observed_images[batch_index, 1]))
                axis_observation_3.imshow(
                    make_uint8(observed_images[batch_index, 2]))

                y_angle_rad = math.pi / 2

                for pitch_loop in range(num_yaw_pitch_steps):
                    camera_y = math.sin(y_angle_rad)
                    x_angle_rad = math.pi

                    for yaw_loop in range(num_yaw_pitch_steps):
                        camera_direction = np.array([
                            math.sin(x_angle_rad), camera_y,
                            math.cos(x_angle_rad)
                        ])
                        camera_direction = camera_distance * camera_direction / np.linalg.norm(
                            camera_direction)
                        yaw, pitch = compute_yaw_and_pitch(camera_direction)

                        query_viewpoints = xp.array(
                            (
                                camera_direction[0],
                                camera_direction[1],
                                camera_direction[2],
                                math.cos(yaw),
                                math.sin(yaw),
                                math.cos(pitch),
                                math.sin(pitch),
                            ),
                            dtype=np.float32,
                        )
                        query_viewpoints = xp.broadcast_to(
                            query_viewpoints, (1, ) + query_viewpoints.shape)

                        generated_images = model.generate_image(
                            query_viewpoints, representation)[0]

                        yi_start = pitch_loop * image_height
                        yi_end = (pitch_loop + 1) * image_height
                        xi_start = yaw_loop * image_width
                        xi_end = (yaw_loop + 1) * image_width
                        prediction_images[yi_start:yi_end,
                                          xi_start:xi_end] = make_uint8(
                                              generated_images)

                        x_angle_rad -= 2 * math.pi / num_yaw_pitch_steps
                    y_angle_rad -= math.pi / num_yaw_pitch_steps

                axis_predictions.imshow(prediction_images)

                plt.savefig("{}/shepard_metzler_predictions_{}.png".format(
                    args.figure_directory, file_number))
                file_number += 1