Ejemplo n.º 1
0
    def __init__(self,
                 img_mean: ivy.Array,
                 cam_rel_mat: ivy.Array,
                 img_var: ivy.Array = None,
                 validity_mask: ivy.Array = None,
                 pose_mean: ivy.Array = None,
                 pose_cov: ivy.Array = None):
        """
        Create esm image measurement container

        :param img_mean: Camera-relative co-ordinates and image features
                            *[batch_size, timesteps, height, width, 3 + feat]*
        :type: img_mean: array
        :param cam_rel_mat: The pose of the camera relative to the current agent pose, in matrix form
                            *[batch_size, timesteps, 3, 4]*
        :type cam_rel_mat: array
        :param img_var: Image depth and feature variance values, assumed all zero if None.
                        *[batch_size, timesteps, height, width, 1 + feat]*
        :type: img_var: array, optional
        :param validity_mask: Validity mask, for which pixels should be considered. Assumed all valid if None
                                *[batch_size, timesteps, height, width, 1]*
        :type validity_mask: array, optional
        :param pose_mean: The pose of the camera relative to the current agent pose, in rotation vector pose form.
                            Inferred from cam_rel_mat if None. *[batch_size, timesteps, 6]*
        :type pose_mean: array, optional
        :param pose_cov: The convariance of the camera relative pose, in rotation vector form. Assumed all zero if None.
                            *[batch_size, timesteps, 6, 6]*
        :type pose_cov: array, optional
        """
        img_mean = _pad_to_batch_n_time_dims(img_mean, 5)
        cam_rel_mat = _pad_to_batch_n_time_dims(cam_rel_mat, 4)
        self['img_mean'] = img_mean
        self['cam_rel_mat'] = cam_rel_mat
        if img_var is None:
            img_var = ivy.zeros_like(img_mean)
        else:
            img_var = _pad_to_batch_n_time_dims(img_var, 5)
        self['img_var'] = img_var
        if validity_mask is None:
            validity_mask = ivy.ones_like(img_mean[..., 0:1])
        else:
            validity_mask = _pad_to_batch_n_time_dims(validity_mask, 5)
        self['validity_mask'] = validity_mask
        if pose_mean is None:
            pose_mean = ivy_mech.mat_pose_to_rot_vec_pose(cam_rel_mat)
        else:
            pose_mean = _pad_to_batch_n_time_dims(pose_mean, 3)
        self['pose_mean'] = pose_mean
        if pose_cov is None:
            pose_cov = ivy.tile(ivy.expand_dims(ivy.zeros_like(pose_mean), -1), (1, 1, 1, 6))
        else:
            pose_cov = _pad_to_batch_n_time_dims(pose_cov, 4)
        self['pose_cov'] = pose_cov
Ejemplo n.º 2
0
def _rot_mat_to_zxz_euler(rot_mat):
    # BS x 1
    euler_angles_1 = _ivy.acos(rot_mat[..., 2, 2:3])

    gimbal_validity = _ivy.abs(rot_mat[..., 0, 2:3]) > GIMBAL_TOL

    r12 = rot_mat[..., 0, 1:2]
    r11 = rot_mat[..., 0, 0:1]
    gimbal_euler_angles_0 = _ivy.atan2(-r12, r11)
    gimbal_euler_angles_2 = _ivy.zeros_like(gimbal_euler_angles_0)

    # BS x 3
    gimbal_euler_angles = _ivy.concatenate(
        (gimbal_euler_angles_0, euler_angles_1, gimbal_euler_angles_2), -1)

    # BS x 1
    r31 = rot_mat[..., 2, 0:1]
    r32 = rot_mat[..., 2, 1:2]
    r13 = rot_mat[..., 0, 2:3]
    r23 = rot_mat[..., 1, 2:3]
    normal_euler_angles_0 = _ivy.atan2(r31, r32)
    normal_euler_angles_2 = _ivy.atan2(r13, -r23)

    # BS x 3
    normal_euler_angles = _ivy.concatenate(
        (normal_euler_angles_0, euler_angles_1, normal_euler_angles_2), -1)

    return _ivy.where(gimbal_validity, normal_euler_angles,
                      gimbal_euler_angles)
Ejemplo n.º 3
0
    def __init__(self,
                 img_meas: Dict[str, ESMCamMeasurement],
                 agent_rel_mat: ivy.Array,
                 control_mean: ivy.Array = None,
                 control_cov: ivy.Array = None):
        """
        Create esm observation container

        :param img_meas: dict of ESMImageMeasurement objects, with keys for camera names.
        :type: img_meas: Ivy container
        :param agent_rel_mat: The pose of the agent relative to the previous pose, in matrix form
                                *[batch_size, timesteps, 3, 4]*.
        :type agent_rel_mat: array
        :param control_mean: The pose of the agent relative to the previous pose, in rotation vector pose form.
                                Inferred from agent_rel_mat if None. *[batch_size, timesteps, 6]*
        :type control_mean: array, optional
        :param control_cov: The convariance of the agent relative pose, in rotation vector form.
                             Assumed all zero if None. *[batch_size, timesteps, 6, 6]*.
        :type control_cov: array, optional
        """
        self['img_meas'] = Container(img_meas)
        agent_rel_mat = _pad_to_batch_n_time_dims(agent_rel_mat, 4)
        self['agent_rel_mat'] = agent_rel_mat
        if control_mean is None:
            control_mean = ivy_mech.mat_pose_to_rot_vec_pose(agent_rel_mat)
        else:
            control_mean = _pad_to_batch_n_time_dims(control_mean, 3)
        self['control_mean'] = control_mean
        if control_cov is None:
            control_cov = ivy.tile(ivy.expand_dims(ivy.zeros_like(control_mean), -1), (1, 1, 1, 6))
        else:
            control_cov = _pad_to_batch_n_time_dims(control_cov, 4)
        self['control_cov'] = control_cov
Ejemplo n.º 4
0
def _rot_mat_to_yzy_euler(rot_mat):
    # BS x 1
    euler_angles_1 = _ivy.acos(rot_mat[..., 1, 1:2])

    gimbal_validity = _ivy.abs(rot_mat[..., 1, 0:1]) > GIMBAL_TOL

    r31 = rot_mat[..., 2, 0:1]
    r33 = rot_mat[..., 2, 2:3]
    gimbal_euler_angles_0 = _ivy.atan2(-r31, r33)
    gimbal_euler_angles_2 = _ivy.zeros_like(gimbal_euler_angles_0)

    # BS x 3
    gimbal_euler_angles = _ivy.concatenate(
        (gimbal_euler_angles_0, euler_angles_1, gimbal_euler_angles_2), -1)

    # BS x 1
    r23 = rot_mat[..., 1, 2:3]
    r21 = rot_mat[..., 1, 0:1]
    r32 = rot_mat[..., 2, 1:2]
    r12 = rot_mat[..., 0, 1:2]
    normal_euler_angles_0 = _ivy.atan2(r23, r21)
    normal_euler_angles_2 = _ivy.atan2(r32, r12)

    # BS x 3
    normal_euler_angles = _ivy.concatenate(
        (normal_euler_angles_0, euler_angles_1, normal_euler_angles_2), -1)

    return _ivy.where(gimbal_validity, normal_euler_angles,
                      gimbal_euler_angles)
Ejemplo n.º 5
0
def _rot_mat_to_zyx_euler(rot_mat):
    # BS x 1
    euler_angles_1 = _ivy.asin(rot_mat[..., 0, 2:3])

    gimbal_validity = _ivy.abs(rot_mat[..., 1, 1:2]) > GIMBAL_TOL

    r21 = rot_mat[..., 1, 0:1]
    r22 = rot_mat[..., 1, 1:2]
    gimbal_euler_angles_0 = _ivy.atan2(r21, r22)
    gimbal_euler_angles_2 = _ivy.zeros_like(gimbal_euler_angles_0)

    # BS x 3
    gimbal_euler_angles = _ivy.concatenate(
        (gimbal_euler_angles_0, euler_angles_1, gimbal_euler_angles_2), -1)

    # BS x 1
    r12 = rot_mat[..., 0, 1:2]
    r11 = rot_mat[..., 0, 0:1]
    r23 = rot_mat[..., 1, 2:3]
    r33 = rot_mat[..., 2, 2:3]
    normal_euler_angles_0 = _ivy.atan2(-r12, r11)
    normal_euler_angles_2 = _ivy.atan2(-r23, r33)

    # BS x 3
    normal_euler_angles = _ivy.concatenate(
        (normal_euler_angles_0, euler_angles_1, normal_euler_angles_2), -1)

    return _ivy.where(gimbal_validity, normal_euler_angles,
                      gimbal_euler_angles)
Ejemplo n.º 6
0
def _se_to_mask(se: ivy.Array) -> ivy.Array:
    se_h, se_w = se.shape
    se_flat = ivy.reshape(se, (-1,))
    num_feats = se_h * se_w
    i_s = ivy.expand_dims(ivy.arange(num_feats, dev_str=ivy.dev_str(se)), -1)
    y_s = i_s % se_h
    x_s = i_s // se_h
    indices = ivy.concatenate((i_s, ivy.zeros_like(i_s, dtype_str='int32'), x_s, y_s), -1)
    out = ivy.scatter_nd(
        indices, ivy.cast(se_flat >= 0, ivy.dtype_str(se)), (num_feats, 1, se_h, se_w), dev_str=ivy.dev_str(se))
    return out
Ejemplo n.º 7
0
def _get_dummy_obs(batch_size, num_frames, num_cams, image_dims, num_feature_channels, dev_str='cpu', ones=False,
                   empty=False):

    uniform_pixel_coords =\
        ivy_vision.create_uniform_pixel_coords_image(image_dims, [batch_size, num_frames], dev_str=dev_str)

    img_meas = dict()
    for i in range(num_cams):
        validity_mask = ivy.ones([batch_size, num_frames] + image_dims + [1], dev_str=dev_str)
        if ones:
            img_mean = ivy.concatenate((uniform_pixel_coords[..., 0:2], ivy.ones(
                [batch_size, num_frames] + image_dims + [1 + num_feature_channels], dev_str=dev_str)), -1)
            img_var = ivy.ones(
                     [batch_size, num_frames] + image_dims + [3 + num_feature_channels], dev_str=dev_str)*1e-3
            pose_mean = ivy.zeros([batch_size, num_frames, 6], dev_str=dev_str)
            pose_cov = ivy.ones([batch_size, num_frames, 6, 6], dev_str=dev_str)*1e-3
        else:
            img_mean = ivy.concatenate((uniform_pixel_coords[..., 0:2], ivy.random_uniform(
                1e-3, 1, [batch_size, num_frames] + image_dims + [1 + num_feature_channels], dev_str=dev_str)), -1)
            img_var = ivy.random_uniform(
                     1e-3, 1, [batch_size, num_frames] + image_dims + [3 + num_feature_channels], dev_str=dev_str)
            pose_mean = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6], dev_str=dev_str)
            pose_cov = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6, 6], dev_str=dev_str)
        if empty:
            img_var = ivy.ones_like(img_var) * 1e12
            validity_mask = ivy.zeros_like(validity_mask)
        img_meas['dummy_cam_{}'.format(i)] =\
            {'img_mean': img_mean,
             'img_var': img_var,
             'validity_mask': validity_mask,
             'pose_mean': pose_mean,
             'pose_cov': pose_cov,
             'cam_rel_mat': ivy.identity(4, batch_shape=[batch_size, num_frames], dev_str=dev_str)[..., 0:3, :]}

    if ones:
        control_mean = ivy.zeros([batch_size, num_frames, 6], dev_str=dev_str)
        control_cov = ivy.ones([batch_size, num_frames, 6, 6], dev_str=dev_str)*1e-3
    else:
        control_mean = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6], dev_str=dev_str)
        control_cov = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6, 6], dev_str=dev_str)
    return Container({'img_meas': img_meas,
                      'control_mean': control_mean,
                      'control_cov': control_cov,
                      'agent_rel_mat': ivy.identity(4, batch_shape=[batch_size, num_frames],
                                                    dev_str=dev_str)[..., 0:3, :]})
Ejemplo n.º 8
0
    def __init__(self,
                 mean: ivy.Array,
                 var: ivy.Array = None):
        """
        Create esm memory container

        :param mean: The ESM memory feature values *[batch_size, timesteps, omni_height, omni_width, 2 + feat]*
        :type: mean: array
        :param var: The ESM memory feature variance values. All assumed zero if None.
                        *[batch_size, timesteps, omni_height, omni_width, feat]*
        :type: var: array, optional
        """
        mean = _pad_to_batch_n_time_dims(mean, 5)
        self['mean'] = mean
        if var is None:
            var = ivy.zeros_like(mean)
        else:
            var = _pad_to_batch_n_time_dims(var, 5)
        self['var'] = var
Ejemplo n.º 9
0
Archivo: esm.py Proyecto: wx-b/memory
    def _convert_images_to_omni_observations(self, measurements,
                                             uniform_sphere_pixel_coords,
                                             holes_prior, batch_size,
                                             num_timesteps, num_cams,
                                             image_dims):
        """
        Convert image to omni-directional measurements

        :param measurements: perspective captured images and relative poses container
        :param uniform_sphere_pixel_coords: Uniform  sphere pixel coords *[batch_size, num_timesteps, oh, ow, 3]*
        :param holes_prior: Prior for quantization holes *[batch_size, num_timesteps, oh, ow, 1+f]*
        :param batch_size: Size of batch
        :param num_timesteps: Number of frames
        :param num_cams: Number of cameras
        :param image_dims: Image dimensions
        :return: *[batch_size, n, oh, ow, 3+f]*    *[batch_size, n, oh, ow, 3+f]*
        """

        # coords from all scene cameras wrt world

        images_list = list()
        images_var_list = list()
        cam_rel_poses_list = list()
        cam_rel_poses_cov_list = list()
        cam_rel_mats_list = list()
        validity_mask_list = list()
        for key, item in measurements.to_iterator():
            if key == 'img_mean':
                # B x N x 1 x H x W x (3+f)
                images_list.append(ivy.expand_dims(item, 2))
            elif key == 'img_var':
                # B x N x 1 x H x W x (3+f)
                images_var_list.append(ivy.expand_dims(item, 2))
            elif key == 'pose_mean':
                # B x N x 1 x 6
                cam_rel_poses_list.append(ivy.expand_dims(item, 2))
            elif key == 'pose_cov':
                # B x N x 1 x 6 x 6
                cam_rel_poses_cov_list.append(ivy.expand_dims(item, 2))
            elif key == 'cam_rel_mat':
                # B x N x 1 x 3 x 4
                cam_rel_mats_list.append(ivy.expand_dims(item, 2))
            elif key == 'validity_mask':
                validity_mask_list.append(ivy.expand_dims(item, 2))
            else:
                raise Exception('Invalid image key: {}'.format(key))

        # B x N x C x H x W x (3+f)
        images = ivy.concatenate(images_list, 2)

        # B x N x C x H x W x (3+f)
        var_to_project = ivy.concatenate(images_var_list, 2)

        # B x N x C x 6
        cam_to_cam_poses = ivy.concatenate(cam_rel_poses_list, 2)

        # B x N x C x 3 x 4
        cam_to_cam_mats = ivy.concatenate(cam_rel_mats_list, 2)

        # B x N x C x 6 x 6
        cam_to_cam_pose_covs = ivy.concatenate(cam_rel_poses_cov_list, 2)

        # B x N x C x 1
        validity_masks = ivy.concatenate(validity_mask_list, 2) > 0

        # B x N x OH x OW x (3+f)
        holes_prior_var = ivy.ones(
            [batch_size, num_timesteps] + self._sphere_img_dims +
            [3 + self._feat_dim],
            dev_str=self._dev_str) * 1e12

        # reset invalid regions to prior

        # B x N x C x H x W x (3+f)
        images = ivy.where(
            validity_masks, images,
            ivy.concatenate(
                (images[..., 0:2],
                 ivy.zeros_like(images[..., 2:], dev_str=self._dev_str)), -1))

        # B x N x C x H x W x (3+f)
        var_to_project = ivy.where(
            validity_masks, var_to_project,
            ivy.ones_like(var_to_project, dev_str=self._dev_str) * 1e12)

        # B x N x OH x OW x (3+f)    # B x N x OH x OW x (3+f)
        return self._frame_to_omni_frame_projection(
            cam_to_cam_poses, cam_to_cam_mats, uniform_sphere_pixel_coords,
            images[..., 0:3], images[..., 3:], cam_to_cam_pose_covs,
            var_to_project, holes_prior, holes_prior_var, batch_size,
            num_timesteps, num_cams, image_dims)
Ejemplo n.º 10
0
    def _forward(self, x, prev_state):
        prev_read_vector_list = prev_state[1]

        controller_input = ivy.concatenate([x] + prev_read_vector_list, axis=1)
        controller_output, controller_state = self._controller(ivy.expand_dims(controller_input, -2),
                                                               initial_state=prev_state[0])
        controller_output = controller_output[..., -1, :]

        parameters = self._controller_proj(controller_output)
        parameters = ivy.clip(parameters, -self._clip_value, self._clip_value)
        head_parameter_list = \
            ivy.split(parameters[:, :self._num_parameters_per_head * self._num_heads], self._num_heads,
                          axis=1)
        erase_add_list = ivy.split(parameters[:, self._num_parameters_per_head * self._num_heads:],
                                       2 * self._write_head_num, axis=1)

        prev_w_list = prev_state[2]
        prev_M = prev_state[4]
        w_list = []
        for i, head_parameter in enumerate(head_parameter_list):
            k = ivy.tanh(head_parameter[:, 0:self._memory_vector_dim])
            beta = ivy.softplus(head_parameter[:, self._memory_vector_dim])
            g = ivy.sigmoid(head_parameter[:, self._memory_vector_dim + 1])
            s = ivy.softmax(
                head_parameter[:, self._memory_vector_dim + 2:self._memory_vector_dim +
                                                              2 + (self._shift_range * 2 + 1)])
            gamma = ivy.softplus(head_parameter[:, -1]) + 1
            w = self._addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i])
            w_list.append(w)

        # Reading (Sec 3.1)

        read_w_list = w_list[:self._read_head_num]
        if self._step == 0:
            usage_indicator = ivy.zeros_like(w_list[0])
        else:
            usage_indicator = prev_state[3] + ivy.reduce_sum(ivy.concatenate(read_w_list, 0))
        read_vector_list = []
        for i in range(self._read_head_num):
            read_vector = ivy.reduce_sum(ivy.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1)
            read_vector_list.append(read_vector)

        # Writing (Sec 3.2)

        prev_wrtie_w_list = prev_w_list[self._read_head_num:]
        w_wr_size = math.ceil(self._memory_size / 2) if self._retroactive_updates else self._memory_size
        if self._sequential_writing:
            batch_size = ivy.shape(x)[0]
            if self._step < w_wr_size:
                w_wr_list = [ivy.tile(ivy.cast(ivy.one_hot(
                    ivy.array([self._step]), w_wr_size), 'float32'),
                    (batch_size, 1))] * self._write_head_num
            else:
                batch_idxs = ivy.expand_dims(ivy.arange(batch_size, 0), -1)
                mem_idxs = ivy.expand_dims(ivy.argmax(usage_indicator[..., :w_wr_size], -1), -1)
                total_idxs = ivy.concatenate((batch_idxs, mem_idxs), -1)
                w_wr_list = [ivy.scatter_nd(total_idxs, ivy.ones((batch_size,)),
                                                (batch_size, w_wr_size))] * self._write_head_num
        else:
            w_wr_list = w_list[self._read_head_num:]
        if self._retroactive_updates:
            w_ret_list = [self._retroactive_discount * prev_wrtie_w[..., w_wr_size:] +
                          (1 - self._retroactive_discount) * prev_wrtie_w[..., :w_wr_size]
                          for prev_wrtie_w in prev_wrtie_w_list]
            w_wrtie_list = [ivy.concatenate((w_wr, w_ret), -1) for w_wr, w_ret in zip(w_wr_list, w_ret_list)]
        else:
            w_wrtie_list = w_wr_list
        M = prev_M
        for i in range(self._write_head_num):
            w = ivy.expand_dims(w_wrtie_list[i], axis=2)
            if self._with_erase:
                erase_vector = ivy.expand_dims(ivy.sigmoid(erase_add_list[i * 2]), axis=1)
                M = M * ivy.ones(ivy.shape(M)) - ivy.matmul(w, erase_vector)
            add_vector = ivy.expand_dims(ivy.tanh(erase_add_list[i * 2 + 1]), axis=1)
            M = M + ivy.matmul(w, add_vector)

        NTM_output = self._output_proj(ivy.concatenate([controller_output] + read_vector_list, axis=1))
        NTM_output = ivy.clip(NTM_output, -self._clip_value, self._clip_value)

        self._step += 1
        return NTM_output, NTMControllerState(
            controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list,
            usage_indicator=usage_indicator, M=M)
Ejemplo n.º 11
0
def velocity_from_cam_coords_id_image_and_object_trans(cam_coords_t,
                                                       id_image,
                                                       obj_ids,
                                                       obj_trans,
                                                       delta_t,
                                                       batch_shape=None,
                                                       image_dims=None,
                                                       dev_str=None):
    """
    Compute velocity image from co-ordinate image, id image, and object transformations.

    :param cam_coords_t: Camera-centric homogeneous co-ordinates image in frame t *[batch_shape,h,w,4]*
    :type cam_coords_t: array
    :param id_image: Image containing per-pixel object ids *[batch_shape,h,w,1]*
    :type id_image: array
    :param obj_ids: Object ids *[batch_shape,num_obj,1]*
    :type obj_ids: array
    :param obj_trans: Object transformations for this frame over time *[batch_shape,num_obj,3,4]*
    :type obj_trans: array
    :param delta_t: Time difference between frame at timestep t-1 and t *[batch_shape,1]*
    :type delta_t: array
    :param batch_shape: Shape of batch. Inferred from inputs if None.
    :type batch_shape: sequence of ints, optional
    :param image_dims: Image dimensions. Inferred from inputs in None.
    :type image_dims: sequence of ints, optional
    :param dev_str: device on which to create the array 'cuda:0', 'cuda:1', 'cpu' etc. Same as x if None.
    :type dev_str: str, optional
    :return: Relative velocity image *[batch_shape,h,w,3]*
    """

    if batch_shape is None:
        batch_shape = cam_coords_t.shape[:-3]

    if image_dims is None:
        image_dims = cam_coords_t.shape[-3:-1]

    if dev_str is None:
        dev_str = _ivy.dev_str(cam_coords_t)

    # shapes as list
    batch_shape = list(batch_shape)
    image_dims = list(image_dims)

    # get co-ordinate re-projections

    # BS x H x W x 4
    cam_coords_t_all_trans, motion_mask =\
        project_cam_coords_with_object_transformations(cam_coords_t, id_image, obj_ids, obj_trans,
                                                       _ivy.identity(4, batch_shape=batch_shape)[..., 0:3, :],
                                                       batch_shape, image_dims)

    # BS x H x W x 4
    cam_coords_t_all_trans = \
        _ivy.where(motion_mask, cam_coords_t_all_trans, _ivy.zeros_like(cam_coords_t_all_trans, dev_str=dev_str))

    # compute velocities

    # BS x H x W x 3
    vel = (cam_coords_t[..., 0:3] - cam_coords_t_all_trans[..., 0:3]) / delta_t

    # prune velocities

    # BS x H x W x 3
    return _ivy.where(motion_mask, vel, _ivy.zeros_like(vel, dev_str=dev_str))
Ejemplo n.º 12
0
def velocity_from_flow_cam_coords_and_cam_mats(flow_t_to_tm1,
                                               cam_coords_t,
                                               cam_coords_tm1,
                                               cam_tm1_to_t_ext_mat,
                                               delta_t,
                                               uniform_pixel_coords=None,
                                               batch_shape=None,
                                               image_dims=None,
                                               dev_str=None):
    """
    Compute relative cartesian velocity from optical flow, camera co-ordinates, and camera extrinsics.

    :param flow_t_to_tm1: Optical flow from frame t to t-1 *[batch_shape,h,w,2]*
    :type flow_t_to_tm1: array
    :param cam_coords_t: Camera-centric homogeneous co-ordinates image in frame t *[batch_shape,h,w,4]*
    :type cam_coords_t: array
    :param cam_coords_tm1: Camera-centric homogeneous co-ordinates image in frame t-1 *[batch_shape,h,w,4]*
    :type cam_coords_tm1: array
    :param cam_tm1_to_t_ext_mat: Camera t-1 to camera t extrinsic projection matrix *[batch_shape,3,4]*
    :type cam_tm1_to_t_ext_mat: array
    :param delta_t: Time difference between frame at timestep t-1 and t *[batch_shape,1]*
    :type delta_t: array
    :param uniform_pixel_coords: Homogeneous uniform (integer) pixel co-ordinate images, inferred from image_dims if None *[batch_shape,h,w,3]*
    :type uniform_pixel_coords: array, optional
    :param batch_shape: Shape of batch. Inferred from inputs if None.
    :type batch_shape: sequence of ints, optional
    :param image_dims: Image dimensions. Inferred from inputs in None.
    :type image_dims: sequence of ints, optional
    :param dev_str: device on which to create the array 'cuda:0', 'cuda:1', 'cpu' etc. Same as x if None.
    :type dev_str: str, optional
    :return: Cartesian velocity measurements relative to the camera *[batch_shape,h,w,3]*
    """

    if batch_shape is None:
        batch_shape = flow_t_to_tm1.shape[:-3]

    if image_dims is None:
        image_dims = flow_t_to_tm1.shape[-3:-1]

    # shapes as list
    batch_shape = list(batch_shape)
    image_dims = list(image_dims)

    if dev_str is None:
        dev_str = _ivy.dev_str(flow_t_to_tm1)

    if uniform_pixel_coords is None:
        uniform_pixel_coords = _ivy_svg.create_uniform_pixel_coords_image(
            image_dims, batch_shape, dev_str)

    # Interpolate cam coords from frame t-1

    # BS x H x W x 2
    warp = uniform_pixel_coords[..., 0:2] + flow_t_to_tm1

    # BS x H x W x 4
    cam_coords_tm1_interp = _ivy.image.bilinear_resample(cam_coords_tm1, warp)

    # Project to frame t

    # BS x H x W x 4
    cam_coords_t_proj = _ivy_tvg.cam_to_cam_coords(cam_coords_tm1_interp,
                                                   cam_tm1_to_t_ext_mat,
                                                   batch_shape, image_dims)

    # delta co-ordinates

    # BS x H x W x 3
    delta_cam_coords_t = (cam_coords_t - cam_coords_t_proj)[..., 0:3]

    # velocity

    # BS x H x W x 3
    vel = delta_cam_coords_t / _ivy.reshape(delta_t, batch_shape + [1] * 3)

    # Validity mask

    # BS x H x W x 1
    validity_mask = \
        _ivy.reduce_sum(_ivy.cast(warp < _ivy.array([image_dims[1], image_dims[0]], 'float32', dev_str=dev_str),
                                  'int32'), -1, keepdims=True) == 2

    # pruned

    # BS x H x W x 3,    BS x H x W x 1
    return _ivy.where(validity_mask, vel,
                      _ivy.zeros_like(vel, dev_str=dev_str)), validity_mask
Ejemplo n.º 13
0
def main(batch_size=32,
         num_train_steps=31250,
         compile_flag=True,
         num_bits=8,
         seq_len=28,
         ctrl_output_size=100,
         memory_size=128,
         memory_vector_dim=28,
         overfit_flag=False,
         interactive=True,
         f=None):
    f = choose_random_framework() if f is None else f
    set_framework(f)

    # train config
    lr = 1e-3 if not overfit_flag else 1e-2
    batch_size = batch_size if not overfit_flag else 1
    num_train_steps = num_train_steps if not overfit_flag else 150
    max_grad_norm = 50

    # logging config
    vis_freq = 250 if not overfit_flag else 1

    # optimizer
    optimizer = ivy.Adam(lr=lr)

    # ntm
    ntm = NTM(input_dim=num_bits + 1,
              output_dim=num_bits,
              ctrl_output_size=ctrl_output_size,
              ctrl_layers=1,
              memory_size=memory_size,
              memory_vector_dim=memory_vector_dim,
              read_head_num=1,
              write_head_num=1)

    # compile loss fn
    total_seq_example = ivy.random_uniform(shape=(batch_size, 2 * seq_len + 1,
                                                  num_bits + 1))
    target_seq_example = total_seq_example[:, 0:seq_len, :-1]
    if compile_flag:
        loss_fn_maybe_compiled = ivy.compile_fn(
            lambda v, ttl_sq, trgt_sq, sq_ln: loss_fn(ntm, v, ttl_sq, trgt_sq,
                                                      sq_ln),
            dynamic=False,
            example_inputs=[
                ntm.v, total_seq_example, target_seq_example, seq_len
            ])
    else:
        loss_fn_maybe_compiled = lambda v, ttl_sq, trgt_sq, sq_ln: loss_fn(
            ntm, v, ttl_sq, trgt_sq, sq_ln)

    # init
    input_seq_m1 = ivy.cast(
        ivy.random_uniform(0., 1., (batch_size, seq_len, num_bits)) > 0.5,
        'float32')
    mw = None
    vw = None

    for i in range(num_train_steps):

        # sequence to copy
        if not overfit_flag:
            input_seq_m1 = ivy.cast(
                ivy.random_uniform(0., 1.,
                                   (batch_size, seq_len, num_bits)) > 0.5,
                'float32')
        target_seq = input_seq_m1
        input_seq = ivy.concatenate(
            (input_seq_m1, ivy.zeros((batch_size, seq_len, 1))), -1)
        eos = ivy.ones((batch_size, 1, num_bits + 1))
        output_seq = ivy.zeros_like(input_seq)
        total_seq = ivy.concatenate((input_seq, eos, output_seq), -2)

        # train step
        loss, pred_vals = train_step(loss_fn_maybe_compiled, optimizer, ntm,
                                     total_seq, target_seq, seq_len, mw, vw,
                                     ivy.array(i + 1,
                                               'float32'), max_grad_norm)

        # log
        print('step: {}, loss: {}'.format(i, ivy.to_numpy(loss).item()))

        # visualize
        if i % vis_freq == 0:
            target_to_vis = (ivy.to_numpy(target_seq[0] * 255)).astype(
                np.uint8)
            target_to_vis = np.transpose(
                cv2.resize(target_to_vis, (560, 160),
                           interpolation=cv2.INTER_NEAREST), (1, 0))

            pred_to_vis = (ivy.to_numpy(pred_vals[0] * 255)).astype(np.uint8)
            pred_to_vis = np.transpose(
                cv2.resize(pred_to_vis, (560, 160),
                           interpolation=cv2.INTER_NEAREST), (1, 0))

            img_to_vis = np.concatenate((pred_to_vis, target_to_vis), 0)
            img_to_vis = cv2.resize(img_to_vis, (1120, 640),
                                    interpolation=cv2.INTER_NEAREST)

            img_to_vis[0:60, -200:] = 0
            img_to_vis[5:55, -195:-5] = 255
            cv2.putText(img_to_vis, 'step {}'.format(i), (935, 42),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, tuple([0] * 3), 2)

            img_to_vis[0:60, 0:200] = 0
            img_to_vis[5:55, 5:195] = 255
            cv2.putText(img_to_vis, 'prediction', (7, 42),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, tuple([0] * 3), 2)

            img_to_vis[320:380, 0:130] = 0
            img_to_vis[325:375, 5:125] = 255
            cv2.putText(img_to_vis, 'target', (7, 362),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.2, tuple([0] * 3), 2)

            if interactive:
                cv2.imshow('prediction_and_target', img_to_vis)
                if overfit_flag:
                    cv2.waitKey(1)
                else:
                    cv2.waitKey(100)
                    cv2.destroyAllWindows()
Ejemplo n.º 14
0
def quantize_to_image(pixel_coords,
                      final_image_dims,
                      feat=None,
                      feat_prior=None,
                      with_db=False,
                      pixel_coords_var=1e-3,
                      feat_var=1e-3,
                      pixel_coords_prior_var=1e12,
                      feat_prior_var=1e12,
                      var_threshold=(1e-3, 1e12),
                      uniform_pixel_coords=None,
                      batch_shape=None,
                      dev_str=None):
    """
    Quantize pixel co-ordinates with d feature channels (for depth, rgb, normals etc.), from
    images :math:`\mathbf{X}\in\mathbb{R}^{input\_images\_shape×(2+d)}`, which may have been reprojected from a host of
    different cameras (leading to non-integer pixel values), to a new quantized pixel co-ordinate image with the same
    feature channels :math:`\mathbf{X}\in\mathbb{R}^{h×w×(2+d)}`, and with integer pixel co-ordinates.
    Duplicates during the quantization are either probabilistically fused based on variance, or the minimum depth is
    chosen when using depth buffer mode.

    :param pixel_coords: Pixel co-ordinates *[batch_shape,input_size,2]*
    :type pixel_coords: array
    :param final_image_dims: Image dimensions of the final image.
    :type final_image_dims: sequence of ints
    :param feat: Features (i.e. depth, rgb, encoded), default is None. *[batch_shape,input_size,d]*
    :type feat: array, optional
    :param feat_prior: Prior feature image mean, default is None. *[batch_shape,input_size,d]*
    :type feat_prior: array or float to fill with
    :param with_db: Whether or not to use depth buffer in rendering, default is false
    :type with_db: bool, optional
    :param pixel_coords_var: Pixel coords variance *[batch_shape,input_size,2]*
    :type pixel_coords_var: array or float to fill with
    :param feat_var: Feature variance *[batch_shape,input_size,d]*
    :type feat_var: array or float to fill with
    :param pixel_coords_prior_var: Pixel coords prior variance *[batch_shape,h,w,2]*
    :type pixel_coords_prior_var: array or float to fill with
    :param feat_prior_var: Features prior variance *[batch_shape,h,w,3]*
    :type feat_prior_var: array or float to fill with
    :param var_threshold: Variance threshold, for projecting valid coords and clipping *[batch_shape,2+d,2]*
    :type var_threshold: array or sequence of floats to fill with
    :param uniform_pixel_coords: Homogeneous uniform (integer) pixel co-ordinate images, inferred from final_image_dims
                                    if None *[batch_shape,h,w,3]*
    :type uniform_pixel_coords: array, optional
    :param batch_shape: Shape of batch. Assumed no batches if None.
    :type batch_shape: sequence of ints, optional
    :param dev_str: device on which to create the array 'cuda:0', 'cuda:1', 'cpu' etc. Same as x if None.
    :type dev_str: str, optional
    :return: Quantized pixel co-ordinates image with d feature channels (for depth, rgb, normals etc.) *[batch_shape,h,w,2+d]*,
             maybe the quantized variance, *[batch_shape,h,w,2+d]*, and scatter counter image *[batch_shape,h,w,1]*
    """

    # ToDo: make variance fully optional. If not specified,
    #  then do not compute and scatter during function call for better efficiency.
    # config
    if batch_shape is None:
        batch_shape = pixel_coords.shape[:-2]

    if dev_str is None:
        dev_str = _ivy.dev_str(pixel_coords)

    if feat is None:
        d = 0
    else:
        d = feat.shape[-1]
    min_depth_diff = _ivy.array([MIN_DEPTH_DIFF], dev_str=dev_str)
    red = 'min' if with_db else 'sum'

    # shapes as list
    batch_shape = list(batch_shape)
    final_image_dims = list(final_image_dims)
    num_batch_dims = len(batch_shape)

    # variance threshold
    if isinstance(var_threshold, tuple) or isinstance(var_threshold, list):
        ones = _ivy.ones(batch_shape + [1, 2 + d, 1])
        var_threshold = _ivy.concatenate(
            (ones * var_threshold[0], ones * var_threshold[1]), -1)
    else:
        var_threshold = _ivy.reshape(var_threshold,
                                     batch_shape + [1, 2 + d, 2])

    # uniform pixel coords
    if uniform_pixel_coords is None:
        uniform_pixel_coords =\
            _ivy_svg.create_uniform_pixel_coords_image(final_image_dims, batch_shape, dev_str=dev_str)
    uniform_pixel_coords = uniform_pixel_coords[..., 0:2]

    # Extract Values #

    feat_prior = _ivy.ones_like(feat) * feat_prior if isinstance(
        feat_prior, float) else feat_prior
    pixel_coords_var = _ivy.ones_like(pixel_coords) * pixel_coords_var\
        if isinstance(pixel_coords_var, float) else pixel_coords_var
    feat_var = _ivy.ones_like(feat) * feat_var if isinstance(
        feat_var, float) else feat_var
    pixel_coords_prior_var = _ivy.ones(batch_shape + final_image_dims + [2]) * pixel_coords_prior_var\
        if isinstance(pixel_coords_prior_var, float) else pixel_coords_prior_var
    feat_prior_var = _ivy.ones(batch_shape + final_image_dims + [d]) * feat_prior_var\
        if isinstance(feat_prior_var, float) else feat_prior_var

    # Quantize #

    # BS x N x 2
    quantized_pixel_coords = _ivy.reshape(
        _ivy.cast(_ivy.round(pixel_coords), 'int32'), batch_shape + [-1, 2])

    # Combine #

    # BS x N x (2+D)
    pc_n_feat = _ivy.reshape(_ivy.concatenate((pixel_coords, feat), -1),
                             batch_shape + [-1, 2 + d])
    pc_n_feat_var = _ivy.reshape(
        _ivy.concatenate((pixel_coords_var, feat_var), -1),
        batch_shape + [-1, 2 + d])

    # BS x H x W x (2+D)
    prior = _ivy.concatenate((uniform_pixel_coords, feat_prior), -1)
    prior_var = _ivy.concatenate((pixel_coords_prior_var, feat_prior_var), -1)

    # Validity Mask #

    # BS x N x 1
    var_validity_mask = \
        _ivy.reduce_sum(_ivy.cast(pc_n_feat_var < var_threshold[..., 1], 'int32'), -1, keepdims=True) == 2+d
    bounds_validity_mask = _ivy.logical_and(
        _ivy.logical_and(quantized_pixel_coords[..., 0:1] >= 0,
                         quantized_pixel_coords[..., 1:2] >= 0),
        _ivy.logical_and(
            quantized_pixel_coords[..., 0:1] <= final_image_dims[1] - 1,
            quantized_pixel_coords[..., 1:2] <= final_image_dims[0] - 1))
    validity_mask = _ivy.logical_and(var_validity_mask, bounds_validity_mask)

    # num_valid_indices x len(BS)+2
    validity_indices = _ivy.reshape(
        _ivy.cast(_ivy.indices_where(validity_mask), 'int32'),
        [-1, num_batch_dims + 2])
    num_valid_indices = validity_indices.shape[-2]

    if num_valid_indices == 0:
        return _ivy.concatenate((uniform_pixel_coords[..., 0:2], feat_prior), -1), \
               _ivy.concatenate((pixel_coords_prior_var, feat_prior_var), -1),\
               _ivy.zeros_like(feat[..., 0:1], dev_str=dev_str)

    # Depth Based Scaling #

    mean_depth_min = None
    mean_depth_range = None
    pc_n_feat_wo_depth_range = None
    pc_n_feat_wo_depth_min = None
    var_vals_range = None
    var_vals_min = None

    if with_db:

        # BS x N x 1
        mean_depth = pc_n_feat[..., 2:3]

        # BS x 1 x 1
        mean_depth_min = _ivy.reduce_min(mean_depth, -2, keepdims=True)
        mean_depth_max = _ivy.reduce_max(mean_depth, -2, keepdims=True)
        mean_depth_range = mean_depth_max - mean_depth_min

        # BS x N x 1
        scaled_depth = (mean_depth - mean_depth_min) / (
            mean_depth_range * min_depth_diff + MIN_DENOMINATOR)

        if d == 1:

            # BS x 1 x 1+D
            pc_n_feat_wo_depth_min = _ivy.zeros(batch_shape + [1, 0],
                                                dev_str=dev_str)
            pc_n_feat_wo_depth_range = _ivy.ones(batch_shape + [1, 0],
                                                 dev_str=dev_str)

        else:
            # feat without depth

            # BS x N x 1+D
            pc_n_feat_wo_depth = _ivy.concatenate(
                (pc_n_feat[..., 0:2], pc_n_feat[..., 3:]), -1)

            # find the min and max of each value

            # BS x 1 x 1+D
            pc_n_feat_wo_depth_max = _ivy.reduce_max(
                pc_n_feat_wo_depth, -2, keepdims=True) + 1
            pc_n_feat_wo_depth_min = _ivy.reduce_min(
                pc_n_feat_wo_depth, -2, keepdims=True) - 1
            pc_n_feat_wo_depth_range = pc_n_feat_wo_depth_max - pc_n_feat_wo_depth_min

            # BS x N x 1+D
            normed_pc_n_feat_wo_depth = (pc_n_feat_wo_depth - pc_n_feat_wo_depth_min) / \
                                        (pc_n_feat_wo_depth_range + MIN_DENOMINATOR)

            # combine with scaled depth

            # BS x N x 1+D
            pc_n_feat_wo_depth_scaled = normed_pc_n_feat_wo_depth + scaled_depth

            # BS x N x (2+D)
            pc_n_feat = _ivy.concatenate(
                (pc_n_feat_wo_depth_scaled[..., 0:2], mean_depth,
                 pc_n_feat_wo_depth_scaled[..., 2:]), -1)

        # scale variance

        # BS x 1 x (2+D)
        var_vals_max = _ivy.reduce_max(pc_n_feat_var, -2, keepdims=True) + 1
        var_vals_min = _ivy.reduce_min(pc_n_feat_var, -2, keepdims=True) - 1
        var_vals_range = var_vals_max - var_vals_min

        # BS x N x (2+D)
        normed_var_vals = (pc_n_feat_var - var_vals_min) / (var_vals_range +
                                                            MIN_DENOMINATOR)
        pc_n_feat_var = normed_var_vals + scaled_depth

        # ready for later reversal with full image dimensions

        # BS x 1 x 1 x D
        var_vals_min = _ivy.expand_dims(var_vals_min, -2)
        var_vals_range = _ivy.expand_dims(var_vals_range, -2)

    # Validity Pruning #

    # num_valid_indices x (2+D)
    pc_n_feat = _ivy.gather_nd(pc_n_feat,
                               validity_indices[..., 0:num_batch_dims + 1])
    pc_n_feat_var = _ivy.gather_nd(pc_n_feat_var,
                                   validity_indices[..., 0:num_batch_dims + 1])

    # num_valid_indices x 2
    quantized_pixel_coords = _ivy.gather_nd(
        quantized_pixel_coords, validity_indices[..., 0:num_batch_dims + 1])

    if with_db:
        means_to_scatter = pc_n_feat
        vars_to_scatter = pc_n_feat_var
    else:
        # num_valid_indices x (2+D)
        vars_to_scatter = 1 / (pc_n_feat_var + MIN_DENOMINATOR)
        means_to_scatter = pc_n_feat * vars_to_scatter

    # Scatter #

    # num_valid_indices x 1
    counter = _ivy.ones_like(pc_n_feat[..., 0:1], dev_str=dev_str)
    if with_db:
        counter *= -1

    # num_valid_indices x 2(2+D)+1
    values_to_scatter = _ivy.concatenate(
        (means_to_scatter, vars_to_scatter, counter), -1)

    # num_valid_indices x (num_batch_dims + 2)
    all_indices = _ivy.flip(quantized_pixel_coords, -1)
    if num_batch_dims > 0:
        all_indices = _ivy.concatenate(
            (validity_indices[..., :-2], all_indices), -1)

    # BS x H x W x (2(2+D) + 1)
    quantized_img = _ivy.scatter_nd(
        _ivy.reshape(all_indices, [-1, num_batch_dims + 2]),
        _ivy.reshape(values_to_scatter, [-1, 2 * (2 + d) + 1]),
        batch_shape + final_image_dims + [2 * (2 + d) + 1],
        reduction='replace' if _ivy.backend == 'mxnd' else red)

    # BS x H x W x 1
    quantized_counter = quantized_img[..., -1:]
    if with_db:
        invalidity_mask = quantized_counter != -1
    else:
        invalidity_mask = quantized_counter == 0

    if with_db:
        # BS x H x W x (2+D)
        quantized_mean_scaled = quantized_img[..., 0:2 + d]
        quantized_var_scaled = quantized_img[..., 2 + d:2 * (2 + d)]

        # BS x H x W x 1
        quantized_depth_mean = quantized_mean_scaled[..., 2:3]

        # BS x 1 x 1 x 1
        mean_depth_min = _ivy.expand_dims(mean_depth_min, -2)
        mean_depth_range = _ivy.expand_dims(mean_depth_range, -2)

        # BS x 1 x 1 x (1+D)
        pc_n_feat_wo_depth_min = _ivy.expand_dims(pc_n_feat_wo_depth_min, -2)
        pc_n_feat_wo_depth_range = _ivy.expand_dims(pc_n_feat_wo_depth_range,
                                                    -2)

        # BS x 1 x 1 x (2+D) x 2
        var_threshold = _ivy.expand_dims(var_threshold, -3)

        # BS x H x W x (1+D)
        quantized_mean_wo_depth_scaled = _ivy.concatenate(
            (quantized_mean_scaled[..., 0:2], quantized_mean_scaled[..., 3:]),
            -1)
        quantized_mean_wo_depth_normed = quantized_mean_wo_depth_scaled - (quantized_depth_mean - mean_depth_min) / \
                                         (mean_depth_range * min_depth_diff + MIN_DENOMINATOR)
        quantized_mean_wo_depth = quantized_mean_wo_depth_normed * pc_n_feat_wo_depth_range + pc_n_feat_wo_depth_min
        prior_wo_depth = _ivy.concatenate((prior[..., 0:2], prior[..., 3:]),
                                          -1)
        quantized_mean_wo_depth = _ivy.where(invalidity_mask, prior_wo_depth,
                                             quantized_mean_wo_depth)

        # BS x H x W x (2+D)
        quantized_mean = _ivy.concatenate(
            (quantized_mean_wo_depth[..., 0:2], quantized_depth_mean,
             quantized_mean_wo_depth[..., 2:]), -1)

        # BS x H x W x (2+D)
        quantized_var_normed = quantized_var_scaled - (quantized_depth_mean - mean_depth_min) / \
                               (mean_depth_range * min_depth_diff + MIN_DENOMINATOR)
        quantized_var = _ivy.maximum(
            quantized_var_normed * var_vals_range + var_vals_min,
            var_threshold[..., 0])
        quantized_var = _ivy.where(invalidity_mask, prior_var, quantized_var)
    else:
        # BS x H x W x (2+D)
        quantized_sum_mean_x_recip_var = quantized_img[..., 0:2 + d]
        quantized_var_wo_increase = _ivy.where(
            invalidity_mask, prior_var,
            (1 / (quantized_img[..., 2 + d:2 * (2 + d)] + MIN_DENOMINATOR)))
        quantized_var = _ivy.maximum(
            quantized_var_wo_increase * quantized_counter,
            _ivy.expand_dims(var_threshold[..., 0], -2))
        quantized_var = _ivy.where(invalidity_mask, prior_var, quantized_var)
        quantized_mean = _ivy.where(
            invalidity_mask, prior,
            quantized_var_wo_increase * quantized_sum_mean_x_recip_var)

    # BS x H x W x (2+D)    BS x H x W x (2+D)     BS x H x W x 1
    return quantized_mean, quantized_var, quantized_counter
Ejemplo n.º 15
0
def main(interactive=True, f=None):

    global INTERACTIVE
    INTERACTIVE = interactive

    # Framework Setup #
    # ----------------#

    # choose random framework
    f = choose_random_framework() if f is None else f
    set_framework(f)

    # Camera Geometry #
    # ----------------#

    # intrinsics

    # common intrinsic params
    img_dims = [512, 512]
    pp_offsets = ivy.array([dim / 2 - 0.5 for dim in img_dims], 'float32')
    cam_persp_angles = ivy.array([60 * np.pi / 180] * 2, 'float32')

    # ivy cam intrinsics container
    intrinsics = ivy_vision.persp_angles_and_pp_offsets_to_intrinsics_object(
        cam_persp_angles, pp_offsets, img_dims)

    # extrinsics

    # 3 x 4
    cam1_inv_ext_mat = ivy.array(np.load(data_dir + '/cam1_inv_ext_mat.npy'),
                                 'float32')
    cam2_inv_ext_mat = ivy.array(np.load(data_dir + '/cam2_inv_ext_mat.npy'),
                                 'float32')

    # full geometry

    # ivy cam geometry container
    cam1_geom = ivy_vision.inv_ext_mat_and_intrinsics_to_cam_geometry_object(
        cam1_inv_ext_mat, intrinsics)
    cam2_geom = ivy_vision.inv_ext_mat_and_intrinsics_to_cam_geometry_object(
        cam2_inv_ext_mat, intrinsics)
    cam_geoms = [cam1_geom, cam2_geom]

    # Camera Geometry Check #
    # ----------------------#

    # assert camera geometry shapes

    for cam_geom in cam_geoms:

        assert cam_geom.intrinsics.focal_lengths.shape == (2, )
        assert cam_geom.intrinsics.persp_angles.shape == (2, )
        assert cam_geom.intrinsics.pp_offsets.shape == (2, )
        assert cam_geom.intrinsics.calib_mats.shape == (3, 3)
        assert cam_geom.intrinsics.inv_calib_mats.shape == (3, 3)

        assert cam_geom.extrinsics.cam_centers.shape == (3, 1)
        assert cam_geom.extrinsics.Rs.shape == (3, 3)
        assert cam_geom.extrinsics.inv_Rs.shape == (3, 3)
        assert cam_geom.extrinsics.ext_mats_homo.shape == (4, 4)
        assert cam_geom.extrinsics.inv_ext_mats_homo.shape == (4, 4)

        assert cam_geom.full_mats_homo.shape == (4, 4)
        assert cam_geom.inv_full_mats_homo.shape == (4, 4)

    # Image Data #
    # -----------#

    # load images

    # h x w x 3
    color1 = ivy.array(
        cv2.imread(data_dir + '/rgb1.png').astype(np.float32) / 255)
    color2 = ivy.array(
        cv2.imread(data_dir + '/rgb2.png').astype(np.float32) / 255)

    # h x w x 1
    depth1 = ivy.array(
        np.reshape(
            np.frombuffer(
                cv2.imread(data_dir + '/depth1.png', -1).tobytes(),
                np.float32), img_dims + [1]))
    depth2 = ivy.array(
        np.reshape(
            np.frombuffer(
                cv2.imread(data_dir + '/depth2.png', -1).tobytes(),
                np.float32), img_dims + [1]))

    # depth scaled pixel coords

    # h x w x 3
    u_pix_coords = ivy_vision.create_uniform_pixel_coords_image(img_dims)
    ds_pixel_coords1 = u_pix_coords * depth1
    ds_pixel_coords2 = u_pix_coords * depth2

    # depth limits
    depth_min = ivy.reduce_min(ivy.concatenate((depth1, depth2), 0))
    depth_max = ivy.reduce_max(ivy.concatenate((depth1, depth2), 0))
    depth_limits = [depth_min, depth_max]

    # show images
    show_rgb_and_depth_images(color1, color2, depth1, depth2, depth_limits)

    # Flow and Depth Triangulation #
    # -----------------------------#

    # required mat formats
    cam1to2_full_mat_homo = ivy.matmul(cam2_geom.full_mats_homo,
                                       cam1_geom.inv_full_mats_homo)
    cam1to2_full_mat = cam1to2_full_mat_homo[..., 0:3, :]
    full_mats_homo = ivy.concatenate(
        (ivy.expand_dims(cam1_geom.full_mats_homo,
                         0), ivy.expand_dims(cam2_geom.full_mats_homo, 0)), 0)
    full_mats = full_mats_homo[..., 0:3, :]

    # flow
    flow1to2 = ivy_vision.flow_from_depth_and_cam_mats(ds_pixel_coords1,
                                                       cam1to2_full_mat)

    # depth again
    depth1_from_flow = ivy_vision.depth_from_flow_and_cam_mats(
        flow1to2, full_mats)

    # show images
    show_flow_and_depth_images(depth1, flow1to2, depth1_from_flow,
                               depth_limits)

    # Inverse Warping #
    # ----------------#

    # inverse warp rendering
    warp = u_pix_coords[..., 0:2] + flow1to2
    color2_warp_to_f1 = ivy.reshape(ivy.bilinear_resample(color2, warp),
                                    color1.shape)

    # projected depth scaled pixel coords 2
    ds_pixel_coords1_wrt_f2 = ivy_vision.ds_pixel_to_ds_pixel_coords(
        ds_pixel_coords1, cam1to2_full_mat)

    # projected depth 2
    depth1_wrt_f2 = ds_pixel_coords1_wrt_f2[..., -1:]

    # inverse warp depth
    depth2_warp_to_f1 = ivy.reshape(ivy.bilinear_resample(depth2, warp),
                                    depth1.shape)

    # depth validity
    depth_validity = ivy.abs(depth1_wrt_f2 - depth2_warp_to_f1) < 0.01

    # inverse warp rendering with mask
    color2_warp_to_f1_masked = ivy.where(depth_validity, color2_warp_to_f1,
                                         ivy.zeros_like(color2_warp_to_f1))

    # show images
    show_inverse_warped_images(depth1_wrt_f2, depth2_warp_to_f1,
                               depth_validity, color1, color2_warp_to_f1,
                               color2_warp_to_f1_masked, depth_limits)

    # Forward Warping #
    # ----------------#

    # forward warp rendering
    ds_pixel_coords1_proj = ivy_vision.ds_pixel_to_ds_pixel_coords(
        ds_pixel_coords2,
        ivy.inv(cam1to2_full_mat_homo)[..., 0:3, :])
    depth1_proj = ds_pixel_coords1_proj[..., -1:]
    ds_pixel_coords1_proj = ds_pixel_coords1_proj[..., 0:2] / depth1_proj
    features_to_render = ivy.concatenate((depth1_proj, color2), -1)

    # without depth buffer
    f1_forward_warp_no_db, _, _ = ivy_vision.quantize_to_image(
        ivy.reshape(ds_pixel_coords1_proj, (-1, 2)),
        img_dims,
        ivy.reshape(features_to_render, (-1, 4)),
        ivy.zeros_like(features_to_render),
        with_db=False)

    # with depth buffer
    f1_forward_warp_w_db, _, _ = ivy_vision.quantize_to_image(
        ivy.reshape(ds_pixel_coords1_proj, (-1, 2)),
        img_dims,
        ivy.reshape(features_to_render, (-1, 4)),
        ivy.zeros_like(features_to_render),
        with_db=False if ivy.get_framework() == 'mxnd' else True)

    # show images
    show_forward_warped_images(depth1, color1, f1_forward_warp_no_db,
                               f1_forward_warp_w_db, depth_limits)

    # message
    print('End of Run Through Demo!')
Ejemplo n.º 16
0
def main():

    # LSTM #
    # -----#

    # using the Ivy LSTM memory module, dual stacked, in a PyTorch model

    class TorchModelWithLSTM(torch.nn.Module):
        def __init__(self, channels_in, channels_out):
            torch.nn.Module.__init__(self)
            self._linear = torch.nn.Linear(channels_in, 64)
            self._lstm = ivy_mem.LSTM(64, channels_out, 2, return_state=False)
            self._assign_variables()

        def _assign_variables(self):
            self._lstm.v.map(lambda x, kc: self.register_parameter(
                name=kc, param=torch.nn.Parameter(x)))
            self._lstm.v = self._lstm.v.map(lambda x, kc: self._parameters[kc])

        def forward(self, x):
            x = self._linear(x)
            return self._lstm(x)

    # create model
    in_channels = 32
    out_channels = 8
    ivy.set_framework('torch')
    model = TorchModelWithLSTM(in_channels, out_channels)

    # define inputs
    batch_shape = [1, 2]
    timesteps = 3
    input_shape = batch_shape + [timesteps, in_channels]
    input_seq = torch.rand(batch_shape + [timesteps, in_channels])

    # call model and test output
    output_seq = model(input_seq)
    assert input_seq.shape[:-1] == output_seq.shape[:-1]
    assert input_seq.shape[-1] == in_channels
    assert output_seq.shape[-1] == out_channels

    # define loss function
    target = torch.zeros_like(output_seq)

    def loss_fn():
        pred = model(input_seq)
        return torch.sum((pred - target)**2)

    # define optimizer
    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

    # train model
    print('\ntraining dummy PyTorch LSTM model...\n')
    for i in range(10):
        loss = loss_fn()
        loss.backward()
        optimizer.step()
        print('step {}, loss = {}'.format(i, loss))
    print('\ndummy PyTorch LSTM model trained!\n')
    ivy.unset_framework()

    # NTM #
    # ----#

    # using the Ivy NTM memory module in a TensorFlow model

    class TfModelWithNTM(tf.keras.Model):
        def __init__(self, channels_in, channels_out):
            tf.keras.Model.__init__(self)
            self._linear = tf.keras.layers.Dense(64)
            memory_size = 4
            memory_vector_dim = 1
            self._ntm = ivy_mem.NTM(input_dim=64,
                                    output_dim=channels_out,
                                    ctrl_output_size=channels_out,
                                    ctrl_layers=1,
                                    memory_size=memory_size,
                                    memory_vector_dim=memory_vector_dim,
                                    read_head_num=1,
                                    write_head_num=1)
            self._assign_variables()

        def _assign_variables(self):
            self._ntm.v.map(
                lambda x, kc: self.add_weight(name=kc, shape=x.shape))
            self.set_weights(
                [ivy.to_numpy(v) for k, v in self._ntm.v.to_iterator()])
            self.trainable_weights_dict = dict()
            for weight in self.trainable_weights:
                self.trainable_weights_dict[weight.name] = weight
            self._ntm.v = self._ntm.v.map(
                lambda x, kc: self.trainable_weights_dict[kc + ':0'])

        def call(self, x, **kwargs):
            x = self._linear(x)
            return self._ntm(x)

    # create model
    in_channels = 32
    out_channels = 8
    ivy.set_framework('tensorflow')
    model = TfModelWithNTM(in_channels, out_channels)

    # define inputs
    batch_shape = [1, 2]
    timesteps = 3
    input_shape = batch_shape + [timesteps, in_channels]
    input_seq = tf.random.uniform(batch_shape + [timesteps, in_channels])

    # call model and test output
    output_seq = model(input_seq)
    assert input_seq.shape[:-1] == output_seq.shape[:-1]
    assert input_seq.shape[-1] == in_channels
    assert output_seq.shape[-1] == out_channels

    # define loss function
    target = tf.zeros_like(output_seq)

    def loss_fn():
        pred = model(input_seq)
        return tf.reduce_sum((pred - target)**2)

    # define optimizer
    optimizer = tf.keras.optimizers.Adam(1e-2)

    # train model
    print('\ntraining dummy TensorFlow NTM model...\n')
    for i in range(10):
        with tf.GradientTape() as tape:
            loss = loss_fn()
        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        print('step {}, loss = {}'.format(i, loss))
    print('\ndummy TensorFlow NTM model trained!\n')
    ivy.unset_framework()

    # ESM #
    # ----#

    # using the Ivy ESM memory module in a pure-Ivy model, with a JAX backend
    # ToDo: add pre-ESM conv layers to this demo

    class IvyModelWithESM(ivy.Module):
        def __init__(self, channels_in, channels_out):
            self._channels_in = channels_in
            self._esm = ivy_mem.ESM(omni_image_dims=(16, 32))
            self._linear = ivy_mem.Linear(channels_in, channels_out)
            ivy.Module.__init__(self, 'cpu')

        def _forward(self, obs):
            mem = self._esm(obs)
            x = ivy.reshape(mem.mean, (-1, self._channels_in))
            return self._linear(x)

    # create model
    in_channels = 32
    out_channels = 8
    ivy.set_framework('torch')
    model = IvyModelWithESM(in_channels, out_channels)

    # input config
    batch_size = 1
    image_dims = [5, 5]
    num_timesteps = 2
    num_feature_channels = 3

    # create image of pixel co-ordinates
    uniform_pixel_coords =\
        ivy_vision.create_uniform_pixel_coords_image(image_dims, [batch_size, num_timesteps])

    # define camera measurement
    depths = ivy.random_uniform(shape=[batch_size, num_timesteps] +
                                image_dims + [1])
    ds_pixel_coords = ivy_vision.depth_to_ds_pixel_coords(depths)
    inv_calib_mats = ivy.random_uniform(
        shape=[batch_size, num_timesteps, 3, 3])
    cam_coords = ivy_vision.ds_pixel_to_cam_coords(ds_pixel_coords,
                                                   inv_calib_mats)[..., 0:3]
    features = ivy.random_uniform(shape=[batch_size, num_timesteps] +
                                  image_dims + [num_feature_channels])
    img_mean = ivy.concatenate((cam_coords, features), -1)
    cam_rel_mat = ivy.identity(4, batch_shape=[batch_size,
                                               num_timesteps])[..., 0:3, :]

    # place these into an ESM camera measurement container
    esm_cam_meas = ESMCamMeasurement(img_mean=img_mean,
                                     cam_rel_mat=cam_rel_mat)

    # define agent pose transformation
    agent_rel_mat = ivy.identity(4, batch_shape=[batch_size,
                                                 num_timesteps])[..., 0:3, :]

    # collect together into an ESM observation container
    esm_obs = ESMObservation(img_meas={'camera_0': esm_cam_meas},
                             agent_rel_mat=agent_rel_mat)

    # call model and test output
    output = model(esm_obs)
    assert output.shape[-1] == out_channels

    # define loss function
    target = ivy.zeros_like(output)

    def loss_fn(v):
        pred = model(esm_obs, v=v)
        return ivy.reduce_mean((pred - target)**2)

    # optimizer
    optimizer = ivy.SGD(lr=1e-4)

    # train model
    print('\ntraining dummy Ivy ESM model...\n')
    for i in range(10):
        loss, grads = ivy.execute_with_gradients(loss_fn, model.v)
        model.v = optimizer.step(model.v, grads)
        print('step {}, loss = {}'.format(i, ivy.to_numpy(loss).item()))
    print('\ndummy Ivy ESM model trained!\n')
    ivy.unset_framework()

    # message
    print('End of Run Through Demo!')