def __init__(self, img_mean: ivy.Array, cam_rel_mat: ivy.Array, img_var: ivy.Array = None, validity_mask: ivy.Array = None, pose_mean: ivy.Array = None, pose_cov: ivy.Array = None): """ Create esm image measurement container :param img_mean: Camera-relative co-ordinates and image features *[batch_size, timesteps, height, width, 3 + feat]* :type: img_mean: array :param cam_rel_mat: The pose of the camera relative to the current agent pose, in matrix form *[batch_size, timesteps, 3, 4]* :type cam_rel_mat: array :param img_var: Image depth and feature variance values, assumed all zero if None. *[batch_size, timesteps, height, width, 1 + feat]* :type: img_var: array, optional :param validity_mask: Validity mask, for which pixels should be considered. Assumed all valid if None *[batch_size, timesteps, height, width, 1]* :type validity_mask: array, optional :param pose_mean: The pose of the camera relative to the current agent pose, in rotation vector pose form. Inferred from cam_rel_mat if None. *[batch_size, timesteps, 6]* :type pose_mean: array, optional :param pose_cov: The convariance of the camera relative pose, in rotation vector form. Assumed all zero if None. *[batch_size, timesteps, 6, 6]* :type pose_cov: array, optional """ img_mean = _pad_to_batch_n_time_dims(img_mean, 5) cam_rel_mat = _pad_to_batch_n_time_dims(cam_rel_mat, 4) self['img_mean'] = img_mean self['cam_rel_mat'] = cam_rel_mat if img_var is None: img_var = ivy.zeros_like(img_mean) else: img_var = _pad_to_batch_n_time_dims(img_var, 5) self['img_var'] = img_var if validity_mask is None: validity_mask = ivy.ones_like(img_mean[..., 0:1]) else: validity_mask = _pad_to_batch_n_time_dims(validity_mask, 5) self['validity_mask'] = validity_mask if pose_mean is None: pose_mean = ivy_mech.mat_pose_to_rot_vec_pose(cam_rel_mat) else: pose_mean = _pad_to_batch_n_time_dims(pose_mean, 3) self['pose_mean'] = pose_mean if pose_cov is None: pose_cov = ivy.tile(ivy.expand_dims(ivy.zeros_like(pose_mean), -1), (1, 1, 1, 6)) else: pose_cov = _pad_to_batch_n_time_dims(pose_cov, 4) self['pose_cov'] = pose_cov
def _rot_mat_to_zxz_euler(rot_mat): # BS x 1 euler_angles_1 = _ivy.acos(rot_mat[..., 2, 2:3]) gimbal_validity = _ivy.abs(rot_mat[..., 0, 2:3]) > GIMBAL_TOL r12 = rot_mat[..., 0, 1:2] r11 = rot_mat[..., 0, 0:1] gimbal_euler_angles_0 = _ivy.atan2(-r12, r11) gimbal_euler_angles_2 = _ivy.zeros_like(gimbal_euler_angles_0) # BS x 3 gimbal_euler_angles = _ivy.concatenate( (gimbal_euler_angles_0, euler_angles_1, gimbal_euler_angles_2), -1) # BS x 1 r31 = rot_mat[..., 2, 0:1] r32 = rot_mat[..., 2, 1:2] r13 = rot_mat[..., 0, 2:3] r23 = rot_mat[..., 1, 2:3] normal_euler_angles_0 = _ivy.atan2(r31, r32) normal_euler_angles_2 = _ivy.atan2(r13, -r23) # BS x 3 normal_euler_angles = _ivy.concatenate( (normal_euler_angles_0, euler_angles_1, normal_euler_angles_2), -1) return _ivy.where(gimbal_validity, normal_euler_angles, gimbal_euler_angles)
def __init__(self, img_meas: Dict[str, ESMCamMeasurement], agent_rel_mat: ivy.Array, control_mean: ivy.Array = None, control_cov: ivy.Array = None): """ Create esm observation container :param img_meas: dict of ESMImageMeasurement objects, with keys for camera names. :type: img_meas: Ivy container :param agent_rel_mat: The pose of the agent relative to the previous pose, in matrix form *[batch_size, timesteps, 3, 4]*. :type agent_rel_mat: array :param control_mean: The pose of the agent relative to the previous pose, in rotation vector pose form. Inferred from agent_rel_mat if None. *[batch_size, timesteps, 6]* :type control_mean: array, optional :param control_cov: The convariance of the agent relative pose, in rotation vector form. Assumed all zero if None. *[batch_size, timesteps, 6, 6]*. :type control_cov: array, optional """ self['img_meas'] = Container(img_meas) agent_rel_mat = _pad_to_batch_n_time_dims(agent_rel_mat, 4) self['agent_rel_mat'] = agent_rel_mat if control_mean is None: control_mean = ivy_mech.mat_pose_to_rot_vec_pose(agent_rel_mat) else: control_mean = _pad_to_batch_n_time_dims(control_mean, 3) self['control_mean'] = control_mean if control_cov is None: control_cov = ivy.tile(ivy.expand_dims(ivy.zeros_like(control_mean), -1), (1, 1, 1, 6)) else: control_cov = _pad_to_batch_n_time_dims(control_cov, 4) self['control_cov'] = control_cov
def _rot_mat_to_yzy_euler(rot_mat): # BS x 1 euler_angles_1 = _ivy.acos(rot_mat[..., 1, 1:2]) gimbal_validity = _ivy.abs(rot_mat[..., 1, 0:1]) > GIMBAL_TOL r31 = rot_mat[..., 2, 0:1] r33 = rot_mat[..., 2, 2:3] gimbal_euler_angles_0 = _ivy.atan2(-r31, r33) gimbal_euler_angles_2 = _ivy.zeros_like(gimbal_euler_angles_0) # BS x 3 gimbal_euler_angles = _ivy.concatenate( (gimbal_euler_angles_0, euler_angles_1, gimbal_euler_angles_2), -1) # BS x 1 r23 = rot_mat[..., 1, 2:3] r21 = rot_mat[..., 1, 0:1] r32 = rot_mat[..., 2, 1:2] r12 = rot_mat[..., 0, 1:2] normal_euler_angles_0 = _ivy.atan2(r23, r21) normal_euler_angles_2 = _ivy.atan2(r32, r12) # BS x 3 normal_euler_angles = _ivy.concatenate( (normal_euler_angles_0, euler_angles_1, normal_euler_angles_2), -1) return _ivy.where(gimbal_validity, normal_euler_angles, gimbal_euler_angles)
def _rot_mat_to_zyx_euler(rot_mat): # BS x 1 euler_angles_1 = _ivy.asin(rot_mat[..., 0, 2:3]) gimbal_validity = _ivy.abs(rot_mat[..., 1, 1:2]) > GIMBAL_TOL r21 = rot_mat[..., 1, 0:1] r22 = rot_mat[..., 1, 1:2] gimbal_euler_angles_0 = _ivy.atan2(r21, r22) gimbal_euler_angles_2 = _ivy.zeros_like(gimbal_euler_angles_0) # BS x 3 gimbal_euler_angles = _ivy.concatenate( (gimbal_euler_angles_0, euler_angles_1, gimbal_euler_angles_2), -1) # BS x 1 r12 = rot_mat[..., 0, 1:2] r11 = rot_mat[..., 0, 0:1] r23 = rot_mat[..., 1, 2:3] r33 = rot_mat[..., 2, 2:3] normal_euler_angles_0 = _ivy.atan2(-r12, r11) normal_euler_angles_2 = _ivy.atan2(-r23, r33) # BS x 3 normal_euler_angles = _ivy.concatenate( (normal_euler_angles_0, euler_angles_1, normal_euler_angles_2), -1) return _ivy.where(gimbal_validity, normal_euler_angles, gimbal_euler_angles)
def _se_to_mask(se: ivy.Array) -> ivy.Array: se_h, se_w = se.shape se_flat = ivy.reshape(se, (-1,)) num_feats = se_h * se_w i_s = ivy.expand_dims(ivy.arange(num_feats, dev_str=ivy.dev_str(se)), -1) y_s = i_s % se_h x_s = i_s // se_h indices = ivy.concatenate((i_s, ivy.zeros_like(i_s, dtype_str='int32'), x_s, y_s), -1) out = ivy.scatter_nd( indices, ivy.cast(se_flat >= 0, ivy.dtype_str(se)), (num_feats, 1, se_h, se_w), dev_str=ivy.dev_str(se)) return out
def _get_dummy_obs(batch_size, num_frames, num_cams, image_dims, num_feature_channels, dev_str='cpu', ones=False, empty=False): uniform_pixel_coords =\ ivy_vision.create_uniform_pixel_coords_image(image_dims, [batch_size, num_frames], dev_str=dev_str) img_meas = dict() for i in range(num_cams): validity_mask = ivy.ones([batch_size, num_frames] + image_dims + [1], dev_str=dev_str) if ones: img_mean = ivy.concatenate((uniform_pixel_coords[..., 0:2], ivy.ones( [batch_size, num_frames] + image_dims + [1 + num_feature_channels], dev_str=dev_str)), -1) img_var = ivy.ones( [batch_size, num_frames] + image_dims + [3 + num_feature_channels], dev_str=dev_str)*1e-3 pose_mean = ivy.zeros([batch_size, num_frames, 6], dev_str=dev_str) pose_cov = ivy.ones([batch_size, num_frames, 6, 6], dev_str=dev_str)*1e-3 else: img_mean = ivy.concatenate((uniform_pixel_coords[..., 0:2], ivy.random_uniform( 1e-3, 1, [batch_size, num_frames] + image_dims + [1 + num_feature_channels], dev_str=dev_str)), -1) img_var = ivy.random_uniform( 1e-3, 1, [batch_size, num_frames] + image_dims + [3 + num_feature_channels], dev_str=dev_str) pose_mean = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6], dev_str=dev_str) pose_cov = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6, 6], dev_str=dev_str) if empty: img_var = ivy.ones_like(img_var) * 1e12 validity_mask = ivy.zeros_like(validity_mask) img_meas['dummy_cam_{}'.format(i)] =\ {'img_mean': img_mean, 'img_var': img_var, 'validity_mask': validity_mask, 'pose_mean': pose_mean, 'pose_cov': pose_cov, 'cam_rel_mat': ivy.identity(4, batch_shape=[batch_size, num_frames], dev_str=dev_str)[..., 0:3, :]} if ones: control_mean = ivy.zeros([batch_size, num_frames, 6], dev_str=dev_str) control_cov = ivy.ones([batch_size, num_frames, 6, 6], dev_str=dev_str)*1e-3 else: control_mean = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6], dev_str=dev_str) control_cov = ivy.random_uniform(1e-3, 1, [batch_size, num_frames, 6, 6], dev_str=dev_str) return Container({'img_meas': img_meas, 'control_mean': control_mean, 'control_cov': control_cov, 'agent_rel_mat': ivy.identity(4, batch_shape=[batch_size, num_frames], dev_str=dev_str)[..., 0:3, :]})
def __init__(self, mean: ivy.Array, var: ivy.Array = None): """ Create esm memory container :param mean: The ESM memory feature values *[batch_size, timesteps, omni_height, omni_width, 2 + feat]* :type: mean: array :param var: The ESM memory feature variance values. All assumed zero if None. *[batch_size, timesteps, omni_height, omni_width, feat]* :type: var: array, optional """ mean = _pad_to_batch_n_time_dims(mean, 5) self['mean'] = mean if var is None: var = ivy.zeros_like(mean) else: var = _pad_to_batch_n_time_dims(var, 5) self['var'] = var
def _convert_images_to_omni_observations(self, measurements, uniform_sphere_pixel_coords, holes_prior, batch_size, num_timesteps, num_cams, image_dims): """ Convert image to omni-directional measurements :param measurements: perspective captured images and relative poses container :param uniform_sphere_pixel_coords: Uniform sphere pixel coords *[batch_size, num_timesteps, oh, ow, 3]* :param holes_prior: Prior for quantization holes *[batch_size, num_timesteps, oh, ow, 1+f]* :param batch_size: Size of batch :param num_timesteps: Number of frames :param num_cams: Number of cameras :param image_dims: Image dimensions :return: *[batch_size, n, oh, ow, 3+f]* *[batch_size, n, oh, ow, 3+f]* """ # coords from all scene cameras wrt world images_list = list() images_var_list = list() cam_rel_poses_list = list() cam_rel_poses_cov_list = list() cam_rel_mats_list = list() validity_mask_list = list() for key, item in measurements.to_iterator(): if key == 'img_mean': # B x N x 1 x H x W x (3+f) images_list.append(ivy.expand_dims(item, 2)) elif key == 'img_var': # B x N x 1 x H x W x (3+f) images_var_list.append(ivy.expand_dims(item, 2)) elif key == 'pose_mean': # B x N x 1 x 6 cam_rel_poses_list.append(ivy.expand_dims(item, 2)) elif key == 'pose_cov': # B x N x 1 x 6 x 6 cam_rel_poses_cov_list.append(ivy.expand_dims(item, 2)) elif key == 'cam_rel_mat': # B x N x 1 x 3 x 4 cam_rel_mats_list.append(ivy.expand_dims(item, 2)) elif key == 'validity_mask': validity_mask_list.append(ivy.expand_dims(item, 2)) else: raise Exception('Invalid image key: {}'.format(key)) # B x N x C x H x W x (3+f) images = ivy.concatenate(images_list, 2) # B x N x C x H x W x (3+f) var_to_project = ivy.concatenate(images_var_list, 2) # B x N x C x 6 cam_to_cam_poses = ivy.concatenate(cam_rel_poses_list, 2) # B x N x C x 3 x 4 cam_to_cam_mats = ivy.concatenate(cam_rel_mats_list, 2) # B x N x C x 6 x 6 cam_to_cam_pose_covs = ivy.concatenate(cam_rel_poses_cov_list, 2) # B x N x C x 1 validity_masks = ivy.concatenate(validity_mask_list, 2) > 0 # B x N x OH x OW x (3+f) holes_prior_var = ivy.ones( [batch_size, num_timesteps] + self._sphere_img_dims + [3 + self._feat_dim], dev_str=self._dev_str) * 1e12 # reset invalid regions to prior # B x N x C x H x W x (3+f) images = ivy.where( validity_masks, images, ivy.concatenate( (images[..., 0:2], ivy.zeros_like(images[..., 2:], dev_str=self._dev_str)), -1)) # B x N x C x H x W x (3+f) var_to_project = ivy.where( validity_masks, var_to_project, ivy.ones_like(var_to_project, dev_str=self._dev_str) * 1e12) # B x N x OH x OW x (3+f) # B x N x OH x OW x (3+f) return self._frame_to_omni_frame_projection( cam_to_cam_poses, cam_to_cam_mats, uniform_sphere_pixel_coords, images[..., 0:3], images[..., 3:], cam_to_cam_pose_covs, var_to_project, holes_prior, holes_prior_var, batch_size, num_timesteps, num_cams, image_dims)
def _forward(self, x, prev_state): prev_read_vector_list = prev_state[1] controller_input = ivy.concatenate([x] + prev_read_vector_list, axis=1) controller_output, controller_state = self._controller(ivy.expand_dims(controller_input, -2), initial_state=prev_state[0]) controller_output = controller_output[..., -1, :] parameters = self._controller_proj(controller_output) parameters = ivy.clip(parameters, -self._clip_value, self._clip_value) head_parameter_list = \ ivy.split(parameters[:, :self._num_parameters_per_head * self._num_heads], self._num_heads, axis=1) erase_add_list = ivy.split(parameters[:, self._num_parameters_per_head * self._num_heads:], 2 * self._write_head_num, axis=1) prev_w_list = prev_state[2] prev_M = prev_state[4] w_list = [] for i, head_parameter in enumerate(head_parameter_list): k = ivy.tanh(head_parameter[:, 0:self._memory_vector_dim]) beta = ivy.softplus(head_parameter[:, self._memory_vector_dim]) g = ivy.sigmoid(head_parameter[:, self._memory_vector_dim + 1]) s = ivy.softmax( head_parameter[:, self._memory_vector_dim + 2:self._memory_vector_dim + 2 + (self._shift_range * 2 + 1)]) gamma = ivy.softplus(head_parameter[:, -1]) + 1 w = self._addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i]) w_list.append(w) # Reading (Sec 3.1) read_w_list = w_list[:self._read_head_num] if self._step == 0: usage_indicator = ivy.zeros_like(w_list[0]) else: usage_indicator = prev_state[3] + ivy.reduce_sum(ivy.concatenate(read_w_list, 0)) read_vector_list = [] for i in range(self._read_head_num): read_vector = ivy.reduce_sum(ivy.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1) read_vector_list.append(read_vector) # Writing (Sec 3.2) prev_wrtie_w_list = prev_w_list[self._read_head_num:] w_wr_size = math.ceil(self._memory_size / 2) if self._retroactive_updates else self._memory_size if self._sequential_writing: batch_size = ivy.shape(x)[0] if self._step < w_wr_size: w_wr_list = [ivy.tile(ivy.cast(ivy.one_hot( ivy.array([self._step]), w_wr_size), 'float32'), (batch_size, 1))] * self._write_head_num else: batch_idxs = ivy.expand_dims(ivy.arange(batch_size, 0), -1) mem_idxs = ivy.expand_dims(ivy.argmax(usage_indicator[..., :w_wr_size], -1), -1) total_idxs = ivy.concatenate((batch_idxs, mem_idxs), -1) w_wr_list = [ivy.scatter_nd(total_idxs, ivy.ones((batch_size,)), (batch_size, w_wr_size))] * self._write_head_num else: w_wr_list = w_list[self._read_head_num:] if self._retroactive_updates: w_ret_list = [self._retroactive_discount * prev_wrtie_w[..., w_wr_size:] + (1 - self._retroactive_discount) * prev_wrtie_w[..., :w_wr_size] for prev_wrtie_w in prev_wrtie_w_list] w_wrtie_list = [ivy.concatenate((w_wr, w_ret), -1) for w_wr, w_ret in zip(w_wr_list, w_ret_list)] else: w_wrtie_list = w_wr_list M = prev_M for i in range(self._write_head_num): w = ivy.expand_dims(w_wrtie_list[i], axis=2) if self._with_erase: erase_vector = ivy.expand_dims(ivy.sigmoid(erase_add_list[i * 2]), axis=1) M = M * ivy.ones(ivy.shape(M)) - ivy.matmul(w, erase_vector) add_vector = ivy.expand_dims(ivy.tanh(erase_add_list[i * 2 + 1]), axis=1) M = M + ivy.matmul(w, add_vector) NTM_output = self._output_proj(ivy.concatenate([controller_output] + read_vector_list, axis=1)) NTM_output = ivy.clip(NTM_output, -self._clip_value, self._clip_value) self._step += 1 return NTM_output, NTMControllerState( controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list, usage_indicator=usage_indicator, M=M)
def velocity_from_cam_coords_id_image_and_object_trans(cam_coords_t, id_image, obj_ids, obj_trans, delta_t, batch_shape=None, image_dims=None, dev_str=None): """ Compute velocity image from co-ordinate image, id image, and object transformations. :param cam_coords_t: Camera-centric homogeneous co-ordinates image in frame t *[batch_shape,h,w,4]* :type cam_coords_t: array :param id_image: Image containing per-pixel object ids *[batch_shape,h,w,1]* :type id_image: array :param obj_ids: Object ids *[batch_shape,num_obj,1]* :type obj_ids: array :param obj_trans: Object transformations for this frame over time *[batch_shape,num_obj,3,4]* :type obj_trans: array :param delta_t: Time difference between frame at timestep t-1 and t *[batch_shape,1]* :type delta_t: array :param batch_shape: Shape of batch. Inferred from inputs if None. :type batch_shape: sequence of ints, optional :param image_dims: Image dimensions. Inferred from inputs in None. :type image_dims: sequence of ints, optional :param dev_str: device on which to create the array 'cuda:0', 'cuda:1', 'cpu' etc. Same as x if None. :type dev_str: str, optional :return: Relative velocity image *[batch_shape,h,w,3]* """ if batch_shape is None: batch_shape = cam_coords_t.shape[:-3] if image_dims is None: image_dims = cam_coords_t.shape[-3:-1] if dev_str is None: dev_str = _ivy.dev_str(cam_coords_t) # shapes as list batch_shape = list(batch_shape) image_dims = list(image_dims) # get co-ordinate re-projections # BS x H x W x 4 cam_coords_t_all_trans, motion_mask =\ project_cam_coords_with_object_transformations(cam_coords_t, id_image, obj_ids, obj_trans, _ivy.identity(4, batch_shape=batch_shape)[..., 0:3, :], batch_shape, image_dims) # BS x H x W x 4 cam_coords_t_all_trans = \ _ivy.where(motion_mask, cam_coords_t_all_trans, _ivy.zeros_like(cam_coords_t_all_trans, dev_str=dev_str)) # compute velocities # BS x H x W x 3 vel = (cam_coords_t[..., 0:3] - cam_coords_t_all_trans[..., 0:3]) / delta_t # prune velocities # BS x H x W x 3 return _ivy.where(motion_mask, vel, _ivy.zeros_like(vel, dev_str=dev_str))
def velocity_from_flow_cam_coords_and_cam_mats(flow_t_to_tm1, cam_coords_t, cam_coords_tm1, cam_tm1_to_t_ext_mat, delta_t, uniform_pixel_coords=None, batch_shape=None, image_dims=None, dev_str=None): """ Compute relative cartesian velocity from optical flow, camera co-ordinates, and camera extrinsics. :param flow_t_to_tm1: Optical flow from frame t to t-1 *[batch_shape,h,w,2]* :type flow_t_to_tm1: array :param cam_coords_t: Camera-centric homogeneous co-ordinates image in frame t *[batch_shape,h,w,4]* :type cam_coords_t: array :param cam_coords_tm1: Camera-centric homogeneous co-ordinates image in frame t-1 *[batch_shape,h,w,4]* :type cam_coords_tm1: array :param cam_tm1_to_t_ext_mat: Camera t-1 to camera t extrinsic projection matrix *[batch_shape,3,4]* :type cam_tm1_to_t_ext_mat: array :param delta_t: Time difference between frame at timestep t-1 and t *[batch_shape,1]* :type delta_t: array :param uniform_pixel_coords: Homogeneous uniform (integer) pixel co-ordinate images, inferred from image_dims if None *[batch_shape,h,w,3]* :type uniform_pixel_coords: array, optional :param batch_shape: Shape of batch. Inferred from inputs if None. :type batch_shape: sequence of ints, optional :param image_dims: Image dimensions. Inferred from inputs in None. :type image_dims: sequence of ints, optional :param dev_str: device on which to create the array 'cuda:0', 'cuda:1', 'cpu' etc. Same as x if None. :type dev_str: str, optional :return: Cartesian velocity measurements relative to the camera *[batch_shape,h,w,3]* """ if batch_shape is None: batch_shape = flow_t_to_tm1.shape[:-3] if image_dims is None: image_dims = flow_t_to_tm1.shape[-3:-1] # shapes as list batch_shape = list(batch_shape) image_dims = list(image_dims) if dev_str is None: dev_str = _ivy.dev_str(flow_t_to_tm1) if uniform_pixel_coords is None: uniform_pixel_coords = _ivy_svg.create_uniform_pixel_coords_image( image_dims, batch_shape, dev_str) # Interpolate cam coords from frame t-1 # BS x H x W x 2 warp = uniform_pixel_coords[..., 0:2] + flow_t_to_tm1 # BS x H x W x 4 cam_coords_tm1_interp = _ivy.image.bilinear_resample(cam_coords_tm1, warp) # Project to frame t # BS x H x W x 4 cam_coords_t_proj = _ivy_tvg.cam_to_cam_coords(cam_coords_tm1_interp, cam_tm1_to_t_ext_mat, batch_shape, image_dims) # delta co-ordinates # BS x H x W x 3 delta_cam_coords_t = (cam_coords_t - cam_coords_t_proj)[..., 0:3] # velocity # BS x H x W x 3 vel = delta_cam_coords_t / _ivy.reshape(delta_t, batch_shape + [1] * 3) # Validity mask # BS x H x W x 1 validity_mask = \ _ivy.reduce_sum(_ivy.cast(warp < _ivy.array([image_dims[1], image_dims[0]], 'float32', dev_str=dev_str), 'int32'), -1, keepdims=True) == 2 # pruned # BS x H x W x 3, BS x H x W x 1 return _ivy.where(validity_mask, vel, _ivy.zeros_like(vel, dev_str=dev_str)), validity_mask
def main(batch_size=32, num_train_steps=31250, compile_flag=True, num_bits=8, seq_len=28, ctrl_output_size=100, memory_size=128, memory_vector_dim=28, overfit_flag=False, interactive=True, f=None): f = choose_random_framework() if f is None else f set_framework(f) # train config lr = 1e-3 if not overfit_flag else 1e-2 batch_size = batch_size if not overfit_flag else 1 num_train_steps = num_train_steps if not overfit_flag else 150 max_grad_norm = 50 # logging config vis_freq = 250 if not overfit_flag else 1 # optimizer optimizer = ivy.Adam(lr=lr) # ntm ntm = NTM(input_dim=num_bits + 1, output_dim=num_bits, ctrl_output_size=ctrl_output_size, ctrl_layers=1, memory_size=memory_size, memory_vector_dim=memory_vector_dim, read_head_num=1, write_head_num=1) # compile loss fn total_seq_example = ivy.random_uniform(shape=(batch_size, 2 * seq_len + 1, num_bits + 1)) target_seq_example = total_seq_example[:, 0:seq_len, :-1] if compile_flag: loss_fn_maybe_compiled = ivy.compile_fn( lambda v, ttl_sq, trgt_sq, sq_ln: loss_fn(ntm, v, ttl_sq, trgt_sq, sq_ln), dynamic=False, example_inputs=[ ntm.v, total_seq_example, target_seq_example, seq_len ]) else: loss_fn_maybe_compiled = lambda v, ttl_sq, trgt_sq, sq_ln: loss_fn( ntm, v, ttl_sq, trgt_sq, sq_ln) # init input_seq_m1 = ivy.cast( ivy.random_uniform(0., 1., (batch_size, seq_len, num_bits)) > 0.5, 'float32') mw = None vw = None for i in range(num_train_steps): # sequence to copy if not overfit_flag: input_seq_m1 = ivy.cast( ivy.random_uniform(0., 1., (batch_size, seq_len, num_bits)) > 0.5, 'float32') target_seq = input_seq_m1 input_seq = ivy.concatenate( (input_seq_m1, ivy.zeros((batch_size, seq_len, 1))), -1) eos = ivy.ones((batch_size, 1, num_bits + 1)) output_seq = ivy.zeros_like(input_seq) total_seq = ivy.concatenate((input_seq, eos, output_seq), -2) # train step loss, pred_vals = train_step(loss_fn_maybe_compiled, optimizer, ntm, total_seq, target_seq, seq_len, mw, vw, ivy.array(i + 1, 'float32'), max_grad_norm) # log print('step: {}, loss: {}'.format(i, ivy.to_numpy(loss).item())) # visualize if i % vis_freq == 0: target_to_vis = (ivy.to_numpy(target_seq[0] * 255)).astype( np.uint8) target_to_vis = np.transpose( cv2.resize(target_to_vis, (560, 160), interpolation=cv2.INTER_NEAREST), (1, 0)) pred_to_vis = (ivy.to_numpy(pred_vals[0] * 255)).astype(np.uint8) pred_to_vis = np.transpose( cv2.resize(pred_to_vis, (560, 160), interpolation=cv2.INTER_NEAREST), (1, 0)) img_to_vis = np.concatenate((pred_to_vis, target_to_vis), 0) img_to_vis = cv2.resize(img_to_vis, (1120, 640), interpolation=cv2.INTER_NEAREST) img_to_vis[0:60, -200:] = 0 img_to_vis[5:55, -195:-5] = 255 cv2.putText(img_to_vis, 'step {}'.format(i), (935, 42), cv2.FONT_HERSHEY_SIMPLEX, 1.2, tuple([0] * 3), 2) img_to_vis[0:60, 0:200] = 0 img_to_vis[5:55, 5:195] = 255 cv2.putText(img_to_vis, 'prediction', (7, 42), cv2.FONT_HERSHEY_SIMPLEX, 1.2, tuple([0] * 3), 2) img_to_vis[320:380, 0:130] = 0 img_to_vis[325:375, 5:125] = 255 cv2.putText(img_to_vis, 'target', (7, 362), cv2.FONT_HERSHEY_SIMPLEX, 1.2, tuple([0] * 3), 2) if interactive: cv2.imshow('prediction_and_target', img_to_vis) if overfit_flag: cv2.waitKey(1) else: cv2.waitKey(100) cv2.destroyAllWindows()
def quantize_to_image(pixel_coords, final_image_dims, feat=None, feat_prior=None, with_db=False, pixel_coords_var=1e-3, feat_var=1e-3, pixel_coords_prior_var=1e12, feat_prior_var=1e12, var_threshold=(1e-3, 1e12), uniform_pixel_coords=None, batch_shape=None, dev_str=None): """ Quantize pixel co-ordinates with d feature channels (for depth, rgb, normals etc.), from images :math:`\mathbf{X}\in\mathbb{R}^{input\_images\_shape×(2+d)}`, which may have been reprojected from a host of different cameras (leading to non-integer pixel values), to a new quantized pixel co-ordinate image with the same feature channels :math:`\mathbf{X}\in\mathbb{R}^{h×w×(2+d)}`, and with integer pixel co-ordinates. Duplicates during the quantization are either probabilistically fused based on variance, or the minimum depth is chosen when using depth buffer mode. :param pixel_coords: Pixel co-ordinates *[batch_shape,input_size,2]* :type pixel_coords: array :param final_image_dims: Image dimensions of the final image. :type final_image_dims: sequence of ints :param feat: Features (i.e. depth, rgb, encoded), default is None. *[batch_shape,input_size,d]* :type feat: array, optional :param feat_prior: Prior feature image mean, default is None. *[batch_shape,input_size,d]* :type feat_prior: array or float to fill with :param with_db: Whether or not to use depth buffer in rendering, default is false :type with_db: bool, optional :param pixel_coords_var: Pixel coords variance *[batch_shape,input_size,2]* :type pixel_coords_var: array or float to fill with :param feat_var: Feature variance *[batch_shape,input_size,d]* :type feat_var: array or float to fill with :param pixel_coords_prior_var: Pixel coords prior variance *[batch_shape,h,w,2]* :type pixel_coords_prior_var: array or float to fill with :param feat_prior_var: Features prior variance *[batch_shape,h,w,3]* :type feat_prior_var: array or float to fill with :param var_threshold: Variance threshold, for projecting valid coords and clipping *[batch_shape,2+d,2]* :type var_threshold: array or sequence of floats to fill with :param uniform_pixel_coords: Homogeneous uniform (integer) pixel co-ordinate images, inferred from final_image_dims if None *[batch_shape,h,w,3]* :type uniform_pixel_coords: array, optional :param batch_shape: Shape of batch. Assumed no batches if None. :type batch_shape: sequence of ints, optional :param dev_str: device on which to create the array 'cuda:0', 'cuda:1', 'cpu' etc. Same as x if None. :type dev_str: str, optional :return: Quantized pixel co-ordinates image with d feature channels (for depth, rgb, normals etc.) *[batch_shape,h,w,2+d]*, maybe the quantized variance, *[batch_shape,h,w,2+d]*, and scatter counter image *[batch_shape,h,w,1]* """ # ToDo: make variance fully optional. If not specified, # then do not compute and scatter during function call for better efficiency. # config if batch_shape is None: batch_shape = pixel_coords.shape[:-2] if dev_str is None: dev_str = _ivy.dev_str(pixel_coords) if feat is None: d = 0 else: d = feat.shape[-1] min_depth_diff = _ivy.array([MIN_DEPTH_DIFF], dev_str=dev_str) red = 'min' if with_db else 'sum' # shapes as list batch_shape = list(batch_shape) final_image_dims = list(final_image_dims) num_batch_dims = len(batch_shape) # variance threshold if isinstance(var_threshold, tuple) or isinstance(var_threshold, list): ones = _ivy.ones(batch_shape + [1, 2 + d, 1]) var_threshold = _ivy.concatenate( (ones * var_threshold[0], ones * var_threshold[1]), -1) else: var_threshold = _ivy.reshape(var_threshold, batch_shape + [1, 2 + d, 2]) # uniform pixel coords if uniform_pixel_coords is None: uniform_pixel_coords =\ _ivy_svg.create_uniform_pixel_coords_image(final_image_dims, batch_shape, dev_str=dev_str) uniform_pixel_coords = uniform_pixel_coords[..., 0:2] # Extract Values # feat_prior = _ivy.ones_like(feat) * feat_prior if isinstance( feat_prior, float) else feat_prior pixel_coords_var = _ivy.ones_like(pixel_coords) * pixel_coords_var\ if isinstance(pixel_coords_var, float) else pixel_coords_var feat_var = _ivy.ones_like(feat) * feat_var if isinstance( feat_var, float) else feat_var pixel_coords_prior_var = _ivy.ones(batch_shape + final_image_dims + [2]) * pixel_coords_prior_var\ if isinstance(pixel_coords_prior_var, float) else pixel_coords_prior_var feat_prior_var = _ivy.ones(batch_shape + final_image_dims + [d]) * feat_prior_var\ if isinstance(feat_prior_var, float) else feat_prior_var # Quantize # # BS x N x 2 quantized_pixel_coords = _ivy.reshape( _ivy.cast(_ivy.round(pixel_coords), 'int32'), batch_shape + [-1, 2]) # Combine # # BS x N x (2+D) pc_n_feat = _ivy.reshape(_ivy.concatenate((pixel_coords, feat), -1), batch_shape + [-1, 2 + d]) pc_n_feat_var = _ivy.reshape( _ivy.concatenate((pixel_coords_var, feat_var), -1), batch_shape + [-1, 2 + d]) # BS x H x W x (2+D) prior = _ivy.concatenate((uniform_pixel_coords, feat_prior), -1) prior_var = _ivy.concatenate((pixel_coords_prior_var, feat_prior_var), -1) # Validity Mask # # BS x N x 1 var_validity_mask = \ _ivy.reduce_sum(_ivy.cast(pc_n_feat_var < var_threshold[..., 1], 'int32'), -1, keepdims=True) == 2+d bounds_validity_mask = _ivy.logical_and( _ivy.logical_and(quantized_pixel_coords[..., 0:1] >= 0, quantized_pixel_coords[..., 1:2] >= 0), _ivy.logical_and( quantized_pixel_coords[..., 0:1] <= final_image_dims[1] - 1, quantized_pixel_coords[..., 1:2] <= final_image_dims[0] - 1)) validity_mask = _ivy.logical_and(var_validity_mask, bounds_validity_mask) # num_valid_indices x len(BS)+2 validity_indices = _ivy.reshape( _ivy.cast(_ivy.indices_where(validity_mask), 'int32'), [-1, num_batch_dims + 2]) num_valid_indices = validity_indices.shape[-2] if num_valid_indices == 0: return _ivy.concatenate((uniform_pixel_coords[..., 0:2], feat_prior), -1), \ _ivy.concatenate((pixel_coords_prior_var, feat_prior_var), -1),\ _ivy.zeros_like(feat[..., 0:1], dev_str=dev_str) # Depth Based Scaling # mean_depth_min = None mean_depth_range = None pc_n_feat_wo_depth_range = None pc_n_feat_wo_depth_min = None var_vals_range = None var_vals_min = None if with_db: # BS x N x 1 mean_depth = pc_n_feat[..., 2:3] # BS x 1 x 1 mean_depth_min = _ivy.reduce_min(mean_depth, -2, keepdims=True) mean_depth_max = _ivy.reduce_max(mean_depth, -2, keepdims=True) mean_depth_range = mean_depth_max - mean_depth_min # BS x N x 1 scaled_depth = (mean_depth - mean_depth_min) / ( mean_depth_range * min_depth_diff + MIN_DENOMINATOR) if d == 1: # BS x 1 x 1+D pc_n_feat_wo_depth_min = _ivy.zeros(batch_shape + [1, 0], dev_str=dev_str) pc_n_feat_wo_depth_range = _ivy.ones(batch_shape + [1, 0], dev_str=dev_str) else: # feat without depth # BS x N x 1+D pc_n_feat_wo_depth = _ivy.concatenate( (pc_n_feat[..., 0:2], pc_n_feat[..., 3:]), -1) # find the min and max of each value # BS x 1 x 1+D pc_n_feat_wo_depth_max = _ivy.reduce_max( pc_n_feat_wo_depth, -2, keepdims=True) + 1 pc_n_feat_wo_depth_min = _ivy.reduce_min( pc_n_feat_wo_depth, -2, keepdims=True) - 1 pc_n_feat_wo_depth_range = pc_n_feat_wo_depth_max - pc_n_feat_wo_depth_min # BS x N x 1+D normed_pc_n_feat_wo_depth = (pc_n_feat_wo_depth - pc_n_feat_wo_depth_min) / \ (pc_n_feat_wo_depth_range + MIN_DENOMINATOR) # combine with scaled depth # BS x N x 1+D pc_n_feat_wo_depth_scaled = normed_pc_n_feat_wo_depth + scaled_depth # BS x N x (2+D) pc_n_feat = _ivy.concatenate( (pc_n_feat_wo_depth_scaled[..., 0:2], mean_depth, pc_n_feat_wo_depth_scaled[..., 2:]), -1) # scale variance # BS x 1 x (2+D) var_vals_max = _ivy.reduce_max(pc_n_feat_var, -2, keepdims=True) + 1 var_vals_min = _ivy.reduce_min(pc_n_feat_var, -2, keepdims=True) - 1 var_vals_range = var_vals_max - var_vals_min # BS x N x (2+D) normed_var_vals = (pc_n_feat_var - var_vals_min) / (var_vals_range + MIN_DENOMINATOR) pc_n_feat_var = normed_var_vals + scaled_depth # ready for later reversal with full image dimensions # BS x 1 x 1 x D var_vals_min = _ivy.expand_dims(var_vals_min, -2) var_vals_range = _ivy.expand_dims(var_vals_range, -2) # Validity Pruning # # num_valid_indices x (2+D) pc_n_feat = _ivy.gather_nd(pc_n_feat, validity_indices[..., 0:num_batch_dims + 1]) pc_n_feat_var = _ivy.gather_nd(pc_n_feat_var, validity_indices[..., 0:num_batch_dims + 1]) # num_valid_indices x 2 quantized_pixel_coords = _ivy.gather_nd( quantized_pixel_coords, validity_indices[..., 0:num_batch_dims + 1]) if with_db: means_to_scatter = pc_n_feat vars_to_scatter = pc_n_feat_var else: # num_valid_indices x (2+D) vars_to_scatter = 1 / (pc_n_feat_var + MIN_DENOMINATOR) means_to_scatter = pc_n_feat * vars_to_scatter # Scatter # # num_valid_indices x 1 counter = _ivy.ones_like(pc_n_feat[..., 0:1], dev_str=dev_str) if with_db: counter *= -1 # num_valid_indices x 2(2+D)+1 values_to_scatter = _ivy.concatenate( (means_to_scatter, vars_to_scatter, counter), -1) # num_valid_indices x (num_batch_dims + 2) all_indices = _ivy.flip(quantized_pixel_coords, -1) if num_batch_dims > 0: all_indices = _ivy.concatenate( (validity_indices[..., :-2], all_indices), -1) # BS x H x W x (2(2+D) + 1) quantized_img = _ivy.scatter_nd( _ivy.reshape(all_indices, [-1, num_batch_dims + 2]), _ivy.reshape(values_to_scatter, [-1, 2 * (2 + d) + 1]), batch_shape + final_image_dims + [2 * (2 + d) + 1], reduction='replace' if _ivy.backend == 'mxnd' else red) # BS x H x W x 1 quantized_counter = quantized_img[..., -1:] if with_db: invalidity_mask = quantized_counter != -1 else: invalidity_mask = quantized_counter == 0 if with_db: # BS x H x W x (2+D) quantized_mean_scaled = quantized_img[..., 0:2 + d] quantized_var_scaled = quantized_img[..., 2 + d:2 * (2 + d)] # BS x H x W x 1 quantized_depth_mean = quantized_mean_scaled[..., 2:3] # BS x 1 x 1 x 1 mean_depth_min = _ivy.expand_dims(mean_depth_min, -2) mean_depth_range = _ivy.expand_dims(mean_depth_range, -2) # BS x 1 x 1 x (1+D) pc_n_feat_wo_depth_min = _ivy.expand_dims(pc_n_feat_wo_depth_min, -2) pc_n_feat_wo_depth_range = _ivy.expand_dims(pc_n_feat_wo_depth_range, -2) # BS x 1 x 1 x (2+D) x 2 var_threshold = _ivy.expand_dims(var_threshold, -3) # BS x H x W x (1+D) quantized_mean_wo_depth_scaled = _ivy.concatenate( (quantized_mean_scaled[..., 0:2], quantized_mean_scaled[..., 3:]), -1) quantized_mean_wo_depth_normed = quantized_mean_wo_depth_scaled - (quantized_depth_mean - mean_depth_min) / \ (mean_depth_range * min_depth_diff + MIN_DENOMINATOR) quantized_mean_wo_depth = quantized_mean_wo_depth_normed * pc_n_feat_wo_depth_range + pc_n_feat_wo_depth_min prior_wo_depth = _ivy.concatenate((prior[..., 0:2], prior[..., 3:]), -1) quantized_mean_wo_depth = _ivy.where(invalidity_mask, prior_wo_depth, quantized_mean_wo_depth) # BS x H x W x (2+D) quantized_mean = _ivy.concatenate( (quantized_mean_wo_depth[..., 0:2], quantized_depth_mean, quantized_mean_wo_depth[..., 2:]), -1) # BS x H x W x (2+D) quantized_var_normed = quantized_var_scaled - (quantized_depth_mean - mean_depth_min) / \ (mean_depth_range * min_depth_diff + MIN_DENOMINATOR) quantized_var = _ivy.maximum( quantized_var_normed * var_vals_range + var_vals_min, var_threshold[..., 0]) quantized_var = _ivy.where(invalidity_mask, prior_var, quantized_var) else: # BS x H x W x (2+D) quantized_sum_mean_x_recip_var = quantized_img[..., 0:2 + d] quantized_var_wo_increase = _ivy.where( invalidity_mask, prior_var, (1 / (quantized_img[..., 2 + d:2 * (2 + d)] + MIN_DENOMINATOR))) quantized_var = _ivy.maximum( quantized_var_wo_increase * quantized_counter, _ivy.expand_dims(var_threshold[..., 0], -2)) quantized_var = _ivy.where(invalidity_mask, prior_var, quantized_var) quantized_mean = _ivy.where( invalidity_mask, prior, quantized_var_wo_increase * quantized_sum_mean_x_recip_var) # BS x H x W x (2+D) BS x H x W x (2+D) BS x H x W x 1 return quantized_mean, quantized_var, quantized_counter
def main(interactive=True, f=None): global INTERACTIVE INTERACTIVE = interactive # Framework Setup # # ----------------# # choose random framework f = choose_random_framework() if f is None else f set_framework(f) # Camera Geometry # # ----------------# # intrinsics # common intrinsic params img_dims = [512, 512] pp_offsets = ivy.array([dim / 2 - 0.5 for dim in img_dims], 'float32') cam_persp_angles = ivy.array([60 * np.pi / 180] * 2, 'float32') # ivy cam intrinsics container intrinsics = ivy_vision.persp_angles_and_pp_offsets_to_intrinsics_object( cam_persp_angles, pp_offsets, img_dims) # extrinsics # 3 x 4 cam1_inv_ext_mat = ivy.array(np.load(data_dir + '/cam1_inv_ext_mat.npy'), 'float32') cam2_inv_ext_mat = ivy.array(np.load(data_dir + '/cam2_inv_ext_mat.npy'), 'float32') # full geometry # ivy cam geometry container cam1_geom = ivy_vision.inv_ext_mat_and_intrinsics_to_cam_geometry_object( cam1_inv_ext_mat, intrinsics) cam2_geom = ivy_vision.inv_ext_mat_and_intrinsics_to_cam_geometry_object( cam2_inv_ext_mat, intrinsics) cam_geoms = [cam1_geom, cam2_geom] # Camera Geometry Check # # ----------------------# # assert camera geometry shapes for cam_geom in cam_geoms: assert cam_geom.intrinsics.focal_lengths.shape == (2, ) assert cam_geom.intrinsics.persp_angles.shape == (2, ) assert cam_geom.intrinsics.pp_offsets.shape == (2, ) assert cam_geom.intrinsics.calib_mats.shape == (3, 3) assert cam_geom.intrinsics.inv_calib_mats.shape == (3, 3) assert cam_geom.extrinsics.cam_centers.shape == (3, 1) assert cam_geom.extrinsics.Rs.shape == (3, 3) assert cam_geom.extrinsics.inv_Rs.shape == (3, 3) assert cam_geom.extrinsics.ext_mats_homo.shape == (4, 4) assert cam_geom.extrinsics.inv_ext_mats_homo.shape == (4, 4) assert cam_geom.full_mats_homo.shape == (4, 4) assert cam_geom.inv_full_mats_homo.shape == (4, 4) # Image Data # # -----------# # load images # h x w x 3 color1 = ivy.array( cv2.imread(data_dir + '/rgb1.png').astype(np.float32) / 255) color2 = ivy.array( cv2.imread(data_dir + '/rgb2.png').astype(np.float32) / 255) # h x w x 1 depth1 = ivy.array( np.reshape( np.frombuffer( cv2.imread(data_dir + '/depth1.png', -1).tobytes(), np.float32), img_dims + [1])) depth2 = ivy.array( np.reshape( np.frombuffer( cv2.imread(data_dir + '/depth2.png', -1).tobytes(), np.float32), img_dims + [1])) # depth scaled pixel coords # h x w x 3 u_pix_coords = ivy_vision.create_uniform_pixel_coords_image(img_dims) ds_pixel_coords1 = u_pix_coords * depth1 ds_pixel_coords2 = u_pix_coords * depth2 # depth limits depth_min = ivy.reduce_min(ivy.concatenate((depth1, depth2), 0)) depth_max = ivy.reduce_max(ivy.concatenate((depth1, depth2), 0)) depth_limits = [depth_min, depth_max] # show images show_rgb_and_depth_images(color1, color2, depth1, depth2, depth_limits) # Flow and Depth Triangulation # # -----------------------------# # required mat formats cam1to2_full_mat_homo = ivy.matmul(cam2_geom.full_mats_homo, cam1_geom.inv_full_mats_homo) cam1to2_full_mat = cam1to2_full_mat_homo[..., 0:3, :] full_mats_homo = ivy.concatenate( (ivy.expand_dims(cam1_geom.full_mats_homo, 0), ivy.expand_dims(cam2_geom.full_mats_homo, 0)), 0) full_mats = full_mats_homo[..., 0:3, :] # flow flow1to2 = ivy_vision.flow_from_depth_and_cam_mats(ds_pixel_coords1, cam1to2_full_mat) # depth again depth1_from_flow = ivy_vision.depth_from_flow_and_cam_mats( flow1to2, full_mats) # show images show_flow_and_depth_images(depth1, flow1to2, depth1_from_flow, depth_limits) # Inverse Warping # # ----------------# # inverse warp rendering warp = u_pix_coords[..., 0:2] + flow1to2 color2_warp_to_f1 = ivy.reshape(ivy.bilinear_resample(color2, warp), color1.shape) # projected depth scaled pixel coords 2 ds_pixel_coords1_wrt_f2 = ivy_vision.ds_pixel_to_ds_pixel_coords( ds_pixel_coords1, cam1to2_full_mat) # projected depth 2 depth1_wrt_f2 = ds_pixel_coords1_wrt_f2[..., -1:] # inverse warp depth depth2_warp_to_f1 = ivy.reshape(ivy.bilinear_resample(depth2, warp), depth1.shape) # depth validity depth_validity = ivy.abs(depth1_wrt_f2 - depth2_warp_to_f1) < 0.01 # inverse warp rendering with mask color2_warp_to_f1_masked = ivy.where(depth_validity, color2_warp_to_f1, ivy.zeros_like(color2_warp_to_f1)) # show images show_inverse_warped_images(depth1_wrt_f2, depth2_warp_to_f1, depth_validity, color1, color2_warp_to_f1, color2_warp_to_f1_masked, depth_limits) # Forward Warping # # ----------------# # forward warp rendering ds_pixel_coords1_proj = ivy_vision.ds_pixel_to_ds_pixel_coords( ds_pixel_coords2, ivy.inv(cam1to2_full_mat_homo)[..., 0:3, :]) depth1_proj = ds_pixel_coords1_proj[..., -1:] ds_pixel_coords1_proj = ds_pixel_coords1_proj[..., 0:2] / depth1_proj features_to_render = ivy.concatenate((depth1_proj, color2), -1) # without depth buffer f1_forward_warp_no_db, _, _ = ivy_vision.quantize_to_image( ivy.reshape(ds_pixel_coords1_proj, (-1, 2)), img_dims, ivy.reshape(features_to_render, (-1, 4)), ivy.zeros_like(features_to_render), with_db=False) # with depth buffer f1_forward_warp_w_db, _, _ = ivy_vision.quantize_to_image( ivy.reshape(ds_pixel_coords1_proj, (-1, 2)), img_dims, ivy.reshape(features_to_render, (-1, 4)), ivy.zeros_like(features_to_render), with_db=False if ivy.get_framework() == 'mxnd' else True) # show images show_forward_warped_images(depth1, color1, f1_forward_warp_no_db, f1_forward_warp_w_db, depth_limits) # message print('End of Run Through Demo!')
def main(): # LSTM # # -----# # using the Ivy LSTM memory module, dual stacked, in a PyTorch model class TorchModelWithLSTM(torch.nn.Module): def __init__(self, channels_in, channels_out): torch.nn.Module.__init__(self) self._linear = torch.nn.Linear(channels_in, 64) self._lstm = ivy_mem.LSTM(64, channels_out, 2, return_state=False) self._assign_variables() def _assign_variables(self): self._lstm.v.map(lambda x, kc: self.register_parameter( name=kc, param=torch.nn.Parameter(x))) self._lstm.v = self._lstm.v.map(lambda x, kc: self._parameters[kc]) def forward(self, x): x = self._linear(x) return self._lstm(x) # create model in_channels = 32 out_channels = 8 ivy.set_framework('torch') model = TorchModelWithLSTM(in_channels, out_channels) # define inputs batch_shape = [1, 2] timesteps = 3 input_shape = batch_shape + [timesteps, in_channels] input_seq = torch.rand(batch_shape + [timesteps, in_channels]) # call model and test output output_seq = model(input_seq) assert input_seq.shape[:-1] == output_seq.shape[:-1] assert input_seq.shape[-1] == in_channels assert output_seq.shape[-1] == out_channels # define loss function target = torch.zeros_like(output_seq) def loss_fn(): pred = model(input_seq) return torch.sum((pred - target)**2) # define optimizer optimizer = torch.optim.SGD(model.parameters(), lr=1e-2) # train model print('\ntraining dummy PyTorch LSTM model...\n') for i in range(10): loss = loss_fn() loss.backward() optimizer.step() print('step {}, loss = {}'.format(i, loss)) print('\ndummy PyTorch LSTM model trained!\n') ivy.unset_framework() # NTM # # ----# # using the Ivy NTM memory module in a TensorFlow model class TfModelWithNTM(tf.keras.Model): def __init__(self, channels_in, channels_out): tf.keras.Model.__init__(self) self._linear = tf.keras.layers.Dense(64) memory_size = 4 memory_vector_dim = 1 self._ntm = ivy_mem.NTM(input_dim=64, output_dim=channels_out, ctrl_output_size=channels_out, ctrl_layers=1, memory_size=memory_size, memory_vector_dim=memory_vector_dim, read_head_num=1, write_head_num=1) self._assign_variables() def _assign_variables(self): self._ntm.v.map( lambda x, kc: self.add_weight(name=kc, shape=x.shape)) self.set_weights( [ivy.to_numpy(v) for k, v in self._ntm.v.to_iterator()]) self.trainable_weights_dict = dict() for weight in self.trainable_weights: self.trainable_weights_dict[weight.name] = weight self._ntm.v = self._ntm.v.map( lambda x, kc: self.trainable_weights_dict[kc + ':0']) def call(self, x, **kwargs): x = self._linear(x) return self._ntm(x) # create model in_channels = 32 out_channels = 8 ivy.set_framework('tensorflow') model = TfModelWithNTM(in_channels, out_channels) # define inputs batch_shape = [1, 2] timesteps = 3 input_shape = batch_shape + [timesteps, in_channels] input_seq = tf.random.uniform(batch_shape + [timesteps, in_channels]) # call model and test output output_seq = model(input_seq) assert input_seq.shape[:-1] == output_seq.shape[:-1] assert input_seq.shape[-1] == in_channels assert output_seq.shape[-1] == out_channels # define loss function target = tf.zeros_like(output_seq) def loss_fn(): pred = model(input_seq) return tf.reduce_sum((pred - target)**2) # define optimizer optimizer = tf.keras.optimizers.Adam(1e-2) # train model print('\ntraining dummy TensorFlow NTM model...\n') for i in range(10): with tf.GradientTape() as tape: loss = loss_fn() grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) print('step {}, loss = {}'.format(i, loss)) print('\ndummy TensorFlow NTM model trained!\n') ivy.unset_framework() # ESM # # ----# # using the Ivy ESM memory module in a pure-Ivy model, with a JAX backend # ToDo: add pre-ESM conv layers to this demo class IvyModelWithESM(ivy.Module): def __init__(self, channels_in, channels_out): self._channels_in = channels_in self._esm = ivy_mem.ESM(omni_image_dims=(16, 32)) self._linear = ivy_mem.Linear(channels_in, channels_out) ivy.Module.__init__(self, 'cpu') def _forward(self, obs): mem = self._esm(obs) x = ivy.reshape(mem.mean, (-1, self._channels_in)) return self._linear(x) # create model in_channels = 32 out_channels = 8 ivy.set_framework('torch') model = IvyModelWithESM(in_channels, out_channels) # input config batch_size = 1 image_dims = [5, 5] num_timesteps = 2 num_feature_channels = 3 # create image of pixel co-ordinates uniform_pixel_coords =\ ivy_vision.create_uniform_pixel_coords_image(image_dims, [batch_size, num_timesteps]) # define camera measurement depths = ivy.random_uniform(shape=[batch_size, num_timesteps] + image_dims + [1]) ds_pixel_coords = ivy_vision.depth_to_ds_pixel_coords(depths) inv_calib_mats = ivy.random_uniform( shape=[batch_size, num_timesteps, 3, 3]) cam_coords = ivy_vision.ds_pixel_to_cam_coords(ds_pixel_coords, inv_calib_mats)[..., 0:3] features = ivy.random_uniform(shape=[batch_size, num_timesteps] + image_dims + [num_feature_channels]) img_mean = ivy.concatenate((cam_coords, features), -1) cam_rel_mat = ivy.identity(4, batch_shape=[batch_size, num_timesteps])[..., 0:3, :] # place these into an ESM camera measurement container esm_cam_meas = ESMCamMeasurement(img_mean=img_mean, cam_rel_mat=cam_rel_mat) # define agent pose transformation agent_rel_mat = ivy.identity(4, batch_shape=[batch_size, num_timesteps])[..., 0:3, :] # collect together into an ESM observation container esm_obs = ESMObservation(img_meas={'camera_0': esm_cam_meas}, agent_rel_mat=agent_rel_mat) # call model and test output output = model(esm_obs) assert output.shape[-1] == out_channels # define loss function target = ivy.zeros_like(output) def loss_fn(v): pred = model(esm_obs, v=v) return ivy.reduce_mean((pred - target)**2) # optimizer optimizer = ivy.SGD(lr=1e-4) # train model print('\ntraining dummy Ivy ESM model...\n') for i in range(10): loss, grads = ivy.execute_with_gradients(loss_fn, model.v) model.v = optimizer.step(model.v, grads) print('step {}, loss = {}'.format(i, ivy.to_numpy(loss).item())) print('\ndummy Ivy ESM model trained!\n') ivy.unset_framework() # message print('End of Run Through Demo!')