def visualize(self, parameters={}): # image_frame = PTImage.from_cwh_torch(self.data[0]) if parameters.get('mode', 'train') == 'train': image_pos = PTImage.from_cwh_torch(self.data[0]) image_neg = PTImage.from_cwh_torch(self.data[1]) image_anchor = PTImage.from_cwh_torch(self.output[0]) image_pos_map = PTImage.from_2d_wh_torch( F.sigmoid(self.output[1]).data) image_neg_map = PTImage.from_2d_wh_torch( F.sigmoid(self.output[2]).data) image_pos_tar = PTImage.from_2d_wh_torch(self.target[0]) image_neg_tar = PTImage.from_2d_wh_torch(self.target[1]) # target_box = Box.tensor_to_box(self.target[0].cpu(),image_pos.get_wh()) # objs = [Object(target_box,0,obj_type='T')] # pos_frame = Frame.from_image_and_objects(image_pos,objs) # ImageVisualizer().set_image(image_frame,parameters.get('title','') + ' : Frame') ImageVisualizer().set_image( image_anchor, parameters.get('title', '') + ' : anchor') ImageVisualizer().set_image( image_pos, parameters.get('title', '') + ' : pos_frame') ImageVisualizer().set_image( image_neg, parameters.get('title', '') + ' : neg_frame') ImageVisualizer().set_image( image_pos_tar, parameters.get('title', '') + ' : pos_target') ImageVisualizer().set_image( image_neg_tar, parameters.get('title', '') + ' : neg_target') ImageVisualizer().set_image( image_pos_map, parameters.get('title', '') + ' : pos_res') ImageVisualizer().set_image( image_neg_map, parameters.get('title', '') + ' : neg_res') else: img_frame = PTImage.from_cwh_torch(self.data[0]) img_frame_xcor = PTImage.from_2d_wh_torch( F.sigmoid(self.output[0]).data) # img_pos = PTImage.from_cwh_torch(self.data[1]) # img_neg = PTImage.from_cwh_torch(self.data[2]) # image_pos_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[1]).data) # image_neg_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[2]).data) ImageVisualizer().set_image( img_frame, parameters.get('title', '') + ' : Frame') ImageVisualizer().set_image( img_frame_xcor, parameters.get('title', '') + ' : Frame xcor')
def visualize(self,parameters={}): image_original = PTImage.from_cwh_torch(self.data[0]) ImageVisualizer().set_image(image_original,parameters.get('title','') + ' : Input') # need to draw the mask layers ontop of the data with transparency target_mask_chw = self.target[0] output_mask_chw = self.output[0][-1] # draw a separate image for each channel for now for i in range(target_mask_chw.size(0)): imt = PTImage.from_cwh_torch(target_mask_chw[i,:,:].unsqueeze(0)) imo = PTImage.from_cwh_torch(output_mask_chw[i,:,:].unsqueeze(0)) ImageVisualizer().set_image(imt,parameters.get('title','') + ' : Target-{}'.format(self.class_lookup[i])) ImageVisualizer().set_image(imo,parameters.get('title','') + ' : LOutput-{}'.format(self.class_lookup[i]))
def visualize(self,parameters={}): # here output[0] could either be a single image or a sequence of images if isinstance(self.output[0],list): image_target = PTImage.from_cwh_torch(self.target[0]) ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target') for i,o in enumerate(self.output[0]): image_output = PTImage.from_cwh_torch(o) ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output{:02d}'.format(i)) else: image_target = PTImage.from_cwh_torch(self.target[0]) image_output = PTImage.from_cwh_torch(self.output[0]) ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target') ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output')
def visualize(self, parameters={}): image_original = PTImage.from_cwh_torch(self.data[0]) drawing_image = image_original.to_order_and_class( Ordering.HWC, ValueClass.BYTE0255).get_data().copy() boxes, classes = self.output[1:] # Nx4 boxes and N class tensor valid_boxes, valid_classes = MultiObjectDetector.post_process_boxes( self.data[0], boxes, classes, len(self.class_lookup)) # convert targets real_targets = self.target[0][:, 0] > -1 filtered_targets = self.target[0][real_targets].reshape( -1, self.target[0].shape[1]) target_boxes = filtered_targets[:, 1:] target_classes = filtered_targets[:, 0] if target_boxes.shape[0] > 0: draw_objects_on_np_image(drawing_image, self.__convert_to_objects( target_boxes, target_classes), color=(255, 0, 0)) if valid_boxes.shape[0] > 0: draw_objects_on_np_image(drawing_image, self.__convert_to_objects( valid_boxes, valid_classes), color=None) ImageVisualizer().set_image(PTImage(drawing_image), parameters.get('title', '') + ' : Output')
def visualize(self, parameters={}): # visualizes a sequence for i in range(self.data[0].shape[0]): img = PTImage.from_cwh_torch(self.data[0][i]) ImageVisualizer().set_image( img, parameters.get('title', '') + ' : Image {}'.format(i)) for i in range(self.output[2].shape[0]): dmap = self.output[2][i] depth_map = PTImage.from_2d_wh_torch(dmap) ImageVisualizer().set_image( depth_map, parameters.get('title', '') + ' : DepthMap {}'.format(i))
def forward(self, x): batch_size,chans,height,width = x.size() # need to first determine the hidden state size, which is tied to the cnn feature size dummy_glimpse = torch.Tensor(batch_size,chans,self.attn_grid_size,self.attn_grid_size) if x.is_cuda: dummy_glimpse = dummy_glimpse.cuda() dummy_feature_map = self.encoder.forward(dummy_glimpse) self.att_rnn.forward(dummy_feature_map.view(batch_size,dummy_feature_map.nelement()/batch_size)) self.att_rnn.reset_hidden_state(batch_size,x.data.is_cuda) outputs = [] init_tensor = torch.zeros(batch_size,self.num_classes,height,width) if x.data.is_cuda: init_tensor = init_tensor.cuda() outputs.append(init_tensor) self.init_weights(self.att_rnn.get_hidden_state()) for t in range(self.timesteps): # 1) decode hidden state to generate gaussian attention parameters state = self.att_rnn.get_hidden_state() gauss_attn_params = F.tanh(F.linear(state,self.att_decoder_weights)) # 2) extract glimpse glimpse = self.attn_reader.forward(x,gauss_attn_params,self.attn_grid_size) # visualize first glimpse in batch for all t torch_glimpses = torch.chunk(glimpse,batch_size,dim=0) ImageVisualizer().set_image(PTImage.from_cwh_torch(torch_glimpses[0].squeeze().data),'zGlimpse {}'.format(t)) # 3) use conv stack or resnet to extract features feature_map = self.encoder.forward(glimpse) conv_output_dims = self.encoder.get_output_dims()[:-1][::-1] conv_output_dims.append(glimpse.size()) # import ipdb;ipdb.set_trace() # 4) update hidden state # think about this connection a bit more self.att_rnn.forward(feature_map.view(batch_size,feature_map.nelement()/batch_size)) # 5) use deconv network to get partial masks partial_mask = self.decoder.forward(feature_map,conv_output_dims) # 6) write masks additively to mask canvas partial_canvas = self.attn_writer.forward(partial_mask,gauss_attn_params,(height,width)) outputs.append(torch.add(outputs[-1],partial_canvas)) # return the sigmoided versions for i in range(len(outputs)): outputs[i] = F.sigmoid(outputs[i]) return outputs
def process_single_batch(original_images, ego_motion_vectors, disp_maps, calib_frames, batch_number=0, mask_loss_factor=0.1): cam_coords = [] num_frames = calib_frames.shape[0] Logger().set('loss_component.disp_maps_mean', disp_maps.data.mean().item()) Logger().set('loss_component.disp_maps_min', disp_maps.data.min().item()) Logger().set('loss_component.disp_maps_max', disp_maps.data.max().item()) Logger().set('loss_component.ego_motion_vectors[0]', np.array2string(ego_motion_vectors[0].detach().cpu().numpy())) # step 1) Use inverse cam_matrix and depth to convert # frame 1,2,3 into camera coordinates for i in range(0, num_frames): cam_coords.append( image_to_cam(original_images[i], disp_maps[i], calib_frames[i])) transforms = [] # step 2) Generate transformation matrix from ego_motion_vectors for i in range(0, num_frames - 1): # fake_ego_motion_vec = torch.zeros_like(ego_motion_vectors[i]) transforms.append(six_dof_vec_to_matrix(ego_motion_vectors[i])) # step 3) Transform Frame i (cam_coords) -> Frame i+1(cam_coords) # Then construct a new 2D image using new projection matrix total_re_loss = torch.zeros([], dtype=original_images.dtype, device=original_images.device) total_ssim_loss = torch.zeros([], dtype=original_images.dtype, device=original_images.device) total_mask_loss = torch.zeros([], dtype=original_images.dtype, device=original_images.device) out_images = [] for i in range(0, num_frames - 1): # augment cam coords with row of 1's to 4D vecs ones_row = torch.ones_like(cam_coords[i])[0, :].unsqueeze(0) augmented_vecs = torch.cat((cam_coords[i], ones_row), dim=0) cur_frame_coords = torch.matmul(transforms[i], augmented_vecs) intrin_filler_right = torch.zeros( 3, dtype=original_images.dtype, device=original_images.device).unsqueeze(1) intrin_filler_bottom = torch.zeros( 4, dtype=original_images.dtype, device=original_images.device).unsqueeze(0) intrin_filler_bottom[0, 3] = 1 hom_calib = torch.cat((calib_frames[i], intrin_filler_right), dim=1) hom_calib = torch.cat((hom_calib, intrin_filler_bottom), dim=0) warped_image, mask = cam_to_image(hom_calib, cur_frame_coords, original_images[i]) out_images.append(warped_image) # compare warped_image to next real image # don't use 0 pixels for loss ptimage = PTImage.from_cwh_torch(warped_image) ptmask = PTImage.from_2d_wh_torch(mask) orig_image = PTImage.from_cwh_torch(original_images[i]) # ImageVisualizer().set_image(orig_image,'original_images {}'.format(i)) ImageVisualizer().set_image( ptimage, 'warped_image {}-{}'.format(batch_number, i)) ImageVisualizer().set_image(ptmask, 'mask {}-{}'.format(batch_number, i)) Logger().set('loss_component.mask_mean.{}-{}'.format(batch_number, i), mask.mean().data.item()) masked_warp_image = warped_image.unsqueeze(0) * mask masked_gt_image = original_images[i + 1].unsqueeze(0) * mask re_loss = F.smooth_l1_loss(masked_warp_image, masked_gt_image, reduction='none') # add loss to prevent mask from going to 0 # total_mask_loss += mask_loss_factor*F.smooth_l1_loss(mask, torch.ones_like(mask)) total_re_loss += re_loss.mean() total_ssim_loss += old_div( (1 - ssim(masked_warp_image, masked_gt_image)), 2) Logger().set('loss_component.mask_loss.{}'.format(batch_number), total_mask_loss.data.item()) Logger().set('loss_component.batch_re_loss.{}'.format(batch_number), total_re_loss.data.item()) Logger().set('loss_component.batch_ssim_loss.{}'.format(batch_number), total_ssim_loss.data.item()) return total_re_loss + total_ssim_loss + total_mask_loss, out_images