Beispiel #1
0
    def visualize(self, parameters={}):
        # image_frame = PTImage.from_cwh_torch(self.data[0])
        if parameters.get('mode', 'train') == 'train':
            image_pos = PTImage.from_cwh_torch(self.data[0])
            image_neg = PTImage.from_cwh_torch(self.data[1])
            image_anchor = PTImage.from_cwh_torch(self.output[0])
            image_pos_map = PTImage.from_2d_wh_torch(
                F.sigmoid(self.output[1]).data)
            image_neg_map = PTImage.from_2d_wh_torch(
                F.sigmoid(self.output[2]).data)
            image_pos_tar = PTImage.from_2d_wh_torch(self.target[0])
            image_neg_tar = PTImage.from_2d_wh_torch(self.target[1])
            # target_box = Box.tensor_to_box(self.target[0].cpu(),image_pos.get_wh())
            # objs = [Object(target_box,0,obj_type='T')]
            # pos_frame = Frame.from_image_and_objects(image_pos,objs)

            # ImageVisualizer().set_image(image_frame,parameters.get('title','') + ' : Frame')
            ImageVisualizer().set_image(
                image_anchor,
                parameters.get('title', '') + ' : anchor')
            ImageVisualizer().set_image(
                image_pos,
                parameters.get('title', '') + ' : pos_frame')
            ImageVisualizer().set_image(
                image_neg,
                parameters.get('title', '') + ' : neg_frame')
            ImageVisualizer().set_image(
                image_pos_tar,
                parameters.get('title', '') + ' : pos_target')
            ImageVisualizer().set_image(
                image_neg_tar,
                parameters.get('title', '') + ' : neg_target')
            ImageVisualizer().set_image(
                image_pos_map,
                parameters.get('title', '') + ' : pos_res')
            ImageVisualizer().set_image(
                image_neg_map,
                parameters.get('title', '') + ' : neg_res')
        else:
            img_frame = PTImage.from_cwh_torch(self.data[0])
            img_frame_xcor = PTImage.from_2d_wh_torch(
                F.sigmoid(self.output[0]).data)

            # img_pos = PTImage.from_cwh_torch(self.data[1])
            # img_neg = PTImage.from_cwh_torch(self.data[2])
            # image_pos_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[1]).data)
            # image_neg_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[2]).data)

            ImageVisualizer().set_image(
                img_frame,
                parameters.get('title', '') + ' : Frame')
            ImageVisualizer().set_image(
                img_frame_xcor,
                parameters.get('title', '') + ' : Frame xcor')
Beispiel #2
0
 def visualize(self,parameters={}):
     image_original = PTImage.from_cwh_torch(self.data[0])
     ImageVisualizer().set_image(image_original,parameters.get('title','') + ' : Input')
     # need to draw the mask layers ontop of the data with transparency
     target_mask_chw = self.target[0]
     output_mask_chw = self.output[0][-1]
     # draw a separate image for each channel for now
     for i in range(target_mask_chw.size(0)):
         imt = PTImage.from_cwh_torch(target_mask_chw[i,:,:].unsqueeze(0))
         imo = PTImage.from_cwh_torch(output_mask_chw[i,:,:].unsqueeze(0))
         ImageVisualizer().set_image(imt,parameters.get('title','') + ' : Target-{}'.format(self.class_lookup[i]))
         ImageVisualizer().set_image(imo,parameters.get('title','') + ' : LOutput-{}'.format(self.class_lookup[i]))             
Beispiel #3
0
    def visualize(self,parameters={}):
        # here output[0] could either be a single image or a sequence of images

        if isinstance(self.output[0],list):
            image_target = PTImage.from_cwh_torch(self.target[0])
            ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target')
            for i,o in enumerate(self.output[0]):
                image_output = PTImage.from_cwh_torch(o)
                ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output{:02d}'.format(i))                
        else:
            image_target = PTImage.from_cwh_torch(self.target[0])
            image_output = PTImage.from_cwh_torch(self.output[0])
            ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target')
            ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output')
    def visualize(self, parameters={}):
        image_original = PTImage.from_cwh_torch(self.data[0])
        drawing_image = image_original.to_order_and_class(
            Ordering.HWC, ValueClass.BYTE0255).get_data().copy()

        boxes, classes = self.output[1:]
        # Nx4 boxes and N class tensor
        valid_boxes, valid_classes = MultiObjectDetector.post_process_boxes(
            self.data[0], boxes, classes, len(self.class_lookup))
        # convert targets
        real_targets = self.target[0][:, 0] > -1
        filtered_targets = self.target[0][real_targets].reshape(
            -1, self.target[0].shape[1])
        target_boxes = filtered_targets[:, 1:]
        target_classes = filtered_targets[:, 0]

        if target_boxes.shape[0] > 0:
            draw_objects_on_np_image(drawing_image,
                                     self.__convert_to_objects(
                                         target_boxes, target_classes),
                                     color=(255, 0, 0))
        if valid_boxes.shape[0] > 0:
            draw_objects_on_np_image(drawing_image,
                                     self.__convert_to_objects(
                                         valid_boxes, valid_classes),
                                     color=None)
        ImageVisualizer().set_image(PTImage(drawing_image),
                                    parameters.get('title', '') + ' : Output')
 def visualize(self, parameters={}):
     # visualizes a sequence
     for i in range(self.data[0].shape[0]):
         img = PTImage.from_cwh_torch(self.data[0][i])
         ImageVisualizer().set_image(
             img,
             parameters.get('title', '') + ' : Image {}'.format(i))
     for i in range(self.output[2].shape[0]):
         dmap = self.output[2][i]
         depth_map = PTImage.from_2d_wh_torch(dmap)
         ImageVisualizer().set_image(
             depth_map,
             parameters.get('title', '') + ' : DepthMap {}'.format(i))
Beispiel #6
0
    def forward(self, x):
        batch_size,chans,height,width = x.size()

        # need to first determine the hidden state size, which is tied to the cnn feature size
        dummy_glimpse = torch.Tensor(batch_size,chans,self.attn_grid_size,self.attn_grid_size)
        if x.is_cuda:
            dummy_glimpse = dummy_glimpse.cuda()
        dummy_feature_map = self.encoder.forward(dummy_glimpse)
        self.att_rnn.forward(dummy_feature_map.view(batch_size,dummy_feature_map.nelement()/batch_size))
        self.att_rnn.reset_hidden_state(batch_size,x.data.is_cuda)

        outputs = []
        init_tensor = torch.zeros(batch_size,self.num_classes,height,width)
        if x.data.is_cuda:
            init_tensor = init_tensor.cuda()
        outputs.append(init_tensor) 

        self.init_weights(self.att_rnn.get_hidden_state())

        for t in range(self.timesteps):
            # 1) decode hidden state to generate gaussian attention parameters
            state = self.att_rnn.get_hidden_state()
            gauss_attn_params = F.tanh(F.linear(state,self.att_decoder_weights))

            # 2) extract glimpse
            glimpse = self.attn_reader.forward(x,gauss_attn_params,self.attn_grid_size)

            # visualize first glimpse in batch for all t
            torch_glimpses = torch.chunk(glimpse,batch_size,dim=0)
            ImageVisualizer().set_image(PTImage.from_cwh_torch(torch_glimpses[0].squeeze().data),'zGlimpse {}'.format(t))            

            # 3) use conv stack or resnet to extract features
            feature_map = self.encoder.forward(glimpse)
            conv_output_dims = self.encoder.get_output_dims()[:-1][::-1]
            conv_output_dims.append(glimpse.size())
            # import ipdb;ipdb.set_trace()

            # 4) update hidden state # think about this connection a bit more
            self.att_rnn.forward(feature_map.view(batch_size,feature_map.nelement()/batch_size))

            # 5) use deconv network to get partial masks
            partial_mask = self.decoder.forward(feature_map,conv_output_dims)

            # 6) write masks additively to mask canvas
            partial_canvas = self.attn_writer.forward(partial_mask,gauss_attn_params,(height,width))
            outputs.append(torch.add(outputs[-1],partial_canvas))

                # return the sigmoided versions
        for i in range(len(outputs)):
            outputs[i] = F.sigmoid(outputs[i])
        return outputs
Beispiel #7
0
def process_single_batch(original_images,
                         ego_motion_vectors,
                         disp_maps,
                         calib_frames,
                         batch_number=0,
                         mask_loss_factor=0.1):
    cam_coords = []
    num_frames = calib_frames.shape[0]
    Logger().set('loss_component.disp_maps_mean', disp_maps.data.mean().item())
    Logger().set('loss_component.disp_maps_min', disp_maps.data.min().item())
    Logger().set('loss_component.disp_maps_max', disp_maps.data.max().item())
    Logger().set('loss_component.ego_motion_vectors[0]',
                 np.array2string(ego_motion_vectors[0].detach().cpu().numpy()))
    # step 1) Use inverse cam_matrix and depth to convert
    # frame 1,2,3 into camera coordinates
    for i in range(0, num_frames):
        cam_coords.append(
            image_to_cam(original_images[i], disp_maps[i], calib_frames[i]))
    transforms = []
    # step 2) Generate transformation matrix from ego_motion_vectors
    for i in range(0, num_frames - 1):
        # fake_ego_motion_vec = torch.zeros_like(ego_motion_vectors[i])
        transforms.append(six_dof_vec_to_matrix(ego_motion_vectors[i]))

    # step 3) Transform Frame i (cam_coords) -> Frame i+1(cam_coords)
    # Then construct a new 2D image using new projection matrix
    total_re_loss = torch.zeros([],
                                dtype=original_images.dtype,
                                device=original_images.device)
    total_ssim_loss = torch.zeros([],
                                  dtype=original_images.dtype,
                                  device=original_images.device)
    total_mask_loss = torch.zeros([],
                                  dtype=original_images.dtype,
                                  device=original_images.device)
    out_images = []
    for i in range(0, num_frames - 1):
        # augment cam coords with row of 1's to 4D vecs
        ones_row = torch.ones_like(cam_coords[i])[0, :].unsqueeze(0)
        augmented_vecs = torch.cat((cam_coords[i], ones_row), dim=0)
        cur_frame_coords = torch.matmul(transforms[i], augmented_vecs)
        intrin_filler_right = torch.zeros(
            3, dtype=original_images.dtype,
            device=original_images.device).unsqueeze(1)
        intrin_filler_bottom = torch.zeros(
            4, dtype=original_images.dtype,
            device=original_images.device).unsqueeze(0)
        intrin_filler_bottom[0, 3] = 1
        hom_calib = torch.cat((calib_frames[i], intrin_filler_right), dim=1)
        hom_calib = torch.cat((hom_calib, intrin_filler_bottom), dim=0)
        warped_image, mask = cam_to_image(hom_calib, cur_frame_coords,
                                          original_images[i])
        out_images.append(warped_image)
        # compare warped_image to next real image
        # don't use 0 pixels for loss
        ptimage = PTImage.from_cwh_torch(warped_image)
        ptmask = PTImage.from_2d_wh_torch(mask)
        orig_image = PTImage.from_cwh_torch(original_images[i])
        # ImageVisualizer().set_image(orig_image,'original_images {}'.format(i))
        ImageVisualizer().set_image(
            ptimage, 'warped_image {}-{}'.format(batch_number, i))
        ImageVisualizer().set_image(ptmask,
                                    'mask {}-{}'.format(batch_number, i))
        Logger().set('loss_component.mask_mean.{}-{}'.format(batch_number, i),
                     mask.mean().data.item())

        masked_warp_image = warped_image.unsqueeze(0) * mask
        masked_gt_image = original_images[i + 1].unsqueeze(0) * mask
        re_loss = F.smooth_l1_loss(masked_warp_image,
                                   masked_gt_image,
                                   reduction='none')
        # add loss to prevent mask from going to 0
        # total_mask_loss += mask_loss_factor*F.smooth_l1_loss(mask, torch.ones_like(mask))
        total_re_loss += re_loss.mean()
        total_ssim_loss += old_div(
            (1 - ssim(masked_warp_image, masked_gt_image)), 2)

    Logger().set('loss_component.mask_loss.{}'.format(batch_number),
                 total_mask_loss.data.item())
    Logger().set('loss_component.batch_re_loss.{}'.format(batch_number),
                 total_re_loss.data.item())
    Logger().set('loss_component.batch_ssim_loss.{}'.format(batch_number),
                 total_ssim_loss.data.item())
    return total_re_loss + total_ssim_loss + total_mask_loss, out_images