Exemple #1
0
class Frame:
    def __init__(self,image_path='',objects=[]):
        self.image_path = image_path
        self.objects = objects
        self.image = PTImage(pil_image_path=self.image_path,persist=False)

    @classmethod
    def from_image_and_objects(cls,ptimage,objects=[]):
        frame = cls('',objects)
        frame.image = ptimage
        return frame

    def get_objects(self):
        return self.objects

    def show_raw_image(self):
        self.image.visualize('frame image')

    def visualize(self,axes=None,display=False,title='Frame Visualization'):
        axes = self.image.visualize(axes=axes,title=title,display=False)
        for obj in self.objects:
            rect = patches.Rectangle(obj.box.xy_min(),obj.box.edges()[0],obj.box.edges()[1],linewidth=1,edgecolor='r',facecolor='none')
            axes.add_patch(rect)
            coord_string = str([int(round(x)) for x in obj.box.to_single_array()])
            axes.text(obj.box.xmin, obj.box.ymin, str(obj.unique_id) + ' ' + str(obj.obj_type) + ' ' + coord_string, 
                color='white', fontsize=12, bbox={'facecolor':'red', 'alpha':0.5, 'pad':2})
        if display:
            plt.show(block=True)
            plt.close()
    def visualize(self, parameters={}):
        image_original = PTImage.from_cwh_torch(self.data[0])
        drawing_image = image_original.to_order_and_class(
            Ordering.HWC, ValueClass.BYTE0255).get_data().copy()

        boxes, classes = self.output[1:]
        # Nx4 boxes and N class tensor
        valid_boxes, valid_classes = MultiObjectDetector.post_process_boxes(
            self.data[0], boxes, classes, len(self.class_lookup))
        # convert targets
        real_targets = self.target[0][:, 0] > -1
        filtered_targets = self.target[0][real_targets].reshape(
            -1, self.target[0].shape[1])
        target_boxes = filtered_targets[:, 1:]
        target_classes = filtered_targets[:, 0]

        if target_boxes.shape[0] > 0:
            draw_objects_on_np_image(drawing_image,
                                     self.__convert_to_objects(
                                         target_boxes, target_classes),
                                     color=(255, 0, 0))
        if valid_boxes.shape[0] > 0:
            draw_objects_on_np_image(drawing_image,
                                     self.__convert_to_objects(
                                         valid_boxes, valid_classes),
                                     color=None)
        ImageVisualizer().set_image(PTImage(drawing_image),
                                    parameters.get('title', '') + ' : Output')
Exemple #3
0
 def visualize(self,parameters={}):
     image_original = PTImage.from_cwh_torch(self.data[0])
     ImageVisualizer().set_image(image_original,parameters.get('title','') + ' : Input')
     # need to draw the mask layers ontop of the data with transparency
     target_mask_chw = self.target[0]
     output_mask_chw = self.output[0][-1]
     # draw a separate image for each channel for now
     for i in range(target_mask_chw.size(0)):
         imt = PTImage.from_cwh_torch(target_mask_chw[i,:,:].unsqueeze(0))
         imo = PTImage.from_cwh_torch(output_mask_chw[i,:,:].unsqueeze(0))
         ImageVisualizer().set_image(imt,parameters.get('title','') + ' : Target-{}'.format(self.class_lookup[i]))
         ImageVisualizer().set_image(imo,parameters.get('title','') + ' : LOutput-{}'.format(self.class_lookup[i]))             
Exemple #4
0
    def apply_to_image(self, image, _output_size):
        output_size = [int(_output_size[0]), int(_output_size[1])]
        inverse_transform = self.inverse.copy()
        inverse_transform[0:2, 0:2] = self.inverse[1:None:-1, 1:None:-1]
        inverse_transform[0:2, 2] = self.inverse[1:None:-1, 2]

        assert image.ordering == Ordering.HWC, 'Ordering must be HWC to apply the affine transform!'
        img_data = image.get_data()
        # check if image only has 1 channel, duplicate the channels
        if len(img_data.shape) == 2:
            img_data = np.stack((img_data, ) * 3, axis=2)
        assert len(img_data.shape
                   ) == 3, 'Input image must have 3 channels! found {}'.format(
                       image_data.shape)
        newimage = PTImage(data=np.empty(
            [output_size[0], output_size[1], img_data.shape[2]],
            dtype=image.vc['dtype']),
                           ordering=Ordering.HWC,
                           vc=image.vc)
        newimage_data = newimage.get_data()
        # print self.inverse
        # print inverse_transform
        # print output_size

        # for i in range(0,image.data.shape[2]):
        #     newimage.data[:,:,i] = affine_transform(image.data[:,:,i],
        #                                             self.inverse[0:2,0:2],
        #                                             offset=-self.transform[0:2,2],
        #                                             output_shape=output_size).astype(image.vc['dtype'])

        # scipy's affine_transform sucks, it only accepts 2x2 affine matrices and
        # you have to specify the offset from the input using my own affine
        # Going to use map_coordinates apply the affine and interpolation separately

        # 1) first create an augmented matrix of 3 x (m*n) output points
        px, py = np.mgrid[0:output_size[0]:1, 0:output_size[1]:1]
        points = np.c_[px.ravel(), py.ravel()]
        points_aug = np.concatenate((points, np.ones((points.shape[0], 1))),
                                    axis=1)

        # 2) next apply the inverse transform to find the input points to sample at
        inv_points = np.dot(inverse_transform, points_aug.T)

        # 3) use map_coordinates to do a interpolation on the input image at the required points
        for i in range(0, img_data.shape[2]):
            newimage_data[:, :,
                          i] = map_coordinates(
                              img_data[:, :, i],
                              inv_points[0:2, :],
                              order=self.interp_order).reshape(output_size)

        return newimage
 def visualize(self, parameters={}):
     # visualizes a sequence
     for i in range(self.data[0].shape[0]):
         img = PTImage.from_cwh_torch(self.data[0][i])
         ImageVisualizer().set_image(
             img,
             parameters.get('title', '') + ' : Image {}'.format(i))
     for i in range(self.output[2].shape[0]):
         dmap = self.output[2][i]
         depth_map = PTImage.from_2d_wh_torch(dmap)
         ImageVisualizer().set_image(
             depth_map,
             parameters.get('title', '') + ' : DepthMap {}'.format(i))
Exemple #6
0
    def visualize(self,parameters={}):
        # here output[0] could either be a single image or a sequence of images

        if isinstance(self.output[0],list):
            image_target = PTImage.from_cwh_torch(self.target[0])
            ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target')
            for i,o in enumerate(self.output[0]):
                image_output = PTImage.from_cwh_torch(o)
                ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output{:02d}'.format(i))                
        else:
            image_target = PTImage.from_cwh_torch(self.target[0])
            image_output = PTImage.from_cwh_torch(self.output[0])
            ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target')
            ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output')
Exemple #7
0
    def visualize(self, parameters={}):
        # image_frame = PTImage.from_cwh_torch(self.data[0])
        if parameters.get('mode', 'train') == 'train':
            image_pos = PTImage.from_cwh_torch(self.data[0])
            image_neg = PTImage.from_cwh_torch(self.data[1])
            image_anchor = PTImage.from_cwh_torch(self.output[0])
            image_pos_map = PTImage.from_2d_wh_torch(
                F.sigmoid(self.output[1]).data)
            image_neg_map = PTImage.from_2d_wh_torch(
                F.sigmoid(self.output[2]).data)
            image_pos_tar = PTImage.from_2d_wh_torch(self.target[0])
            image_neg_tar = PTImage.from_2d_wh_torch(self.target[1])
            # target_box = Box.tensor_to_box(self.target[0].cpu(),image_pos.get_wh())
            # objs = [Object(target_box,0,obj_type='T')]
            # pos_frame = Frame.from_image_and_objects(image_pos,objs)

            # ImageVisualizer().set_image(image_frame,parameters.get('title','') + ' : Frame')
            ImageVisualizer().set_image(
                image_anchor,
                parameters.get('title', '') + ' : anchor')
            ImageVisualizer().set_image(
                image_pos,
                parameters.get('title', '') + ' : pos_frame')
            ImageVisualizer().set_image(
                image_neg,
                parameters.get('title', '') + ' : neg_frame')
            ImageVisualizer().set_image(
                image_pos_tar,
                parameters.get('title', '') + ' : pos_target')
            ImageVisualizer().set_image(
                image_neg_tar,
                parameters.get('title', '') + ' : neg_target')
            ImageVisualizer().set_image(
                image_pos_map,
                parameters.get('title', '') + ' : pos_res')
            ImageVisualizer().set_image(
                image_neg_map,
                parameters.get('title', '') + ' : neg_res')
        else:
            img_frame = PTImage.from_cwh_torch(self.data[0])
            img_frame_xcor = PTImage.from_2d_wh_torch(
                F.sigmoid(self.output[0]).data)

            # img_pos = PTImage.from_cwh_torch(self.data[1])
            # img_neg = PTImage.from_cwh_torch(self.data[2])
            # image_pos_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[1]).data)
            # image_neg_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[2]).data)

            ImageVisualizer().set_image(
                img_frame,
                parameters.get('title', '') + ' : Frame')
            ImageVisualizer().set_image(
                img_frame_xcor,
                parameters.get('title', '') + ' : Frame xcor')
Exemple #8
0
 def __getitem__(self, index):
     image, labels = self.dataset[index]
     np_arr = np.asarray(image)
     ptimage = PTImage.from_numpy_array(np_arr)
     objects = []
     for t in labels:
         box = Box.from_xywh(t['bbox'])
         obj_type = self.coco.loadCats([t['category_id']])[0]['name']
         # convert segmentation to polygon using the pycocotools
         # note the segmentation could in one of several formats, for example the custom coco RLE,
         # to convert the RLE back to polygon is bit of a pain so I will just ignore those right now
         # according the COCO site, most of the data is in polygon form (not sure why theres a discrepency?)
         # and I'd rather not store 2D binary masks with every object.
         polygon = t.get('segmentation')
         # reshape to 2d poly, assume its convex hull?
         polys = []
         if polygon and isinstance(polygon, list):
             for seg in polygon:
                 polys.append(
                     Polygon(
                         np.array(seg).reshape((int(old_div(len(seg),
                                                            2)), 2))))
         objects.append(Object(box, obj_type=obj_type, polygons=polys))
     frame = Frame.from_image_and_objects(ptimage, objects)
     return frame
Exemple #9
0
    def __getitem__(self,index):
        pil_img,label = self.dataset[index]
        # assert 2D here
        np_arr = np.asarray(pil_img)
        np_arr = np.expand_dims(np_arr, axis=2)
        # create the PTImage, and object that span the frame
        # add extra channel dimension

        ptimage = PTImage.from_numpy_array(np_arr)
        obj = Object(Box(0,0,pil_img.size[0],pil_img.size[1]))
        frame = Frame.from_image_and_objects(ptimage,[obj])        
        return frame
Exemple #10
0
    def forward(self, x):
        batch_size,chans,height,width = x.size()

        # need to first determine the hidden state size, which is tied to the cnn feature size
        dummy_glimpse = torch.Tensor(batch_size,chans,self.attn_grid_size,self.attn_grid_size)
        if x.is_cuda:
            dummy_glimpse = dummy_glimpse.cuda()
        dummy_feature_map = self.encoder.forward(dummy_glimpse)
        self.att_rnn.forward(dummy_feature_map.view(batch_size,dummy_feature_map.nelement()/batch_size))
        self.att_rnn.reset_hidden_state(batch_size,x.data.is_cuda)

        outputs = []
        init_tensor = torch.zeros(batch_size,self.num_classes,height,width)
        if x.data.is_cuda:
            init_tensor = init_tensor.cuda()
        outputs.append(init_tensor) 

        self.init_weights(self.att_rnn.get_hidden_state())

        for t in range(self.timesteps):
            # 1) decode hidden state to generate gaussian attention parameters
            state = self.att_rnn.get_hidden_state()
            gauss_attn_params = F.tanh(F.linear(state,self.att_decoder_weights))

            # 2) extract glimpse
            glimpse = self.attn_reader.forward(x,gauss_attn_params,self.attn_grid_size)

            # visualize first glimpse in batch for all t
            torch_glimpses = torch.chunk(glimpse,batch_size,dim=0)
            ImageVisualizer().set_image(PTImage.from_cwh_torch(torch_glimpses[0].squeeze().data),'zGlimpse {}'.format(t))            

            # 3) use conv stack or resnet to extract features
            feature_map = self.encoder.forward(glimpse)
            conv_output_dims = self.encoder.get_output_dims()[:-1][::-1]
            conv_output_dims.append(glimpse.size())
            # import ipdb;ipdb.set_trace()

            # 4) update hidden state # think about this connection a bit more
            self.att_rnn.forward(feature_map.view(batch_size,feature_map.nelement()/batch_size))

            # 5) use deconv network to get partial masks
            partial_mask = self.decoder.forward(feature_map,conv_output_dims)

            # 6) write masks additively to mask canvas
            partial_canvas = self.attn_writer.forward(partial_mask,gauss_attn_params,(height,width))
            outputs.append(torch.add(outputs[-1],partial_canvas))

                # return the sigmoided versions
        for i in range(len(outputs)):
            outputs[i] = F.sigmoid(outputs[i])
        return outputs
Exemple #11
0
def process_single_batch(original_images,
                         ego_motion_vectors,
                         disp_maps,
                         calib_frames,
                         batch_number=0,
                         mask_loss_factor=0.1):
    cam_coords = []
    num_frames = calib_frames.shape[0]
    Logger().set('loss_component.disp_maps_mean', disp_maps.data.mean().item())
    Logger().set('loss_component.disp_maps_min', disp_maps.data.min().item())
    Logger().set('loss_component.disp_maps_max', disp_maps.data.max().item())
    Logger().set('loss_component.ego_motion_vectors[0]',
                 np.array2string(ego_motion_vectors[0].detach().cpu().numpy()))
    # step 1) Use inverse cam_matrix and depth to convert
    # frame 1,2,3 into camera coordinates
    for i in range(0, num_frames):
        cam_coords.append(
            image_to_cam(original_images[i], disp_maps[i], calib_frames[i]))
    transforms = []
    # step 2) Generate transformation matrix from ego_motion_vectors
    for i in range(0, num_frames - 1):
        # fake_ego_motion_vec = torch.zeros_like(ego_motion_vectors[i])
        transforms.append(six_dof_vec_to_matrix(ego_motion_vectors[i]))

    # step 3) Transform Frame i (cam_coords) -> Frame i+1(cam_coords)
    # Then construct a new 2D image using new projection matrix
    total_re_loss = torch.zeros([],
                                dtype=original_images.dtype,
                                device=original_images.device)
    total_ssim_loss = torch.zeros([],
                                  dtype=original_images.dtype,
                                  device=original_images.device)
    total_mask_loss = torch.zeros([],
                                  dtype=original_images.dtype,
                                  device=original_images.device)
    out_images = []
    for i in range(0, num_frames - 1):
        # augment cam coords with row of 1's to 4D vecs
        ones_row = torch.ones_like(cam_coords[i])[0, :].unsqueeze(0)
        augmented_vecs = torch.cat((cam_coords[i], ones_row), dim=0)
        cur_frame_coords = torch.matmul(transforms[i], augmented_vecs)
        intrin_filler_right = torch.zeros(
            3, dtype=original_images.dtype,
            device=original_images.device).unsqueeze(1)
        intrin_filler_bottom = torch.zeros(
            4, dtype=original_images.dtype,
            device=original_images.device).unsqueeze(0)
        intrin_filler_bottom[0, 3] = 1
        hom_calib = torch.cat((calib_frames[i], intrin_filler_right), dim=1)
        hom_calib = torch.cat((hom_calib, intrin_filler_bottom), dim=0)
        warped_image, mask = cam_to_image(hom_calib, cur_frame_coords,
                                          original_images[i])
        out_images.append(warped_image)
        # compare warped_image to next real image
        # don't use 0 pixels for loss
        ptimage = PTImage.from_cwh_torch(warped_image)
        ptmask = PTImage.from_2d_wh_torch(mask)
        orig_image = PTImage.from_cwh_torch(original_images[i])
        # ImageVisualizer().set_image(orig_image,'original_images {}'.format(i))
        ImageVisualizer().set_image(
            ptimage, 'warped_image {}-{}'.format(batch_number, i))
        ImageVisualizer().set_image(ptmask,
                                    'mask {}-{}'.format(batch_number, i))
        Logger().set('loss_component.mask_mean.{}-{}'.format(batch_number, i),
                     mask.mean().data.item())

        masked_warp_image = warped_image.unsqueeze(0) * mask
        masked_gt_image = original_images[i + 1].unsqueeze(0) * mask
        re_loss = F.smooth_l1_loss(masked_warp_image,
                                   masked_gt_image,
                                   reduction='none')
        # add loss to prevent mask from going to 0
        # total_mask_loss += mask_loss_factor*F.smooth_l1_loss(mask, torch.ones_like(mask))
        total_re_loss += re_loss.mean()
        total_ssim_loss += old_div(
            (1 - ssim(masked_warp_image, masked_gt_image)), 2)

    Logger().set('loss_component.mask_loss.{}'.format(batch_number),
                 total_mask_loss.data.item())
    Logger().set('loss_component.batch_re_loss.{}'.format(batch_number),
                 total_re_loss.data.item())
    Logger().set('loss_component.batch_ssim_loss.{}'.format(batch_number),
                 total_ssim_loss.data.item())
    return total_re_loss + total_ssim_loss + total_mask_loss, out_images
Exemple #12
0
 def __init__(self,image_path='',objects=[]):
     self.image_path = image_path
     self.objects = objects
     self.image = PTImage(pil_image_path=self.image_path,persist=False)
Exemple #13
0
 def __init__(self, image_path='', objs=[], calib_mat=None):
     self.image_path = image_path
     self.objects = copy.deepcopy(objs)
     self.image = PTImage(pil_image_path=self.image_path, persist=False)
     # 4x3 calibration matrix
     self.calib_mat = calib_mat
Exemple #14
0
 def __init__(self, image_path='', objs=[]):
     self.image_path = image_path
     self.objects = copy.deepcopy(objs)
     self.image = PTImage(pil_image_path=self.image_path, persist=False)