Example #1
0
    def feature(self, im: np.array, target_pos, target_sz, avg_chans=None):
        """Extract feature

        Parameters
        ----------
        im : np.array
            initial frame
        target_pos : 
            target position (x, y)
        target_sz : [type]
            target size (w, h)
        avg_chans : [type], optional
            channel mean values, (B, G, R), by default None
        
        Returns
        -------
        [type]
            [description]
        """
        if avg_chans is None:
            avg_chans = np.mean(im, axis=(0, 1))

        z_size = self._hyper_params['z_size']
        context_amount = self._hyper_params['context_amount']

        im_z_crop, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        phase = self._hyper_params['phase_init']
        with torch.no_grad():
            features = self._model(imarray_to_tensor(im_z_crop).to(
                self.device),
                                   phase=phase)

        return features, im_z_crop, avg_chans
Example #2
0
    def track(self,
              im_x,
              target_pos,
              target_sz,
              features,
              update_state=False,
              **kwargs):
        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )

        # process batch of templates
        score_list = []
        box_list = []
        cls_list = []
        ctr_list = []
        fms_x = None
        for ith in range(self._hyper_params['mem_len']):
            if fms_x is None:
                with torch.no_grad():
                    score, box, cls, ctr, extra = self._model(
                        imarray_to_tensor(im_x_crop).to(self.device),
                        *(features[ith]),
                        phase=phase_track)
                fms_x = [extra['c_x'], extra['r_x']]
            else:
                with torch.no_grad():
                    score, box, cls, ctr, extra = self._model(
                        *(features[ith]),
                        fms_x[0],
                        fms_x[1],
                        phase=phase_track)
            box = tensor_to_numpy(box[0])
            score = tensor_to_numpy(score[0])[:, 0]
            cls = tensor_to_numpy(cls[0])[:, 0]
            ctr = tensor_to_numpy(ctr[0])[:, 0]
            # append to list
            box_list.append(box)
            score_list.append(score)
            cls_list.append(cls)
            ctr_list.append(ctr)

        # fusion
        if self._hyper_params['mem_len'] > 1:
            score = score_list[0] * (1-self._hyper_params['mem_coef']) + \
                    np.stack(score_list[1:], axis=0).mean(axis=0) * self._hyper_params['mem_coef']
        else:
            # single template
            score = score_list[0]
        box = box_list[0]
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, box_wh, target_sz, scale_x)
        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        new_target_pos, new_target_sz = self._restrict_box(
            new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr

        return new_target_pos, new_target_sz
Example #3
0
    def track(self,
              im_x,
              target_pos,
              target_sz,
              features,
              update_state=False,
              **kwargs):
        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        z_size = self._hyper_params['z_size']
        x_size = self._hyper_params['x_size']
        context_amount = self._hyper_params['context_amount']
        phase_track = self._hyper_params['phase_track']
        im_x_crop, scale_x = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size,
            x_size=x_size,
            avg_chans=avg_chans,
            context_amount=context_amount,
            func_get_subwindow=get_subwindow_tracking,
        )
        # store crop information
        self._state["crop_info"] = dict(
            target_pos=target_pos,
            target_sz=target_sz,
            scale_x=scale_x,
            avg_chans=avg_chans,
        )
        with torch.no_grad():
            score, box, cls, ctr, *args = self._model(
                imarray_to_tensor(im_x_crop).to(self.device),
                *features,
                phase=phase_track)

        box = tensor_to_numpy(box[0])
        score = tensor_to_numpy(score[0])[:, 0]
        cls = tensor_to_numpy(cls[0])
        ctr = tensor_to_numpy(ctr[0])
        box_wh = xyxy2cxywh(box)

        # score post-processing
        best_pscore_id, pscore, penalty = self._postprocess_score(
            score, box_wh, target_sz, scale_x)
        # box post-processing
        new_target_pos, new_target_sz = self._postprocess_box(
            best_pscore_id, score, box_wh, target_pos, target_sz, scale_x,
            x_size, penalty)

        if self.debug:
            box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x)

        # restrict new_target_pos & new_target_sz
        # new_target_pos, new_target_sz = self._restrict_box(
        #     new_target_pos, new_target_sz)

        # record basic mid-level info
        self._state['x_crop'] = im_x_crop
        # bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int)
        bbox_pred_in_crop = box[best_pscore_id]
        self._state['bbox_pred_in_crop'] = bbox_pred_in_crop
        self._state['bbox_pred_in_frame'] = bbox_pred_in_crop

        # record optional mid-level info
        if update_state:
            self._state['score'] = score
            self._state['pscore'] = pscore
            self._state['all_box'] = box
            self._state['cls'] = cls
            self._state['ctr'] = ctr

        return new_target_pos, new_target_sz
Example #4
0
    def joint_segmentation(self, im_x, target_pos, target_sz, corr_feature,
                           gml_feature, **kwargs):
        r"""
        segment the current frame for VOS
        crop image => segmentation =>  params updation

        :param im_x: current image
        :param target_pos: target position (x, y)
        :param target_sz: target size (w, h)
        :param corr_feature: correlated feature produced by siamese encoder
        :param gml_feature: global feature produced by gloabl modeling loop
        :return: pred_mask  mask prediction in the patch of saliency image
        :return: pred_mask_b binary mask prediction in the patch of saliency image
        """

        if 'avg_chans' in kwargs:
            avg_chans = kwargs['avg_chans']
        else:
            avg_chans = self._state['avg_chans']

        # crop image for saliency encoder
        saliency_image, scale_seg = get_crop(
            im_x,
            target_pos,
            target_sz,
            z_size=self._hyper_params["z_size"],
            output_size=self._hyper_params["saliency_image_size"],
            x_size=self._hyper_params["saliency_image_field"],
            avg_chans=avg_chans,
            context_amount=self._hyper_params["context_amount"],
            func_get_subwindow=get_subwindow_tracking,
        )
        self._state["scale_x"] = scale_seg
        # mask prediction
        pred_mask = self._segmenter(imarray_to_tensor(saliency_image).to(
            self.device),
                                    corr_feature,
                                    gml_feature,
                                    phase='segment')[0]  #tensor(1,1,257,257)

        pred_mask = tensor_to_numpy(pred_mask[0]).transpose(
            (1, 2, 0))  #np (257,257,1)

        # post processing
        mask_filter = (pred_mask >
                       self._hyper_params['mask_filter_thresh']).astype(
                           np.uint8)
        pred_mask_b = (pred_mask >
                       self._hyper_params['mask_pred_thresh']).astype(np.uint8)

        if self._hyper_params['save_patch']:
            mask_red = np.zeros_like(saliency_image)
            mask_red[:, :, 0] = mask_filter[:, :, 0] * 255
            masked_image = saliency_image * 0.5 + mask_red * 0.5
            self._state['patch_prediction'] = masked_image

        filtered_image = saliency_image * mask_filter
        filtered_image = cv2.resize(filtered_image,
                                    (self._hyper_params["GMP_image_size"],
                                     self._hyper_params["GMP_image_size"]))
        self._state['filtered_image'] = filtered_image

        if pred_mask_b.sum() > 0:
            conf_score = (pred_mask * pred_mask_b).sum() / pred_mask_b.sum()
        else:
            conf_score = 0
        self._state['conf_score'] = conf_score
        mask_in_full_image = self._mask_back(
            pred_mask,
            size=self._hyper_params["saliency_image_size"],
            region=self._hyper_params["saliency_image_field"])
        self._state['mask_in_full_image'] = mask_in_full_image
        if self._tracker.get_track_score(
        ) < self._hyper_params["track_failed_score_th"]:
            self._state['mask_in_full_image'] *= 0
        return pred_mask, pred_mask_b
Example #5
0
    def init(self, im, state, init_mask):
        """
        initialize the whole pipeline :
        tracker init => global modeling loop init

        :param im: init frame
        :param state: bbox in xywh format
        :param init_mask: binary mask of target object in shape (h,w)
        """

        #========== SiamFC++ init ==============
        self._tracker.init(im, state)
        avg_chans = self._tracker.get_avg_chans()
        self._state['avg_chans'] = avg_chans

        rect = state  # bbox in xywh format is given for initialization in case of tracking
        box = xywh2cxywh(rect)
        target_pos, target_sz = box[:2], box[2:]
        self._state['state'] = (target_pos, target_sz)
        self._state['im_h'] = im.shape[0]
        self._state['im_w'] = im.shape[1]

        # ========== Global Modeling Loop init ==============
        init_image, _ = get_crop(
            im,
            target_pos,
            target_sz,
            z_size=self._hyper_params["z_size"],
            x_size=self._hyper_params["GMP_image_size"],
            avg_chans=avg_chans,
            context_amount=self._hyper_params["context_amount"],
            func_get_subwindow=get_subwindow_tracking,
        )
        init_mask_c3 = np.stack([init_mask, init_mask, init_mask],
                                -1).astype(np.uint8)
        init_mask_crop_c3, _ = get_crop(
            init_mask_c3,
            target_pos,
            target_sz,
            z_size=self._hyper_params["z_size"],
            x_size=self._hyper_params["GMP_image_size"],
            avg_chans=avg_chans * 0,
            context_amount=self._hyper_params["context_amount"],
            func_get_subwindow=get_subwindow_tracking,
        )
        init_mask_crop = init_mask_crop_c3[:, :, 0]
        init_mask_crop = (init_mask_crop >
                          self._hyper_params['mask_filter_thresh']).astype(
                              np.uint8)
        init_mask_crop = np.expand_dims(init_mask_crop,
                                        axis=-1)  #shape: (129,129,1)
        filtered_image = init_mask_crop * init_image
        self._state['filtered_image'] = filtered_image  #shape: (129,129,3)

        with torch.no_grad():
            deep_feature = self._segmenter(
                imarray_to_tensor(filtered_image).to(self.device),
                phase='global_feature')[0]

        self._state['seg_init_feature'] = deep_feature  #shape : (1,256,5,5)
        self._state['seg_global_feature'] = deep_feature
        self._state['gml_feature'] = deep_feature
        self._state['conf_score'] = 1