Exemple #1
0
    def extract_transformed(self, im, dp, pos, scale, image_sz, transforms):
        """Extract features from a set of transformed image samples.
        args:
            im: Image.
            dp: Depth
            pos: Center position for extraction.
            scale: Image scale to extract features from.
            image_sz: Size to resize the image samples to before extraction.
            transforms: A set of image transforms to apply.
        """

        # Get image patche
        im_patch, dp_patch, _ = sample_patch(im, dp, pos, scale * image_sz,
                                             image_sz)

        # Apply transforms
        im_patches = torch.cat([T(im_patch) for T in transforms])
        dp_patches = torch.cat([T(dp_patch) for T in transforms])

        # Compute features
        feature_map = TensorList([
            f.get_feature(im_patches, dp_patches) for f in self.features
        ]).unroll()

        return feature_map
Exemple #2
0
    def generate_init_samples(self, im: torch.Tensor, target_pos, sample_scale) -> TensorList:
        # Compute augmentation size
        aug_expansion_factor = getattr(self.params, 'augmentation_expansion_factor', None)
        aug_expansion_sz = self.img_sample_sz.clone()
        aug_output_sz = None
        if aug_expansion_factor is not None and aug_expansion_factor != 1:
            aug_expansion_sz = (self.img_sample_sz * aug_expansion_factor).long()
            aug_expansion_sz += (aug_expansion_sz - self.img_sample_sz.long()) % 2
            aug_expansion_sz = aug_expansion_sz.float()
            aug_output_sz = self.img_sample_sz.long().tolist()

        # Random shift operator
        get_rand_shift = lambda: None

        # Create transofmations
        self.transforms = [augmentation.Identity(aug_output_sz)]
        if 'shift' in self.params.augmentation_method:
            self.transforms.extend([augmentation.Translation(shift, aug_output_sz) for shift in self.params.augmentation_method['shift']])
        if 'relativeshift' in self.params.augmentation_method:
            get_absolute = lambda shift: (torch.Tensor(shift) * self.img_sample_sz/2).long().tolist()
            self.transforms.extend([augmentation.Translation(get_absolute(shift), aug_output_sz) for shift in self.params.augmentation_method['relativeshift']])
        if 'fliplr' in self.params.augmentation_method and self.params.augmentation_method['fliplr']:
            self.transforms.append(augmentation.FlipHorizontal(aug_output_sz, get_rand_shift()))
        if 'blur' in self.params.augmentation_method:
            self.transforms.extend([augmentation.Blur(sigma, aug_output_sz, get_rand_shift()) for sigma in self.params.augmentation_method['blur']])
        if 'scale' in self.params.augmentation_method:
            self.transforms.extend([augmentation.Scale(scale_factor, aug_output_sz, get_rand_shift()) for scale_factor in self.params.augmentation_method['scale']])
        if 'rotate' in self.params.augmentation_method:
            self.transforms.extend([augmentation.Rotate(angle, aug_output_sz, get_rand_shift()) for angle in self.params.augmentation_method['rotate']])

        init_sample = sample_patch(im, target_pos, sample_scale*aug_expansion_sz, aug_expansion_sz)
        init_samples = torch.cat([T(init_sample) for T in self.transforms])
        if not self.params.augmentation:
            init_samples = init_samples[0:1,...]
        return init_samples
Exemple #3
0
    def extract(self, im, pos, scales, image_sz, return_patches=False):
        """Extract features.
        args:
            im: Image.
            pos: Center position for extraction.
            scales: Image scales to extract features from.
            image_sz: Size to resize the image samples to before extraction.
        """
        if isinstance(scales, (int, float)):
            scales = [scales]

        # Get image patches
        patch_iter, coord_iter = zip(*(sample_patch(im, pos, s*image_sz, image_sz, mode=self.patch_mode) for s in scales))
        im_patches = torch.cat(list(patch_iter))
        patch_coords = torch.cat(list(coord_iter))

        # im_patches = torch.cat([sample_patch(im, pos, s*image_sz, image_sz) for s in scales])

        # Compute features
        feature_map = TensorList([f.get_feature(im_patches) for f in self.features]).unroll()

        if return_patches:
            return feature_map, patch_coords, im_patches
        else:
            return feature_map, patch_coords
Exemple #4
0
    def extract_transformed(self, im, pos, scale, image_sz, transforms):
        """Extract features from a set of transformed image samples.
        args:
            im: Image.
            pos: Center position for extraction.
            scale: Image scale to extract features from.
            image_sz: Size to resize the image samples to before extraction.
            transforms: A set of image transforms to apply.
        """

        # Get image patche
        im_patch = sample_patch(im, pos, scale * image_sz, image_sz)

        # Apply transforms
        im_patches = torch.cat([T(im_patch) for T in transforms])

        # import cv2
        # import numpy as np
        # cv2.namedWindow('Patch', cv2.WINDOW_AUTOSIZE)
        # for p in im_patches:
        #     p_ = p.permute(1, 2, 0).cpu().numpy()
        #     cv2.imshow('Patch', p_.astype(np.uint8))
        #     cv2.waitKey(0)

        # Compute features
        feature_map = TensorList(
            [f.get_feature(im_patches) for f in self.features]).unroll()

        return feature_map
Exemple #5
0
    def extract_transformed(self,
                            im,
                            pos,
                            scale,
                            image_sz,
                            transforms,
                            debug_save_name=None):
        """Extract features from a set of transformed image samples.
        args:
            im: Image.
            pos: Center position for extraction.
            scale: Image scale to extract features from.
            image_sz: Size to resize the image samples to before extraction.
            transforms: A set of image transforms to apply.
        """

        # Get image patche
        im_patch = sample_patch(im, pos, scale * image_sz, image_sz)

        # Apply transforms
        with fluid.dygraph.guard(fluid.CPUPlace()):
            im_patches = np.stack([T(im_patch) for T in transforms])

        if debug_save_name is not None:
            np.save(debug_save_name, im_patches)

        im_patches = np.transpose(im_patches, (0, 3, 1, 2))

        # Compute features
        feature_map = TensorList(
            [f.get_feature(im_patches) for f in self.features]).unroll()

        return feature_map
Exemple #6
0
    def extract(self, im, pos, scales, image_sz, debug_save_name=None):
        """Extract features.
        args:
            im: Image.
            pos: Center position for extraction.
            scales: Image scales to extract features from.
            image_sz: Size to resize the image samples to before extraction.
        """
        if isinstance(scales, (int, float)):
            scales = [scales]

        # Get image patches
        with fluid.dygraph.guard(fluid.CPUPlace()):
            im_patches = np.stack([
                sample_patch(im, pos, s * image_sz, image_sz) for s in scales
            ])

        if debug_save_name is not None:
            np.save(debug_save_name, im_patches)

        im_patches = np.transpose(im_patches, (0, 3, 1, 2))

        # Compute features
        feature_map = TensorList(
            [f.get_feature(im_patches) for f in self.features]).unroll()

        return feature_map
Exemple #7
0
    def extract(self, im, pos, scales, image_sz):
        if isinstance(scales, (int, float)):
            scales = [scales]

        # Get image patches
        im_patches = torch.cat(
            [sample_patch(im, pos, s * image_sz, image_sz) for s in scales])
        if im.shape[1] == 1:
            im_patches = torch.cat([
                sample_patch(im, pos, s * image_sz, image_sz) for s in scales
            ])
            return im_patches
        # Compute features
        feature_map = torch.cat(TensorList(
            [f.get_feature(im_patches) for f in self.features]).unroll(),
                                dim=1)

        return feature_map
Exemple #8
0
    def extract(self, im, pos, scales, image_sz):
        if isinstance(scales, (int, float)):
            scales = [scales]

        # Get image patches
        im_patches = np.stack(
            [sample_patch(im, pos, s * image_sz, image_sz) for s in scales])
        im_patches = np.transpose(im_patches, (0, 3, 1, 2))

        # Compute features
        feature_map = layers.concat(TensorList(
            [f.get_feature(im_patches) for f in self.features]).unroll(),
                                    axis=1)

        return feature_map
Exemple #9
0
    def extract(self, im, pos, scales, image_sz):
        """Extract features.
        args:
            im: Image.
            pos: Center position for extraction.
            scales: Image scales to extract features from.
            image_sz: Size to resize the image samples to before extraction.
        """
        if isinstance(scales, (int, float)):
            scales = [scales]
        # print(image_sz,scales)
        # Get image patches
        im_patches = torch.cat([sample_patch(im, pos, s*image_sz, image_sz) for s in scales])

        # Compute features
        feature_map = TensorList([f.get_feature(im_patches) for f in self.features]).unroll()

        return feature_map
Exemple #10
0
    def extract_v2(self, im, pos, scales, image_sz):
        """Extract img_patch/features.
        args:
            im: Image.
            pos: Center position for extraction.
            scales: Image scales to extract features from.
            image_sz: Size to resize the image samples to before extraction.
        """
        if isinstance(scales, (int, float)):
            scales = [scales]

        # Get image patches
        im_patches = torch.cat(
            [sample_patch(im, pos, s * image_sz, image_sz) for s in scales])

        im_patches_np = im_patches[0:1].squeeze().permute(
            1, 2, 0).numpy().astype(np.uint8)
        #cv2.imwrite('test.jpg', im_patches_np)

        # Compute features
        feature_map = TensorList(
            [f.get_feature(im_patches) for f in self.features]).unroll()

        return im_patches_np, feature_map
Exemple #11
0
    def track(self, image, info: dict = None) -> dict:
        self.debug_info = {}

        self.frame_num += 1
        self.debug_info['frame_num'] = self.frame_num

        # Obtain the merged segmentation prediction for the previous frames. This is used to update the target model
        # and determine the search region for the current frame
        if self.object_id is None:
            prev_segmentation_prob_im = info['previous_output'][
                'segmentation_raw']
        else:
            prev_segmentation_prob_im = info['previous_output'][
                'segmentation_raw'][self.object_id]

        prev_segmentation_prob_im = torch.from_numpy(
            prev_segmentation_prob_im).unsqueeze(0).unsqueeze(0).float()

        # ********************************************************************************** #
        # ------- Update the target model using merged masks from the previous frame ------- #
        # ********************************************************************************** #
        if self.frame_num > 2:
            # Crop the segmentation mask for the previous search area
            if self.params.get('update_target_model', True):
                prev_segmentation_prob_crop, _ = sample_patch(
                    prev_segmentation_prob_im,
                    self.prev_pos,
                    self.prev_scale * self.img_sample_sz,
                    self.img_sample_sz,
                    mode=self.params.get('border_mode', 'replicate'),
                    max_scale_change=self.params.get('patch_max_scale_change'),
                    is_mask=True)

                # Update the target model
                self.update_target_model(self.prev_test_x,
                                         prev_segmentation_prob_crop.clone())

        # ****************************************************************************************** #
        # -------- Estimate target box using the merged segmentation mask from prev. frame --------- #
        # --- The estimated target box is used to obtain the search region for the current frame --- #
        # ****************************************************************************************** #
        self.pos, self.target_sz = self.get_target_state(
            prev_segmentation_prob_im.squeeze())

        new_target_scale = torch.sqrt(self.target_sz.prod() /
                                      self.base_target_sz.prod())

        if self.params.get('max_scale_change') is not None:
            # Do not allow drastic scale change, as this might be caused due to occlusions or incorrect mask
            # prediction
            new_target_scale = self.clip_scale_change(new_target_scale)

        # Update target size and scale using the filtered target size
        self.target_scale = new_target_scale
        self.target_sz = self.base_target_sz * self.target_scale

        # ********************************************************************** #
        # ---------- Predict segmentation mask for the current frame ----------- #
        # ********************************************************************** #

        # Convert image
        im = numpy_to_torch(image)

        # Extract backbone features
        backbone_feat, sample_coords, im_patches = self.extract_backbone_features(
            im, self.get_centered_sample_pos(), self.target_scale,
            self.img_sample_sz)

        # Save the search region information as it is needed to merge the segmentation masks for the next frame update
        self.prev_pos = self.get_centered_sample_pos()
        self.prev_scale = self.target_scale

        # Extract features input to the target model
        test_x = self.get_target_model_features(backbone_feat)

        # Location of sample
        sample_pos, sample_scale = self.get_sample_location(sample_coords)

        # Predict the segmentation mask. Note: These are raw scores, before the sigmoid
        segmentation_scores = self.segment_target(test_x, backbone_feat)

        self.prev_test_x = test_x

        # Get the segmentation scores for the full image.
        # Regions outside the search region are assigned low scores (-100)
        segmentation_scores_im = self.convert_scores_crop_to_image(
            segmentation_scores, im, sample_scale, sample_pos)

        segmentation_mask_im = (segmentation_scores_im >
                                0.0).float()  # Binary segmentation mask
        segmentation_prob_im = torch.sigmoid(
            segmentation_scores_im
        )  # Probability of being target at each pixel

        # ************************************************************************ #
        # ---------- Output estimated segmentation mask and target box ----------- #
        # ************************************************************************ #

        # Get target box from the predicted segmentation
        pred_pos, pred_target_sz = self.get_target_state(
            segmentation_prob_im.squeeze())
        new_state = torch.cat(
            (pred_pos[[1, 0]] - (pred_target_sz[[1, 0]] - 1) / 2,
             pred_target_sz[[1, 0]]))
        output_state = new_state.tolist()

        if self.object_id is None:
            # In single object mode, no merge called. Hence return the probabilities
            segmentation_output = segmentation_prob_im
        else:
            # In multi-object mode, return raw scores
            segmentation_output = segmentation_scores_im

        segmentation_mask_im = segmentation_mask_im.view(
            *segmentation_mask_im.shape[-2:]).cpu().numpy()
        segmentation_output = segmentation_output.cpu().numpy()

        if self.visdom is not None:
            self.visdom.register(segmentation_scores_im, 'heatmap', 2,
                                 'Seg Scores' + self.id_str)
            self.visdom.register(self.debug_info, 'info_dict', 1, 'Status')

        out = {
            'segmentation': segmentation_mask_im,
            'target_bbox': output_state,
            'segmentation_raw': segmentation_output
        }
        return out
Exemple #12
0
    def track(self, image):
        self.frame_num += 1

        # For debugging and display only
        if self.params.output_image:
            image_show = image.copy()

        # Initialization
        hard_flag = False
        
        # Conver to tensor and GPU
        image_cuda = self.numpy_to_tensor_gpu(image)

        # ------- LOCALIZATION ------- #
        sample_pos = self.target_pos.clone()
        sample_scale = self.sample_scale.clone()
        target_sample_sz = self.target_sample_sz.clone()

        # Sample and extract backbone features
        test_sample = sample_patch(image_cuda, sample_pos, sample_scale*self.img_sample_sz, self.img_sample_sz)
        test_backbone_features = self.params.model.extract_backbone_features(test_sample)

        # Extract locator features and calcualte the localization score
        test_locator_proposals = self.get_locator_proposals(target_sample_sz)
        test_locator_features = self.params.model.extract_locator_features(test_backbone_features, test_locator_proposals).squeeze()
        test_locator_score = torch.mm(test_locator_features, self.locator_model)

        # Window output and find argmax
        if getattr(self.params, 'window_output', False):
            test_locator_score = test_locator_score * self.output_window
        max_score, max_id = torch.max(test_locator_score, dim=0)
        max_score, max_id = max_score.item(), max_id.item()

        # When target is found
        if max_score > self.params.target_not_found:
            # Update target position
            self.target_pos[1] += (self.locator_proposals_xc[max_id].item() - self.img_sample_sz[1]*0.5) * sample_scale  # x
            self.target_pos[0] += (self.locator_proposals_yc[max_id].item() - self.img_sample_sz[0]*0.5) * sample_scale  # y

            # ------- REFINEMENT ------- # 
            # Extract iou backbone features and refine target box
            test_iou_backbone_features = self.params.model.extract_iou_features(test_backbone_features) 
            new_target_box = self.refine_target_box(self.target_pos, self.target_sz, sample_pos, sample_scale, test_iou_backbone_features)

            # Update target box
            if new_target_box is not None:
                self.target_pos = sample_pos + (new_target_box[:2] + new_target_box[2:]/2 - (self.img_sample_sz - 1) / 2).flip((0,)) * sample_scale
                self.target_sz = self.params.scale_damp * self.target_sz + (1 - self.params.scale_damp) * new_target_box[2:].flip((0,)) * sample_scale

            self.target_sz = torch.min(self.target_sz, self.initial_target_sz*self.max_scale_factor)
            self.target_sz = torch.max(self.target_sz, self.initial_target_sz*self.min_scale_factor)

            # Update the sampling message
            self.search_area = torch.prod(self.target_sz * self.params.search_padding)
            self.sample_scale = torch.sqrt(self.search_area / self.params.img_sample_area)
            self.target_sample_sz = self.target_sz / self.sample_scale

            # ------- UPDAT FEATURE MODEL------- #
            train_sample = sample_patch(image_cuda, self.target_pos, self.sample_scale*self.img_sample_sz, self.img_sample_sz)
            train_backbone_features = self.params.model.extract_backbone_features(train_sample)

            # Extract locator features
            train_locator_proposals = self.get_locator_proposals(self.target_sample_sz)
            train_locator_features = self.params.model.extract_locator_features(train_backbone_features, train_locator_proposals).squeeze()

            # Hard negtive mining and Adaptive learning rate
            if self.params.hard_negative_mining:
                train_locator_score = torch.mm(train_locator_features, self.locator_model)
                max_score = train_locator_score.max()
                train_locator_score = train_locator_score * self.hard_negative_region_mask
                if (train_locator_score.max() > self.params.hard_negative_threshold*max_score) and (train_locator_score.max() > self.params.target_not_found):
                    hard_flag = True
                    learning_rate = self.params.hard_negative_learning_rate
                else:
                    learning_rate = self.params.learning_rate

            # Update locator model
            self.locator_XTX = (1 - learning_rate) * self.locator_XTX + learning_rate * torch.mm(train_locator_features.t(), train_locator_features)
            self.locator_XTY = (1 - learning_rate) * self.locator_XTY + learning_rate * torch.mm(train_locator_features.t(), self.locator_labels)

            # Adjust weight of initial frame
            self.current_initial_frame_weight = (1 - learning_rate) * self.current_initial_frame_weight
            if self.current_initial_frame_weight < self.params.init_samples_minimum_weight:
                diff = self.params.init_samples_minimum_weight - self.current_initial_frame_weight
                coff = diff / (1 - self.current_initial_frame_weight)
                self.locator_XTX = (1 - coff) * self.locator_XTX + coff * self.locator_XTX_initial
                self.locator_XTY = (1 - coff) * self.locator_XTY + coff * self.locator_XTY_initial
                self.current_initial_frame_weight = self.params.init_samples_minimum_weight

        # ------- TRAIN ------- #
        if (self.frame_num % self.params.train_skipping == 0) or (hard_flag):
            self.locator_model = self.train_locator_model(self.locator_XTX+self.locator_regularization, self.locator_XTY, self.locator_model)

        # ------- RETURN ------- #
        # Return new state
        new_state = torch.cat((self.target_pos[[1,0]] - self.target_sz[[1,0]]*0.5, self.target_sz[[1,0]]))
        new_state[0], new_state[1] = new_state[0].clamp(0), new_state[1].clamp(0)
        new_state[2] = new_state[2].clamp(0, self.IMG_WIDTH -new_state[0])
        new_state[3] = new_state[3].clamp(0, self.IMG_HEIGHT-new_state[1])

        # Output result image
        if self.params.output_image:
            self.output_result_image(image_show, new_state)

        return new_state.tolist()
Exemple #13
0
	def track(self, image):
		self.frame_num += 1

		# For debug show only
		#image_show = image.copy()
		
		# Conver to tensor and GPU
		image_cuda = self.numpy_to_tensor_gpu(image)

		# ------- LOCALIZATION ------- #
		sample_pos = self.target_pos.clone()
		sample_scale = self.sample_scale.clone()
		target_sample_sz = self.target_sample_sz.clone()

		# sample and extract features
		test_sample = sample_patch(image_cuda, sample_pos, sample_scale*self.img_sample_sz, self.img_sample_sz)
		test_locator_proposals = self.get_locator_proposals(target_sample_sz)
		self.params.model.extract(test_sample, test_locator_proposals)

		# calcualte the localization score
		test_locator_score = torch.mm(self.params.model.locator_features, self.locator_model)
		if getattr(self.params, 'window_output', False):
			test_locator_score = test_locator_score * self.output_window
		max_score, max_id = torch.max(test_locator_score, dim=0)
		max_score, max_id = max_score.item(), max_id.item()

		# when target not found
		if max_score < self.params.target_not_found_threshold:
			# maintain the original target position and size
			new_state = torch.cat((self.target_pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))
			# Output result image
			#self.output_result_image(image_show, new_state)
			return new_state.tolist()

		# update the target position
		self.target_pos[0] = self.target_pos[0] + (self.proposals_yc[max_id].item() - self.img_sample_sz[1]*0.5) * sample_scale
		self.target_pos[1] = self.target_pos[1] + (self.proposals_xc[max_id].item() - self.img_sample_sz[0]*0.5) * sample_scale

		# refine the target position and size by IoUNet
		new_pos, new_target_sz = self.refine_target_box(self.target_pos, self.target_sz, sample_pos, sample_scale)

		# bound the taeget size
		if new_target_sz is not None:
		    new_target_sz = torch.min(new_target_sz, self.initial_target_sz*self.max_scale_factor)
		    new_target_sz = torch.max(new_target_sz, self.initial_target_sz*self.min_scale_factor)

		# update the target and sampling message
		if new_pos is not None:
			self.target_pos = new_pos.clone()
			self.target_sz = new_target_sz.clone()
			self.search_area = torch.prod(self.target_sz * self.params.search_area_scale)
			self.sample_scale = torch.sqrt(self.search_area / self.params.img_sample_area)
			self.target_sample_sz = self.target_sz / self.sample_scale

		# Return new state
		new_state = torch.cat((self.target_pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]]))

		# Output result image
		#self.output_result_image(image_show, new_state)

		# ------- UPDAT MODEL------- #
		train_sample = sample_patch(image_cuda, self.target_pos, self.sample_scale*self.img_sample_sz, self.img_sample_sz)
		train_locator_proposals = self.get_locator_proposals(self.target_sample_sz)
		self.params.model.extract(train_sample, train_locator_proposals, only_locator=True)

		hard_flag = False
		if self.params.hard_negative_mining:
			train_locator_score = torch.mm(self.params.model.locator_features, self.locator_model)
			train_locator_score = train_locator_score * self.hard_negative_region_mask
			max_score, _ = torch.max(train_locator_score, dim=0)
			if max_score > self.params.hard_negative_threshold:
				hard_flag = True

		if hard_flag:
			learning_rate = self.params.hard_negative_learning_rate
		else:
			learning_rate = self.params.learning_rate

		self.locator_features_model = (1 - learning_rate) * self.locator_features_model + learning_rate * self.params.model.locator_features
		self.current_initial_frame_weight = (1 - learning_rate) * self.current_initial_frame_weight

		if self.current_initial_frame_weight < self.params.init_samples_minimum_weight:
			diff = self.params.init_samples_minimum_weight - self.current_initial_frame_weight
			coff = diff / (1 - self.current_initial_frame_weight)
			self.locator_features_model = (1 - coff) * self.locator_features_model + coff * self.initial_locator_features
			self.current_initial_frame_weight = self.params.init_samples_minimum_weight

		if (self.frame_num % self.params.train_skipping == 0) or (hard_flag):
			self.locator_model = self.train_locator_model(self.locator_features_model, self.locator_model)

		return new_state.tolist()