def initialize(self, image, info: dict) -> dict: # Initialize some stuff self.frame_num = 1 if not self.params.has('device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize network self.initialize_features() # The DiMP network self.net = self.params.net # Time initialization tic = time.time() state = info['init_bbox'] # Get Target layer target_depth = get_target_depth(image, state) # Get Target layer target_depth = get_target_depth(image, state) print(target_depth) target_layer = get_layered_image_by_depth(image, target_depth) self.layer_id = int(target_depth // 2000) print('layer id : ', self.layer_id) # Convert image # im = numpy_to_torch(image) # HxWx6 -> 6 * H * W im = numpy_to_torch(target_layer) # Get target position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Get object id self.object_id = info.get('object_ids', [None])[0] self.id_str = '' if self.object_id is None else ' {}'.format( self.object_id) # Set sizes self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) sz = self.params.image_sample_size sz = torch.Tensor([sz, sz] if isinstance(sz, int) else sz) if self.params.get('use_image_aspect_ratio', False): sz = self.image_sz * sz.prod().sqrt() / self.image_sz.prod().sqrt() stride = self.params.get('feature_stride', 32) sz = torch.round(sz / stride) * stride self.img_sample_sz = sz self.img_support_sz = self.img_sample_sz # Set search area search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() self.target_scale = math.sqrt( search_area) / self.img_sample_sz.prod().sqrt() # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Setup scale factors if not self.params.has('scale_factors'): self.params.scale_factors = torch.ones(1) elif isinstance(self.params.scale_factors, (list, tuple)): self.params.scale_factors = torch.Tensor(self.params.scale_factors) # Setup scale bounds self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample init_backbone_feat = self.generate_init_samples(im) # Initialize classifier self.init_classifier(init_backbone_feat) # Initialize IoUNet if self.params.get('use_iou_net', True): self.init_iou_net(init_backbone_feat) out = {'time': time.time() - tic} return out
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # track on each layer # 0-2 m 2-4m, 4-6m , 6-8m, 8-10m, 10-inf max_score = 0 flag = 'not_found' new_pos = [-1, -1, -1, -1] scale_ind = None backbone_feat = None test_x = None sample_pos = None s = None sample_coords = None target_dist = 0 final_layer = image print(np.max(image)) start = max(0, self.layer_id - 1) end = min(11, self.layer_id + 2) for z_dist in range(start, end): if z_dist == 10: lower = 10000 # 10 meter upper = np.max(image) else: lower = z_dist * 1000 upper = (z_dist + 2) * 1000 layer = image.copy() layer[layer > upper] = 0 layer[layer < lower] = 0 print(lower, upper, np.median(np.nonzero(layer))) layer = (layer - lower) / (upper - lower) layer = np.asarray(layer * 255, dtype=np.uint8) layer = cv2.applyColorMap(layer, cv2.COLORMAP_JET) layer = numpy_to_torch(layer) # Extract backbone features backbone_feat_layer, sample_coords_layer, im_patches_layer = self.extract_backbone_features( layer, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features test_x_layer = self.get_classification_features( backbone_feat_layer) # Location of sample sample_pos_layer, sample_scales_layer = self.get_sample_location( sample_coords_layer) # Compute classification scores scores_raw_layer = self.classify_target(test_x_layer) # Localize the target translation_vec_layer, scale_ind_layer, s_layer, flag_layer = self.localize_target( scores_raw_layer, sample_pos_layer, sample_scales_layer) # Song Here can add depth cues new_pos_layer = sample_pos_layer[ scale_ind_layer, :] + translation_vec_layer score_map_layer = s_layer[scale_ind_layer, ...] max_score_layer = torch.max(score_map_layer).item() if flag_layer != 'not_found' and max_score_layer > max_score: flag = flag_layer max_score = max_score_layer new_pos = new_pos_layer scale_ind = scale_ind_layer sample_pos = sample_pos_layer backbone_feat = backbone_feat_layer test_x = test_x_layer sample_scales = sample_scales_layer s = s_layer target_dist = z_dist sample_coords = sample_coords_layer final_layer = layer # if max_score > 0.8: # self.layer_id = target_dist print('Choose %d meter ... ' % target_dist, flag, max_score) # Update position and scale if flag != 'not_found': if self.params.get('use_iou_net', True): update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind, :], sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.get('hard_negative_learning_rate', None) if hard_negative else None if update_flag and self.params.get('update_classifier', False): # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) # Set the pos of the tracker to iounet pos if self.params.get('use_iou_net', True) and flag != 'not_found' and hasattr( self, 'pos_iounet'): self.pos = self.pos_iounet.clone() if flag != 'not_found': score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() # Visualize and set debug info self.search_area_box = torch.cat( (sample_coords[scale_ind, [1, 0]], sample_coords[scale_ind, [3, 2]] - sample_coords[scale_ind, [1, 0]] - 1)) self.debug_info['flag' + self.id_str] = flag self.debug_info['max_score' + self.id_str] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map' + self.id_str) self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) else: max_score = 0 final_layer = image # Compute output bounding box new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) if self.params.get('output_not_found_box', False) and flag == 'not_found': output_state = [-1, -1, -1, -1] else: output_state = new_state.tolist() target_depth = get_target_depth(image, output_state) self.layer_id = int(target_depth // 2000) out = { 'target_bbox': output_state, 'confidence': max_score, 'image': torch_to_numpy(final_layer) } return out
def _get_frame(self, seq_path, frame_id, bbox=None): ''' Return : - colormap from depth image - [depth, depth, depth] ''' color_path, depth_path = self._get_frame_path(seq_path, frame_id) rgb = cv2.imread(color_path) rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) dp = cv2.imread(depth_path, -1) max_depth = min(np.max(dp), 10000) dp[dp > max_depth] = max_depth if self.dtype == 'color': # img = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) img = rgb elif self.dtype == 'R': img = rgb[:, :, 0] img = cv2.merge((img, img, img)) elif self.dtype == 'G': img = rgb[:, :, 1] img = cv2.merge((img, img, img)) elif self.dtype == 'B': img = rgb[:, :, 2] img = cv2.merge((img, img, img)) elif self.dtype == 'RColormap': R = rgb[:, :, 0] R = cv2.normalize(R, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) R = np.asarray(R, dtype=np.uint8) img = cv2.applyColorMap(R, cv2.COLORMAP_JET) elif self.dtype == 'GColormap': G = rgb[:, :, 1] G = cv2.normalize(G, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) G = np.asarray(G, dtype=np.uint8) img = cv2.applyColorMap(G, cv2.COLORMAP_JET) elif self.dtype == 'BColormap': B = rgb[:, :, 2] B = cv2.normalize(B, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) B = np.asarray(B, dtype=np.uint8) img = cv2.applyColorMap(B, cv2.COLORMAP_JET) elif self.dtype == 'rgbcolormap': colormap = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) colormap = np.asarray(colormap, dtype=np.uint8) colormap = cv2.applyColorMap(colormap, cv2.COLORMAP_JET) img = cv2.merge((rgb, colormap)) elif self.dtype in [ 'centered_colormap', 'centered_norm_depth', 'centered_raw_depth' ]: if bbox is None: print('Error !!! require bbox for centered_colormap') return target_depth = get_target_depth(dp, bbox) img = get_layered_image_by_depth(dp, target_depth, dtype=self.dtype) elif self.dtype == 'colormap': dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) img = cv2.applyColorMap(dp, cv2.COLORMAP_JET) elif self.dtype == 'colormap_norm_depth': ''' Colormap + depth ''' dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET) r, g, b = cv2.split(colormap) img = cv2.merge((r, g, b, dp)) elif self.dtype == 'colormap_raw_depth': raw_dp = dp dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET) r, g, b = cv2.split(colormap) # img = cv2.merge((r, g, b, dp)) img = np.stack((r, g, b, raw_dp), axis=2) elif self.dtype == 'raw_depth': # No normalization here !!!! image = cv2.merge((dp, dp, dp)) elif self.dtype == 'normalized_depth': dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) img = cv2.merge((dp, dp, dp)) # H * W * 3 elif self.dtype == 'rgbd': r, g, b = cv2.split(rgb) dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) img = cv2.merge((r, g, b, dp)) elif self.dtype == 'hha': hha_path = os.path.join(seq_path, 'hha') if not os.path.isdir(hha_path): os.mkdir(hha_path) hha_img = os.path.join( hha_path, '{:08}.png'.format(frame_id + 1)) # frames start from 1 print(hha_img) if not os.path.isfile(hha_img): dp = dp / 1000 img = getHHA(dp, dp) cv2.imwrite(hha_img, img) else: img = cv2.imread(hha_img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) elif self.dtype == 'sigmoid': img = sigmoid(dp) else: print('no such dtype ... : %s' % self.dtype) img = None return img
def _read_image(self, image_file: str, dtype='colormap', bbox=None): if dtype == 'color': im = cv.imread(image_file) return cv.cvtColor(im, cv.COLOR_BGR2RGB) elif dtype == 'R': im = cv.imread(image_file) im = cv.cvtColor(im, cv.COLOR_BGR2RGB) im = im[:, :, 0] im = cv.merge((im, im, im)) return im elif dtype == 'G': im = cv.imread(image_file) im = cv.cvtColor(im, cv.COLOR_BGR2RGB) im = im[:, :, 1] im = cv.merge((im, im, im)) return im elif dtype == 'B': im = cv.imread(image_file) im = cv.cvtColor(im, cv.COLOR_BGR2RGB) im = im[:, :, 2] im = cv.merge((im, im, im)) return im elif dtype == 'RColormap': rgb = cv.imread(image_file) rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB) R = rgb[:, :, 0] R = cv.normalize(R, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX) R = np.asarray(R, dtype=np.uint8) img = cv.applyColorMap(R, cv.COLORMAP_JET) return img elif dtype == 'GColormap': rgb = cv.imread(image_file) rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB) G = rgb[:, :, 1] G = cv.normalize(G, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX) G = np.asarray(G, dtype=np.uint8) img = cv.applyColorMap(G, cv.COLORMAP_JET) return img elif dtype == 'BColormap': rgb = cv.imread(image_file) rgb = cv.cvtColor(rgb, cv.COLOR_BGR2RGB) B = rgb[:, :, 2] B = cv.normalize(B, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX) B = np.asarray(B, dtype=np.uint8) img = cv.applyColorMap(B, cv.COLORMAP_JET) return img elif dtype == 'rgbcolormap': color_image = cv.imread(image_file['color']) color = cv.cvtColor(color_image, cv.COLOR_BGR2RGB) depth_image = cv.imread(image_file['depth'], -1) depth_image = cv.normalize(depth_image, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_32F) depth_image = np.asarray(depth_image, dtype=np.uint8) depth_image = cv.applyColorMap(depth_image, cv.COLORMAP_JET) img = cv.merge((color_image, depth_image)) return img else: depth_image_file = image_file dp = cv.imread(depth_image_file, -1) dp[dp > 10000] = 10000 if dtype == 'colormap': img = cv.normalize(dp, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_32F) img = np.asarray(img, dtype=np.uint8) img = cv.applyColorMap(img, cv.COLORMAP_JET) elif dtype == 'normalized_depth': img = cv.normalize(dp, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_32F) img = np.asarray(img, dtype=np.uint8) img = cv.merge((img, img, img)) elif dtype == 'raw_depth': # img = np.asarray(dp) img = dp # img = np.stack((img, img, img), axis=2) elif dtype in [ 'centered_colormap', 'centered_normalized_depth', 'centered_raw_depth' ]: if bbox is None: print('centered colormap requires BBox !!!') return None target_depth = get_target_depth(dp, bbox) img = get_layered_image_by_depth(dp, target_depth, dtype=dtype) elif dtype == 'hha': dp = dp / 1000 # meter img = getHHA(dp, dp) elif dtype == 'sigmoid': img = sigmoid(dp) elif dtype == 'sigmoid_depth': depth = dp sig = dp / 1000 sig = sigmoid(sig) img = {} img['sigmoid'] = sig img['depth'] = depth elif dtype == 'colormap_depth': depth = dp colormap = cv.normalize(dp, None, alpha=0, beta=255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_32F) colormap = np.asarray(colormap, dtype=np.uint8) colormap = cv.applyColorMap(colormap, cv.COLORMAP_JET) img = {} img['colormap'] = colormap img['depth'] = depth else: print('No such dtype !!! ') img = None return img
def _get_frames(self, seq_id, depth_threshold=None, bbox=None): ''' dypte : - raw_depth - norm_depth - centered_raw_depth - centered_norm_depth - colormap - centered_colormap ''' rgb_path = self.coco_set.loadImgs([ self.coco_set.anns[self.sequence_list[seq_id]]['image_id'] ])[0]['file_name'] depth_path = rgb_path[:-4] + '.png' rgb = cv2.imread(os.path.join(self.img_path, 'color', rgb_path)) rgb = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB) if os.path.isfile(os.path.join(self.img_path, 'segDepth', depth_path)): dp = cv2.imread( os.path.join(self.img_path, 'segDepth', depth_path), -1) else: dp = cv2.imread(os.path.join(self.img_path, 'depth', depth_path), -1) max_depth = min(np.max(dp), 10000) dp[dp > max_depth] = max_depth if self.dtype in [ 'centered_colormap', 'centered_raw_depth', 'centered_norm_depth' ]: if bbox is None: print('Error !!! centered_colormap requires BBox ') return # bbox is repeated target_depth = get_target_depth(dp, bbox[0]) img = get_layered_image_by_depth(dp, target_depth, dtype=self.dtype) elif self.dtype == 'R': img = rgb[:, :, 0] img = cv2.merge((img, img, img)) elif self.dtype == 'G': img = rgb[:, :, 1] img = cv2.merge((img, img, img)) elif self.dtype == 'B': img = rgb[:, :, 2] img = cv2.merge((img, img, img)) elif self.dtype == 'RColormap': R = rgb[:, :, 0] R = cv2.normalize(R, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) R = np.asarray(R, dtype=np.uint8) img = cv2.applyColorMap(R, cv2.COLORMAP_JET) elif self.dtype == 'GColormap': G = rgb[:, :, 1] G = cv2.normalize(G, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) G = np.asarray(G, dtype=np.uint8) img = cv2.applyColorMap(G, cv2.COLORMAP_JET) elif self.dtype == 'BColormap': B = rgb[:, :, 2] B = cv2.normalize(B, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) B = np.asarray(B, dtype=np.uint8) img = cv2.applyColorMap(B, cv2.COLORMAP_JET) elif self.dtype == 'colormap': dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) img = cv2.applyColorMap(dp, cv2.COLORMAP_JET) elif self.dtype == 'colormap_norm_depth': ''' Colormap + depth ''' dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET) r, g, b = cv2.split(colormap) img = cv2.merge((r, g, b, dp)) elif self.dtype == 'colormap_raw_depth': raw_dp = dp dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET) r, g, b = cv2.split(colormap) # img = cv2.merge((r, g, b, dp)) img = np.stack((r, g, b, raw_dp), axis=2) elif self.dtype == 'normalized_depth': dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) img = cv2.merge((dp, dp, dp)) # H * W * 3 elif self.dtype == 'raw_depth': img = np.stack((dp, dp, dp), axis=2) elif self.dtype == 'color': img = rgb elif self.dtype == 'hha': hha_path = os.path.join(self.img_path, 'hha') if not os.path.isdir(hha_path): os.mkdir(hha_path) hha_img = os.path.join(hha_path, depth_path) print(hha_img) if not os.path.isfile(hha_img): dp = dp / 1000 # meter img = getHHA(dp, dp) cv2.imwrite(hha_img, img) else: img = cv2.imread(hha_img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) elif self.dtype == 'sigmoid': img = sigmoid(dp) elif self.dtype == 'rgbcolormap': dp = cv2.normalize(dp, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX) dp = np.asarray(dp, dtype=np.uint8) colormap = cv2.applyColorMap(dp, cv2.COLORMAP_JET) img = cv2.merge((rgb, colormap)) return img