def show_image(self, im, plot_name=None, ax=None): if isinstance(im, torch.Tensor): im = torch_to_numpy(im) # plot_id = sum([ord(x) for x in list(plot_name)]) if ax is None: plot_fig_name = 'debug_fig_' + plot_name plot_ax_name = 'debug_ax_' + plot_name if not hasattr(self, plot_fig_name): fig, ax = plt.subplots(1) setattr(self, plot_fig_name, fig) setattr(self, plot_ax_name, ax) plt.tight_layout() ax.set_title(plot_name) else: fig = getattr(self, plot_fig_name, None) ax = getattr(self, plot_ax_name, None) ax.cla() ax.imshow(im) ax.set_axis_off() ax.axis('equal') ax.set_title(plot_name) draw_figure(fig)
def __call__(self, image, is_mask=False): if isinstance(image, torch.Tensor): return self.crop_to_output(numpy_to_torch(self(torch_to_numpy(image)))) else: c = (np.expand_dims(np.array(image.shape[:2]),1)-1)/2 R = np.array([[math.cos(self.angle), math.sin(self.angle)], [-math.sin(self.angle), math.cos(self.angle)]]) H =np.concatenate([R, c - R @ c], 1) return cv.warpAffine(image, H, image.shape[1::-1], borderMode=cv.BORDER_REPLICATE)
def __call__(self, image, is_mask=False): input_tensor = torch.is_tensor(image) if input_tensor: image = torch_to_numpy(image) do_flip, theta, shear_values, scale_factors = self.roll_values t_mat = self._construct_t_mat(image.shape[:2], do_flip, theta, shear_values, scale_factors) output_sz = (image.shape[1] + 2*self.pad_amount, image.shape[0] + 2*self.pad_amount) if not is_mask: image_t = cv.warpAffine(image, t_mat, output_sz, flags=cv.INTER_LINEAR, borderMode=self.border_flag) else: image_t = cv.warpAffine(image, t_mat, output_sz, flags=cv.INTER_NEAREST, borderMode=self.border_flag) image_t = image_t.reshape(image.shape) if input_tensor: image_t = numpy_to_torch(image_t) return self.crop_to_output(image_t)
def __call__(self, image, is_mask=False): if isinstance(image, torch.Tensor): return self.crop_to_output(numpy_to_torch(self(torch_to_numpy(image)))) else: return cv.warpAffine(image, self.transform_matrix, image.shape[1::-1], borderMode=cv.BORDER_REPLICATE)
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # track on each layer # 0-2 m 2-4m, 4-6m , 6-8m, 8-10m, 10-inf max_score = 0 flag = 'not_found' new_pos = [-1, -1, -1, -1] scale_ind = None backbone_feat = None test_x = None sample_pos = None s = None sample_coords = None target_dist = 0 final_layer = image print(np.max(image)) start = max(0, self.layer_id - 1) end = min(11, self.layer_id + 2) for z_dist in range(start, end): if z_dist == 10: lower = 10000 # 10 meter upper = np.max(image) else: lower = z_dist * 1000 upper = (z_dist + 2) * 1000 layer = image.copy() layer[layer > upper] = 0 layer[layer < lower] = 0 print(lower, upper, np.median(np.nonzero(layer))) layer = (layer - lower) / (upper - lower) layer = np.asarray(layer * 255, dtype=np.uint8) layer = cv2.applyColorMap(layer, cv2.COLORMAP_JET) layer = numpy_to_torch(layer) # Extract backbone features backbone_feat_layer, sample_coords_layer, im_patches_layer = self.extract_backbone_features( layer, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features test_x_layer = self.get_classification_features( backbone_feat_layer) # Location of sample sample_pos_layer, sample_scales_layer = self.get_sample_location( sample_coords_layer) # Compute classification scores scores_raw_layer = self.classify_target(test_x_layer) # Localize the target translation_vec_layer, scale_ind_layer, s_layer, flag_layer = self.localize_target( scores_raw_layer, sample_pos_layer, sample_scales_layer) # Song Here can add depth cues new_pos_layer = sample_pos_layer[ scale_ind_layer, :] + translation_vec_layer score_map_layer = s_layer[scale_ind_layer, ...] max_score_layer = torch.max(score_map_layer).item() if flag_layer != 'not_found' and max_score_layer > max_score: flag = flag_layer max_score = max_score_layer new_pos = new_pos_layer scale_ind = scale_ind_layer sample_pos = sample_pos_layer backbone_feat = backbone_feat_layer test_x = test_x_layer sample_scales = sample_scales_layer s = s_layer target_dist = z_dist sample_coords = sample_coords_layer final_layer = layer # if max_score > 0.8: # self.layer_id = target_dist print('Choose %d meter ... ' % target_dist, flag, max_score) # Update position and scale if flag != 'not_found': if self.params.get('use_iou_net', True): update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind, :], sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.get('hard_negative_learning_rate', None) if hard_negative else None if update_flag and self.params.get('update_classifier', False): # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) # Set the pos of the tracker to iounet pos if self.params.get('use_iou_net', True) and flag != 'not_found' and hasattr( self, 'pos_iounet'): self.pos = self.pos_iounet.clone() if flag != 'not_found': score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() # Visualize and set debug info self.search_area_box = torch.cat( (sample_coords[scale_ind, [1, 0]], sample_coords[scale_ind, [3, 2]] - sample_coords[scale_ind, [1, 0]] - 1)) self.debug_info['flag' + self.id_str] = flag self.debug_info['max_score' + self.id_str] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map' + self.id_str) self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) else: max_score = 0 final_layer = image # Compute output bounding box new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) if self.params.get('output_not_found_box', False) and flag == 'not_found': output_state = [-1, -1, -1, -1] else: output_state = new_state.tolist() target_depth = get_target_depth(image, output_state) self.layer_id = int(target_depth // 2000) out = { 'target_bbox': output_state, 'confidence': max_score, 'image': torch_to_numpy(final_layer) } return out