def visualize_iou_pred(self, iou_features, center_box): center_box = center_box.view(1,1,4) sz_norm = center_box[...,2:].clone() center_box_rel = bbutils.rect_to_rel(center_box, sz_norm) pos_dist = 1.0 sz_dist = math.log(3.0) pos_step = 0.01 sz_step = 0.01 pos_scale = torch.arange(-pos_dist, pos_dist+pos_step, step=pos_step) sz_scale = torch.arange(-sz_dist, sz_dist+sz_step, step=sz_step) bbx = torch.zeros(1, pos_scale.numel(), 4) bbx[0,:,0] = pos_scale.clone() bby = torch.zeros(pos_scale.numel(), 1, 4) bby[:,0,1] = pos_scale.clone() bbw = torch.zeros(1, sz_scale.numel(), 4) bbw[0,:,2] = sz_scale.clone() bbh = torch.zeros(sz_scale.numel(), 1, 4) bbh[:,0,3] = sz_scale.clone() pos_boxes = bbutils.rel_to_rect((center_box_rel + bbx) + bby, sz_norm).view(1,-1,4).to(self.params.device) sz_boxes = bbutils.rel_to_rect((center_box_rel + bbw) + bbh, sz_norm).view(1,-1,4).to(self.params.device) pos_scores = self.net.bb_regressor.predict_iou(self.iou_modulation, iou_features, pos_boxes).exp() sz_scores = self.net.bb_regressor.predict_iou(self.iou_modulation, iou_features, sz_boxes).exp() show_tensor(pos_scores.view(pos_scale.numel(),-1), title='Position scores', fig_num=21) show_tensor(sz_scores.view(sz_scale.numel(),-1), title='Size scores', fig_num=22)
def track(self, image): self.frame_num += 1 # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_xf = self.extract_fourier_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores sf = self.apply_filter(test_xf) translation_vec, scale_ind, s = self.localize_target(sf) scale_change_factor = self.params.scale_factors[scale_ind] # Update position and scale self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor) self.predict_target_box(sample_pos, sample_scales[scale_ind], scale_ind) if self.params.debug >= 2: show_tensor(s[scale_ind, ...], 5) if self.params.debug >= 3: for i, hf in enumerate(self.filter): show_tensor(fourier.sample_fs(hf).abs().mean(1), 6 + i) # ------- UPDATE ------- # # Get train sample train_xf = TensorList( [xf[scale_ind:scale_ind + 1, ...] for xf in test_xf]) # Shift the sample shift_samp = 2 * math.pi * (self.pos - sample_pos) / ( sample_scales[scale_ind] * self.img_support_sz) train_xf = fourier.shift_fs(train_xf, shift=shift_samp) # Update memory self.update_memory(train_xf) # Train filter if self.frame_num % self.params.train_skipping == 1: self.filter_optimizer.run(self.params.CG_iter, train_xf) self.symmetrize_filter() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) return new_state.tolist()
def track(self, image) -> dict: # print('track',torch.rand(2)) self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores scores_raw = self.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.localize_target(scores_raw) # Update position and scale if flag != 'not_found': if self.use_iou_net: update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain' if getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec) self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag) elif getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec, sample_scales[scale_ind]) score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score self.debug_info['flag'] = flag if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # metricnet if self.use_iou_net and flag != 'not_found': pos_tmp = self.pos_iounet.clone() else: pos_tmp = self.pos state_tmp = torch.cat( (pos_tmp[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) state_tmp = state_tmp.numpy() with torch.no_grad(): current_target_metric_feature0 = get_target_feature( self.metric_model, state_tmp, np.array(image)) current_target_metric_feature = current_target_metric_feature0.cpu( ).detach().numpy() # success, target_dist = judge_success(self.metric_model, current_target_metric_feature, # self.target_metric_feature, self.params) lof_score, success = lof(current_target_metric_feature, self.clf, k=5, thresh=self.lof_thresh) if self.frame_num <= self.params.train_skipping: self.lof_thresh = (self.lof_thresh * (self.frame_num - 2) + lof_score * self.params.lof_rate) / (self.frame_num - 1) if self.frame_num == self.params.train_skipping: print(self.frame_num, lof_score, self.lof_thresh) # print(self.frame_num, ': lof:', lof_score, ' ', success) # if success: # for ii in range(len(self.target_features_all)-1,-1,-1): # dist = torch.norm(self.target_features_all[ii] - current_target_metric_feature0 , 2, dim=1).view(-1) # if dist<self.similar: # success=0 # continue # if success: # self.target_features_all.append(current_target_metric_feature0) # ------- UPDATE ------- # # Check flags and set learning rate if hard negative update_flag = flag not in ['not_found', 'uncertain'] if self.frame_num > self.params.train_skipping: update_flag = update_flag and success hard_negative = (flag == 'hard_negative') learning_rate = self.params.hard_negative_learning_rate if hard_negative else None if update_flag: # Get train sample train_x = TensorList( [x[scale_ind:scale_ind + 1, ...] for x in test_x]) # Create label for sample train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]) # Update memory self.update_memory(train_x, train_y, learning_rate) # Train filter if hard_negative: self.filter_optimizer.run(self.params.hard_negative_CG_iter) elif (self.frame_num - 1) % self.params.train_skipping == 0: self.filter_optimizer.run(self.params.CG_iter) # Set the pos of the tracker to iounet pos if self.use_iou_net and flag != 'not_found': self.pos = self.pos_iounet.clone() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) out = {'target_bbox': new_state.tolist()} return out
def track(self, image) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # print(['pos',self.pos]) # print(['get_centered_sample_pos',self.get_centered_sample_pos()]) # Extract backbone features backbone_feat, sample_coords = self.extract_backbone_features(im, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors,#1.82 self.img_sample_sz)#18*16 # Extract classification features test_x = self.get_classification_features(backbone_feat) # Location of sample sample_pos, sample_scales = self.get_sample_location(sample_coords) #print(sample_scales) # Compute classification scores scores_raw = self.classify_target(test_x) # Localize the target translation_vec, scale_ind, s, flag = self.localize_target(scores_raw, sample_scales) #print(['translation_vec', translation_vec]) new_pos = sample_pos[scale_ind,:] + translation_vec self.debug_info['flag'] = flag # print(['flag'],flag) # Update position and scale if flag != 'not_found': if getattr(self.params, 'use_iou_net', True): update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain' if getattr(self.params, 'use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind,:], sample_scales[scale_ind], scale_ind, update_scale_flag) elif getattr(self.params, 'use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = getattr(self.params, 'hard_negative_learning_rate', None) if hard_negative else None if getattr(self.params, 'update_classifier', False) and update_flag: # Get train sample train_x = test_x[scale_ind:scale_ind+1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind,:], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind,...]) # Set the pos of the tracker to iounet pos if getattr(self.params, 'use_iou_net', True) and flag != 'not_found' and hasattr(self, 'pos_iounet'): self.pos = self.pos_iounet.clone() score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: #print(['score_map has shape'], score_map.shape) show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # Compute output bounding box new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]])) if flag == 'not_found': #e.g. occluted, out of view out = {'target_bbox': [0, 0, 0, 0]} else: out = {'target_bbox': new_state.tolist()} return out
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores scores_raw = self.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.localize_target(scores_raw) # Update position and scale if flag != 'not_found': if self.use_iou_net: update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(sample_pos + translation_vec) self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(sample_pos + translation_vec, sample_scales[scale_ind]) score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score self.debug_info['flag'] = flag if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # ------- UPDATE ------- # # Check flags and set learning rate if hard negative update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.hard_negative_learning_rate if hard_negative else None if update_flag: # Get train sample train_x = TensorList( [x[scale_ind:scale_ind + 1, ...] for x in test_x]) # Create label for sample train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]) # Update memory self.update_memory(train_x, train_y, learning_rate) # Train filter if hard_negative: self.filter_optimizer.run(self.params.hard_negative_CG_iter) elif (self.frame_num - 1) % self.params.train_skipping == 0: self.filter_optimizer.run(self.params.CG_iter) # Set the pos of the tracker to iounet pos if self.use_iou_net and flag != 'not_found': self.pos = self.pos_iounet.clone() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) out = {'target_bbox': new_state.tolist()} return out
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # track on each layer # 0-2 m 2-4m, 4-6m , 6-8m, 8-10m, 10-inf max_score = 0 flag = 'not_found' new_pos = [-1, -1, -1, -1] scale_ind = None backbone_feat = None test_x = None sample_pos = None s = None sample_coords = None target_dist = 0 final_layer = image print(np.max(image)) start = max(0, self.layer_id - 1) end = min(11, self.layer_id + 2) for z_dist in range(start, end): if z_dist == 10: lower = 10000 # 10 meter upper = np.max(image) else: lower = z_dist * 1000 upper = (z_dist + 2) * 1000 layer = image.copy() layer[layer > upper] = 0 layer[layer < lower] = 0 print(lower, upper, np.median(np.nonzero(layer))) layer = (layer - lower) / (upper - lower) layer = np.asarray(layer * 255, dtype=np.uint8) layer = cv2.applyColorMap(layer, cv2.COLORMAP_JET) layer = numpy_to_torch(layer) # Extract backbone features backbone_feat_layer, sample_coords_layer, im_patches_layer = self.extract_backbone_features( layer, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features test_x_layer = self.get_classification_features( backbone_feat_layer) # Location of sample sample_pos_layer, sample_scales_layer = self.get_sample_location( sample_coords_layer) # Compute classification scores scores_raw_layer = self.classify_target(test_x_layer) # Localize the target translation_vec_layer, scale_ind_layer, s_layer, flag_layer = self.localize_target( scores_raw_layer, sample_pos_layer, sample_scales_layer) # Song Here can add depth cues new_pos_layer = sample_pos_layer[ scale_ind_layer, :] + translation_vec_layer score_map_layer = s_layer[scale_ind_layer, ...] max_score_layer = torch.max(score_map_layer).item() if flag_layer != 'not_found' and max_score_layer > max_score: flag = flag_layer max_score = max_score_layer new_pos = new_pos_layer scale_ind = scale_ind_layer sample_pos = sample_pos_layer backbone_feat = backbone_feat_layer test_x = test_x_layer sample_scales = sample_scales_layer s = s_layer target_dist = z_dist sample_coords = sample_coords_layer final_layer = layer # if max_score > 0.8: # self.layer_id = target_dist print('Choose %d meter ... ' % target_dist, flag, max_score) # Update position and scale if flag != 'not_found': if self.params.get('use_iou_net', True): update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind, :], sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.get('hard_negative_learning_rate', None) if hard_negative else None if update_flag and self.params.get('update_classifier', False): # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) # Set the pos of the tracker to iounet pos if self.params.get('use_iou_net', True) and flag != 'not_found' and hasattr( self, 'pos_iounet'): self.pos = self.pos_iounet.clone() if flag != 'not_found': score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() # Visualize and set debug info self.search_area_box = torch.cat( (sample_coords[scale_ind, [1, 0]], sample_coords[scale_ind, [3, 2]] - sample_coords[scale_ind, [1, 0]] - 1)) self.debug_info['flag' + self.id_str] = flag self.debug_info['max_score' + self.id_str] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map' + self.id_str) self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) else: max_score = 0 final_layer = image # Compute output bounding box new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) if self.params.get('output_not_found_box', False) and flag == 'not_found': output_state = [-1, -1, -1, -1] else: output_state = new_state.tolist() target_depth = get_target_depth(image, output_state) self.layer_id = int(target_depth // 2000) out = { 'target_bbox': output_state, 'confidence': max_score, 'image': torch_to_numpy(final_layer) } return out
def track(self, image): self.frame_num += 1 # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores scores_raw = self.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.localize_target(scores_raw) # Save response(added window) for speed adjust search region self.response = s[scale_ind] # Update position and scale if flag != 'not_found': if self.use_iou_net: update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain' if getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec) self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag) elif getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec, sample_scales[scale_ind]) if self.params.debug >= 2: show_tensor(s[scale_ind,...], 5, title='Max score = {:.2f}'.format(torch.max(s[scale_ind,...]).item())) # ------- UPDATE ------- # # Check flags and set learning rate if hard negative update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.hard_negative_learning_rate if hard_negative else None if update_flag: # Get train sample train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x]) # Create label for sample train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]) # Update memory self.update_memory(train_x, train_y, learning_rate) # Train filter if hard_negative: self.filter_optimizer.run(self.params.hard_negative_CG_iter) elif (self.frame_num-1) % self.params.train_skipping == 0: self.filter_optimizer.run(self.params.CG_iter) # Set the pos of the tracker to iounet pos if self.use_iou_net and flag != 'not_found': self.pos = self.pos_iounet.clone() # Return new state new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]])) return new_state.tolist()
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # ''' # Song : add the depth # ''' # if 'depth' in info.keys(): # depth = info['depth'] # depth = torch.from_numpy(np.asarray(depth, dtype=np.float32)).float() # else: # depth = None # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors # if not depth: test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # else: # ''' # Song : edited extract_processed_sample # ''' # if 'depth_usage' in info.keys(): # depth_usage = info['depth_usage'] # else: # depth_usage = 'default' # # if depth_usage == 'hist_depth_mask': # test_x = self.extract_processed_sample_hist_depth_mask(im, depth, self.pos, sample_scales, self.img_sample_sz) # elif depth_usage == 'kmeans_depth_mask': # # Not implemented yet # test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # elif depth_usage == 'default': # test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # else: # ''' # nothing to do , just as the color inputs # ''' # test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores scores_raw = self.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.localize_target(scores_raw) # Update position and scale if flag != 'not_found': if self.use_iou_net: update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(sample_pos + translation_vec) self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(sample_pos + translation_vec, sample_scales[scale_ind]) score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score self.debug_info['flag'] = flag if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # # ''' # Song : should I do something when update_state ??? # ''' # ------- UPDATE ------- # # Check flags and set learning rate if hard negative update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.hard_negative_learning_rate if hard_negative else None if update_flag: # Get train sample train_x = TensorList( [x[scale_ind:scale_ind + 1, ...] for x in test_x]) # Create label for sample train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]) # Update memory self.update_memory(train_x, train_y, learning_rate) # Train filter if hard_negative: self.filter_optimizer.run(self.params.hard_negative_CG_iter) elif (self.frame_num - 1) % self.params.train_skipping == 0: self.filter_optimizer.run(self.params.CG_iter) # Set the pos of the tracker to iounet pos if self.use_iou_net and flag != 'not_found': self.pos = self.pos_iounet.clone() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) # out = {'target_bbox': new_state.tolist()} out = { 'target_bbox': new_state.tolist(), 'confidence': max_score } # , 'score_map': s.clone().cpu().numpy().squeeze()} # Song !!!!, as the confidence return out
def track(self, image) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_xf = self.extract_fourier_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores sf = self.apply_filter(test_xf) translation_vec, scale_ind, s = self.localize_target(sf) scale_change_factor = self.params.scale_factors[scale_ind] # Update position and scale self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor) score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # if self.params.debug >= 3: # for i, hf in enumerate(self.filter): # show_tensor(fourier.sample_fs(hf).abs().mean(1), 6+i) # metric state_tmp = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) state_tmp = state_tmp.numpy() with torch.no_grad(): self.current_target_metric_feature.append( get_target_feature(self.metric_model, state_tmp, np.array(image)).cpu().detach().numpy()) # self.iou.append(overlap_ratio(state_tmp,self.ground_truth_rect[self.frame_num-1])) # success, target_dist = judge_success_no_class(self.metric_model, current_target_metric_feature,self.target_metric_feature, self.params) # lof_predict,success = lof(self.gt_pos_features, current_target_metric_feature.cpu().detach().numpy().reshape((1,1024)), k=5,thresh=5) # print(self.frame_num,': lof:',lof_predict[0],' ',success[0]) # ------- UPDATE ------- # # Get train sample train_xf = TensorList( [xf[scale_ind:scale_ind + 1, ...] for xf in test_xf]) # Shift the sample shift_samp = 2 * math.pi * (self.pos - sample_pos) / ( sample_scales[scale_ind] * self.img_support_sz) train_xf = fourier.shift_fs(train_xf, shift=shift_samp) self.train_xf.append(train_xf) if self.frame_num == 1: # Update memory self.update_memory(train_xf) # metricnet self.filter_optimizer.run(self.params.CG_iter, train_xf) self.symmetrize_filter() elif self.frame_num % self.params.train_skipping == 1: current_target_metric_feature = np.array( self.current_target_metric_feature).squeeze() current_target_metric_feature0 = torch.from_numpy( current_target_metric_feature).cuda() # lof_predict, success = lof(np.concatenate([self.gt_pos_features,current_target_metric_feature],axis=0), k=20,thresh=self.lof_thresh) lof_predict, success = lof(current_target_metric_feature, self.clf, k=5, thresh=self.lof_thresh) last_id = -1 if self.frame_num <= self.params.train_skipping + 1: self.lof_thresh = lof_predict.mean() * self.params.lof_rate print('lof_thresh:', self.lof_thresh) for ii in range(len(self.train_xf)): # print('lof:',lof_predict[ii],' iou:',self.iou[ii],success[ii]) if self.frame_num > self.params.train_skipping + 1 and success[ ii]: for kk in range(len(self.target_features_all) - 1, -1, -1): dist = torch.norm( self.target_features_all[kk] - current_target_metric_feature0[ii].reshape( [1, 1024]), 2, dim=1).view(-1) if dist < self.similar: success[ii] = 0 continue if self.frame_num <= self.params.train_skipping + 1 or success[ ii]: self.target_features_all.append( current_target_metric_feature0[ii].reshape([1, 1024])) last_id = ii self.update_memory(self.train_xf[ii]) if last_id > -1: self.filter_optimizer.run(self.params.CG_iter, self.train_xf[last_id]) self.symmetrize_filter() self.current_target_metric_feature = [] self.train_xf = [] # self.iou=[] # # Train filter # if self.frame_num % self.params.train_skipping == 1: # self.filter_optimizer.run(self.params.CG_iter, train_xf) # self.symmetrize_filter() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) out = {'target_bbox': new_state.tolist()} return out
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Extract backbone features backbone_feat, sample_coords, im_patches = self.extract_backbone_features( im, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features test_x = self.get_classification_features(backbone_feat) # Location of sample sample_pos, sample_scales = self.get_sample_location(sample_coords) # Compute classification scores scores_raw = self.classify_target(test_x) # Localize the target translation_vec, scale_ind, s, flag = self.localize_target( scores_raw, sample_pos, sample_scales) new_pos = sample_pos[scale_ind, :] + translation_vec # Update position and scale if flag != 'not_found': if self.params.get('use_iou_net', True): update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind, :], sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.get('hard_negative_learning_rate', None) if hard_negative else None if update_flag and self.params.get('update_classifier', False): # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) # Set the pos of the tracker to iounet pos if self.params.get('use_iou_net', True) and flag != 'not_found' and hasattr( self, 'pos_iounet'): self.pos = self.pos_iounet.clone() score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() # Visualize and set debug info self.search_area_box = torch.cat( (sample_coords[scale_ind, [1, 0]], sample_coords[scale_ind, [3, 2]] - sample_coords[scale_ind, [1, 0]] - 1)) self.debug_info['flag' + self.id_str] = flag self.debug_info['max_score' + self.id_str] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map' + self.id_str) self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # Compute output bounding box new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) if self.params.get('output_not_found_box', False) and flag == 'not_found': output_state = [-1, -1, -1, -1] else: output_state = new_state.tolist() out = {'target_bbox': output_state} return out
def track(self, image1, image2): self.frame_num += 1 # Convert image im1 = numpy_to_torch(image1) im2 = numpy_to_torch(image2) # self.im = im # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_x1 = self.extract_sample(im1, self.pos, sample_scales, self.img_sample_sz) test_x2 = self.extract_sample(im2, self.pos, sample_scales, self.img_sample_sz) test_x = TensorList([torch.cat((v,i),1) for v, i in zip(test_x1, test_x2)]) # Compute scores scores_raw = self.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.localize_target(scores_raw) # Update position and scale if flag != 'not_found': if self.use_iou_net: update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain' if getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec) self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag) elif getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec, sample_scales[scale_ind]) if self.params.debug >= 2: show_tensor(s[scale_ind,...], 5, title='Max score = {:.2f}'.format(torch.max(s[scale_ind,...]).item())) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = getattr(self.params, 'hard_negative_learning_rate', None) if hard_negative else None if getattr(self.params, 'update_classifier', False) and update_flag: # Get train sample train_x = TensorList([x[scale_ind:scale_ind+1, ...] for x in test_x]) # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos, sample_scales[scale_ind]) train_y = self.get_label_function(sample_pos, sample_scales[scale_ind]).to(self.params.device) # Update the classifier model self.update_classifier(train_x, train_y, target_box, learning_rate, s[scale_ind,...]) # Update memory # self.update_memory(train_x, train_y, learning_rate) # Set the pos of the tracker to iounet pos if self.use_iou_net and flag != 'not_found' and hasattr(self, 'pos_iounet'): self.pos = self.pos_iounet.clone() # Return new state new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]])) return new_state.tolist()
def track(self, image, info: dict = None) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_xf = self.extract_fourier_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores sf = self.apply_filter(test_xf) translation_vec, scale_ind, s = self.localize_target(sf) scale_change_factor = self.params.scale_factors[scale_ind] # Update position and scale self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor) score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # if self.params.debug >= 3: # for i, hf in enumerate(self.filter): # show_tensor(fourier.sample_fs(hf).abs().mean(1), 6+i) # ------- UPDATE ------- # # Get train sample train_xf = TensorList([xf[scale_ind:scale_ind+1, ...] for xf in test_xf]) # Shift the sample shift_samp = 2*math.pi * (self.pos - sample_pos) / (sample_scales[scale_ind] * self.img_support_sz) train_xf = fourier.shift_fs(train_xf, shift=shift_samp) # Update memory self.update_memory(train_xf) # Train filter if self.frame_num % self.params.train_skipping == 1: self.filter_optimizer.run(self.params.CG_iter, train_xf) self.symmetrize_filter() # Return new state new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]])) out = {'target_bbox': new_state.tolist()} return out
def track(self, image): self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Extract backbone features backbone_feat, sample_coords, im_patches = self.extract_backbone_features( im, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features x_clf = self.get_classification_features(backbone_feat) decoded_x, test_x = self.transformer_decoder(x_clf) # Location of sample sample_pos, sample_scales = self.get_sample_location(sample_coords) # Compute classification scores scores_raw = self.classify_target(test_x) # Localize the target translation_vec, scale_ind, s, flag = self.localize_target( scores_raw, sample_pos, sample_scales) new_pos = sample_pos[scale_ind, :] + translation_vec # Update position and scale if flag != 'not_found': if self.params.get('use_iou_net', True): update_scale_flag = self.params.get( 'update_scale_when_uncertain', True) or flag != 'uncertain' if self.params.get('use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind, :], sample_scales[scale_ind], scale_ind, update_scale_flag) elif self.params.get('use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # update_flag = flag not in ['not_found', 'uncertain'] hard_negative = (flag == 'hard_negative') learning_rate = self.params.get('hard_negative_learning_rate', None) if hard_negative else None if update_flag and self.params.get('update_classifier', False): # Get train sample train_x = test_x[scale_ind:scale_ind + 1, ...] # Create target_box and label for spatial sample target_box = self.get_iounet_box(self.pos, self.target_sz, sample_pos[scale_ind, :], sample_scales[scale_ind]) # Update the classifier model self.update_classifier(train_x, target_box, learning_rate, s[scale_ind, ...]) # Update Transformer memory if (self.frame_num - 1) % self.params.transformer_skipping == 0: cur_tf_label = prutils.gaussian_label_function( target_box.cpu().view(-1, 4), 0.1, self.net.classifier.filter_size, self.feature_sz, self.img_sample_sz, end_pad_if_even=False) if self.x_clf.shape[0] < self.params.transformer_memory_size: self.transformer_label = torch.cat([ cur_tf_label.unsqueeze(1).cuda(), self.transformer_label ], dim=0) self.x_clf = torch.cat([x_clf, self.x_clf], dim=0) else: self.transformer_label = torch.cat([ cur_tf_label.unsqueeze(1).cuda(), self.transformer_label[:-1, ...] ], dim=0) self.x_clf = torch.cat([x_clf, self.x_clf[:-1, ...]], dim=0) self.transformer_memory, _ = self.net.classifier.transformer.encoder( self.x_clf.unsqueeze(1), pos=None) # Set the pos of the tracker to iounet pos if self.params.get('use_iou_net', True) and flag != 'not_found' and hasattr( self, 'pos_iounet'): self.pos = self.pos_iounet.clone() score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() # Visualize and set debug info self.search_area_box = torch.cat( (sample_coords[scale_ind, [1, 0]], sample_coords[scale_ind, [3, 2]] - sample_coords[scale_ind, [1, 0]] - 1)) # self.debug_info['flag' + self.id_str] = flag # self.debug_info['max_score' + self.id_str] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map' + self.id_str) self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # Compute output bounding box new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) if self.params.get('output_not_found_box', False) and flag == 'not_found': output_state = [-1, -1, -1, -1] else: output_state = new_state.tolist() # out = {'target_bbox': output_state} # return out return new_state.tolist()
def track_updater(self, image) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_x = self.extract_processed_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores scores_raw = self.apply_filter(test_x) translation_vec, scale_ind, s, flag = self.localize_target(scores_raw) # Update position and scale if flag != 'not_found': if self.use_iou_net: update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain' if getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec) self.refine_target_box(sample_pos, sample_scales[scale_ind], scale_ind, update_scale_flag) elif getattr(self.params, 'use_classifier', True): self.update_state(sample_pos + translation_vec, sample_scales[scale_ind]) score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score self.debug_info['flag'] = flag if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # ------- UPDATE ------- # # Set the pos of the tracker to iounet pos if self.use_iou_net and flag != 'not_found': self.pos = self.pos_iounet.clone() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) out = {'target_bbox': new_state.tolist()} return new_state.tolist(), score_map.cpu().data.numpy( ), test_x, scale_ind, sample_pos, sample_scales, flag, s
def track_updater(self, image) -> dict: self.debug_info = {} self.frame_num += 1 self.debug_info['frame_num'] = self.frame_num # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Extract backbone features backbone_feat, sample_coords = self.extract_backbone_features(im, self.get_centered_sample_pos(), self.target_scale * self.params.scale_factors, self.img_sample_sz) # Extract classification features test_x = self.get_classification_features(backbone_feat) # Location of sample sample_pos, sample_scales = self.get_sample_location(sample_coords) # Compute classification scores scores_raw = self.classify_target(test_x) # Localize the target translation_vec, scale_ind, s, flag = self.localize_target(scores_raw, sample_scales) new_pos = sample_pos[scale_ind,:] + translation_vec self.debug_info['flag'] = flag # Update position and scale if flag != 'not_found': if getattr(self.params, 'use_iou_net', True): update_scale_flag = getattr(self.params, 'update_scale_when_uncertain', True) or flag != 'uncertain' if getattr(self.params, 'use_classifier', True): self.update_state(new_pos) self.refine_target_box(backbone_feat, sample_pos[scale_ind,:], sample_scales[scale_ind], scale_ind, update_scale_flag) elif getattr(self.params, 'use_classifier', True): self.update_state(new_pos, sample_scales[scale_ind]) # ------- UPDATE ------- # # Set the pos of the tracker to iounet pos if getattr(self.params, 'use_iou_net', True) and flag != 'not_found' and hasattr(self, 'pos_iounet'): self.pos = self.pos_iounet.clone() score_map = s[scale_ind, ...] max_score = torch.max(score_map).item() self.debug_info['max_score'] = max_score if self.visdom is not None: self.visdom.register(score_map, 'heatmap', 2, 'Score Map') self.visdom.register(self.debug_info, 'info_dict', 1, 'Status') elif self.params.debug >= 2: show_tensor(score_map, 5, title='Max score = {:.2f}'.format(max_score)) # Compute output bounding box new_state = torch.cat((self.pos[[1,0]] - (self.target_sz[[1,0]]-1)/2, self.target_sz[[1,0]])) out = {'target_bbox': new_state.tolist(), 'score_map': score_map} return new_state.tolist(), score_map.cpu().data.numpy(), test_x, scale_ind, sample_pos, sample_scales, flag, s