def track_feature(self, im, target_pos, target_sz, avg_chans=None): r""" Extract target image feature for tracker :param im: image frame :param target_pos: target position (x, y) :param target_sz: target size (w, h) :param avg_chans: channel mean values :return f_z feature of target image :return im_z_crop cropped patch of target image :return avg_chans channel average """ if avg_chans is None: avg_chans = np.mean(im, axis=(0, 1)) z_size = self._hyper_params['z_size'] context_amount = self._hyper_params['context_amount'] im_z_crop, _ = get_crop( im, target_pos, target_sz, z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) phase = self._hyper_params['phase_init'] with torch.no_grad(): f_z = self._tracker(imarray_to_tensor(im_z_crop).to(self.device), phase=phase)[0] return f_z, im_z_crop, avg_chans
def feature(self, im, target_pos, target_sz, avg_chans=None): r""" Extract feature :param im: initial frame :param target_pos: target position (x, y) :param target_sz: target size (w, h) :param avg_chans: channel mean values :return: """ if avg_chans is None: avg_chans = np.mean(im, axis=(0, 1)) z_size = self._hyper_params['z_size'] context_amount = self._hyper_params['context_amount'] im_z_crop, _ = get_crop( im, target_pos, target_sz, z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) phase = self._hyper_params['phase_init'] with torch.no_grad(): features = self.model(imarray_to_tensor(im_z_crop).to(self.device), phase=phase) return features, im_z_crop, avg_chans
def init(self, im, state): r"""Initialize tracker Internal target state representation: self._state['state'] = (target_pos, target_sz) Arguments --------- im : np.array initial frame image state target state on initial frame (bbox in case of SOT), format: xywh """ self.frame_num = 1 self.temp_max = 0 rect = state # bbox in xywh format is given for initialization in case of tracking box = xywh2cxywh(rect) target_pos, target_sz = box[:2], box[2:] self._state['im_h'] = im.shape[0] self._state['im_w'] = im.shape[1] # extract template feature features, im_z_crop, avg_chans, im_z_crop_t = self.feature( im, target_pos, target_sz) score_size = self._hyper_params['score_size'] if self._hyper_params['windowing'] == 'cosine': window = np.outer(np.hanning(score_size), np.hanning(score_size)) window = window.reshape(-1) elif self._hyper_params['windowing'] == 'uniform': window = np.ones((score_size, score_size)) else: window = np.ones((score_size, score_size)) self._state['z_crop'] = im_z_crop self._state['z0_crop'] = im_z_crop_t with torch.no_grad(): self._model.instance(im_z_crop_t) self._state['avg_chans'] = avg_chans self._state['features'] = features self._state['window'] = window self._state['state'] = (target_pos, target_sz) # init online classifier z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] init_im_crop, scale_x = get_crop( im, target_pos, target_sz, z_size, x_size=x_size * 2, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) init_x_crop_t = imarray_to_tensor(init_im_crop) self.online_classifier.initialize(init_x_crop_t, state)
def init(state, im_x, total_num): for i in range(len(state)): target_pos.append(state[i][:2]) target_sz.append(state[i][2:4]) tracking_index.append(index * 100 + total_num + i) im_z_crop, _ = get_crop(im_x, target_pos[i], target_sz[i], z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking) im_z_crops.append(im_z_crop) array = torch.from_numpy( np.ascontiguousarray( im_z_crops[i].transpose(2, 0, 1)[np.newaxis, ...], np.float32)).to(torch.device("cuda")) lost.append(0) with torch.no_grad(): features.append(Model(array, phase=phase))
def feature(self, im: np.array, target_pos, target_sz, avg_chans=None): """Extract feature Parameters ---------- im : np.array initial frame target_pos : target position (x, y) target_sz : [type] target size (w, h) avg_chans : [type], optional channel mean values, (B, G, R), by default None Returns ------- [type] [description] """ if avg_chans is None: avg_chans = np.mean(im, axis=(0, 1)) z_size = self._hyper_params['z_size'] context_amount = self._hyper_params['context_amount'] im_z_crop, _ = get_crop( im, target_pos, target_sz, z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) phase = self._hyper_params['phase_init'] with torch.no_grad(): features = self._model(imarray_to_tensor(im_z_crop).to( self.device), phase=phase) return features, im_z_crop, avg_chans
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] if self._state["lost_target"]: x_size = self._hyper_params["lost_search_size"] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) self._state["scale_x"] = deepcopy(scale_x) with torch.no_grad(): score, box, cls, ctr, extra = self._model( imarray_to_tensor(im_x_crop).to(self.device), *features, phase=phase_track) if self._hyper_params["corr_fea_output"]: self._state["corr_fea"] = extra["corr_fea"] box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore[best_pscore_id] self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr if pscore[best_pscore_id] > self._hyper_params["lost_score_th_high"]: self._state["lost_target"] = False elif pscore[best_pscore_id] < self._hyper_params["lost_score_th_low"]: self._state["lost_target"] = True logger.info("lost target") return new_target_pos, new_target_sz
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) # process batch of templates score_list = [] box_list = [] cls_list = [] ctr_list = [] fms_x = None for ith in range(self._hyper_params['mem_len']): if fms_x is None: with torch.no_grad(): score, box, cls, ctr, extra = self._model( imarray_to_tensor(im_x_crop).to(self.device), *(features[ith]), phase=phase_track) fms_x = [extra['c_x'], extra['r_x']] else: with torch.no_grad(): score, box, cls, ctr, extra = self._model( *(features[ith]), fms_x[0], fms_x[1], phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0])[:, 0] ctr = tensor_to_numpy(ctr[0])[:, 0] # append to list box_list.append(box) score_list.append(score) cls_list.append(cls) ctr_list.append(ctr) # fusion if self._hyper_params['mem_len'] > 1: score = score_list[0] * (1-self._hyper_params['mem_coef']) + \ np.stack(score_list[1:], axis=0).mean(axis=0) * self._hyper_params['mem_coef'] else: # single template score = score_list[0] box = box_list[0] box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz
def multiprocessing_update(task, task_cfg, index, im, dataqueue, resultqueue): # build model Model = model_builder.build_model(task, task_cfg.model).to(torch.device("cuda")) Model.eval() target_pos = [] target_sz = [] im_z_crops = [] lost = [] features = [] tracking_index = [] total_num = 0 avg_chans = np.mean(im, axis=(0, 1)) im_h, im_w = im.shape[0], im.shape[1] z_size = hyper_params['z_size'] x_size = hyper_params['x_size'] context_amount = hyper_params['context_amount'] phase = hyper_params['phase_init'] phase_track = hyper_params['phase_track'] score_size = ( hyper_params['x_size'] - hyper_params['z_size'] ) // hyper_params['total_stride'] + 1 - hyper_params['num_conv3x3'] * 2 if hyper_params['windowing'] == 'cosine': window = np.outer(np.hanning(score_size), np.hanning(score_size)) window = window.reshape(-1) elif hyper_params['windowing'] == 'uniform': window = np.ones((score_size, score_size)) else: window = np.ones((score_size, score_size)) def init(state, im_x, total_num): for i in range(len(state)): target_pos.append(state[i][:2]) target_sz.append(state[i][2:4]) tracking_index.append(index * 100 + total_num + i) im_z_crop, _ = get_crop(im_x, target_pos[i], target_sz[i], z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking) im_z_crops.append(im_z_crop) array = torch.from_numpy( np.ascontiguousarray( im_z_crops[i].transpose(2, 0, 1)[np.newaxis, ...], np.float32)).to(torch.device("cuda")) lost.append(0) with torch.no_grad(): features.append(Model(array, phase=phase)) def delete_node(j): try: del target_pos[j] del target_sz[j] del features[j] del tracking_index[j] del lost[j] except Exception as error: print("delete error", error) while True: try: im_x, state, delete = dataqueue.get() except Exception as error: print(error) continue else: if len(state) > 0: init(state, im_x, total_num) total_num += len(state) continue if len(delete) > 0: delete_list = [] for i in delete: if i in tracking_index: # print("delete",i) node = tracking_index.index(i) delete_node(node) result = [] im = im_x.copy() del im_x, state, delete for i in range(len(features)): im_x_crop, scale_x = get_crop( im, target_pos[i], target_sz[i], z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking) array = torch.from_numpy( np.ascontiguousarray( im_x_crop.transpose(2, 0, 1)[np.newaxis, ...], np.float32)).to(torch.device("cuda")) with torch.no_grad(): score, box, cls, ctr, *args = Model(array, *features[i], phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # #lost goal if score.max() < 0.2: lost[i] += 1 continue elif lost[i] > 0: lost[i] -= 1 best_pscore_id, pscore, penalty = postprocess_score( score, box_wh, target_sz[i], scale_x, window) # box post-processing new_target_pos, new_target_sz = postprocess_box( best_pscore_id, score, box_wh, target_pos[i], target_sz[i], scale_x, x_size, penalty) new_target_pos, new_target_sz = restrict_box( im_h, im_w, new_target_pos, new_target_sz) # save underlying state target_pos[i], target_sz[i] = new_target_pos, new_target_sz # return rect format track_rect = cxywh2xywh( np.concatenate([target_pos[i], target_sz[i]], axis=-1)) result.append(track_rect) delete_list = [] for i in range(len(features)): if lost[i] > 10: delete_list = [] delete_list.append(i) for i in delete_list: delete_node(i) # print(index, len(features)) resultqueue.put([result, tracking_index])
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) # store crop information self._state["crop_info"] = dict( target_pos=target_pos, target_sz=target_sz, scale_x=scale_x, avg_chans=avg_chans, ) with torch.no_grad(): score, box, cls, ctr, *args = self._model( imarray_to_tensor(im_x_crop).to(self.device), *features, phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz # new_target_pos, new_target_sz = self._restrict_box( # new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop # bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) bbox_pred_in_crop = box[best_pscore_id] self._state['bbox_pred_in_crop'] = bbox_pred_in_crop self._state['bbox_pred_in_frame'] = bbox_pred_in_crop # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz
def joint_segmentation(self, im_x, target_pos, target_sz, corr_feature, gml_feature, **kwargs): r""" segment the current frame for VOS crop image => segmentation => params updation :param im_x: current image :param target_pos: target position (x, y) :param target_sz: target size (w, h) :param corr_feature: correlated feature produced by siamese encoder :param gml_feature: global feature produced by gloabl modeling loop :return: pred_mask mask prediction in the patch of saliency image :return: pred_mask_b binary mask prediction in the patch of saliency image """ if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] # crop image for saliency encoder saliency_image, scale_seg = get_crop( im_x, target_pos, target_sz, z_size=self._hyper_params["z_size"], output_size=self._hyper_params["saliency_image_size"], x_size=self._hyper_params["saliency_image_field"], avg_chans=avg_chans, context_amount=self._hyper_params["context_amount"], func_get_subwindow=get_subwindow_tracking, ) self._state["scale_x"] = scale_seg # mask prediction pred_mask = self._segmenter(imarray_to_tensor(saliency_image).to( self.device), corr_feature, gml_feature, phase='segment')[0] #tensor(1,1,257,257) pred_mask = tensor_to_numpy(pred_mask[0]).transpose( (1, 2, 0)) #np (257,257,1) # post processing mask_filter = (pred_mask > self._hyper_params['mask_filter_thresh']).astype( np.uint8) pred_mask_b = (pred_mask > self._hyper_params['mask_pred_thresh']).astype(np.uint8) if self._hyper_params['save_patch']: mask_red = np.zeros_like(saliency_image) mask_red[:, :, 0] = mask_filter[:, :, 0] * 255 masked_image = saliency_image * 0.5 + mask_red * 0.5 self._state['patch_prediction'] = masked_image filtered_image = saliency_image * mask_filter filtered_image = cv2.resize(filtered_image, (self._hyper_params["GMP_image_size"], self._hyper_params["GMP_image_size"])) self._state['filtered_image'] = filtered_image if pred_mask_b.sum() > 0: conf_score = (pred_mask * pred_mask_b).sum() / pred_mask_b.sum() else: conf_score = 0 self._state['conf_score'] = conf_score mask_in_full_image = self._mask_back( pred_mask, size=self._hyper_params["saliency_image_size"], region=self._hyper_params["saliency_image_field"]) self._state['mask_in_full_image'] = mask_in_full_image if self._tracker.get_track_score( ) < self._hyper_params["track_failed_score_th"]: self._state['mask_in_full_image'] *= 0 return pred_mask, pred_mask_b
def init(self, im, state, init_mask): """ initialize the whole pipeline : tracker init => global modeling loop init :param im: init frame :param state: bbox in xywh format :param init_mask: binary mask of target object in shape (h,w) """ #========== SiamFC++ init ============== self._tracker.init(im, state) avg_chans = self._tracker.get_avg_chans() self._state['avg_chans'] = avg_chans rect = state # bbox in xywh format is given for initialization in case of tracking box = xywh2cxywh(rect) target_pos, target_sz = box[:2], box[2:] self._state['state'] = (target_pos, target_sz) self._state['im_h'] = im.shape[0] self._state['im_w'] = im.shape[1] # ========== Global Modeling Loop init ============== init_image, _ = get_crop( im, target_pos, target_sz, z_size=self._hyper_params["z_size"], x_size=self._hyper_params["GMP_image_size"], avg_chans=avg_chans, context_amount=self._hyper_params["context_amount"], func_get_subwindow=get_subwindow_tracking, ) init_mask_c3 = np.stack([init_mask, init_mask, init_mask], -1).astype(np.uint8) init_mask_crop_c3, _ = get_crop( init_mask_c3, target_pos, target_sz, z_size=self._hyper_params["z_size"], x_size=self._hyper_params["GMP_image_size"], avg_chans=avg_chans * 0, context_amount=self._hyper_params["context_amount"], func_get_subwindow=get_subwindow_tracking, ) init_mask_crop = init_mask_crop_c3[:, :, 0] init_mask_crop = (init_mask_crop > self._hyper_params['mask_filter_thresh']).astype( np.uint8) init_mask_crop = np.expand_dims(init_mask_crop, axis=-1) #shape: (129,129,1) filtered_image = init_mask_crop * init_image self._state['filtered_image'] = filtered_image #shape: (129,129,3) with torch.no_grad(): deep_feature = self._segmenter(imarray_to_tensor(filtered_image).to( self.device), phase='global_feature')[0] self._state['seg_init_feature'] = deep_feature #shape : (1,256,5,5) self._state['seg_global_feature'] = deep_feature self._state['gml_feature'] = deep_feature self._state['conf_score'] = 1
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) self._state["scale_x"] = deepcopy(scale_x) with torch.no_grad(): score, box, cls, ctr, extra = self._model( imarray_to_tensor(im_x_crop).to(self.device), *features, phase=phase_track) if self._hyper_params["corr_fea_output"]: self._state["corr_fea"] = extra["corr_fea"] box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) def normalize(score): score = (score - np.min(score)) / (np.max(score) - np.min(score)) return score if True: flag, s = self.online_classifier.track() if flag == 'not_found': self.lost_count += 1 else: self.lost_count = 0 confidence = s.detach().cpu().numpy() offset = (confidence.shape[0] - self._hyper_params["score_size"]) // 2 confidence = confidence[offset:-offset, offset:-offset] confidence = normalize(confidence).flatten() box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, confidence, box_wh, target_sz, scale_x) if self._hyper_params["debug_show"]: bbox_in_crop = box[best_pscore_id, :] bbox_in_crop = tuple(map(int, bbox_in_crop)) show_im_patch = im_x_crop.copy() cv2.rectangle(show_im_patch, bbox_in_crop[:2], bbox_in_crop[2:], (0, 255, 0), 2) cv2.imshow("pred in crop", show_im_patch) # offline score score_mat = score.reshape(self._hyper_params["score_size"], self._hyper_params["score_size"]) score_mat = (255 * score_mat).astype(np.uint8) score_map = cv2.applyColorMap(score_mat, cv2.COLORMAP_JET) cv2.imshow("offline score", score_map) score_mat = confidence.reshape(self._hyper_params["score_size"], self._hyper_params["score_size"]) score_mat = (255 * score_mat).astype(np.uint8) score_map = cv2.applyColorMap(score_mat, cv2.COLORMAP_JET) cv2.imshow("online score", score_map) cv2.waitKey() # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop self.online_classifier.update( np.concatenate([new_target_pos, new_target_sz], axis=0), self.scale_z, flag) # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore[best_pscore_id] self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz
def track4vos(self, im_x, target_pos, target_sz, f_z, update_state=False, **kwargs): r""" similarity encoder with regression head returns regressed bbox and correlation feature :param im_x: current frame :param target_pos: target position (x, y) :param target_sz: target size (w, h) :param f_z: target feature :return new_target_pos, new_target_sz, corr_feature """ if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) with torch.no_grad(): score, box, cls, ctr, corr_feature = self._tracker( imarray_to_tensor(im_x_crop).to(self.device), f_z, phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop self._state['current_state'] = (target_pos, target_sz) self._state['scale_x'] = scale_x # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz, corr_feature
def feature(self, im: np.array, target_pos, target_sz, avg_chans=None): """Extract feature Parameters ---------- im : np.array initial frame target_pos : target position (x, y) target_sz : [type] target size (w, h) avg_chans : [type], optional channel mean values, (B, G, R), by default None Returns ------- [type] [description] """ if avg_chans is None: avg_chans = np.mean(im, axis=(0, 1)) z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] im_z_crop, _, _ = get_crop( im, target_pos, target_sz, z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, params=self._hyper_params ) im_x_crop, scale_x, label = get_crop( im, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, params=self._hyper_params ) label, ctr_res_final, gt_boxes_res_final = label phase = self._hyper_params['phase_init'] loop_num = 2 im_z = imarray_to_tensor(im_z_crop).to(self.device) for i in range(loop_num): im_z.requires_grad = True score, ctr, reg = self._model.update(im_z, imarray_to_tensor(im_x_crop).to(self.device)) if i == 0: feat_len = score.shape[1] label_raw = label.reshape(1, feat_len, 1) label = torch.from_numpy(label_raw).to(self.device) ctr_res_final = ctr_res_final.reshape(1, feat_len, 1) ctr_res_final = torch.from_numpy(ctr_res_final).to(self.device) gt_boxes_res_final = torch.from_numpy(gt_boxes_res_final).to(self.device) gt_boxes_res_final = gt_boxes_res_final.unsqueeze(0) loss1, _ = self.loss1(score, label) loss2, _ = self.loss2(ctr, ctr_res_final) loss3, _ = self.loss3(reg, gt_boxes_res_final, label) loss = loss1 + loss2 + loss3 print(loss.item()) self._model.zero_grad() # Calculate gradients of model in backward pass loss.backward() # Collect datagrad data_grad = im_z.grad.data # Call FGSM Attack perturbed_data = fgsm_attack(im_z, 0.05, data_grad) im_z = perturbed_data.data '''重新初始化''' with torch.no_grad(): features = self._model(im_z, phase=phase) return features, im_z_crop, avg_chans