def _transform_bbox_from_crop_to_frame(self, bbox_in_crop, crop_info=None): r"""Transform bbox from crop to frame, Based on latest detection setting (cropping position / cropping scale) Arguments --------- bbox_in_crop: bboxes on crop that will be transformed on bboxes on frame object able to be reshaped into (-1, 4), xyxy, crop_info: Dict dictionary containing cropping information. Transform will be performed based on crop_info target_pos: cropping position target_sz: target size based on which cropping range was calculated scale_x: cropping scale, length on crop / length on frame Returns ------- np.array bboxes on frame. (N, 4) """ if crop_info is None: crop_info = self._state["crop_info"] target_pos = crop_info["target_pos"] target_sz = crop_info["target_sz"] scale_x = crop_info["scale_x"] x_size = self._hyper_params["x_size"] bbox_in_crop = np.array(bbox_in_crop).reshape(-1, 4) pred_in_crop = xyxy2cxywh(bbox_in_crop) pred_in_crop = pred_in_crop / np.float32(scale_x) lr = 1.0 # no EMA smoothing, size directly determined by prediction res_x = pred_in_crop[..., 0] + target_pos[0] - (x_size // 2) / scale_x res_y = pred_in_crop[..., 1] + target_pos[1] - (x_size // 2) / scale_x res_w = target_sz[0] * (1 - lr) + pred_in_crop[..., 2] * lr res_h = target_sz[1] * (1 - lr) + pred_in_crop[..., 3] * lr bbox_in_frame = cxywh2xyxy( np.stack([res_x, res_y, res_w, res_h], axis=1)) return bbox_in_frame
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] if self._state["lost_target"]: x_size = self._hyper_params["lost_search_size"] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) self._state["scale_x"] = deepcopy(scale_x) with torch.no_grad(): score, box, cls, ctr, extra = self._model( imarray_to_tensor(im_x_crop).to(self.device), *features, phase=phase_track) if self._hyper_params["corr_fea_output"]: self._state["corr_fea"] = extra["corr_fea"] box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore[best_pscore_id] self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr if pscore[best_pscore_id] > self._hyper_params["lost_score_th_high"]: self._state["lost_target"] = False elif pscore[best_pscore_id] < self._hyper_params["lost_score_th_low"]: self._state["lost_target"] = True logger.info("lost target") return new_target_pos, new_target_sz
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) # process batch of templates score_list = [] box_list = [] cls_list = [] ctr_list = [] fms_x = None for ith in range(self._hyper_params['mem_len']): if fms_x is None: with torch.no_grad(): score, box, cls, ctr, extra = self._model( imarray_to_tensor(im_x_crop).to(self.device), *(features[ith]), phase=phase_track) fms_x = [extra['c_x'], extra['r_x']] else: with torch.no_grad(): score, box, cls, ctr, extra = self._model( *(features[ith]), fms_x[0], fms_x[1], phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0])[:, 0] ctr = tensor_to_numpy(ctr[0])[:, 0] # append to list box_list.append(box) score_list.append(score) cls_list.append(cls) ctr_list.append(ctr) # fusion if self._hyper_params['mem_len'] > 1: score = score_list[0] * (1-self._hyper_params['mem_coef']) + \ np.stack(score_list[1:], axis=0).mean(axis=0) * self._hyper_params['mem_coef'] else: # single template score = score_list[0] box = box_list[0] box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz
def multiprocessing_update(task, task_cfg, index, im, dataqueue, resultqueue): # build model Model = model_builder.build_model(task, task_cfg.model).to(torch.device("cuda")) Model.eval() target_pos = [] target_sz = [] im_z_crops = [] lost = [] features = [] tracking_index = [] total_num = 0 avg_chans = np.mean(im, axis=(0, 1)) im_h, im_w = im.shape[0], im.shape[1] z_size = hyper_params['z_size'] x_size = hyper_params['x_size'] context_amount = hyper_params['context_amount'] phase = hyper_params['phase_init'] phase_track = hyper_params['phase_track'] score_size = ( hyper_params['x_size'] - hyper_params['z_size'] ) // hyper_params['total_stride'] + 1 - hyper_params['num_conv3x3'] * 2 if hyper_params['windowing'] == 'cosine': window = np.outer(np.hanning(score_size), np.hanning(score_size)) window = window.reshape(-1) elif hyper_params['windowing'] == 'uniform': window = np.ones((score_size, score_size)) else: window = np.ones((score_size, score_size)) def init(state, im_x, total_num): for i in range(len(state)): target_pos.append(state[i][:2]) target_sz.append(state[i][2:4]) tracking_index.append(index * 100 + total_num + i) im_z_crop, _ = get_crop(im_x, target_pos[i], target_sz[i], z_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking) im_z_crops.append(im_z_crop) array = torch.from_numpy( np.ascontiguousarray( im_z_crops[i].transpose(2, 0, 1)[np.newaxis, ...], np.float32)).to(torch.device("cuda")) lost.append(0) with torch.no_grad(): features.append(Model(array, phase=phase)) def delete_node(j): try: del target_pos[j] del target_sz[j] del features[j] del tracking_index[j] del lost[j] except Exception as error: print("delete error", error) while True: try: im_x, state, delete = dataqueue.get() except Exception as error: print(error) continue else: if len(state) > 0: init(state, im_x, total_num) total_num += len(state) continue if len(delete) > 0: delete_list = [] for i in delete: if i in tracking_index: # print("delete",i) node = tracking_index.index(i) delete_node(node) result = [] im = im_x.copy() del im_x, state, delete for i in range(len(features)): im_x_crop, scale_x = get_crop( im, target_pos[i], target_sz[i], z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking) array = torch.from_numpy( np.ascontiguousarray( im_x_crop.transpose(2, 0, 1)[np.newaxis, ...], np.float32)).to(torch.device("cuda")) with torch.no_grad(): score, box, cls, ctr, *args = Model(array, *features[i], phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # #lost goal if score.max() < 0.2: lost[i] += 1 continue elif lost[i] > 0: lost[i] -= 1 best_pscore_id, pscore, penalty = postprocess_score( score, box_wh, target_sz[i], scale_x, window) # box post-processing new_target_pos, new_target_sz = postprocess_box( best_pscore_id, score, box_wh, target_pos[i], target_sz[i], scale_x, x_size, penalty) new_target_pos, new_target_sz = restrict_box( im_h, im_w, new_target_pos, new_target_sz) # save underlying state target_pos[i], target_sz[i] = new_target_pos, new_target_sz # return rect format track_rect = cxywh2xywh( np.concatenate([target_pos[i], target_sz[i]], axis=-1)) result.append(track_rect) delete_list = [] for i in range(len(features)): if lost[i] > 10: delete_list = [] delete_list.append(i) for i in delete_list: delete_node(i) # print(index, len(features)) resultqueue.put([result, tracking_index])
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) # store crop information self._state["crop_info"] = dict( target_pos=target_pos, target_sz=target_sz, scale_x=scale_x, avg_chans=avg_chans, ) with torch.no_grad(): score, box, cls, ctr, *args = self._model( imarray_to_tensor(im_x_crop).to(self.device), *features, phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz # new_target_pos, new_target_sz = self._restrict_box( # new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop # bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) bbox_pred_in_crop = box[best_pscore_id] self._state['bbox_pred_in_crop'] = bbox_pred_in_crop self._state['bbox_pred_in_frame'] = bbox_pred_in_crop # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz
def track(self, im_x, target_pos, target_sz, features, update_state=False, **kwargs): if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) self._state["scale_x"] = deepcopy(scale_x) with torch.no_grad(): score, box, cls, ctr, extra = self._model( imarray_to_tensor(im_x_crop).to(self.device), *features, phase=phase_track) if self._hyper_params["corr_fea_output"]: self._state["corr_fea"] = extra["corr_fea"] box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) def normalize(score): score = (score - np.min(score)) / (np.max(score) - np.min(score)) return score if True: flag, s = self.online_classifier.track() if flag == 'not_found': self.lost_count += 1 else: self.lost_count = 0 confidence = s.detach().cpu().numpy() offset = (confidence.shape[0] - self._hyper_params["score_size"]) // 2 confidence = confidence[offset:-offset, offset:-offset] confidence = normalize(confidence).flatten() box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, confidence, box_wh, target_sz, scale_x) if self._hyper_params["debug_show"]: bbox_in_crop = box[best_pscore_id, :] bbox_in_crop = tuple(map(int, bbox_in_crop)) show_im_patch = im_x_crop.copy() cv2.rectangle(show_im_patch, bbox_in_crop[:2], bbox_in_crop[2:], (0, 255, 0), 2) cv2.imshow("pred in crop", show_im_patch) # offline score score_mat = score.reshape(self._hyper_params["score_size"], self._hyper_params["score_size"]) score_mat = (255 * score_mat).astype(np.uint8) score_map = cv2.applyColorMap(score_mat, cv2.COLORMAP_JET) cv2.imshow("offline score", score_map) score_mat = confidence.reshape(self._hyper_params["score_size"], self._hyper_params["score_size"]) score_mat = (255 * score_mat).astype(np.uint8) score_map = cv2.applyColorMap(score_mat, cv2.COLORMAP_JET) cv2.imshow("online score", score_map) cv2.waitKey() # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop self.online_classifier.update( np.concatenate([new_target_pos, new_target_sz], axis=0), self.scale_z, flag) # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore[best_pscore_id] self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz
def track4vos(self, im_x, target_pos, target_sz, f_z, update_state=False, **kwargs): r""" similarity encoder with regression head returns regressed bbox and correlation feature :param im_x: current frame :param target_pos: target position (x, y) :param target_sz: target size (w, h) :param f_z: target feature :return new_target_pos, new_target_sz, corr_feature """ if 'avg_chans' in kwargs: avg_chans = kwargs['avg_chans'] else: avg_chans = self._state['avg_chans'] z_size = self._hyper_params['z_size'] x_size = self._hyper_params['x_size'] context_amount = self._hyper_params['context_amount'] phase_track = self._hyper_params['phase_track'] im_x_crop, scale_x = get_crop( im_x, target_pos, target_sz, z_size, x_size=x_size, avg_chans=avg_chans, context_amount=context_amount, func_get_subwindow=get_subwindow_tracking, ) with torch.no_grad(): score, box, cls, ctr, corr_feature = self._tracker( imarray_to_tensor(im_x_crop).to(self.device), f_z, phase=phase_track) box = tensor_to_numpy(box[0]) score = tensor_to_numpy(score[0])[:, 0] cls = tensor_to_numpy(cls[0]) ctr = tensor_to_numpy(ctr[0]) box_wh = xyxy2cxywh(box) # score post-processing best_pscore_id, pscore, penalty = self._postprocess_score( score, box_wh, target_sz, scale_x) # box post-processing new_target_pos, new_target_sz = self._postprocess_box( best_pscore_id, score, box_wh, target_pos, target_sz, scale_x, x_size, penalty) if self.debug: box = self._cvt_box_crop2frame(box_wh, target_pos, x_size, scale_x) # restrict new_target_pos & new_target_sz new_target_pos, new_target_sz = self._restrict_box( new_target_pos, new_target_sz) # record basic mid-level info self._state['x_crop'] = im_x_crop bbox_pred_in_crop = np.rint(box[best_pscore_id]).astype(np.int) self._state['bbox_pred_in_crop'] = bbox_pred_in_crop self._state['current_state'] = (target_pos, target_sz) self._state['scale_x'] = scale_x # record optional mid-level info if update_state: self._state['score'] = score self._state['pscore'] = pscore self._state['all_box'] = box self._state['cls'] = cls self._state['ctr'] = ctr return new_target_pos, new_target_sz, corr_feature