def coarse_location(self, hp_score_up, p_score_up, scale_score, lrtbs): upsize = (cfg.TRACK.SCORE_SIZE - 1) * cfg.TRACK.STRIDE + 1 max_r_up_hp, max_c_up_hp = np.unravel_index(hp_score_up.argmax(), hp_score_up.shape) max_r = int(round(max_r_up_hp / scale_score)) max_c = int(round(max_c_up_hp / scale_score)) max_r = bbox_clip(max_r, 0, cfg.TRACK.SCORE_SIZE) max_c = bbox_clip(max_c, 0, cfg.TRACK.SCORE_SIZE) bbox_region = lrtbs[max_r, max_c, :] min_bbox = int(cfg.TRACK.REGION_S * cfg.TRACK.EXEMPLAR_SIZE) max_bbox = int(cfg.TRACK.REGION_L * cfg.TRACK.EXEMPLAR_SIZE) l_region = int( min(max_c_up_hp, bbox_clip(bbox_region[0], min_bbox, max_bbox)) / 2.0) t_region = int( min(max_r_up_hp, bbox_clip(bbox_region[1], min_bbox, max_bbox)) / 2.0) r_region = int( min(upsize - max_c_up_hp, bbox_clip(bbox_region[2], min_bbox, max_bbox)) / 2.0) b_region = int( min(upsize - max_r_up_hp, bbox_clip(bbox_region[3], min_bbox, max_bbox)) / 2.0) mask = np.zeros_like(p_score_up) mask[max_r_up_hp - t_region:max_r_up_hp + b_region + 1, max_c_up_hp - l_region:max_c_up_hp + r_region + 1] = 1 p_score_up = p_score_up * mask return p_score_up
def coarse_location(self, coarseSco, fineSco, scale_score, lrtbs): upsize = (cfg.TRACK.SCORE_SIZE - 1) * cfg.TRACK.STRIDE + 1 CRowUp, CColUp = np.unravel_index(coarseSco.argmax(), coarseSco.shape) CRow = int(round(CRowUp / scale_score)) CCol = int(round(CColUp / scale_score)) CRow = bbox_clip(CRow, 0, cfg.TRACK.SCORE_SIZE - 1) CCol = bbox_clip(CCol, 0, cfg.TRACK.SCORE_SIZE - 1) bbox_region = lrtbs[CRow, CCol, :] min_bbox = int(cfg.TRACK.REGION_S * cfg.TRACK.EXEMPLAR_SIZE) max_bbox = int(cfg.TRACK.REGION_L * cfg.TRACK.EXEMPLAR_SIZE) l = int(min(CColUp, bbox_clip(bbox_region[0], min_bbox, max_bbox)) / 2) t = int(min(CRowUp, bbox_clip(bbox_region[1], min_bbox, max_bbox)) / 2) r = int( min(upsize - CColUp, bbox_clip(bbox_region[2], min_bbox, max_bbox)) / 2) b = int( min(upsize - CRowUp, bbox_clip(bbox_region[3], min_bbox, max_bbox)) / 2) mask = np.zeros_like(fineSco) mask[CRowUp - t:CRowUp + b + 1, CColUp - l:CColUp + r + 1] = 1 fineSco = fineSco * mask return fineSco
def track(self, img): """ args: img(np.ndarray): BGR image return: bbox(list):[x, y, width, height] """ w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) s_z = np.sqrt(w_z * h_z) self.scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE) x_crop = self.get_subwindow(img, self.center_pos, cfg.TRACK.INSTANCE_SIZE, round(s_x), self.channel_average) outputs = self.model.track(x_crop) cls = self._convert_cls(outputs['cls']).squeeze() cen = self._convert_cen(outputs['cen']).squeeze() lrtbs = outputs['loc'].data.cpu().numpy().squeeze() upsize = (cfg.TRACK.SCORE_SIZE - 1) * cfg.TRACK.STRIDE + 1 penalty = self.cal_penalty(lrtbs, cfg.TRACK.PENALTY_K) p_cls = penalty * cls p_score = p_cls * cen if cfg.TRACK.hanming: hp_score = p_score * (1 - cfg.TRACK.WINDOW_INFLUENCE ) + self.window * cfg.TRACK.WINDOW_INFLUENCE else: hp_score = p_score hp_score_up = cv2.resize(hp_score, (upsize, upsize), interpolation=cv2.INTER_CUBIC) p_score_up = cv2.resize(p_score, (upsize, upsize), interpolation=cv2.INTER_CUBIC) cls_up = cv2.resize(cls, (upsize, upsize), interpolation=cv2.INTER_CUBIC) lrtbs = np.transpose(lrtbs, (1, 2, 0)) lrtbs_up = cv2.resize(lrtbs, (upsize, upsize), interpolation=cv2.INTER_CUBIC) scale_score = upsize / (cfg.TRACK.SCORE_SIZE - 1) # get center CRowUp, CColUp, new_cx, new_cy = self.getCenter( hp_score_up, p_score_up, scale_score, lrtbs) # get w h ave_w = (lrtbs_up[CRowUp, CColUp, 0] + lrtbs_up[CRowUp, CColUp, 2]) / self.scale_z ave_h = (lrtbs_up[CRowUp, CColUp, 1] + lrtbs_up[CRowUp, CColUp, 3]) / self.scale_z s_c = self.change( self.sz(ave_w, ave_h) / self.sz(self.size[0] * self.scale_z, self.size[1] * self.scale_z)) r_c = self.change((self.size[0] / self.size[1]) / (ave_w / ave_h)) penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K) lr = penalty * cls_up[CRowUp, CColUp] * cfg.TRACK.LR new_width = lr * ave_w + (1 - lr) * self.size[0] new_height = lr * ave_h + (1 - lr) * self.size[1] # clip boundary cx = bbox_clip(new_cx, 0, img.shape[1]) cy = bbox_clip(new_cy, 0, img.shape[0]) width = bbox_clip(new_width, 0, img.shape[1]) height = bbox_clip(new_height, 0, img.shape[0]) # update state self.center_pos = np.array([cx, cy]) self.size = np.array([width, height]) bbox = [cx - width / 2, cy - height / 2, width, height] return { 'bbox': bbox, }