def init(self, imgs, bbox): """ args: imgs(np.ndarray): batch of BGR image batch of bbox: (x, y, w, h) bbox """ bbox = get_min_max_bbox(bbox) bbox = cxy_wh_2_rect(bbox) self.center_pos = np.array([bbox[:, 0]+(bbox[:, 2])/2.0, bbox[:, 1]+(bbox[:, 3])/2.0]) self.center_pos = self.center_pos.transpose() self.size = np.array([bbox[:, 2], bbox[:, 3]]) self.size = self.size.transpose() w_z = self.size[:, 0] + CONTEXT_AMOUNT * np.sum(self.size, 1) h_z = self.size[:, 1] + CONTEXT_AMOUNT * np.sum(self.size, 1) s_z = np.round(np.sqrt(w_z * h_z)) self.channel_average = [] for img in imgs: self.channel_average.append(np.mean(img, axis=(0, 1))) self.channel_average = np.array(self.channel_average) z_crop = [] for i, img in enumerate(imgs): z_crop.append(self.get_subwindow(img, self.center_pos[i], EXEMPLAR_SIZE, s_z[i], self.channel_average[i], ind=0)) z_crop = torch.cat(z_crop) # print(z_crop) self.model.template(z_crop) self.cnt = 0
def visualise_transformed_data_point(x, y): img_t = x[0] img_i = x[1] bbox_t = x[2][np.newaxis, :] bbox_t = get_min_max_bbox(bbox_t) bbox_t = cxy_wh_2_rect(bbox_t) center_pos = np.array([ bbox_t[0, 0] + (bbox_t[0, 2] - 1) / 2.0, bbox_t[0, 1] + (bbox_t[0, 3] - 1) / 2.0 ]) size = np.array([bbox_t[0, 2], bbox_t[0, 3]]) # calculate z crop size w_z = size[0] + CONTEXT_AMOUNT * np.sum(size) h_z = size[1] + CONTEXT_AMOUNT * np.sum(size) s_z = np.round(np.sqrt(w_z * h_z)) scale_z = EXEMPLAR_SIZE / s_z s_x = np.round(s_z * (INSTANCE_SIZE / EXEMPLAR_SIZE)) # print("Track centre = ", center_pos) channel_average = np.mean(img_t, axis=(0, 1)) # get crop # cv2.imwrite('/home/sudeep/Desktop/img1.jpg', img) # print(img.shape) # print("Init centre = ", center_pos) img_t = get_subwindow(img_t, center_pos, INSTANCE_SIZE, s_x, channel_average) sz = EXEMPLAR_SIZE sx = INSTANCE_SIZE centre = np.array([(sx / 2.0), (sx / 2.0)]) xmin = centre[0] - (sz / 2.0) xmax = centre[0] + (sz / 2.0) t_quad = np.array([xmin, xmax, xmin, xmin, xmax, xmin, xmax, xmax]) #inclusive img_i = get_subwindow(img_i, center_pos, INSTANCE_SIZE, s_x, channel_average) i_quad = y imgt_box = draw_bbox(img_t, t_quad) imgi_box = draw_bbox(img_i, i_quad) vis = np.concatenate((imgt_box, imgi_box), axis=1) return vis
def transform_to_gt(x, y): img_t = x[0] img_i = x[1] bbox_t = x[2][np.newaxis, :] bbox_t = get_min_max_bbox(bbox_t) bbox_t = cxy_wh_2_rect(bbox_t) y = get_min_max_bbox(y[np.newaxis, :])[0] center_pos = np.array([ bbox_t[0, 0] + (bbox_t[0, 2] - 1) / 2.0, bbox_t[0, 1] + (bbox_t[0, 3] - 1) / 2.0 ]) size = np.array([bbox_t[0, 2], bbox_t[0, 3]]) # calculate z crop size w_z = size[0] + CONTEXT_AMOUNT * np.sum(size) h_z = size[1] + CONTEXT_AMOUNT * np.sum(size) s_z = np.round(np.sqrt(w_z * h_z)) scale_z = EXEMPLAR_SIZE / s_z y[0] -= (INSTANCE_SIZE / 2) y[1] -= (INSTANCE_SIZE / 2) y[2] -= (EXEMPLAR_SIZE) y[3] -= (EXEMPLAR_SIZE) y = y / scale_z # print("Bounding box shape = ", bbox.shape) # lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR cx = y[0] + center_pos[0] cy = y[1] + center_pos[1] # bbox2 = [x.detach() for x in bbox] # # smooth bbox # print(size, bbox) width = size[0] * (1 - TRANSITION_LR) + (size[0] + y[2]) * TRANSITION_LR height = size[1] * (1 - TRANSITION_LR) + (size[1] + y[3]) * TRANSITION_LR cx, cy, width, height = _bbox_clip(cx, cy, width, height, img_i.shape[:2]) bbox = np.array([cx - width / 2, cy - height / 2, width, height]) quad_num = get_region_from_corner(bbox[np.newaxis, :])[0] return quad_num