def get_modified_target(self, x, bbox_gt): """ Returns modified ground truth bounding box for rescaled images Return shape: (8,) """ bbox_gt = bbox_gt[np.newaxis, :] bbox = x[2][np.newaxis, :] y_gt = get_min_max_bbox(bbox_gt) y = get_min_max_bbox(bbox) size = np.array([y[:, 2], y[:, 3]]) size = size.transpose() w_z = size[:, 0] + CONTEXT_AMOUNT * np.sum(size, 1) h_z = size[:, 1] + CONTEXT_AMOUNT * np.sum(size, 1) s_z = np.sqrt(w_z * h_z) scale_z = NEW_EXEMPLAR_SIZE / s_z y_gt -= y y_gt = y_gt * scale_z y_gt[:, 0] += (NEW_INSTANCE_SIZE / 2) y_gt[:, 1] += (NEW_INSTANCE_SIZE / 2) y_gt[:, 2] += (NEW_EXEMPLAR_SIZE) y_gt[:, 3] += (NEW_EXEMPLAR_SIZE) y_gt = get_region_from_center(y_gt) return y_gt[0]
def init(self, imgs, bbox): """ args: imgs(np.ndarray): batch of BGR image batch of bbox: (x, y, w, h) bbox """ bbox = get_min_max_bbox(bbox) bbox = cxy_wh_2_rect(bbox) self.center_pos = np.array([bbox[:, 0]+(bbox[:, 2])/2.0, bbox[:, 1]+(bbox[:, 3])/2.0]) self.center_pos = self.center_pos.transpose() self.size = np.array([bbox[:, 2], bbox[:, 3]]) self.size = self.size.transpose() w_z = self.size[:, 0] + CONTEXT_AMOUNT * np.sum(self.size, 1) h_z = self.size[:, 1] + CONTEXT_AMOUNT * np.sum(self.size, 1) s_z = np.round(np.sqrt(w_z * h_z)) self.channel_average = [] for img in imgs: self.channel_average.append(np.mean(img, axis=(0, 1))) self.channel_average = np.array(self.channel_average) z_crop = [] for i, img in enumerate(imgs): z_crop.append(self.get_subwindow(img, self.center_pos[i], EXEMPLAR_SIZE, s_z[i], self.channel_average[i], ind=0)) z_crop = torch.cat(z_crop) # print(z_crop) self.model.template(z_crop) self.cnt = 0
def transform_to_gt(x, y): img_t = x[0] img_i = x[1] bbox_t = x[2][np.newaxis, :] bbox_t = get_min_max_bbox(bbox_t) bbox_t = cxy_wh_2_rect(bbox_t) y = get_min_max_bbox(y[np.newaxis, :])[0] center_pos = np.array([ bbox_t[0, 0] + (bbox_t[0, 2] - 1) / 2.0, bbox_t[0, 1] + (bbox_t[0, 3] - 1) / 2.0 ]) size = np.array([bbox_t[0, 2], bbox_t[0, 3]]) # calculate z crop size w_z = size[0] + CONTEXT_AMOUNT * np.sum(size) h_z = size[1] + CONTEXT_AMOUNT * np.sum(size) s_z = np.round(np.sqrt(w_z * h_z)) scale_z = EXEMPLAR_SIZE / s_z y[0] -= (INSTANCE_SIZE / 2) y[1] -= (INSTANCE_SIZE / 2) y[2] -= (EXEMPLAR_SIZE) y[3] -= (EXEMPLAR_SIZE) y = y / scale_z # print("Bounding box shape = ", bbox.shape) # lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR cx = y[0] + center_pos[0] cy = y[1] + center_pos[1] # bbox2 = [x.detach() for x in bbox] # # smooth bbox # print(size, bbox) width = size[0] * (1 - TRANSITION_LR) + (size[0] + y[2]) * TRANSITION_LR height = size[1] * (1 - TRANSITION_LR) + (size[1] + y[3]) * TRANSITION_LR cx, cy, width, height = _bbox_clip(cx, cy, width, height, img_i.shape[:2]) bbox = np.array([cx - width / 2, cy - height / 2, width, height]) quad_num = get_region_from_corner(bbox[np.newaxis, :])[0] return quad_num
def get_modified_target(self, x, bbox): bbox = bbox[np.newaxis, :] bbox0 = x[2][np.newaxis, :] y0 = get_min_max_bbox(bbox0) size = np.array([y0[:, 2], y0[:, 3]]) size = size.transpose() w_z = size[:, 0] + CONTEXT_AMOUNT * np.sum(size, 1) h_z = size[:, 1] + CONTEXT_AMOUNT * np.sum(size, 1) s_z = np.sqrt(w_z * h_z) scale_z = EXEMPLAR_SIZE / s_z y = get_min_max_bbox(bbox) y -= y0 y = y * scale_z y[:, 0] += (INSTANCE_SIZE / 2) y[:, 1] += (INSTANCE_SIZE / 2) y[:, 2] += (EXEMPLAR_SIZE) y[:, 3] += (EXEMPLAR_SIZE) y = get_region_from_center(y) return y[0]
def visualise_transformed_data_point(x, y): img_t = x[0] img_i = x[1] bbox_t = x[2][np.newaxis, :] bbox_t = get_min_max_bbox(bbox_t) bbox_t = cxy_wh_2_rect(bbox_t) center_pos = np.array([ bbox_t[0, 0] + (bbox_t[0, 2] - 1) / 2.0, bbox_t[0, 1] + (bbox_t[0, 3] - 1) / 2.0 ]) size = np.array([bbox_t[0, 2], bbox_t[0, 3]]) # calculate z crop size w_z = size[0] + CONTEXT_AMOUNT * np.sum(size) h_z = size[1] + CONTEXT_AMOUNT * np.sum(size) s_z = np.round(np.sqrt(w_z * h_z)) scale_z = EXEMPLAR_SIZE / s_z s_x = np.round(s_z * (INSTANCE_SIZE / EXEMPLAR_SIZE)) # print("Track centre = ", center_pos) channel_average = np.mean(img_t, axis=(0, 1)) # get crop # cv2.imwrite('/home/sudeep/Desktop/img1.jpg', img) # print(img.shape) # print("Init centre = ", center_pos) img_t = get_subwindow(img_t, center_pos, INSTANCE_SIZE, s_x, channel_average) sz = EXEMPLAR_SIZE sx = INSTANCE_SIZE centre = np.array([(sx / 2.0), (sx / 2.0)]) xmin = centre[0] - (sz / 2.0) xmax = centre[0] + (sz / 2.0) t_quad = np.array([xmin, xmax, xmin, xmin, xmax, xmin, xmax, xmax]) #inclusive img_i = get_subwindow(img_i, center_pos, INSTANCE_SIZE, s_x, channel_average) i_quad = y imgt_box = draw_bbox(img_t, t_quad) imgi_box = draw_bbox(img_i, i_quad) vis = np.concatenate((imgt_box, imgi_box), axis=1) return vis
def track(self, imgs): """ args: img(np.ndarray): Batch of BGR image return: bbox(list):[x, y, width, height] """ w_z = self.size[:, 0] + CONTEXT_AMOUNT * np.sum(self.size, 1) h_z = self.size[:, 1] + CONTEXT_AMOUNT * np.sum(self.size, 1) s_z = np.sqrt(w_z * h_z) scale_z = EXEMPLAR_SIZE / s_z s_x = s_z * (INSTANCE_SIZE / EXEMPLAR_SIZE) x_crop = [] for i, img in enumerate(imgs): x_crop.append( self.get_subwindow(img, self.center_pos[i], INSTANCE_SIZE, np.round(s_x)[i], self.channel_average[i], ind=1)) x_crop = torch.cat(x_crop) self.cnt += 1 outputs = self.model(x_crop) x_crop = img_to_numpy(x_crop[0]) bbox_lkt = [] bbox_rescaled = [] for i in range(len(outputs[0])): bbox1 = tensor_to_numpy(outputs[0][i]) bbox_lkt.append(bbox1) # print(x_crop.shape, bbox.shape) # x_box = draw_bbox(x_crop, bbox[0, :]) bbox = get_min_max_bbox(bbox1) bbox[:, 0] -= (INSTANCE_SIZE / 2) bbox[:, 1] -= (INSTANCE_SIZE / 2) bbox[:, 2] -= (EXEMPLAR_SIZE) bbox[:, 3] -= (EXEMPLAR_SIZE) bbox = bbox / scale_z[:, np.newaxis] cx = bbox[:, 0] + self.center_pos[:, 0] cy = bbox[:, 1] + self.center_pos[:, 1] width = self.size[:, 0] * (1 - TRANSITION_LR) + ( self.size[:, 0] + bbox[:, 2]) * TRANSITION_LR height = self.size[:, 1] * (1 - TRANSITION_LR) + ( self.size[:, 1] + bbox[:, 3]) * TRANSITION_LR shapes = [] for img in imgs: shapes.append(img.shape[:2]) shapes = np.array(shapes) cx, cy, width, height = self._bbox_clip(cx, cy, width, height, shapes) bbox = np.array([cx - width / 2, cy - height / 2, width, height]).transpose() bbox = get_region_from_corner(bbox) bbox_rescaled.append(bbox) if (i == len(outputs[0]) - 1): self.center_pos = np.array([cx, cy]).transpose() self.size = np.array([width, height]).transpose() return (bbox_rescaled, ) + outputs[1:] + (x_crop, bbox_lkt)