def track(self, image): image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) left = int(max(round(self.position[0] - float(self.window) / 2), 0)) top = int(max(round(self.position[1] - float(self.window) / 2), 0)) right = int( min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1)) bottom = int( min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1)) if right - left < self.template.shape[ 1] or bottom - top < self.template.shape[0]: return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0], self.size[1]) cut = image[top:bottom, left:right] matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches) self.position = (left + max_loc[0] + float(self.size[0]) / 2, top + max_loc[1] + float(self.size[1]) / 2) return vot.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0], self.size[1])
def track(self, image): left = max(round(self.position[0] - float(self.window) / 2), 0) top = max(round(self.position[1] - float(self.window) / 2), 0) right = min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1) bottom = min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1) if right - left < self.template.shape[ 1] or bottom - top < self.template.shape[0]: return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0], self.size[1]) cut = image[int(top):int(bottom), int(left):int(right)] matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches) self.position = (left + max_loc[0] + float(self.size[0]) / 2, top + max_loc[1] + float(self.size[1]) / 2) return vot.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0], self.size[1])
def track(self, image): newg = cv2.cvtColor(image, cv2.cv.CV_BGR2GRAY) # self.bb = [left, top, self.region.width, self.region.height] print(self.bb) if self.bb[0] < 0 or self.bb[1] < 0 or self.bb[1] >= image.shape[ 1] or self.bb[3] >= image.shape[0]: newbb, shift = fbtrack(self.oldg, newg, self.bb, 12, 12, 3, 12) self.bb = newbb self.oldg = newg self.position = (self.bb[0] + self.size[0] / 2, self.bb[1] + self.size[1] / 2) self.window = max(self.bb[2] - self.bb[0], self.bb[3] - self.bb[1]) * 2 left = int(max(round(self.position[0] - float(self.window) / 2), 0)) top = int(max(round(self.position[1] - float(self.window) / 2), 0)) right = int( min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1)) bottom = int( min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1)) if self.bb[0] < 0 or self.bb[1] < 0 or self.bb[ 1] >= image.shape[1] - 1 or self.bb[3] >= image.shape[0] - 1: print("NOTER JE PRISLO") return vot.Rectangle(1, 1, self.size[0], self.size[1]) return vot.Rectangle(self.bb[0], self.bb[1], self.bb[2] - self.bb[0], self.bb[3] - self.bb[1])
def track(tracker, processor, frame, position, size): x, y, w, h = position x1, y1 = x + w, y + h position = NP.array([x, y, x1, y1]) position = NP.expand_dims(position, axis=0) position = NP.expand_dims(position, axis=1) originalSize = frame.shape[:2][::-1] # imageSize must be (width, height) frame = SCPM.imresize(frame, size) frame = NP.expand_dims(frame, axis=0) frame = NP.expand_dims(frame, axis=1) position = Preprocess.scalePosition(position, originalSize) position = Preprocess.rescalePosition(position, size) frame, position = processor.preprocess(frame, position) position = tracker.forward([frame], position[:, 0, :]) x, y, x1, y1 = position[0, 0, :] #logging.info("Tracker prediction: [%s, %s, %s, %s]", x, y, x1, y1) return vot.Rectangle(x, y, x1 - x, y1 - y)
def track(self, image): p1 = (int(self.bbox[0]), int(self.bbox[1])) p2 = (int(self.bbox[0] + self.bbox[2]), int(self.bbox[1] + self.bbox[3])) vis = image.copy() self.gray = cv2.cvtColor(vis, cv2.COLOR_BGR2GRAY) h, w, _ = vis.shape flow = np.zeros((h, w, 1), np.float32) return self.region flow = cv2.calcOpticalFlowFarneback(self.prevgray, self.gray, flow, 0.5, 5, 15, 3, 5, 1, cv2.OPTFLOW_FARNEBACK_GAUSSIAN) newflow = flow[int(p1[1]):int(p2[1]), int(p1[0]):int(p2[0]), :] fx = newflow[..., 0] fy = newflow[..., 1] xavg = np.average(fx) yavg = np.average(fy) xsum = 0 count = 0 return self.region for x in np.nditer(fx): if xavg < 0: if x <= -2.0: xsum += x count += 1 else: if x >= 2.0: xsum += x count += 1 if count > 0: xsum /= count deltax = xsum ysum = 0 count = 0 for y in np.nditer(fy): if yavg < 0: if y <= -2.0: ysum += y count += 1 else: if y >= 2.0: ysum += y count += 1 if count > 0: ysum /= count deltay = ysum self.bbox = (int(self.bbox[0] + deltax), int(self.bbox[1] + deltay), int(self.bbox[2]), int(self.bbox[3])) p1 = (int(self.bbox[0]), int(self.bbox[1])) p2 = (int(self.bbox[0] + self.bbox[2]), int(self.bbox[1] + self.bbox[3])) self.prevgray = self.gray return vot.Rectangle(int(self.bbox[0]), int(self.bbox[1]), int(self.bbox[2]), int(self.bbox[3]))
def track(self, image): test_patch = utils.get_subwindow(image, self.pos, self.sz, scale_factor=self.currentScaleFactor) hog_feature_t = pyhog.features_pedro(test_patch / 255., 1) hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') xt = np.multiply(hog_feature_t, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) response = np.real(np.fft.ifft2(np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2), (self.x_den + self.lamda)))) v_centre, h_centre = np.unravel_index(response.argmax(), response.shape) vert_delta, horiz_delta = \ [(v_centre - response.shape[0] / 2) * self.currentScaleFactor, (h_centre - response.shape[1] / 2) * self.currentScaleFactor] self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] st = utils.get_scale_subwindow(image, self.pos, self.base_target_size, self.currentScaleFactor * self.scaleSizeFactors, self.scale_window, self.scale_model_sz) stf = np.fft.fftn(st, axes=[0]) scale_reponse = np.real(np.fft.ifftn(np.sum(np.divide(np.multiply(self.s_num, stf), (self.s_den[:, None] + self.lamda_scale)), axis=1))) recovered_scale = np.argmax(scale_reponse) self.currentScaleFactor = self.currentScaleFactor * self.scaleFactors[recovered_scale] if self.currentScaleFactor < self.min_scale_factor: self.currentScaleFactor = self.min_scale_factor elif self.currentScaleFactor > self.max_scale_factor: self.currentScaleFactor = self.max_scale_factor # update update_patch = utils.get_subwindow(image, self.pos, self.sz, scale_factor=self.currentScaleFactor) hog_feature_l = pyhog.features_pedro(update_patch / 255., 1) hog_feature_l = np.lib.pad(hog_feature_l, ((1, 1), (1, 1), (0, 0)), 'edge') xl = np.multiply(hog_feature_l, self.cos_window[:, :, None]) xlf = np.fft.fft2(xl, axes=(0, 1)) new_x_num = np.multiply(self.yf[:, :, None], np.conj(xlf)) new_x_den = np.real(np.sum(np.multiply(xlf, np.conj(xlf)), axis=2)) sl = utils.get_scale_subwindow(image, self.pos, self.base_target_size, self.currentScaleFactor * self.scaleSizeFactors, self.scale_window, self.scale_model_sz) slf = np.fft.fftn(sl, axes=[0]) new_s_num = np.multiply(self.ysf[:, None], np.conj(slf)) new_s_den = np.real(np.sum(np.multiply(slf, np.conj(slf)), axis=1)) self.x_num = (1 - self.interp_factor) * self.x_num + self.interp_factor * new_x_num self.x_den = (1 - self.interp_factor) * self.x_den + self.interp_factor * new_x_den self.s_num = (1 - self.interp_factor) * self.s_num + self.interp_factor * new_s_num self.s_den = (1 - self.interp_factor) * self.s_den + self.interp_factor * new_s_den self.target_size = self.base_target_size * self.currentScaleFactor return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0] )
def track(self, image): ok, bbox = self.tracker.update(image) if ok: val = 0.5 else: val = 0.05 return vot.Rectangle(bbox[0], bbox[1], bbox[2], bbox[3]), val
def track(self, image, i): image = self._tracker._read_image(imagefile) res_rect = self._tracker.track(image) print("res_rect", res_rect) tracked_bb = np.array(res_rect).astype(int) print("tracked_bb ", tracked_bb) return vot.Rectangle(res_rect[0], res_rect[1], res_rect[2], res_rect[3])
def track(self, image): left = int(max(round(self.position[0] - float(self.window) / 2), 0)) top = int(max(round(self.position[1] - float(self.window) / 2), 0)) right = int(min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1)) bottom = int(min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1)) if right - left < self.size[1] or bottom - top < self.size[0]: return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0], self.size[1]) img = image[top:bottom, left:right] hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) dst = cv2.calcBackProject([hsv], [0], self.roi_hist, [0, 180], 1) ret, track_window = cv2.meanShift(dst, ( int(self.bb[0] - left), int(self.bb[1] - top), int(self.size[0]), int(self.size[1])), self.term_crit) self.position = ( left + track_window[0] + int(track_window[2] / 2), top + track_window[1] + int(track_window[3] / 2)) self.bb = [left + track_window[0], top + track_window[1], track_window[2], track_window[3]] self.size = (track_window[2], track_window[3]) return vot.Rectangle(left + track_window[0], top + track_window[1], track_window[2], track_window[3])
def run_vot_exp(tracker_name, para_name, vis=False): torch.set_num_threads(1) save_root = os.path.join( '/data/sda/v-yanbi/iccv21/LittleBoy/vot20_lt_debug', para_name) if vis and (not os.path.exists(save_root)): os.makedirs(save_root) tracker = stark_vot20_lt(tracker_name=tracker_name, para_name=para_name) handle = vot.VOT("rectangle") selection = handle.region() imagefile = handle.frame() init_box = [selection.x, selection.y, selection.width, selection.height] if not imagefile: sys.exit(0) if vis: '''for vis''' seq_name = imagefile.split('/')[-3] save_v_dir = os.path.join(save_root, seq_name) if not os.path.exists(save_v_dir): os.mkdir(save_v_dir) cur_time = int(time.time() % 10000) save_dir = os.path.join(save_v_dir, str(cur_time)) if not os.path.exists(save_dir): os.makedirs(save_dir) image = cv2.cvtColor(cv2.imread(imagefile), cv2.COLOR_BGR2RGB) # Right tracker.initialize(image, init_box) while True: imagefile = handle.frame() if not imagefile: break image = cv2.cvtColor(cv2.imread(imagefile), cv2.COLOR_BGR2RGB) # Right b1, conf = tracker.track(image) x1, y1, w, h = b1 handle.report(vot.Rectangle(x1, y1, w, h), conf) if vis: '''Visualization''' # original image image_ori = image[:, :, ::-1].copy() # RGB --> BGR image_name = imagefile.split('/')[-1] save_path = os.path.join(save_dir, image_name) cv2.imwrite(save_path, image_ori) # tracker box image_b = image_ori.copy() cv2.rectangle(image_b, (int(b1[0]), int(b1[1])), (int(b1[0] + b1[2]), int(b1[1] + b1[3])), (0, 0, 255), 2) image_b_name = image_name.replace('.jpg', '_bbox.jpg') save_path = os.path.join(save_dir, image_b_name) cv2.imwrite(save_path, image_b)
def __init__(self, image, mask): region = self._rect_from_mask(mask) region = vot.Rectangle(region[0], region[1], region[2], region[3]) self.window = max(region.width, region.height) * 2 left = max(region.x, 0) top = max(region.y, 0) right = min(region.x + region.width, image.shape[1] - 1) bottom = min(region.y + region.height, image.shape[0] - 1) self.template = image[int(top):int(bottom), int(left):int(right)] self.position = (region.x + region.width / 2, region.y + region.height / 2) self.size = (region.width, region.height)
def track(self, image): index = 0 for scale_factor in self.scale_factors: test = utils.get_subwindow(image, self.pos, self.sz, self.scaling * scale_factor) test = transform.resize(test, (224, 224)) test = (test - imgMean) / imgStd test = np.transpose(test, (2, 0, 1)) feature = model( Variable(torch.from_numpy(test[None, :, :, :]).float())) feature = feature.data[0].numpy().transpose((1, 2, 0)) xt = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) xt = np.multiply(xt, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) response = np.real( np.fft.ifft2( np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2), (self.x_den + self.lamda)))) if index == 0: max = response.argmax() response_final = response scale_factor_final = scale_factor index += 1 if response.argmax() > max: max = response.argmax() response_final = response scale_factor_final = scale_factor self.scaling *= scale_factor_final v_centre, h_centre = np.unravel_index(response_final.argmax(), response_final.shape) vert_delta, horiz_delta = \ [(v_centre - response_final.shape[0] / 2) * self.scaling * self.cell_size, (h_centre - response_final.shape[1] / 2) * self.scaling * self.cell_size] self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] - \ self.target_size * self.scaling / 2. return vot.Rectangle(self.pos[1], self.pos[0], self.target_size[1] * self.scaling, self.target_size[0] * self.scaling)
def track(self, image): # ---------------------------------------track--------------------------------- # test_patch = utils.get_subwindow(image, self.pos, self.sz) hog_feature_t = pyhog.features_pedro(test_patch / 255., 1) hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') xt = np.multiply(hog_feature_t, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) #计算响应,直接多通道叠加 response = np.real( np.fft.ifft2( np.divide(np.sum(np.multiply(self.x_num, xtf), axis=2), (self.x_den + self.lamda)))) #找响应最大值 v_centre, h_centre = np.unravel_index(response.argmax(), response.shape) vert_delta, horiz_delta = \ [(v_centre - response.shape[0] / 2), (h_centre - response.shape[1] / 2)] #新的位置 self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] # ---------------------------------------update--------------------------------- # update_patch = utils.get_subwindow(image, self.pos, self.sz) hog_feature_l = pyhog.features_pedro(update_patch / 255., 1) hog_feature_l = np.lib.pad(hog_feature_l, ((1, 1), (1, 1), (0, 0)), 'edge') xl = np.multiply(hog_feature_l, self.cos_window[:, :, None]) xlf = np.fft.fft2(xl, axes=(0, 1)) #更新位置滤波器 new_x_num = np.multiply(self.yf[:, :, None], np.conj(xlf)) new_x_den = np.real(np.sum(np.multiply(xlf, np.conj(xlf)), axis=2)) #滤波器学习 self.x_num = (1 - self.interp_factor ) * self.x_num + self.interp_factor * new_x_num self.x_den = (1 - self.interp_factor ) * self.x_den + self.interp_factor * new_x_den self.target_size = self.base_target_size return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0])
def track(self, image): # tracking(image,target_position,window_size,num,den,cos_window,scalefactor) self.pos, self.update_flag = tracking(image, self.pos, self.pos, self.sz, self.resnet_num, self.resnet_den, self.cos_window, self.current_scale_factor, self.update_flag, self.cell_size, self.lam) # scale_variation(image,target_position,target_size,scale_num,scale_den,scale_factor,ScaleFactors,scale_window,model_size) self.current_scale_factor = scale_variation( image, self.pos, self.target_size, self.s_num, self.s_den, self.current_scale_factor, self.scaleFactors, self.scale_window, self.scale_model_sz, self.lam) if self.current_scale_factor < self.min_scale_factor: self.current_scale_factor = self.min_scale_factor elif self.current_scale_factor > self.max_scale_factor: self.current_scale_factor = self.max_scale_factor # update # update_position_filter(image, target_position, window_size, scale_factor, position_yf, position_cos_window, # position_num, position_den, update_rate) if self.update_flag == True: self.resnet_num, self.resnet_den = update_position_filter( image, self.pos, self.sz, self.current_scale_factor, self.yf, self.cos_window, self.resnet_num, self.resnet_den, self.interp_factor) # update_scale_filter(image,target_position,target_size,scale_num,scale_den,scale_factor,ScaleFactors,scale_window,model_size,scale_ysf,update_rate) self.s_num, self.s_den = update_scale_filter( image, self.pos, self.target_size, self.s_num, self.s_den, self.current_scale_factor, self.scaleFactors, self.scale_window, self.scale_model_sz, self.ysf, self.interp_factor_scale) self.final_size = self.target_size * self.current_scale_factor return vot.Rectangle(self.pos[1] - self.final_size[1] / 2, self.pos[0] - self.final_size[0] / 2, self.final_size[1], self.final_size[0])
def track(self, image): test_crop = utils.get_subwindow(image, self.pos, self.patch_size) z = np.multiply(test_crop - test_crop.mean(), self.cos_window[:, :, None]) zf = np.fft.fft2(z, axes=(0, 1)) k_test = utils.dense_gauss_kernel(self.feature_bandwidth_sigma, self.xf, self.x, zf, z) kf_test = np.fft.fft2(k_test, axes=(0, 1)) response = np.real(np.fft.ifft2(np.multiply(self.alphaf, kf_test))) # Max position in response map v_centre, h_centre = np.unravel_index(response.argmax(), response.shape) vert_delta, horiz_delta = [v_centre - response.shape[0] / 2, h_centre - response.shape[1] / 2] # Predicted position self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0] )
def track(self, image): ft_target = self.target / self.stride anchors = generate_anchors(ft_target) search_window = generate_search_window(np.shape(image), self.target, self.window_scale).astype(int) im_input = image[search_window[0]:search_window[2], search_window[1]:search_window[3], :] shape_in = np.round(np.shape(im_input)[:2] * self.scale_factor).astype(int) im_input = cv2.resize(im_input, dsize=(shape_in[1], shape_in[0]), interpolation=cv2.INTER_CUBIC) im_input = np.expand_dims(im_input, axis=0) features = np.squeeze(self.model.predict(im_input), axis=0) ft_shape = np.shape(features) pad = FLAGS.template_size // 2 pad_shape = np.array([ ft_shape[0] + self.template_size, ft_shape[1] + self.template_size, ft_shape[2] ]) ft_pad = np.zeros(pad_shape) ft_pad[pad:ft_shape[0] + pad, pad:ft_shape[1] + pad, :] = features features = ft_pad sim_map = self.compute_distance_map(features) sim_map_mean = sim_map.mean() orig_sim_map = sim_map.copy() if self.use_gauss: gauss_filter = gauss_kernel(np.shape(sim_map), FLAGS.gauss_sigma) d = np.round( (to_yxhw(ft_target)[:2] - to_yxhw(search_window // 8)[:2]) * self.scale_factor).astype(int) gauss_filter = shift(gauss_filter, d, cval=0) sim_map = np.multiply(gauss_filter, sim_map) new_target, max_score, max_slice = self.compute_target( anchors, sim_map, features, ft_target) strength = max_score**2 / sim_map_mean self.scores.insert(0, strength) if len(self.scores) > self.strength_queue_length: self.scores.pop() confidence = np.mean(self.scores) / strength if confidence > FLAGS.bad_detection_thresh: t_target = to_yxhw(ft_target) t_target[:2] = to_yxhw(new_target)[:2] ft_target = to_y1x1y2x2(t_target) ft_target += np.tile(search_window[:2] // 8, 2) self.window_scale = FLAGS.search_window_scale * 2 self.use_gauss = False else: ft_target = new_target ft_target += np.tile(search_window[:2] // 8, 2) self.window_scale = FLAGS.search_window_scale self.use_gauss = True if confidence < FLAGS.good_detection_thresh and np.shape( max_slice)[:2] == (self.template_size, self.template_size): self.template = self.template * ( 1 - FLAGS.update_alpha) + max_slice * FLAGS.update_alpha # if confidence < FLAGS.good_detection_thresh / 2: # train(frame, ft_target, orig_sim_map, search_window, epochs=FLAGS.tuning_epochs, learning_rate=FLAGS.tuning_learning_rate) self.target = np.multiply(ft_target, self.stride) target_w = self.target[3] - self.target[1] target_h = self.target[2] - self.target[0] pad_amount = (target_w + target_h) / (self.padding_divider + 2) self.target = to_yxhw(self.target) self.target[2] -= pad_amount self.target[3] -= pad_amount self.scale_factor = (self.template_size / target_h / self.stride, FLAGS.template_size / target_w / self.stride) return vot.Rectangle(self.target[1], self.target[0], int(self.target[3]), int(self.target[2])), confidence
def track(self, image): return vot.Rectangle(self.position[0] - self.size[0] / 2, self.position[1] - self.size[1] / 2, self.size[0], self.size[1])
def track(self, image, i): # Estimate target bbox opts['n_samples'] = 512 samples = gen_samples(self.sample_generator, self.target_bbox, opts['n_samples']) sample_scores = forward_samples(self.model, image, samples, out_layer='fc6') top_scores, top_idx = sample_scores[:, 1].topk(5) top_idx = top_idx.cpu().numpy() target_score = top_scores.mean() self.target_bbox = samples[top_idx].mean(axis=0) success = target_score > opts['success_thr'] # Expand search area at failure if success: self.sample_generator.set_trans_f(opts['trans_f']) else: self.sample_generator.set_trans_f(opts['trans_f_expand']) # Bbox regression if success: bbreg_samples = samples[top_idx] bbreg_feats = forward_samples(self.model, image, bbreg_samples) bbreg_samples = self.bbreg.predict(bbreg_feats, bbreg_samples) self.bbreg_bbox = bbreg_samples.mean(axis=0) else: bbreg_bbox = self.target_bbox # Copy previous result at failure if not success: self.target_bbox = self.result[-1] self.bbreg_bbox = self.result_bb[-1] # Save result self.result.append(self.target_bbox) self.result_bb.append(self.bbreg_bbox) # Data collect if success: # Draw pos/neg samples pos_examples = gen_samples(self.pos_generator, self.target_bbox, opts['n_pos_update'], opts['overlap_pos_update']) if len(pos_examples) == 0: pos_examples = np.tile(self.target_bbox[None, :], (opts['n_pos_init'], 1)) neg_examples = gen_samples(self.neg_generator, self.target_bbox, opts['n_neg_update'], opts['overlap_neg_update']) # Extract pos/neg features pos_feats = forward_samples(self.model, image, pos_examples) neg_feats = forward_samples(self.model, image, neg_examples) self.pos_feats_all.append(pos_feats) self.neg_feats_all.append(neg_feats) if len(self.pos_feats_all) > opts['n_frames_long']: del self.pos_feats_all[0] if len(self.neg_feats_all) > opts['n_frames_short']: del self.neg_feats_all[0] print('====================================') print('Distractor suppression!') print('====================================') ds_samples = gen_samples(self.ds_generator, self.target_bbox, opts['n_samples']) ds_sample_scores = forward_samples(self.model, image, ds_samples, out_layer='fc6') ds_idx = ds_sample_scores[:, 1].gt(0.0).nonzero().cpu().numpy() if len(ds_idx) > 0: print('Distractor suppression!') #ipdb.set_trace() for ds_i, ds_id in enumerate(ds_idx): if ds_i == 0: ds_neg_examples = gen_samples( self.pos_generator, ds_samples[ds_id[0]], opts['n_pos_update'], opts['overlap_pos_update']) else: ds_neg_examples = np.concatenate( (ds_neg_examples, gen_samples(self.pos_generator, ds_samples[ds_id[0]], opts['n_pos_update'], opts['overlap_pos_update'])), axis=0) ds_neg_feats = forward_samples(self.model, image, ds_neg_examples) self.neg_feats_all.append(ds_neg_feats) nframes = min(opts['n_frames_short'], len(self.pos_feats_all)) pos_data = torch.stack(self.pos_feats_all[-nframes:], 0).view(-1, self.feat_dim) neg_data = stackList(self.neg_feats_all).view( -1, self.feat_dim) train(self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Short term update if not success: nframes = min(opts['n_frames_short'], len(self.pos_feats_all)) pos_data = stackList(self.pos_feats_all[-nframes:]) neg_data = stackList(self.neg_feats_all) train(self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = stackList(self.pos_feats_all) neg_data = stackList(self.neg_feats_all) train(self.model, self.criterion, self.update_optimizer, pos_data, neg_data, opts['maxiter_update']) return vot.Rectangle(self.result_bb[-1][0], self.result_bb[-1][1], self.result_bb[-1][2], self.result_bb[-1][3])
def track(self, image): test = utils.get_subwindow(image, self.pos, self.sz, self.current_scale_factor) test = transform.resize(test, (224, 224)) test = (test - imgMean) / imgStd test = np.transpose(test, (2, 0, 1)) feature_ensemble = model( Variable(torch.from_numpy(test[None, :, :, :]).float()).cuda()) for i in range(numlayers): feature = feature_ensemble[i].data[0].cpu().numpy().transpose( (1, 2, 0)) xt = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) xt = np.multiply(xt, self.cos_window[:, :, None]) xtf = np.fft.fft2(xt, axes=(0, 1)) response = np.real( np.fft.ifft2( np.divide(np.sum(np.multiply(self.x_num[i], xtf), axis=2), (self.x_den[i] + self.lamda)))) * layerweights[i] if i == 0: response_final = response else: response_final = np.add(response_final, response) v_centre, h_centre = np.unravel_index(response_final.argmax(), response_final.shape) vert_delta, horiz_delta = \ [(v_centre - response_final.shape[0] / 2) * self.current_scale_factor * self.cell_size, (h_centre - response_final.shape[1] / 2) * self.current_scale_factor * self.cell_size] self.pos = [self.pos[0] + vert_delta, self.pos[1] + horiz_delta] st = utils.get_scale_subwindow( image, self.pos, self.target_size, self.current_scale_factor * self.scaleFactors, self.scale_window, self.scale_model_sz) stf = np.fft.fftn(st, axes=[0]) scale_reponse = np.real( np.fft.ifftn( np.sum(np.divide(np.multiply(self.s_num, stf), (self.s_den[:, None] + self.lamda)), axis=1))) recovered_scale = np.argmax(scale_reponse) self.current_scale_factor = self.current_scale_factor * self.scaleFactors[ recovered_scale] if self.current_scale_factor < self.min_scale_factor: self.current_scale_factor = self.min_scale_factor elif self.current_scale_factor > self.max_scale_factor: self.current_scale_factor = self.max_scale_factor # update update_patch = utils.get_subwindow( image, self.pos, self.sz, scale_factor=self.current_scale_factor) update_patch = transform.resize(update_patch, (224, 224)) update_patch = (update_patch - imgMean) / imgStd update_patch = np.transpose(update_patch, (2, 0, 1)) feature_ensemble = model( Variable(torch.from_numpy( update_patch[None, :, :, :]).float()).cuda()) for i in range(numlayers): feature = feature_ensemble[i].data[0].cpu().numpy().transpose( (1, 2, 0)) xl = ndimage.zoom( feature, (float(self.cos_window.shape[0]) / feature.shape[0], float(self.cos_window.shape[1]) / feature.shape[1], 1), order=1) xl = np.multiply(xl, self.cos_window[:, :, None]) xlf = np.fft.fft2(xl, axes=(0, 1)) self.x_num[i] = (1 - self.interp_factor) * self.x_num[ i] + self.interp_factor * np.multiply(self.yf[:, :, None], np.conj(xlf)) self.x_den[i] = (1 - self.interp_factor) * self.x_den[ i] + self.interp_factor * np.real( np.sum(np.multiply(xlf, np.conj(xlf)), axis=2)) sl = utils.get_scale_subwindow( image, self.pos, self.target_size, self.current_scale_factor * self.scaleFactors, self.scale_window, self.scale_model_sz) slf = np.fft.fftn(sl, axes=[0]) new_s_num = np.multiply(self.ysf[:, None], np.conj(slf)) new_s_den = np.real(np.sum(np.multiply(slf, np.conj(slf)), axis=1)) self.s_num = (1 - self.interp_factor ) * self.s_num + self.interp_factor * new_s_num self.s_den = (1 - self.interp_factor ) * self.s_den + self.interp_factor * new_s_den self.final_size = self.target_size * self.current_scale_factor return vot.Rectangle(self.pos[1] - self.final_size[1] / 2, self.pos[0] - self.final_size[0] / 2, self.final_size[1], self.final_size[0])
# Extract pos/neg features pos_feats = forward_samples(model, image, pos_examples) neg_feats = forward_samples(model, image, neg_examples) pos_feats_all.append(pos_feats) neg_feats_all.append(neg_feats) if len(pos_feats_all) > opts['n_frames_long']: del pos_feats_all[0] if len(neg_feats_all) > opts['n_frames_short']: del neg_feats_all[0] # Short term update if not success: nframes = min(opts['n_frames_short'], len(pos_feats_all)) pos_data = stackList(pos_feats_all[-nframes:]) neg_data = stackList(neg_feats_all) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) # Long term update elif i % opts['long_interval'] == 0: pos_data = stackList(pos_feats_all) neg_data = stackList(neg_feats_all) train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) region = vot.Rectangle(target_bbox[0] + 1, target_bbox[1] + 1, target_bbox[2], target_bbox[3]) handle.report(region)
def track(self, imagepath): #Calculate the scaled params, scales are calculated in __init__ method. scaled_exemplar = self.z_sz * self.scale_factors scaled_search_area = self.x_sz * self.scale_factors scaled_target_w = self.target_w * self.scale_factors scaled_target_h = self.target_h * self.scale_factors #Calculate Siamese scores wrt template image_, scores_, scores_original_, templates_x_, templates_z_ = self.CalcSiamScores( imagepath, scaled_search_area) self.siam_ret['image_'] = image_ # self.siam_ret['scores_'] = self.NormScoreVector(np.squeeze(scores_)) self.siam_ret['scores_original'] = self.NormScoreVector( np.squeeze(scores_original_)) self.siam_ret['scores'] = np.squeeze(scores_) # self.siam_ret['scores_original'] = np.squeeze(scores_original_) self.siam_ret['templates_x_'] = templates_x_ self.siam_ret['templates_z_'] = templates_z_ #Calcualate Color scores wrt template scores_ = self.CalcColorScores(imagepath, scaled_search_area) self.color_ret['scores_'] = self.NormScoreVector(np.squeeze(scores_)) #Calculate weighted average of the scores alpha = 0.9 scores_ = alpha * self.siam_ret['scores_original'] + ( 1.0 - alpha) * self.color_ret['scores_'] scores_ = np.moveaxis(scores_, 0, -1) scores_ = cv2.resize(scores_, dsize=(257, 257), interpolation=cv2.INTER_CUBIC) scores_ = np.moveaxis(scores_, 2, 0) # penalize change of scale scores_[0, :, :] = self.hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = self.hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes self.x_sz = ( 1 - self.hp.scale_lr ) * self.x_sz + self.hp.scale_lr * scaled_search_area[new_scale_id] self.target_w = ( 1 - self.hp.scale_lr ) * self.target_w + self.hp.scale_lr * scaled_target_w[new_scale_id] self.target_h = ( 1 - self.hp.scale_lr ) * self.target_h + self.hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - self.hp.window_influence ) * score_ + self.hp.window_influence * self.penalty # Calculate the new center location and confidence self.pos_x, self.pos_y, confidence = self._update_target_position( self.pos_x, self.pos_y, score_, self.final_score_sz, self.design.tot_stride, self.design.search_sz, self.hp.response_up, self.x_sz) # update the target representation with a rolling average if self.hp.z_lr > 0: with self.graph_siam.as_default(): #Update siam tracker template new_templates_z_ = self.siam_sess.run( [self.siam_params['templates_z']], feed_dict={ siam.pos_x_ph: self.pos_x, siam.pos_y_ph: self.pos_y, siam.z_sz_ph: self.z_sz, self.siam_params['image']: image_ }) self.siam_ret['templates_z_'] = (1 - self.hp.z_lr) * np.asarray( self.siam_ret['templates_z_']) + self.hp.z_lr * np.asarray( new_templates_z_) #Update color tracker template with self.graph_exemplar.as_default(): new_templates_z_, z_crops_ = self.session_exemplar.run( [ self.color_params['features_z'], self.color_params['z_crops'] ], feed_dict={ self.exemplar_ph['filename_ph']: imagepath, self.exemplar_ph['pos_x_ph']: self.pos_x, self.exemplar_ph['pos_y_ph']: self.pos_y, self.exemplar_ph['z_sz_ph']: self.z_sz }) self.color_ret['templates_z_'] = (1 - self.hp.z_lr) * np.asarray( self.color_ret['templates_z_']) + self.hp.z_lr * np.asarray( new_templates_z_) # update template patch size self.z_sz = ( 1 - self.hp.scale_lr ) * self.z_sz + self.hp.scale_lr * scaled_exemplar[new_scale_id] # convert <cx,cy,w,h> to <x,y,w,h> and save output return vot.Rectangle(self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h), confidence
def track(self, image): self.i += 1 cur_ori_img = image cur_img_array, win_loc, scale \ = crop_search_region(cur_ori_img, self.last_gt, 300, mean_rgb=128) detection_box_ori, scores = self.sess.run( [self.pre_box_tensor, self.scores_tensor], feed_dict={ self.input_cur_image: cur_img_array, self.initConstantOp: self.init_feature_maps }) detection_box_ori[:, 0] = detection_box_ori[:, 0] * scale[0] + win_loc[0] detection_box_ori[:, 1] = detection_box_ori[:, 1] * scale[1] + win_loc[1] detection_box_ori[:, 2] = detection_box_ori[:, 2] * scale[0] + win_loc[0] detection_box_ori[:, 3] = detection_box_ori[:, 3] * scale[1] + win_loc[1] A_candis = ( (detection_box_ori[:self.k, 3] - detection_box_ori[:self.k, 1]) * (detection_box_ori[:self.k, 2] - detection_box_ori[:self.k, 0])) A_lastgt = ((self.last_gt[3] - self.last_gt[1]) * (self.last_gt[2] - self.last_gt[0])) x1 = np.maximum(detection_box_ori[:self.k, 1], self.last_gt[1]) y1 = np.maximum(detection_box_ori[:self.k, 0], self.last_gt[0]) x2 = np.minimum(detection_box_ori[:self.k, 3], self.last_gt[3]) y2 = np.minimum(detection_box_ori[:self.k, 2], self.last_gt[2]) inter = np.maximum((x2 - x1), 0) * np.maximum((y2 - y1), 0) IOU = inter / (A_candis + A_lastgt - inter) ID = np.arange(self.k) threshold = 0.4 I_mask = IOU > threshold ID_iou = ID[I_mask] if np.sum(I_mask) > 0: best_idx = ID_iou[np.argmax(scores[0, :self.k][I_mask])] else: best_idx = 0 search_box1 = detection_box_ori[best_idx] search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1) search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1) search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1) search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1) if (int(search_box1[0]) == int(search_box1[2]) or int(search_box1[1]) == int(search_box1[3])): dist_min = self.LargeDist else: unscaled_win = image[int(search_box1[0]):int(search_box1[2]), int(search_box1[1]):int(search_box1[3])] win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64) win -= self.mean win_input = win[np.newaxis, :] candidate_feat = self.sess.run( self.V_feat_op, feed_dict={self.V_image_op: win_input}) dist_min = np.sum(np.square(self.template_feat - candidate_feat)) # if score_max < self.classi_threshold: if dist_min > self.V_thres: search_box1 = detection_box_ori[:self.k] search_box = np.zeros_like(search_box1) # x1 y1 x2 y2 search_box[:, 0] = search_box1[:, 1] search_box[:, 1] = search_box1[:, 0] search_box[:, 2] = search_box1[:, 3] search_box[:, 3] = search_box1[:, 2] search_box[:, 2] = search_box[:, 2] - search_box[:, 0] # x y w h search_box[:, 3] = search_box[:, 3] - search_box[:, 1] search_box[:, 2] = np.maximum(search_box[:, 2], 3) search_box[:, 3] = np.maximum(search_box[:, 3], 3) search_box[:, 0] = np.maximum(search_box[:, 0], 0) search_box[:, 1] = np.maximum(search_box[:, 1], 0) search_box[:, 0] = np.minimum( search_box[:, 0], cur_ori_img.shape[1] - search_box[:, 2] - 1) search_box[:, 1] = np.minimum( search_box[:, 1], cur_ori_img.shape[0] - search_box[:, 3] - 1) if scores[0, 0] > self.Object_thres_low: O_mask = (scores[0, :self.k] > self.Object_thres_low) ID_obj = ID[O_mask] num_object = int(np.sum(O_mask)) win_input = np.zeros((num_object, 128, 128, 3)) starty = search_box[O_mask, 1] startx = search_box[O_mask, 0] endy = search_box[O_mask, 3] + search_box[O_mask, 1] endx = search_box[O_mask, 2] + search_box[O_mask, 0] for i in range(num_object): unscaled_win = image[int(starty[i]):int(endy[i]), int(startx[i]):int(endx[i])] win_input[i] = cv2.resize(unscaled_win, (128, 128)).astype(np.float64) win_input = win_input - self.mean.reshape((1, 1, 1, 3)) candidate_feats = self.sess.run( self.V_feat_op, feed_dict={self.V_image_op: win_input}) dists = np.sum(np.square(self.template_feat - candidate_feats), axis=-1) dists1 = dists.copy() for i in range(num_object): if ID_obj[i] not in ID_iou: dists1[i] = self.LargeDist # IOU < threshold if np.min(dists1) < self.V_thres: best_idx = ID_obj[np.argmin(dists1)] dist_min = np.min(dists1) elif np.min(dists) < self.V_thres: best_idx = ID_obj[np.argmin(dists)] dist_min = np.min(dists) else: dist_min = self.LargeDist detection_box = detection_box_ori[best_idx] if scores[0, best_idx] < self.Object_thres_low: scores, best_idx, detection_box, dist_min \ = self.center_search( cur_ori_img, (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]), scores, best_idx, detection_box, dist_min) if dist_min > self.V_thres: scores, best_idx, detection_box, dist_min \ = self.center_search( cur_ori_img, self.first_h, self.first_w, scores, best_idx, detection_box, dist_min) if dist_min > self.V_thres: scores, best_idx, detection_box, dist_min \ = self.center_search( cur_ori_img, self.first_h / 2.0, self.first_w / 2.0, scores, best_idx, detection_box, dist_min) if dist_min > self.V_thres: scores, best_idx, detection_box, dist_min \ = self.center_search( cur_ori_img, self.first_h * 2.0, self.first_w * 2.0, scores, best_idx, detection_box, dist_min) # print scores[0,max_idx] if scores[0, best_idx] < self.Object_thres_low: """-------------------------------------------------------------------------""" softmax_test_, pos_i = gen_search_patch_Hao( cur_ori_img, self.first_w, self.first_h) softmax_test = softmax_test_.astype(np.float32) print 'global' batch_sz = 64 if softmax_test.shape[0] <= batch_sz: kk = softmax_test cls_out = self.branch_search.predict( [self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1) elif softmax_test.shape[0] > batch_sz: cls_out_list = [] for_i = softmax_test.shape[0] / batch_sz for jj in range(for_i): kk = softmax_test[batch_sz * jj:batch_sz * (jj + 1)] cls_out_list.append( self.branch_search.predict( [self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1)) if softmax_test.shape[0] % batch_sz == 0: pass else: kk = softmax_test[batch_sz * (jj + 1):] cls_out_list.append( self.branch_search.predict( [self.z_feat.repeat(kk.shape[0], axis=0), kk]).reshape(-1)) cls_out = np.concatenate(cls_out_list) search_rank = np.argsort(-cls_out) pos_i = pos_i[search_rank] cls_out = cls_out[search_rank] """-------------------------------------------------------------------------""" self.SEARCH_K = np.minimum(pos_i.shape[0], self.SEARCH_K) if self.SEARCH_K > 1: search_num = self.SEARCH_K - 1 else: search_num = 1 detection_box1_all = np.zeros([search_num, 4]) scores1_all = np.zeros([1, search_num]) for s_i in range(search_num): search_gt = pos_i[s_i] cur_img_array1, win_loc1, scale1 \ = crop_search_region(cur_ori_img, search_gt, 300, mean_rgb=128) detection_box1, scores1 = self.sess.run( [self.pre_box_tensor, self.scores_tensor], feed_dict={ self.input_cur_image: cur_img_array1, self.initConstantOp: self.init_feature_maps }) detection_box1[ 0, 0] = detection_box1[0, 0] * scale1[0] + win_loc1[0] detection_box1[ 0, 1] = detection_box1[0, 1] * scale1[1] + win_loc1[1] detection_box1[ 0, 2] = detection_box1[0, 2] * scale1[0] + win_loc1[0] detection_box1[ 0, 3] = detection_box1[0, 3] * scale1[1] + win_loc1[1] scores1_all[0, s_i] = scores1[0, 0] detection_box1_all[s_i] = detection_box1[0].copy() rank_idx = np.argsort(-scores1_all).reshape(-1) scores1 = scores1_all[:, rank_idx] detection_box1 = detection_box1_all[rank_idx, :] if scores1[0, 0] > self.Object_thres_high: detection_box_ori = detection_box1.copy() # max_idx = 0 search_box1 = detection_box_ori[0] search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1) search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1) search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1) search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1) if (int(search_box1[0]) == int(search_box1[2]) or int(search_box1[1]) == int(search_box1[3])): # score_max = -1 # score_max = 0 # 0 is the minimum score for SINT dist_min = self.LargeDist else: search_box1 = [ search_box1[1], search_box1[0], search_box1[3] - search_box1[1], search_box1[2] - search_box1[0] ] search_box1 = np.reshape(search_box1, (4, )) unscaled_win = image[ int(search_box1[1]):int(search_box1[3] + search_box1[1]), int(search_box1[0]):int(search_box1[2] + search_box1[0])] win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64) win -= self.mean win_input = win[np.newaxis, :] candidate_feat = self.sess.run( self.V_feat_op, feed_dict={self.V_image_op: win_input}) dist_min = np.sum( np.square(self.template_feat - candidate_feat)) if dist_min < self.global_V_thres: scores = scores1.copy() best_idx = 0 detection_box = detection_box_ori[best_idx] elif dist_min > self.global_V_thres and self.SEARCH_K - search_num > 0: search_gt = pos_i[search_num] cur_img_array1, win_loc1, scale1 \ = crop_search_region(cur_ori_img, search_gt, 300, mean_rgb=128) detection_box1, scores1 = self.sess.run( [self.pre_box_tensor, self.scores_tensor], feed_dict={ self.input_cur_image: cur_img_array1, self.initConstantOp: self.init_feature_maps }) detection_box1[ 0, 0] = detection_box1[0, 0] * scale1[0] + win_loc1[0] detection_box1[ 0, 1] = detection_box1[0, 1] * scale1[1] + win_loc1[1] detection_box1[ 0, 2] = detection_box1[0, 2] * scale1[0] + win_loc1[0] detection_box1[ 0, 3] = detection_box1[0, 3] * scale1[1] + win_loc1[1] detection_box_ori = detection_box1.copy() # max_idx = 0 search_box1 = detection_box_ori[0] search_box1[0] = np.clip(search_box1[0], 0, cur_ori_img.shape[0] - 1) search_box1[2] = np.clip(search_box1[2], 0, cur_ori_img.shape[0] - 1) search_box1[1] = np.clip(search_box1[1], 0, cur_ori_img.shape[1] - 1) search_box1[3] = np.clip(search_box1[3], 0, cur_ori_img.shape[1] - 1) if (int(search_box1[0]) == int(search_box1[2]) or int(search_box1[1]) == int(search_box1[3])): dist_min = self.LargeDist else: search_box1 = [ search_box1[1], search_box1[0], search_box1[3] - search_box1[1], search_box1[2] - search_box1[0] ] search_box1 = np.reshape(search_box1, (4, )) unscaled_win = image[ int(search_box1[1]):int(search_box1[3] + search_box1[1]), int(search_box1[0]):int(search_box1[2] + search_box1[0])] win = cv2.resize(unscaled_win, (128, 128)).astype(np.float64) win -= self.mean win_input = win[np.newaxis, :] candidate_feat = self.sess.run( self.V_feat_op, feed_dict={self.V_image_op: win_input}) dist_min = np.sum( np.square(self.template_feat - candidate_feat)) if dist_min < self.global_V_thres: scores = scores1.copy() best_idx = 0 detection_box = detection_box_ori[best_idx] if scores[0, best_idx] < self.Object_thres_low: x_c = (detection_box[3] + detection_box[1]) / 2.0 y_c = (detection_box[0] + detection_box[2]) / 2.0 w1 = self.last_gt[3] - self.last_gt[1] h1 = self.last_gt[2] - self.last_gt[0] x1 = x_c - w1 / 2.0 y1 = y_c - h1 / 2.0 x2 = x_c + w1 / 2.0 y2 = y_c + h1 / 2.0 self.last_gt = np.float32([y1, x1, y2, x2]) else: self.last_gt = detection_box self.target_w = detection_box[3] - detection_box[1] self.target_h = detection_box[2] - detection_box[0] if self.last_gt[0] < 0: self.last_gt[0] = 0 self.last_gt[2] = self.target_h if self.last_gt[1] < 0: self.last_gt[1] = 0 self.last_gt[3] = self.target_w if self.last_gt[2] > cur_ori_img.shape[0]: self.last_gt[2] = cur_ori_img.shape[0] - 1 self.last_gt[0] = cur_ori_img.shape[0] - 1 - self.target_h if self.last_gt[3] > cur_ori_img.shape[1]: self.last_gt[3] = cur_ori_img.shape[1] - 1 self.last_gt[1] = cur_ori_img.shape[1] - 1 - self.target_w self.target_w = (self.last_gt[3] - self.last_gt[1]) self.target_h = (self.last_gt[2] - self.last_gt[0]) width = self.last_gt[3] - self.last_gt[1] height = self.last_gt[2] - self.last_gt[0] if self.dis: show_res(image, np.array(self.last_gt, dtype=np.int32), '2', score=scores[0, best_idx], score_max=dist_min) if (scores[0, best_idx] > self.Object_thres_high and dist_min < self.V_thres): confidence_score = 0.99 elif (scores[0, best_idx] < self.Object_thres_low and dist_min > self.V_thres): confidence_score = np.nan elif dist_min < self.EXTREM: confidence_score = 0.99 else: confidence_score = scores[0, best_idx] if self.vot: return vot.Rectangle(float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height)), confidence_score else: return np.array([ float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height) ]), confidence_score
def track(self, img): # print('entry') params['height'], params['width'] = img.shape[:2] curr_bbox_old = self.curr_bbox self.move_counter = 0 target_score = 0 num_action_step_max = 20 bb_step = np.zeros([num_action_step_max, 4]) score_step = np.zeros([num_action_step_max, 1]) self.is_negative = False prev_score = -9999 self.this_actions = np.zeros([params['num_show_actions'], 1]) action_history_oh_old = self.action_history_oh while (self.move_counter < num_action_step_max): bb_step[self.move_counter] = self.curr_bbox score_step[self.move_counter] = prev_score self.action_history_oh *= 0 for i, act in enumerate( self.action_history[:params['num_action_history']]): if act < 11: self.action_history_oh[i, int(act)] = 1 pred, pred_score = sess.run( [nodes['action'], nodes['soft_conf']], feed_dict={ nodes['image']: [img], nodes['cropped']: 1.0, nodes['full_training']: 1.0, nodes['boxes_ind']: np.array([0]), nodes['boxes']: tutil.refine_box(np.expand_dims(self.curr_bbox, 0), params), nodes['action_hist']: self.action_history_oh.reshape(1, -1) }) curr_score = pred_score[0, 1] max_action = np.argmax(pred[0]) if (curr_score < params['failedThre']): self.is_negative = True curr_score = prev_score self.action_history[1:] = self.action_history[:-1] self.action_history[0] = 12 self.cont_negatives += 1 break self.curr_bbox = tutil.do_action(self.curr_bbox, max_action, params) if ((len( np.where( np.sum( np.equal(np.round(bb_step), np.round( self.curr_bbox)), 1) == 4)[0]) > 0) & (max_action != params['stop_action'])): max_action = params['stop_action'] self.action_history[1:] = self.action_history[:-1] self.action_history[0] = max_action target_score = curr_score if max_action == params['stop_action']: break self.move_counter += 1 prev_score = curr_score #%% Tracking Fail --> Re-detection if ((self.f > 0) & (self.is_negative == True)): # print (f) # cv2.waitKey(0) self.total_pos_data['%d' % self.f] = np.zeros([0, 3, 3, 512]) self.total_neg_data['%d' % self.f] = np.zeros([0, 3, 3, 512]) self.total_pos_action_labels['%d' % self.f] = np.zeros([0, 11]) self.total_pos_examples['%d' % self.f] = np.zeros([0, 4]) self.total_neg_examples['%d' % self.f] = np.zeros([0, 4]) samples_redet = tutil.gen_samples( 'gaussian', curr_bbox_old, params['redet_samples'], params, min(1.5, 0.6 * 1.15**self.cont_negatives), params['finetune_scale_factor']) red_score_pred = sess.run( nodes['soft_conf'], feed_dict={ nodes['image']: [img], nodes['cropped']: 1.0, nodes['full_training']: 1.0, nodes['boxes_ind']: np.array([0] * samples_redet.shape[0]), nodes['boxes']: tutil.refine_box(samples_redet, params), nodes['action_hist']: np.vstack([self.action_history_oh.reshape(1, -1)] * samples_redet.shape[0]), nodes['is_training']: 0.0 }) idx = np.lexsort( (np.array(range(params['redet_samples'])), red_score_pred[:, 1])) target_score = np.mean(red_score_pred[(idx[-5:]), 1]) if target_score > curr_score: self.curr_bbox = np.mean(samples_redet[(idx[-5:]), :], 0) self.move_counter += params['redet_samples'] #%% Tracking Success --> generate samples if ((self.f > 0) & ((self.is_negative == False) | (target_score > params['successThre']))): self.cont_negatives = 0 self.pos_examples = tutil.gen_samples( 'gaussian', self.curr_bbox, params['pos_on'] * 2, params, params['finetune_trans'], params['finetune_scale_factor']) self.r = tutil.overlap_ratio(self.pos_examples, self.curr_bbox) self.pos_examples = self.pos_examples[np.where( self.r > params['pos_thr_on'])] self.pos_examples = self.pos_examples[np.random.choice( self.pos_examples.shape[0], min(params['pos_on'], self.pos_examples.shape[0]), replace=False)] self.neg_examples = tutil.gen_samples('uniform', self.curr_bbox, params['neg_on'] * 2, params, 2, 5) self.r = tutil.overlap_ratio(self.neg_examples, self.curr_bbox) self.neg_examples = self.neg_examples[np.where( self.r < params['neg_thr_on'])] self.neg_examples = self.neg_examples[np.random.choice( self.neg_examples.shape[0], min(params['neg_on'], self.neg_examples.shape[0]), replace=False)] self.examples = np.vstack((self.pos_examples, self.neg_examples)) self.feat_conv = tutil.get_conv_feature( sess, nodes['conv_feat'], feed_dict={ nodes['cropped']: 1.0, nodes['boxes_ind']: np.array([0] * self.examples.shape[0]), nodes['image']: [img], nodes['boxes']: tutil.refine_box(self.examples, params) }) self.total_pos_data[ '%d' % self.f] = self.feat_conv[:self.pos_examples.shape[0]] self.total_neg_data[ '%d' % self.f] = self.feat_conv[self.pos_examples.shape[0]:] self.pos_action_labels = tutil.gen_action_labels( params, self.pos_examples, self.curr_bbox) self.total_pos_action_labels['%d' % self.f] = self.pos_action_labels self.total_pos_examples['%d' % self.f] = self.pos_examples self.total_neg_examples['%d' % self.f] = self.neg_examples self.frame_window.append(self.f) if (len(self.frame_window) > params['frame_long']): self.total_pos_data[ '%d' % self.frame_window[-params['frame_long']]] = np.zeros( [0, 3, 3, 512]) self.total_pos_action_labels[ '%d' % self.frame_window[-params['frame_long']]] = np.zeros( [0, 11]) self.total_pos_examples[ '%d' % self.frame_window[-params['frame_long']]] = np.zeros( [0, 4]) if (len(self.frame_window) > params['frame_short']): self.total_neg_data[ '%d' % self.frame_window[-params['frame_short']]] = np.zeros( [0, 3, 3, 512]) self.total_neg_examples[ '%d' % self.frame_window[-params['frame_short']]] = np.zeros( [0, 4]) #%% Do online-training if (((self.f + 1) % params['iterval'] == 0) | (self.is_negative == True)): if (self.f + 1) % params['iterval'] == 0: f_st = max(0, len(self.frame_window) - params['frame_long']) self.pos_data = [] self.pos_action_labels = [] for wind in self.frame_window[f_st:]: self.pos_data.append(self.total_pos_data['%d' % wind]) self.pos_action_labels.append( self.total_pos_action_labels['%d' % wind]) self.pos_data = np.vstack(self.pos_data) self.pos_action_labels = np.vstack(self.pos_action_labels) else: f_st = max(0, len(self.frame_window) - params['frame_short']) self.pos_data = [] self.pos_action_labels = [] for wind in self.frame_window[f_st:]: self.pos_data.append(self.total_pos_data['%d' % wind]) self.pos_action_labels.append( self.total_pos_action_labels['%d' % wind]) self.pos_data = np.vstack(self.pos_data) self.pos_action_labels = np.vstack(self.pos_action_labels) f_st = max(0, len(self.frame_window) - params['frame_short']) self.neg_data = [] for wind in self.frame_window[f_st:]: self.neg_data.append(self.total_neg_data['%d' % wind]) self.neg_data = np.vstack(self.neg_data) self.feat_conv = np.vstack((self.pos_data, self.neg_data)) # if check == 5: _ = sess.run(variables, feed_dict={reset: 1.0}) # check = 0 iteration = params['iter_on'] # if self.is_negative: # iteration = params['iter_on']//2 tutil.train_fc(sess, nodes, self.feat_conv, self.pos_action_labels, iteration, params, params['on_learning_rate']) self.full_history.append(self.curr_bbox) self.full_gt.append(gt) self.total_moves += self.move_counter frame = np.copy(img) # frame = cv2.rectangle(frame,(int(gt[0]),int(gt[1])), # (int(gt[0]+gt[2]),int(gt[1]+gt[3])),[0,0,255],2) frame = cv2.rectangle(frame, (int(self.curr_bbox[0]), int(self.curr_bbox[1])), (int(self.curr_bbox[0] + self.curr_bbox[2]), int(self.curr_bbox[1] + self.curr_bbox[3])), [255, 0, 0], 2) # cv2.imwrite('results/'+frames[self.f][-8:],frame) # cv2.imshow('f',frame) # key = cv2.waitKey(1) & 0xff # if key == ord('s'): # return self.f += 1 max_val = .99 return vot.Rectangle(self.curr_bbox[0], self.curr_bbox[1], self.curr_bbox[2], self.curr_bbox[3]), max_val
def tracking(self, image): self.i += 1 mask = None candidate_bboxes = None # state, pyscore = self.pymdnet_track(image) # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]] self.local_Tracker.pos = torch.FloatTensor( [(self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2]) self.local_Tracker.target_sz = torch.FloatTensor( [(self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1])]) tic = time.time() local_state, self.score_map, update, local_score, dis, flag = self.local_track(image) md_score = self.pymdnet_eval(image, np.array(local_state).reshape([-1, 4]))[0] self.score_max = md_score if md_score > 0 and flag == 'normal': self.flag = 'found' if self.p.use_mask: self.siamstate['target_pos'] = self.local_Tracker.pos.numpy()[::-1] self.siamstate['target_sz'] = self.local_Tracker.target_sz.numpy()[::-1] siamscore, mask = self.siammask_track(cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) self.local_Tracker.pos = torch.FloatTensor(self.siamstate['target_pos'][::-1].copy()) self.local_Tracker.target_sz = torch.FloatTensor(self.siamstate['target_sz'][::-1].copy()) local_state = torch.cat( (self.local_Tracker.pos[[1, 0]] - (self.local_Tracker.target_sz[[1, 0]] - 1) / 2, self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy() self.last_gt = np.array( [local_state[1], local_state[0], local_state[1] + local_state[3], local_state[0] + local_state[2]]) elif md_score < 0 or flag == 'not_found': self.count += 1 self.flag = 'not_found' candidate_bboxes = self.Global_Track_eval(image, 10) candidate_scores = self.pymdnet_eval(image, candidate_bboxes) max_id = np.argmax(candidate_scores) if candidate_scores[max_id] > 0: redet_bboxes = candidate_bboxes[max_id] if self.count >= 5: self.last_gt = np.array([redet_bboxes[1], redet_bboxes[0], redet_bboxes[1] + redet_bboxes[3], redet_bboxes[2] + redet_bboxes[0]]) self.local_Tracker.pos = torch.FloatTensor( [(self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2]) self.local_Tracker.target_sz = torch.FloatTensor( [(self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1])]) self.score_max = candidate_scores[max_id] self.count = 0 if update: self.collect_samples_pymdnet(image) self.pymdnet_long_term_update() width = self.last_gt[3] - self.last_gt[1] height = self.last_gt[2] - self.last_gt[0] toc = time.time() - tic print(toc) # if self.flag == 'found' and self.score_max > 0: # confidence_score = 0.99 # elif self.flag == 'not_found': # confidence_score = 0.0 # else: # confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1) confidence_score = np.clip((local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) / 2, 0, 1) if self.p.visualization: show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), np.array(self.last_gt, dtype=np.int32), '2', update=update, can_bboxes=candidate_bboxes, frame_id=self.i, tracker_score=md_score, mask=mask) return vot.Rectangle(float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height)), confidence_score
def track(self, image_path): #Calculate the scaled params, scales are calculated in __init__ method. scaled_exemplar = self.z_sz * self.scale_factors scaled_search_area = self.x_sz * self.scale_factors scaled_target_w = self.target_w * self.scale_factors scaled_target_h = self.target_h * self.scale_factors #Run the network with self.sess.as_default(): image_, scores_, scores_original_, self.templates_x_, self.templates_z_ = self.sess.run( [ self.image, self.scores, self.scores_original, self.templates_x, self.templates_z ], feed_dict={ siam.pos_x_ph: self.pos_x, siam.pos_y_ph: self.pos_y, siam.x_sz0_ph: scaled_search_area[0], siam.x_sz1_ph: scaled_search_area[1], siam.x_sz2_ph: scaled_search_area[2], self.templates_z: np.squeeze(self.templates_z_), self.filename: image_path, }, **self.run_opts) scores_ = np.squeeze(scores_) # penalize change of scale scores_[0, :, :] = self.hp.scale_penalty * scores_[0, :, :] scores_[2, :, :] = self.hp.scale_penalty * scores_[2, :, :] # find scale with highest peak (after penalty) new_scale_id = np.argmax(np.amax(scores_, axis=(1, 2))) # update scaled sizes self.x_sz = ( 1 - self.hp.scale_lr ) * self.x_sz + self.hp.scale_lr * scaled_search_area[new_scale_id] self.target_w = ( 1 - self.hp.scale_lr ) * self.target_w + self.hp.scale_lr * scaled_target_w[new_scale_id] self.target_h = ( 1 - self.hp.scale_lr ) * self.target_h + self.hp.scale_lr * scaled_target_h[new_scale_id] # select response with new_scale_id score_ = scores_[new_scale_id, :, :] score_ = score_ - np.min(score_) score_ = score_ / np.sum(score_) # apply displacement penalty score_ = (1 - self.hp.window_influence ) * score_ + self.hp.window_influence * self.penalty #Calculate the new center location and confidence self.pos_x, self.pos_y, confidence = self._update_target_position( self.pos_x, self.pos_y, score_, self.final_score_sz, self.design.tot_stride, self.design.search_sz, self.hp.response_up, self.x_sz) # update the target representation with a rolling average if self.hp.z_lr > 0: new_templates_z_ = self.sess.run( [self.templates_z], feed_dict={ siam.pos_x_ph: self.pos_x, siam.pos_y_ph: self.pos_y, siam.z_sz_ph: self.z_sz, self.image: image_ }) self.templates_z_ = (1 - self.hp.z_lr) * np.asarray( self.templates_z_) + self.hp.z_lr * np.asarray( new_templates_z_) # update template patch size self.z_sz = ( 1 - self.hp.scale_lr ) * self.z_sz + self.hp.scale_lr * scaled_exemplar[new_scale_id] # convert <cx,cy,w,h> to <x,y,w,h> and save output return vot.Rectangle(self.pos_x - self.target_w / 2, self.pos_y - self.target_h / 2, self.target_w, self.target_h), confidence
def track(self, image): print(self.cell_size) start3 = time.time() test_crop = utils.get_subwindow(image, self.pos, self.patch_size) cv2.imshow('hahaha',test_crop) #hog_feature_t = pyhog.features_pedro(test_crop / 255., self.cell_size) #hog_feature_t = np.lib.pad(hog_feature_t, ((1, 1), (1, 1), (0, 0)), 'edge') #hog_feature_t = hog_feature_t[:,:,0:18] hog_feature_t = utils.get_feature_map(test_crop,self.feature_list,self.num_feature_ch,self.patch_size_cell,self.w2c, self.cell_size,self.projection_matrix) z = np.multiply(hog_feature_t, self.cos_window[:, :, None]) print(z.shape) zf = np.fft.fft2(z, axes=(0, 1)) angle_index_series = (np.array((self.angle_index-1,self.angle_index,self.angle_index+1))+24)%24 response_map_series = np.zeros((24,np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1]))) j = 0 end3 = time.time() print ('生成检测用时:'+str(end3-start3)) start4 = time.time() for i in angle_index_series: k_test = utils.dense_gauss_kernel(self.feature_bandwidth_sigma,self.xf_sequence[i,:,:,:],self.x_sequence[i,:,:,:],zf,z) kf_test = np.fft.fft2(k_test, axes=(0, 1)) alphaf_test = self.filter_sequence[i,:,:] response = np.real(np.fft.ifft2(np.multiply(alphaf_test, kf_test))) response_map_series[i,:,:] = response #plt.imshow(response, extent=[0, 1, 0, 1]) self.response_series[j] = np.max(response) self.v_centre[j], self.h_centre[j] = np.unravel_index(response.argmax(), response.shape) j = j + 1 print('response_series') f2.write(str(np.max(self.response_series))+'\n') max_response_index = np.where(self.response_series==np.max(self.response_series))[0][0] print(self.response_series) v = self.v_centre[max_response_index] h = self.h_centre[max_response_index] self.angle_index = angle_index_series[max_response_index] response4show = np.reshape(response_map_series[self.angle_index,:,:], (np.int(self.patch_size_cell[0]),np.int(self.patch_size_cell[1]))) cv2.imshow('response',response4show) #plt.matshow(response4show) #plt.colorbar() #plt.show() #plt.pause(0.033) print(self.angle_index) print(self.angle_index) print(self.angle_index) print(self.angle_index) print(self.angle_index) print(self.angle_index) f1.write(str(self.angle_index)+'\n') end4 = time.time() print ('三个滤波器用时:'+str(end4-start4)) vert_delta, horiz_delta = [v - response.shape[0] / 2,h - response.shape[1] / 2] self.pos = [self.pos[0] + vert_delta*self.cell_size, self.pos[1] + horiz_delta*self.cell_size] return vot.Rectangle(self.pos[1] - self.target_size[1] / 2, self.pos[0] - self.target_size[0] / 2, self.target_size[1], self.target_size[0] )
handle = vot.VOT("polygon") selection = handle.region() selection = selection.points gt_bbox = [] for i in selection: gt_bbox.append(i.x) gt_bbox.append(i.y) if len(gt_bbox) == 4: gt_bbox = [ gt_bbox[0], gt_bbox[1], gt_bbox[0], gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] + gt_bbox[3] - 1, gt_bbox[0] + gt_bbox[2] - 1, gt_bbox[1] ] cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] imagefile = handle.frame() if not imagefile: sys.exit(0) image = cv.cvtColor(cv.imread(imagefile), cv.COLOR_BGR2RGB) params = parameters() tracker = DRNet(params) tracker.initialize(image, list(gt_bbox)) while True: imagefile = handle.frame() if not imagefile: break image = cv.cvtColor(cv.imread(imagefile), cv.COLOR_BGR2RGB) region = tracker.track(image) region = vot.Rectangle(region[0], region[1], region[2], region[3]) handle.report(region, 0.9)
def track(self, image): res_rect = self._tracker.track(image) return vot.Rectangle(res_rect.x, res_rect.y, res_rect.w, res_rect.h)
# Then get the initializaton region # and the first image # ***************************************** handle = vot.VOT("rectangle") selection = handle.region() # Process the first frame imagefile = handle.frame() tracker = SiamVGGTracker(imagefile, selection) if not imagefile: sys.exit(0) while True: # ***************************************** # VOT: Call frame method to get path of the # current image frame. If the result is # null, the sequence is over. # ***************************************** imagefile = handle.frame() if not imagefile: break region, confidence = tracker.track(imagefile) region = vot.Rectangle(region.x, region.y, region.width, region.height) # ***************************************** # VOT: Report the position of the object # every frame using report method. # ***************************************** handle.report(region, confidence)
def track(self, image_curr, tracknet, velocity, sess): """TODO: Docstring for tracker. :returns: TODO """ target_pad, _, _, _ = cropPadImage(self.bbox_prev_tight, self.image_prev) cur_search_region, search_location, edge_spacing_x, edge_spacing_y = cropPadImage( self.bbox_curr_prior_tight, image_curr) # image, BGR(training type) cur_search_region_resize = self.preprocess(cur_search_region) target_pad_resize = self.preprocess(target_pad) cur_search_region_expdim = np.expand_dims(cur_search_region_resize, axis=0) target_pad_expdim = np.expand_dims(target_pad_resize, axis=0) re_fc4_image, fc4_adj = sess.run( [tracknet.re_fc4_image, tracknet.fc4_adj], feed_dict={ tracknet.image: cur_search_region_expdim, tracknet.target: target_pad_expdim }) bbox_estimate, object_bool, objectness = calculate_box( re_fc4_image, fc4_adj) print('objectness_s is: ', objectness) ########### original method ############ # this box is NMS result, TODO, all bbox check if not len(bbox_estimate) == 0: bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3]) # Inplace correction of bounding box bbox_estimate.unscale(cur_search_region) bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y) # self.image_prev = image_curr # self.bbox_prev_tight = bbox_estimate self.bbox_curr_prior_tight = bbox_estimate else: # self.image_prev = self.image_prev # self.bbox_prev_tight = self.bbox_prev_tight self.bbox_curr_prior_tight = self.bbox_curr_prior_tight bbox_estimate = self.bbox_curr_prior_tight ########### original method ############ ############ trick method ############ # if object_bool: # # if not len(bbox_estimate) == 0: # # current_box_wh = np.array([(bbox_estimate.[0][2] - bbox_estimate.[0][0]), (bbox_estimate.[0][3] - bbox_estimate.[0][1])], dtype=np.float32) # # prev_box_wh = np.array([5., 5.], dtype=np.float32) # # bbox_estimate = BoundingBox(bbox_estimate[0][0], bbox_estimate[0][1], bbox_estimate[0][2], bbox_estimate[0][3]) # # # relative distance from center point [5. 5.] # relative_current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2, # (bbox_estimate.y2 + bbox_estimate.y1) / 2], # dtype=np.float32) # relative_distance = np.linalg.norm(relative_current_box - np.array([5., 5.])) # # # Inplace correction of bounding box # bbox_estimate.unscale(cur_search_region) # bbox_estimate.uncenter(image_curr, search_location, edge_spacing_x, edge_spacing_y) # # # image's width height , center point # current_box = np.array([(bbox_estimate.x2 + bbox_estimate.x1) / 2, (bbox_estimate.y2 + bbox_estimate.y1) / 2], dtype=np.float32) # prev_box = np.array([(self.bbox_curr_prior_tight.x2 + self.bbox_curr_prior_tight.x1) / 2, (self.bbox_curr_prior_tight.y2 + self.bbox_curr_prior_tight.y1) / 2], # dtype=np.float32) # # if relative_distance < 2: # self.DeltaBox = self.lambdaBox * (current_box - prev_box) + (1 - self.lambdaBox) * self.DeltaBox # # # self.image_prev = image_curr # self.bbox_prev_tight = bbox_estimate # self.bbox_curr_prior_tight = bbox_estimate # print(self.DeltaBox) # else: # # under prev img, box block is no update # self.image_prev = self.image_prev # self.bbox_prev_tight = self.bbox_prev_tight # # self.bbox_curr_prior_tight = self.bbox_prev_tight # self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y1 + self.DeltaBox[1], # self.bbox_curr_prior_tight.x2 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y2 + self.DeltaBox[1]) # bbox_estimate = self.bbox_curr_prior_tight # print('distance is {:>3}'.format(relative_distance)) # print(self.DeltaBox) # else: # # under prev img, box block is no update # self.image_prev = self.image_prev # self.bbox_prev_tight = self.bbox_prev_tight # # self.bbox_curr_prior_tight = self.bbox_prev_tight # self.bbox_curr_prior_tight = BoundingBox(self.bbox_curr_prior_tight.x1 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y1 + self.DeltaBox[1], # self.bbox_curr_prior_tight.x2 + self.DeltaBox[0], # self.bbox_curr_prior_tight.y2 + self.DeltaBox[1]) # bbox_estimate = self.bbox_curr_prior_tight # print('occlusion is detected') # print(self.DeltaBox) # # ############ trick method ############ left_x = bbox_estimate.x1 left_y = bbox_estimate.y1 width = bbox_estimate.x2 - bbox_estimate.x1 height = bbox_estimate.y2 - bbox_estimate.y1 return vot.Rectangle(left_x, left_y, width, height)