def init(self, first_frame, bbox): assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) if w * h >= 100**2: self.resize = True x0, y0, w, h = x0 / 2, y0 / 2, w / 2, h / 2 first_frame = cv2.resize(first_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) self.x_crop_siz = (int(np.floor(w * (1 + self.x_padding))), int(np.floor(h * (1 + self.x_padding)))) # for vis self._center = (x0 + w / 2, y0 + h / 2) self.w, self.h = w, h self.x_window_size = (int(np.floor(w * (1 + self.x_padding))) // self.cell_size, int(np.floor(h * (1 + self.x_padding))) // self.cell_size) self.x_cos_window = cos_window(self.x_window_size) self.search_size = np.linspace(0.985, 1.015, 7) s = np.sqrt(w * h) * self.output_sigma_factor / self.cell_size self.x_gaus = gaussian2d_labels(self.x_window_size, s) self.target_sz = (w, h) patch = cv2.getRectSubPix(first_frame, self.x_crop_siz, self._center) patch = cv2.resize(patch, dsize=self.x_crop_siz) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self.x_cos_window[:, :, None] self.x1 = torch.from_numpy(hc_features.astype(np.float32)).cuda() # if self.vis is not None: self.vis.image(self.x1.permute(2, 0, 1)[0:3, :, :], win='template')
def init(self, first_frame, bbox): first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.base_target_size = (self.w, self.h) self.target_sz = (self.w, self.h) self._window = cos_window(self.crop_size) output_sigma = np.sqrt(self.w * self.h) * self.output_sigma_factor self.y = gaussian2d_labels(self.crop_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.yf = fft2(self.y) self.current_scale_factor = 1. xl = self.get_translation_sample(first_frame, self._center, self.crop_size, self.current_scale_factor, self._window) self.xlf = fft2(xl) self.hf_den = np.sum(self.xlf * np.conj(self.xlf), axis=2) self.hf_num = self.yf[:, :, None] * np.conj(self.xlf) if self.scale_type == 'normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_size, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_size[1], self.base_target_size[0]]))) / np.log(self._scale_step)) elif self.scale_type == 'LP': self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_size, self.current_scale_factor)
def init(self,first_frame,bbox): if len(first_frame.shape)==3: assert first_frame.shape[2]==3 first_frame=cv2.cvtColor(first_frame,cv2.COLOR_BGR2GRAY) first_frame=first_frame.astype(np.float32) bbox=np.array(bbox).astype(np.int64) x,y,w,h=tuple(bbox) self._center=(x+w/2,y+h/2) self.w,self.h=w,h self._window=cos_window((int(round(2*w)),int(round(2*h)))) self.crop_size=(int(round(2*w)),int(round(2*h))) self.x=cv2.getRectSubPix(first_frame,(int(round(2*w)),int(round(2*h))),self._center)/255-0.5 self.x=self.x*self._window s=np.sqrt(w*h)/16 self.y=gaussian2d_labels((int(round(2*w)),int(round(2*h))),s) self._init_response_center=np.unravel_index(np.argmax(self.y,axis=None),self.y.shape) self.alphaf=self._training(self.x,self.y)
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self._window = cos_window( (int(w * (1 + self.padding)), int(h * (1 + self.padding)))) self.crop_size = (self._window.shape[1], self._window.shape[0]) s = np.sqrt(w * h) * self.output_sigma_factor self.y = gaussian2d_labels(self.crop_size, s) self.yf = fft2(self.y) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.x = self.get_sub_window(first_frame, self._center, self.crop_size) self.x = self._window[:, :, None] * self.x kf = fft2(self._dgk(self.x, self.x)) self.alphaf_num = (self.yf) * kf self.alphaf_den = kf * (kf + self.lambda_)
def init(self, first_frame, bbox): assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) if w * h >= 100**2: self.resize = True x0, y0, w, h = x0 / 2, y0 / 2, w / 2, h / 2 first_frame = cv2.resize(first_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) self.crop_size = (int(np.floor(w * (1 + self.padding))), int(np.floor(h * (1 + self.padding)))) # for vis self._center = (x0 + w / 2, y0 + h / 2) self.w, self.h = w, h self.window_size = (int(np.floor(w * (1 + self.padding))) // self.cell_size, int(np.floor(h * (1 + self.padding))) // self.cell_size) self._window = cos_window(self.window_size) s = np.sqrt(w * h) * self.output_sigma_factor / self.cell_size self.yf = fft2(gaussian2d_rolled_labels(self.window_size, s)) self.search_size = np.linspace(0.985, 1.015, 7) self.target_sz = (w, h) #param0=[self._center[0],self._center[1],1, # 0,1/(self.crop_size[1]/self.crop_size[0]), # 0] #param0=self.affparam2mat(param0) #patch=self.warpimg(first_frame.astype(np.float32),param0,self.crop_size).astype(np.uint8) patch = cv2.getRectSubPix(first_frame, self.crop_size, self._center) patch = cv2.resize(patch, dsize=self.crop_size) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self._window[:, :, None] xf = fft2(hc_features) kf = self._kernel_correlation(xf, xf, kernel=self.kernel) self.model_alphaf = self.yf / (kf + self.lambda_) self.model_xf = xf
def init(self, first_frame, bbox): if len(first_frame.shape) != 2: assert first_frame.shape[2] == 3 first_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY) first_frame = first_frame.astype(np.float32) / 255 x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h w, h = int(round(w)), int(round(h)) self.cos_window = cos_window((w, h)) self._fi = cv2.getRectSubPix(first_frame, (w, h), self._center) self._G = np.fft.fft2(gaussian2d_labels((w, h), self.sigma)) self.crop_size = (w, h) self._Ai = np.zeros_like(self._G) self._Bi = np.zeros_like(self._G) for _ in range(8): fi = self._rand_warp(self._fi) Fi = np.fft.fft2(self._preprocessing(fi, self.cos_window)) self._Ai += self._G * np.conj(Fi) self._Bi += Fi * np.conj(Fi)
def init(self, first_frame, bbox): assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 if self.features == 'gray': first_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY) bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self.crop_size = (int(np.floor(w * (1 + self.padding))), int(np.floor(h * (1 + self.padding)))) # for vis self._center = (np.floor(x0 + w / 2), np.floor(y0 + h / 2)) self.w, self.h = w, h self.window_size = (int(np.floor(w * (1 + self.padding))) // self.cell_size, int(np.floor(h * (1 + self.padding))) // self.cell_size) self._window = cos_window(self.window_size) s = np.sqrt(w * h) * self.output_sigma_factor / self.cell_size self.yf = fft2(gaussian2d_rolled_labels(self.window_size, s)) if self.features == 'gray' or self.features == 'color': first_frame = first_frame.astype(np.float32) / 255 x = self._crop(first_frame, self._center, (w, h)) x = x - np.mean(x) elif self.features == 'hog': x = self._crop(first_frame, self._center, (w, h)) x = cv2.resize(x, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) x = extract_hog_feature(x, cell_size=self.cell_size) elif self.features == 'cn': x = cv2.resize(first_frame, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) x = extract_cn_feature(x, self.cell_size) else: raise NotImplementedError self.xf = fft2(self._get_windowed(x, self._window)) self.init_response_center = (0, 0) self.alphaf = self._training(self.xf, self.yf)
def init(self, im, pos, base_target_sz, current_scale_factor): w, h = base_target_sz avg_dim = (w + h) / 2.5 self.scale_sz = ((w + avg_dim) / current_scale_factor, (h + avg_dim) / current_scale_factor) self.scale_sz0 = self.scale_sz self.cos_window_scale = cos_window( (self.scale_sz_window[0], self.scale_sz_window[1])) self.mag = self.cos_window_scale.shape[0] / np.log( np.sqrt((self.cos_window_scale.shape[0]**2 + self.cos_window_scale.shape[1]**2) / 4)) # scale lp patchL = cv2.getRectSubPix( im, (int(np.floor(current_scale_factor * self.scale_sz[0])), int(np.floor(current_scale_factor * self.scale_sz[1]))), pos) patchL = cv2.resize(patchL, self.scale_sz_window) patchLp = cv2.logPolar(patchL.astype(np.float32), ((patchL.shape[1] - 1) / 2, (patchL.shape[0] - 1) / 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) self.model_patchLp = extract_hog_feature(patchLp, cell_size=4)
def init(self,first_frame,bbox): bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self.target_sz=(w,h) self._center = (int(x0 + w / 2),int( y0 + h / 2)) if w*h>self.translation_model_max_area: self.sc=np.sqrt(w*h/self.translation_model_max_area) else: self.sc=1. self.base_target_sz=(w/self.sc,h/self.sc) self.win_sz = (int(np.floor(self.base_target_sz[0] * (1 + self.padding))), int(np.floor(self.base_target_sz[1] * (1 + self.padding)))) output_sigma=np.sqrt(self.base_target_sz[0]*self.base_target_sz[1])*self.output_sigma_factor/self.cell_size use_sz=(int(np.floor(self.win_sz[0]/self.cell_size)),int(np.floor(self.win_sz[1]/self.cell_size))) self.yf = fft2(0.5*gaussian2d_rolled_labels(use_sz,sigma=output_sigma)) self.interp_sz=(use_sz[0]*self.cell_size,use_sz[1]*self.cell_size) self._window=cos_window(use_sz) if self.scale_type=='normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.sc) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step ** np.ceil( np.log(np.max(5 / np.array(([self.win_sz[0], self.win_sz[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step ** np.floor(np.log(np.min( first_frame.shape[:2] / np.array([self.base_target_sz[1], self.base_target_sz[0]]))) / np.log( self._scale_step)) elif self.scale_type=='LP': self.scale_estimator=LPScaleEstimator(self.target_sz,config=self.scale_config) self.scale_estimator.init(first_frame,self._center,self.base_target_sz,self.sc) self.cn_sigma = self.cn_sigma_color self.hog_sigma = self.hog_sigma_color self.lr_hog = self.lr_hog_color self.lr_cn = self.lr_cn_color self.modnum = self.gap self.is_gray = False patch=cv2.getRectSubPix(first_frame,self.win_sz,self._center).astype(np.uint8) self.z_hog,self.z_cn=self.get_features(patch,cell_size=self.cell_size) data_matrix_cn=self.z_cn.reshape((-1,self.z_cn.shape[2])) pca_basis_cn,_,_=np.linalg.svd(data_matrix_cn.T.dot(data_matrix_cn)) self.projection_matrix_cn=pca_basis_cn[:,:self.num_compressed_dim_cn] data_matrix_hog=self.z_hog.reshape((-1,self.z_hog.shape[2])) pca_basis_hog,_,_=np.linalg.svd(data_matrix_hog.T.dot(data_matrix_hog)) self.projection_matrix_hog=pca_basis_hog[:,:self.num_compressed_dim_hog] self.z_cn2,self.z_hog2=self.feature_projection(self.z_cn,self.z_hog,self.projection_matrix_cn,self.projection_matrix_hog, self._window) self.frame_index=1 self.d=self.train_model()
def update(self, current_frame): if self.resize: current_frame = cv2.resize(current_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) response = None # Conduct V transformation over target template: if self.V is not None: x_ = Trans(self.x1, self.V, self.lr_v) else: self.V = CalTrans(self.x1, self.x1, self.lambda_v) x_ = self.x1 self.z_crop_siz = np.round((self.target_sz[1] * (1 + self.z_padding), self.target_sz[0] * (1 + self.z_padding))) for i in range(len(self.search_size)): tmp_sz = (self.target_sz[0] * (1 + self.z_padding) * self.search_size[i], self.target_sz[1] * (1 + self.z_padding) * self.search_size[i]) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.z_crop_siz) hc_features = self.get_features(patch, self.cell_size) self.z_cos_window = cos_window(np.round(tmp_sz) // self.cell_size) hc_features = hc_features * self.z_cos_window[:, :, None] z = torch.from_numpy(hc_features.astype(np.float32)).cuda() # Conduct U transformation over search region: if self.U is not None: if z.size() != self.U.size()[:-1]: raise NotImplementedError z_ = Trans(z, self.U, self.lr_u) else: z_ = z padding = [ int(np.ceil((self.x_window_size[1] - 1) / 2)), int(np.ceil((self.x_window_size[0] - 1) / 2)) ] if response is None: response = F.conv2d(z_.permute(2, 0, 1).unsqueeze(0), x_.permute(2, 0, 1).unsqueeze(0), padding=padding).squeeze(0).squeeze(0) response = response[:, :, np.newaxis] else: response_ = F.conv2d(z_.permute(2, 0, 1).unsqueeze(0), x_.permute(2, 0, 1).unsqueeze(0), padding=padding).squeeze(0).squeeze(0) response = np.concatenate( (response, response_[:, :, np.newaxis]), axis=2) if self.vis is not None: self.vis.image(x_.permute(2, 0, 1)[0:3, :, :], win='updated_template') self.vis.image(z.permute(2, 0, 1)[0:3, :, :], win='search region') self.vis.image(z_.permute(2, 0, 1)[0:3, :, :], win='updated search region') delta_y, delta_x, sz_id = np.unravel_index( np.argmax(response, axis=None), response.shape) self.sz_id = sz_id if delta_y + 1 > self.tmp_sz[1] / 2: delta_y = delta_y - self.tmp_sz[1] if delta_x + 1 > self.tmp_sz[0] / 2: delta_x = delta_x - self.tmp_sz[0] self.target_sz = (self.target_sz[0] * self.search_size[self.sz_id], self.target_sz[1] * self.search_size[self.sz_id]) tmp_sz = (self.target_sz[0] * (1 + self.x_padding), self.target_sz[1] * (1 + self.x_padding)) current_size_factor = tmp_sz[0] / self.x_crop_siz[0] x, y = self._center x += current_size_factor * self.cell_size * delta_x y += current_size_factor * self.cell_size * delta_y self._center = (x, y) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.x_crop_siz) hc_features = self.get_features(patch, self.cell_size) new_x = self.x_cos_window[:, :, None] * hc_features new_x_ = torch.from_numpy(new_x.astype(np.float32)).cuda() self.V = CalTrans(self.x1, new_x_, self.lambda_v) # new_z tmp_sz = (self.target_sz[0] * (1 + self.z_padding), self.target_sz[1] * (1 + self.z_padding)) self.z_window_size = (int(np.round(tmp_sz[0])) // self.cell_size, int(np.round(tmp_sz[1])) // self.cell_size) self.z_cos_window = cos_window(self.z_window_size) s = np.sqrt( self.target_sz[0] * self.target_sz[1]) * self.output_sigma_factor // self.cell_size self.z_gaus = gaussian2d_labels(self.z_window_size, s) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self.x_cos_window[:, :, None] new_z = torch.from_numpy(hc_features.astype(np.float32)).cuda() new_z_ = np.multiply( np.repeat(self.z_gaus[:, :, np.newaxis], new_z.shape[2], axis=2), new_z) new_z_ = torch.from_numpy(new_z_).cuda() self.U = CalTrans(new_z, new_z_, self.lambda_u) bbox = [(self._center[0] - self.target_sz[0] / 2), (self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1]] if self.resize is True: bbox = [ele * 2 for ele in bbox] max_score = response.max() return bbox, max_score
def init(self, first_frame, bbox): self.frame_idx += 1 first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.target_sz = (self.w, self.h) self.bin_mapping = self.get_bin_mapping(self.n_bins) avg_dim = (w + h) / 2 self.bg_area = (round(w + avg_dim), round(h + avg_dim)) self.fg_area = (int(round(w - avg_dim * self.inner_padding)), int(round(h - avg_dim * self.inner_padding))) self.bg_area = (int(min(self.bg_area[0], first_frame.shape[1] - 1)), int(min(self.bg_area[1], first_frame.shape[0] - 1))) self.bg_area = (self.bg_area[0] - (self.bg_area[0] - self.target_sz[0]) % 2, self.bg_area[1] - (self.bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (self.fg_area[0] + (self.bg_area[0] - self.fg_area[0]) % 2, self.fg_area[1] + (self.bg_area[1] - self.fg_area[1]) % 2) self.area_resize_factor = np.sqrt(self.fixed_area / (self.bg_area[0] * self.bg_area[1])) self.norm_bg_area = (round(self.bg_area[0] * self.area_resize_factor), round(self.bg_area[1] * self.area_resize_factor)) self.cf_response_size = (int( np.floor(self.norm_bg_area[0] / self.cell_size)), int( np.floor(self.norm_bg_area[1] / self.cell_size))) norm_target_sz_w = 0.75 * self.norm_bg_area[ 0] - 0.25 * self.norm_bg_area[1] norm_target_sz_h = 0.75 * self.norm_bg_area[ 1] - 0.25 * self.norm_bg_area[0] self.norm_target_sz = (round(norm_target_sz_w), round(norm_target_sz_h)) norm_pad = (int(np.floor( (self.norm_bg_area[0] - norm_target_sz_w) / 2)), int(np.floor( (self.norm_bg_area[1] - norm_target_sz_h) / 2))) radius = min(norm_pad[0], norm_pad[1]) self.norm_delta_area = (2 * radius + 1, 2 * radius + 1) self.norm_pwp_search_area = (self.norm_target_sz[0] + self.norm_delta_area[0] - 1, self.norm_target_sz[1] + self.norm_delta_area[1] - 1) patch_padded = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) self.new_pwp_model = True self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, patch_padded, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins, ) self.new_pwp_model = False self._window = cos_window(self.cf_response_size) output_sigma = np.sqrt( self.norm_target_sz[0] * self.norm_target_sz[1]) * self.output_sigma_factor / self.cell_size self.y = gaussian2d_rolled_labels_staple(self.cf_response_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) # print(self._init_response_center) self.yf = fft2(self.y) if self.scale_adaptation is True: self.scale_factor = 1 self.base_target_sz = self.target_sz self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.scale_factor) im_patch_bg = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) self.experts[i].hf_den = np.conj(xtf) * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.experts[i].hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.rect_position_padded = None self.avg_dim = avg_dim for i in range(self.expert_num): self.experts[i].rect_positions.append([ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]) self.experts[i].rob_scores.append(1) self.experts[i].smoothes.append(0) self.experts[i].smooth_scores.append(1) self.experts[i].centers.append([self._center[0], self._center[1]])
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self._center = (int(x0 + w / 2), int(y0 + h / 2)) self.target_sz = (w, h) search_area = self.target_sz[ 0] * self.search_area_scale * self.target_sz[ 1] * self.search_area_scale self.sc = np.clip( 1, a_min=np.sqrt(search_area / self.max_image_sample_size), a_max=np.sqrt(search_area / self.min_image_sample_size)) self.base_target_sz = (self.target_sz[0] / self.sc, self.target_sz[1] / self.sc) if self.search_area_shape == 'proportional': self.crop_size = (int(self.base_target_sz[0] * self.search_area_scale), int(self.base_target_sz[1] * self.search_area_scale)) elif self.search_area_shape == 'square': w = int( np.sqrt(self.base_target_sz[0] * self.base_target_sz[1]) * self.search_area_scale) self.crop_size = (w, w) elif self.search_area_shape == 'fix_padding': tmp=int(np.sqrt(self.base_target_sz[0]*self.search_area_scale+(self.base_target_sz[1]-self.base_target_sz[0])/4))+\ (self.base_target_sz[0]+self.base_target_sz[1])/2 self.crop_size = (self.base_target_sz[0] + tmp, self.base_target_sz[1] + tmp) else: raise ValueError output_sigma = np.sqrt(np.floor(self.base_target_sz[0]/self.cell_size)*np.floor(self.base_target_sz[1]*self.cell_size))*\ self.output_sigma_factor self.crop_size = (int( round(self.crop_size[0] / self.cell_size) * self.cell_size), int( round(self.crop_size[1] / self.cell_size) * self.cell_size)) self.feature_map_sz = (self.crop_size[0] // self.cell_size, self.crop_size[1] // self.cell_size) y = gaussian2d_rolled_labels(self.feature_map_sz, output_sigma) self.cosine_window = (cos_window((y.shape[1], y.shape[0]))) self.yf = fft2(y) reg_scale = (int( np.floor(self.base_target_sz[0] / self.feature_downsample_ratio)), int( np.floor(self.base_target_sz[1] / self.feature_downsample_ratio))) use_sz = self.feature_map_sz #self.reg_window=self.create_reg_window(reg_scale,use_sz,self.p,self.reg_window_max, # self.reg_window_min,self.alpha,self.beta) self.reg_window = self.create_reg_window_const(reg_scale, use_sz, self.reg_window_max, self.reg_window_min) self.ky = np.roll( np.arange(-int(np.floor((self.feature_map_sz[1] - 1) / 2)), int(np.ceil((self.feature_map_sz[1] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[1] - 1) / 2))) self.kx = np.roll( np.arange(-int(np.floor((self.feature_map_sz[0] - 1) / 2)), int(np.ceil((self.feature_map_sz[0] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[0] - 1) / 2))) # scale scale_exp = np.arange( -int(np.floor((self.number_of_scales - 1) / 2)), int(np.ceil((self.number_of_scales - 1) / 2) + 1)) self.scale_factors = self.scale_step**scale_exp if self.number_of_scales > 0: self._min_scale_factor = self.scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self.scale_step)) self._max_scale_factor = self.scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_sz[1], self.base_target_sz[0]]))) / np.log(self.scale_step)) #print(self._min_scale_factor) #print(self._max_scale_factor) if self.scale_type == 'normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.sc) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_sz[1], self.base_target_sz[0]]))) / np.log(self._scale_step)) elif self.scale_type == 'LP': self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.sc) patch = self.get_sub_window( first_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(np.round(self.crop_size[0] * self.sc)), int(np.round(self.crop_size[1] * self.sc)))) xl_hc = self.extrac_hc_feature(patch, self.cell_size) xlf_hc = fft2(xl_hc * self.cosine_window[:, :, None]) f_pre_f_hc = np.zeros_like(xlf_hc) mu_hc = 0 self.f_pre_f_hc = self.ADMM(xlf_hc, f_pre_f_hc, mu_hc)
def init(self, first_frame, region): #file = h5py.File('../lib/w2crs.mat', 'r') #self.w2c = file['w2crs'] self.use_color_hist = not (np.all( first_frame[:, :, 0] == first_frame[:, :, 1])) assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 region = np.array(region).astype(np.int64) if len(region) == 4: x0, y0, w, h = tuple(region) rot = 0 self._center = (x0 + w / 2, y0 + h / 2) target_sz = (w, h) elif len(region) == 8: corners = region.reshape((4, 2)) pos = np.mean(corners, axis=0) pos = pos.T dist12 = np.sqrt(np.sum((corners[1, :] - corners[2, :])**2)) dist03 = np.sqrt(np.sum((corners[0, :] - corners[3, :])**2)) dist10 = np.sqrt(np.sum((corners[1, :] - corners[0, :])**2)) dist23 = np.sqrt(np.sum((corners[2, :] - corners[3, :])**2)) target_sz = ((dist10 + dist23) / 2, (dist12 + dist03) / 2) self._center = (pos[0], pos[1]) A = np.array([0., -1.]) B = np.array([region[4] - region[2], region[3] - region[5]]) rot1 = np.arccos( A.dot(B) / (np.linalg.norm(A) * np.linalg.norm(B))) * 2 / np.pi if np.prod(B) < 0: rot1 = -rot1 C = np.array([region[6] - region[0], region[1] - region[7]]) rot2 = np.arccos( A.dot(C) / (np.linalg.norm(A) * np.linalg.norm(C))) * 2 / np.pi if np.prod(C) < 0: rot2 = -rot2 rot = (rot1 + rot2) / 2 else: raise ValueError() self.bin_mapping = self.get_bin_mapping(self.nbin) self.window_sz = (int(np.floor(target_sz[0] * (1 + self.padding))), int(np.floor(target_sz[1] * (1 + self.padding)))) search_area = self.window_sz[0] * self.window_sz[1] self.sc = search_area / np.clip(search_area, a_min=self.min_image_sample_size, a_max=self.max_image_sample_size) self.window_sz0 = (int(np.round(self.window_sz[0] / self.sc)), int(np.round(self.window_sz[1] / self.sc))) feature_sz = (self.window_sz0[0] // self.cell_size, self.window_sz0[1] // self.cell_size) self.window_sz0 = (feature_sz[0] * self.cell_size, feature_sz[1] * self.cell_size) self.sc = (self.window_sz[0] / self.window_sz0[0], self.window_sz[1] / self.window_sz0[1]) self.cell_size = int(np.round((self.window_sz0[0] / feature_sz[0]))) self.rot = rot self.avg_dim = (self.window_sz[0] + self.window_sz[1]) / 4 self.window_sz_search = (int(np.floor(self.window_sz[0] + self.avg_dim)), int(np.floor(self.window_sz[1] + self.avg_dim))) self.window_sz_search0 = (int( np.floor(self.window_sz_search[0] / self.sc[0])), int( np.floor(self.window_sz_search[1] / self.sc[1]))) cell_size_search = self.cell_size feature_sz0 = (int( np.floor(self.window_sz_search0[0] / cell_size_search)), int( np.floor(self.window_sz_search0[1] / cell_size_search))) residual = (feature_sz0[0] - feature_sz[0], feature_sz0[1] - feature_sz[1]) feature_sz0 = (feature_sz0[0] + residual[0] % 2, feature_sz0[1] + residual[1] % 2) self.window_sz_search0 = (feature_sz0[0] * cell_size_search, feature_sz0[1] * cell_size_search) self.sc = (self.window_sz_search[0] / self.window_sz_search0[0], self.window_sz_search[1] / self.window_sz_search0[1]) self.target_sz0 = (int(np.round(target_sz[0] / self.sc[0])), int(np.round(target_sz[1] / self.sc[1]))) self.output_sigma = np.sqrt( target_sz[0] * target_sz[1]) * self.output_sigma_factor / self.cell_size self.y = gaussian2d_rolled_labels_staple( (int(np.round(self.window_sz0[0] / self.cell_size)), int(np.round(self.window_sz0[1] / self.cell_size))), self.output_sigma) self.yf = fft2(self.y) self.cos_window = cos_window((self.y.shape[1], self.y.shape[0])) self.cos_window_search = cos_window( (int(np.floor(self.window_sz_search0[0] / cell_size_search)), int(np.floor(self.window_sz_search0[1] / cell_size_search)))) # scale setttings avg_dim = (target_sz[0] + target_sz[1]) / 2.5 self.scale_sz = ((target_sz[0] + avg_dim) / self.sc[0], (target_sz[1] + avg_dim) / self.sc[1]) self.scale_sz0 = self.scale_sz self.cos_window_scale = cos_window( (self.scale_sz_window[0] // self.cell_size, self.scale_sz_window[1] // self.cell_size)) self.mag = self.scale_sz_window[1] / np.log( np.sqrt( (self.scale_sz_window[0]**2 + self.scale_sz_window[1]**2) / 4)) self.cell_size = cell_size_search tmp_sc = 1. tmp_rot = 0. self.logupdate(1, first_frame, self._center, tmp_sc, tmp_rot) x, y = self._center x = np.clip(x, a_min=0, a_max=first_frame.shape[1] - 1) y = np.clip(y, a_min=0, a_max=first_frame.shape[0] - 1) self._center = (x, y)
def update(self, current_frame, vis=False): img_preprocessed = cv2.resize(current_frame, None, fx=self._scale_factor, fy=self._scale_factor) if self.config.color_space == 'lab': img = cv2.cvtColor(img_preprocessed, cv2.COLOR_BGR2Lab) elif self.config.color_space == 'hsv': img = cv2.cvtColor(img_preprocessed, cv2.COLOR_BGR2HSV) img[:, :, 0] = (img[:, :, 0] * 256 / 180) img = img.astype(np.uint8) else: img = img_preprocessed prev_pos = self.target_pos_history[-1] prev_sz = self.target_sz_history[-1] if self.config.motion_estimation_history_size > 0: prev_pos = prev_pos + get_motion_prediciton( self.target_pos_history, self.config.motion_estimation_history_size) target_pos = (prev_pos[0] * self._scale_factor, prev_pos[1] * self._scale_factor) target_sz = (prev_sz[0] * self._scale_factor, prev_sz[1] * self._scale_factor) search_sz_w = int( np.floor(target_sz[0] + self.config.search_win_padding * max(target_sz[0], target_sz[1]))) search_sz_h = int( np.floor(target_sz[1] + self.config.search_win_padding * max(target_sz[0], target_sz[1]))) search_sz = (search_sz_w, search_sz_h) search_rect = pos2rect(target_pos, search_sz) self.crop_size = (search_rect[2], search_rect[3]) search_win, padded_search_win = get_subwindow_masked( img, target_pos, search_sz) #Apply probability LUT pm_search = get_foreground_prob(search_win, self.prob_lut_, self.bin_mapping) if self.config.distractor_aware is True: pm_search_dist = get_foreground_prob(search_win, self._prob_lut_distractor, self.bin_mapping) pm_search = (pm_search + pm_search_dist) / 2 pm_search = pm_search * padded_search_win window = cos_window(search_sz) hypotheses, vote_scores, dist_scores = get_nms_rects( pm_search, target_sz, self.config.nms_scale, self.config.nms_overlap, self.config.nms_score_factor, window, self.config.nms_include_center_vote) candidate_centers = [] candidate_scores = [] for i in range(len(hypotheses)): candidate_centers.append((hypotheses[i][0] + hypotheses[i][2] / 2, hypotheses[i][1] + hypotheses[i][3] / 2)) candidate_scores.append(vote_scores[i] * dist_scores[i]) best_candidate = np.argmax(np.array(candidate_scores)) target_pos = candidate_centers[best_candidate] distractors = [] distractor_overlap = [] if len(hypotheses) > 1: target_rect = pos2rect(target_pos, target_sz, (pm_search.shape[1], pm_search.shape[0])) for i in range(len(hypotheses)): if i != best_candidate: distractors.append(hypotheses[i]) distractor_overlap.append( cal_iou(target_rect, distractors[-1])) if vis: self.score = pm_search target_pos_img = (target_pos[0] + search_rect[0], target_pos[1] + search_rect[1]) if self.config.prob_lut_update_rate > 0: surr_sz = (int(self.config.surr_win_factor * target_sz[0]), int(self.config.surr_win_factor * target_sz[1])) surr_rect = pos2rect(target_pos_img, surr_sz, (img.shape[1], img.shape[0])) obj_rect_surr = pos2rect(target_pos_img, target_sz, (img.shape[1], img.shape[0])) obj_rect_surr = (obj_rect_surr[0] - surr_rect[0], obj_rect_surr[1] - surr_rect[1], obj_rect_surr[2], obj_rect_surr[3]) surr_win = get_sub_window(img, target_pos_img, surr_sz) prob_lut_bg, _ = get_foreground_background_probs( surr_win, obj_rect_surr, self.config.num_bins) if self.config.distractor_aware is True: if len(distractors) > 1: obj_rect = pos2rect( target_pos, target_sz, (search_win.shape[1], search_win.shape[0])) prob_lut_dist = get_foreground_distractor_probs( search_win, obj_rect, distractors, self.config.num_bins) self._prob_lut_distractor=(1-self.config.prob_lut_update_rate)*self._prob_lut_distractor+\ self.config.prob_lut_update_rate*prob_lut_dist else: self._prob_lut_distractor=(1-self.config.prob_lut_update_rate)*self._prob_lut_distractor+\ self.config.prob_lut_update_rate*prob_lut_bg if len(distractors) == 0 or np.max(distractor_overlap) < 0.1: self.prob_lut_ = ( 1 - self.config.prob_lut_update_rate ) * self.prob_lut_ + self.config.prob_lut_update_rate * prob_lut_bg prob_map = get_foreground_prob(surr_win, self.prob_lut_, self.bin_mapping) dist_map = get_foreground_prob(surr_win, self._prob_lut_distractor, self.bin_mapping) prob_map = 0.5 * prob_map + 0.5 * dist_map else: self.prob_lut_ = ( 1 - self.config.prob_lut_update_rate ) * self.prob_lut_ + self.config.prob_lut_update_rate * prob_lut_bg prob_map = get_foreground_prob(surr_win, self.prob_lut_, self.bin_mapping) self.adaptive_threshold_ = get_adaptive_threshold( prob_map, obj_rect_surr) target_pos = (target_pos[0] + search_rect[0], target_pos[1] + search_rect[1]) target_pos_original = (target_pos[0] / self._scale_factor, target_pos[1] / self._scale_factor) target_sz_original = (target_sz[0] / self._scale_factor, target_sz[1] / self._scale_factor) self.target_pos_history.append(target_pos_original) self.target_sz_history.append(target_sz_original) self._scale_factor = min( 1, round(10 * self.config.img_scale_target_diagonal / cv2.norm(target_sz_original)) / 10) return [ target_pos_original[0] - target_sz_original[0] / 2, target_pos_original[1] - target_sz_original[1] / 2, target_sz_original[0], target_sz_original[1] ]
def init(self,first_frame,bbox): bbox=np.array(bbox).astype(np.int64) x,y,w,h=tuple(bbox) self.init_mask=np.ones((h,w),dtype=np.uint8) self._center=(x+w/2,y+h/2) self.w,self.h=w,h if np.all(first_frame[:,:,0]==first_frame[:,:,1]): self.use_segmentation=False # change 400 to 300 # for larger cell_size self.cell_size=int(min(4,max(1,w*h/300))) self.base_target_sz=(w,h) self.target_sz=self.base_target_sz template_size=(int(w+self.padding*np.sqrt(w*h)),int(h+self.padding*np.sqrt(w*h))) template_size=(template_size[0]+template_size[1])//2 self.template_size=(template_size,template_size) self.rescale_ratio=np.sqrt((200**2)/(self.template_size[0]*self.template_size[1])) self.rescale_ratio=np.clip(self.rescale_ratio,a_min=None,a_max=1) self.rescale_template_size=(int(self.rescale_ratio*self.template_size[0]), int(self.rescale_ratio*self.template_size[1])) self.yf=fft2(gaussian2d_rolled_labels((int(self.rescale_template_size[0]/self.cell_size), int(self.rescale_template_size[1]/self.cell_size)), self.y_sigma)) self._window=cos_window((self.yf.shape[1],self.yf.shape[0])) self.crop_size=self.rescale_template_size self.current_scale_factor = 1. if self.scale_type=='normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step ** np.ceil( np.log(np.max(5 / np.array(([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step ** np.floor(np.log(np.min( first_frame.shape[:2] / np.array([self.base_target_sz[1], self.base_target_sz[0]]))) / np.log( self._scale_step)) elif self.scale_type=='LP': self.scale_estimator=LPScaleEstimator(self.target_sz,config=self.scale_config) self.scale_estimator.init(first_frame,self._center,self.base_target_sz,self.current_scale_factor) # create dummy mask (approximation for segmentation) # size of the object in feature space obj_sz=(int(self.rescale_ratio*(self.base_target_sz[0]/self.cell_size)), int(self.rescale_ratio*(self.base_target_sz[1]/self.cell_size))) x0=int((self.yf.shape[1]-obj_sz[0])/2) y0=int((self.yf.shape[0]-obj_sz[1])/2) x1=x0+obj_sz[0] y1=y0+obj_sz[1] self.target_dummy_mask=np.zeros_like(self.yf,dtype=np.uint8) self.target_dummy_mask[y0:y1,x0:x1]=1 self.target_dummy_area=np.sum(self.target_dummy_mask) if self.use_segmentation: if self.segcolor_space=='bgr': seg_img=first_frame elif self.segcolor_space=='hsv': seg_img=cv2.cvtColor(first_frame,cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32)/180*255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg=Histogram(3,self.nbins) hist_bg=Histogram(3,self.nbins) self.extract_histograms(seg_img,bbox,hist_fg,hist_bg) mask=self.segment_region(seg_img,self._center,self.template_size,self.base_target_sz,self.current_scale_factor, hist_fg,hist_bg) self.hist_bg_p_bins=hist_bg.p_bins self.hist_fg_p_bins=hist_fg.p_bins init_mask_padded=np.zeros_like(mask) pm_x0=int(np.floor(mask.shape[1]/2-bbox[2]/2)) pm_y0=int(np.floor(mask.shape[0]/2-bbox[3]/2)) init_mask_padded[pm_y0:pm_y0+bbox[3],pm_x0:pm_x0+bbox[2]]=1 mask=mask*init_mask_padded mask=cv2.resize(mask,(self.yf.shape[1],self.yf.shape[0])) if self.mask_normal(mask,self.target_dummy_area) is True: kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3),anchor=(1,1)) mask=cv2.dilate(mask,kernel) else: mask=self.target_dummy_mask else: mask=self.target_dummy_mask # extract features f=self.get_csr_features(first_frame,self._center,self.current_scale_factor, self.template_size,self.rescale_template_size,self.cell_size) f=f*self._window[:,:,None] # create filters using segmentation mask self.H=self.create_csr_filter(f,self.yf,mask) response=np.real(ifft2(fft2(f)*np.conj(self.H))) chann_w=np.max(response.reshape(response.shape[0]*response.shape[1],-1),axis=0) self.chann_w=chann_w/np.sum(chann_w)
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.feature_ratio = self.cell_size self.search_area=(self.w/self.feature_ratio*self.search_area_scale)*\ (self.h/self.feature_ratio*self.search_area_scale) if self.search_area < self.cell_selection_thresh * self.filter_max_area: self.cell_size=int(min(self.feature_ratio,max(1,int(np.ceil(np.sqrt( self.w*self.search_area_scale/(self.cell_selection_thresh*self.filter_max_area)*\ self.h*self.search_area_scale/(self.cell_selection_thresh*self.filter_max_area) )))))) self.feature_ratio = self.cell_size self.search_area = (self.w / self.feature_ratio * self.search_area_scale) * \ (self.h / self.feature_ratio * self.search_area_scale) if self.search_area > self.filter_max_area: self.current_scale_factor = np.sqrt(self.search_area / self.filter_max_area) else: self.current_scale_factor = 1. self.base_target_sz = (self.w / self.current_scale_factor, self.h / self.current_scale_factor) self.target_sz = self.base_target_sz if self.search_area_shape == 'proportional': self.crop_size = (int(self.base_target_sz[0] * self.search_area_scale), int(self.base_target_sz[1] * self.search_area_scale)) elif self.search_area_shape == 'square': w = int( np.sqrt(self.base_target_sz[0] * self.base_target_sz[1]) * self.search_area_scale) self.crop_size = (w, w) elif self.search_area_shape == 'fix_padding': tmp=int(np.sqrt(self.base_target_sz[0]*self.search_area_scale+(self.base_target_sz[1]-self.base_target_sz[0])/4))+\ (self.base_target_sz[0]+self.base_target_sz[1])/2 self.crop_size = (self.base_target_sz[0] + tmp, self.base_target_sz[1] + tmp) else: raise ValueError self.crop_size = (int( round(self.crop_size[0] / self.feature_ratio) * self.feature_ratio), int( round(self.crop_size[1] / self.feature_ratio) * self.feature_ratio)) self.feature_map_sz = (self.crop_size[0] // self.feature_ratio, self.crop_size[1] // self.feature_ratio) output_sigma = np.sqrt( np.floor(self.base_target_sz[0] / self.feature_ratio) * np.floor(self.base_target_sz[1] / self.feature_ratio)) * self.output_sigma_factor y = gaussian2d_rolled_labels(self.feature_map_sz, output_sigma) self.yf = fft2(y) if self.interpolate_response == 1: self.interp_sz = (self.feature_map_sz[0] * self.feature_ratio, self.feature_map_sz[1] * self.feature_ratio) else: self.interp_sz = (self.feature_map_sz[0], self.feature_map_sz[1]) self._window = cos_window(self.feature_map_sz) if self.number_of_scales > 0: scale_exp = np.arange( -int(np.floor((self.number_of_scales - 1) / 2)), int(np.ceil((self.number_of_scales - 1) / 2)) + 1) self.scale_factors = self.scale_step**scale_exp self.min_scale_factor = self.scale_step**(np.ceil( np.log(max(5 / self.crop_size[0], 5 / self.crop_size[1])) / np.log(self.scale_step))) self.max_scale_factor = self.scale_step**(np.floor( np.log( min(first_frame.shape[0] / self.base_target_sz[1], first_frame.shape[1] / self.base_target_sz[0])) / np.log(self.scale_step))) if self.interpolate_response >= 3: self.ky = np.roll( np.arange(-int(np.floor((self.feature_map_sz[1] - 1) / 2)), int(np.ceil((self.feature_map_sz[1] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[1] - 1) / 2))) self.kx = np.roll( np.arange(-int(np.floor((self.feature_map_sz[0] - 1) / 2)), int(np.ceil((self.feature_map_sz[0] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[0] - 1) / 2))).T self.small_filter_sz = (int( np.floor(self.base_target_sz[0] / self.feature_ratio)), int( np.floor(self.base_target_sz[1] / self.feature_ratio))) self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) pixels = self.get_sub_window( first_frame, self._center, model_sz=self.crop_size, scaled_sz=(int( np.round(self.crop_size[0] * self.current_scale_factor)), int( np.round(self.crop_size[1] * self.current_scale_factor)))) feature = self.extract_hc_feture(pixels, cell_size=self.feature_ratio) self.model_xf = fft2(self._window[:, :, None] * feature) self.g_f = self.ADMM(self.model_xf)
def init(self,first_frame,bbox): assert len(first_frame.shape)==3 and first_frame.shape[2]==3 self.U = None self.V = None if self.features=='gray': first_frame=cv2.cvtColor(first_frame,cv2.COLOR_BGR2GRAY) bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self.crop_size = (int(np.floor(w * (1 + self.x_padding))), int(np.floor(h * (1 + self.x_padding))))# for vis self._center = (np.floor(x0 + w / 2),np.floor(y0 + h / 2)) self.w, self.h = w, h if self.features=='sfres50': self.x_window_size=(np.ceil(int(np.floor(w*(1+self.x_padding)))/self.cell_size),np.ceil(int(np.floor(h*(1+self.x_padding)))/self.cell_size)) else: self.x_window_size = (int(np.floor(w * (1 + self.x_padding))) // self.cell_size, int(np.floor(h * (1 + self.x_padding))) // self.cell_size) self.x_cos_window = cos_window(self.x_window_size) if self.features == 'sfres50': self.z_window_size = (np.ceil(int(np.floor(w * (1 + self.z_padding))) / self.cell_size), np.ceil(int(np.floor(h * (1 + self.z_padding))) / self.cell_size)) else: self.z_window_size=(int(np.floor(w*(1+self.z_padding)))//self.cell_size,int(np.floor(h*(1+self.z_padding)))//self.cell_size) self.z_cos_window = cos_window(self.z_window_size) s=np.sqrt(w*h)*self.output_sigma_factor/self.cell_size self.x_gaus = gaussian2d_labels(self.x_window_size, s) self.z_gaus = gaussian2d_labels(self.z_window_size, s) if self.features=='gray' or self.features=='color': first_frame = first_frame.astype(np.float32) / 255 x=self._crop(first_frame,self._center,(w,h),self.x_padding) x=x-np.mean(x) elif self.features=='hog': x=self._crop(first_frame,self._center,(w,h),self.x_padding) x=cv2.resize(x,(self.x_window_size[0]*self.cell_size,self.x_window_size[1]*self.cell_size)) x=extract_hog_feature(x, cell_size=self.cell_size) elif self.features=='cn': x = cv2.resize(first_frame, (self.x_window_size[0] * self.cell_size, self.x_window_size[1] * self.cell_size)) x=extract_cn_feature(x,self.cell_size) elif self.features=='sfres50': x=self._crop(first_frame,self._center,(w,h),self.x_padding) desired_sz = (int((self.x_window_size[0]+1) * self.cell_size), \ int((self.x_window_size[1]+1) * self.cell_size)) x = cv2.resize(x, desired_sz) x=extract_sfres50_feature(self.model,x,self.cell_size) else: raise NotImplementedError self.init_response_center = (0,0) x = self._get_windowed(x, self.x_cos_window) self.x1 = torch.from_numpy(x.astype(np.float32)).cuda() # if self.vis is not None: self.vis.image(self.x1.permute(2,0,1)[0:3,:,:],win='template')
def init(self, first_frame, bbox): first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.target_sz = (self.w, self.h) self.bin_mapping = self.get_bin_mapping(self.n_bins) avg_dim = (w + h) / 2 self.bg_area = (round(w + avg_dim), round(h + avg_dim)) self.fg_area = (int(round(w - avg_dim * self.inner_padding)), int(round(h - avg_dim * self.inner_padding))) self.bg_area = (int(min(self.bg_area[0], first_frame.shape[1] - 1)), int(min(self.bg_area[1], first_frame.shape[0] - 1))) self.bg_area = (self.bg_area[0] - (self.bg_area[0] - self.target_sz[0]) % 2, self.bg_area[1] - (self.bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (self.fg_area[0] + (self.bg_area[0] - self.fg_area[0]) % 2, self.fg_area[1] + (self.bg_area[1] - self.fg_area[1]) % 2) self.area_resize_factor = np.sqrt(self.fixed_area / (self.bg_area[0] * self.bg_area[1])) self.norm_bg_area = (round(self.bg_area[0] * self.area_resize_factor), round(self.bg_area[1] * self.area_resize_factor)) self.cf_response_size = (int( np.floor(self.norm_bg_area[0] / self.hog_cell_size)), int( np.floor(self.norm_bg_area[1] / self.hog_cell_size))) norm_target_sz_w = 0.75 * self.norm_bg_area[ 0] - 0.25 * self.norm_bg_area[1] norm_target_sz_h = 0.75 * self.norm_bg_area[ 1] - 0.25 * self.norm_bg_area[0] self.norm_target_sz = (round(norm_target_sz_w), round(norm_target_sz_h)) norm_pad = (int(np.floor( (self.norm_bg_area[0] - norm_target_sz_w) / 2)), int(np.floor( (self.norm_bg_area[1] - norm_target_sz_h) / 2))) radius = min(norm_pad[0], norm_pad[1]) self.norm_delta_area = (2 * radius + 1, 2 * radius + 1) self.norm_pwp_search_area = (self.norm_target_sz[0] + self.norm_delta_area[0] - 1, self.norm_target_sz[1] + self.norm_delta_area[1] - 1) patch_padded = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) self.new_pwp_model = True self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, patch_padded, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins, ) self.new_pwp_model = False self._window = cos_window(self.cf_response_size) output_sigma = np.sqrt( self.norm_target_sz[0] * self.norm_target_sz[1] ) * self.output_sigma_factor / self.hog_cell_size self.y = gaussian2d_rolled_labels_staple(self.cf_response_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.yf = fft2(self.y) if self.use_ca: # w,h format self.offset = [[0, -self.target_sz[1]], [-self.target_sz[0], 0], [0, self.target_sz[1]], [self.target_sz[0], 0]] if self.scale_adaptation is True: self.scale_factor = 1 self.base_target_sz = self.target_sz self.scale_sigma = np.sqrt( self.num_scales) * self.scale_sigma_factor ss = np.arange(1, self.num_scales + 1) - np.ceil( self.num_scales / 2) ys = np.exp(-0.5 * (ss**2) / (self.scale_sigma**2)) self.ysf = np.fft.fft(ys) if self.num_scales % 2 == 0: scale_window = np.hanning(self.num_scales + 1) self.scale_window = scale_window[1:] else: self.scale_window = np.hanning(self.num_scales) ss = np.arange(1, self.num_scales + 1) self.scale_factors = self.scale_step**( np.ceil(self.num_scales / 2) - ss) self.scale_model_factor = 1. if (self.w * self.h) > self.scale_model_max_area: self.scale_model_factor = np.sqrt(self.scale_model_max_area / (self.w * self.h)) self.scale_model_sz = (int( np.floor(self.w * self.scale_model_factor)), int( np.floor(self.h * self.scale_model_factor))) self.current_scale_factor = 1. self.min_scale_factor = self.scale_step**(int( np.ceil( np.log(max(5 / self.crop_size[0], 5 / self.crop_size[1])) / np.log(self.scale_step)))) self.max_scale_factor = self.scale_step**(int( np.floor((np.log( min(first_frame.shape[1] / self.w, first_frame.shape[0] / self.h)) / np.log(self.scale_step))))) im_patch_bg = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.hog_cell_size) xt = self._window[:, :, None] * xt xtf = fft2(xt) if self.use_ca: sum_kfn = np.zeros_like(xtf) for j in range(len(self.offset)): im_patch_bgn = self.get_sub_window( first_frame, (self._center[0] + self.offset[j][0], self._center[1] + self.offset[j][1]), self.norm_bg_area, self.bg_area) xtn = self.get_feature_map(im_patch_bgn, self.hog_cell_size) xtn = self._window[:, :, None] * xtn xtfn = fft2(xtn) sum_kfn += np.conj(xtfn) * xtfn self.hf_num = self.yf[:, :, None] * np.conj(xtf) self.hf_den = np.conj( xtf) * xtf + self.lambda_ + self.lambda_2 * sum_kfn else: self.hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.hf_den = np.conj(xtf) * xtf / (self.cf_response_size[0] * self.cf_response_size[1]) if self.scale_adaptation is True: im_patch_scale = self.get_scale_subwindow( first_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) self.sf_den = np.sum(xsf * np.conj(xsf), axis=0) self.sf_num = self.ysf * np.conj(xsf) self.rect_position_padded = None