def init(self, first_frame, bbox): assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) if w * h >= 100**2: self.resize = True x0, y0, w, h = x0 / 2, y0 / 2, w / 2, h / 2 first_frame = cv2.resize(first_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) self.x_crop_siz = (int(np.floor(w * (1 + self.x_padding))), int(np.floor(h * (1 + self.x_padding)))) # for vis self._center = (x0 + w / 2, y0 + h / 2) self.w, self.h = w, h self.x_window_size = (int(np.floor(w * (1 + self.x_padding))) // self.cell_size, int(np.floor(h * (1 + self.x_padding))) // self.cell_size) self.x_cos_window = cos_window(self.x_window_size) self.search_size = np.linspace(0.985, 1.015, 7) s = np.sqrt(w * h) * self.output_sigma_factor / self.cell_size self.x_gaus = gaussian2d_labels(self.x_window_size, s) self.target_sz = (w, h) patch = cv2.getRectSubPix(first_frame, self.x_crop_siz, self._center) patch = cv2.resize(patch, dsize=self.x_crop_siz) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self.x_cos_window[:, :, None] self.x1 = torch.from_numpy(hc_features.astype(np.float32)).cuda() # if self.vis is not None: self.vis.image(self.x1.permute(2, 0, 1)[0:3, :, :], win='template')
def init(self, first_frame, bbox): first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.base_target_size = (self.w, self.h) self.target_sz = (self.w, self.h) self._window = cos_window(self.crop_size) output_sigma = np.sqrt(self.w * self.h) * self.output_sigma_factor self.y = gaussian2d_labels(self.crop_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.yf = fft2(self.y) self.current_scale_factor = 1. xl = self.get_translation_sample(first_frame, self._center, self.crop_size, self.current_scale_factor, self._window) self.xlf = fft2(xl) self.hf_den = np.sum(self.xlf * np.conj(self.xlf), axis=2) self.hf_num = self.yf[:, :, None] * np.conj(self.xlf) if self.scale_type == 'normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_size, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_size[1], self.base_target_size[0]]))) / np.log(self._scale_step)) elif self.scale_type == 'LP': self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_size, self.current_scale_factor)
def init(self,first_frame,bbox): if len(first_frame.shape)==3: assert first_frame.shape[2]==3 first_frame=cv2.cvtColor(first_frame,cv2.COLOR_BGR2GRAY) first_frame=first_frame.astype(np.float32) bbox=np.array(bbox).astype(np.int64) x,y,w,h=tuple(bbox) self._center=(x+w/2,y+h/2) self.w,self.h=w,h self._window=cos_window((int(round(2*w)),int(round(2*h)))) self.crop_size=(int(round(2*w)),int(round(2*h))) self.x=cv2.getRectSubPix(first_frame,(int(round(2*w)),int(round(2*h))),self._center)/255-0.5 self.x=self.x*self._window s=np.sqrt(w*h)/16 self.y=gaussian2d_labels((int(round(2*w)),int(round(2*h))),s) self._init_response_center=np.unravel_index(np.argmax(self.y,axis=None),self.y.shape) self.alphaf=self._training(self.x,self.y)
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self._window = cos_window( (int(w * (1 + self.padding)), int(h * (1 + self.padding)))) self.crop_size = (self._window.shape[1], self._window.shape[0]) s = np.sqrt(w * h) * self.output_sigma_factor self.y = gaussian2d_labels(self.crop_size, s) self.yf = fft2(self.y) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.x = self.get_sub_window(first_frame, self._center, self.crop_size) self.x = self._window[:, :, None] * self.x kf = fft2(self._dgk(self.x, self.x)) self.alphaf_num = (self.yf) * kf self.alphaf_den = kf * (kf + self.lambda_)
def init(self, first_frame, bbox): if len(first_frame.shape) != 2: assert first_frame.shape[2] == 3 first_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY) first_frame = first_frame.astype(np.float32) / 255 x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h w, h = int(round(w)), int(round(h)) self.cos_window = cos_window((w, h)) self._fi = cv2.getRectSubPix(first_frame, (w, h), self._center) self._G = np.fft.fft2(gaussian2d_labels((w, h), self.sigma)) self.crop_size = (w, h) self._Ai = np.zeros_like(self._G) self._Bi = np.zeros_like(self._G) for _ in range(8): fi = self._rand_warp(self._fi) Fi = np.fft.fft2(self._preprocessing(fi, self.cos_window)) self._Ai += self._G * np.conj(Fi) self._Bi += Fi * np.conj(Fi)
def init(self,first_frame,bbox): assert len(first_frame.shape)==3 and first_frame.shape[2]==3 self.U = None self.V = None if self.features=='gray': first_frame=cv2.cvtColor(first_frame,cv2.COLOR_BGR2GRAY) bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self.crop_size = (int(np.floor(w * (1 + self.x_padding))), int(np.floor(h * (1 + self.x_padding))))# for vis self._center = (np.floor(x0 + w / 2),np.floor(y0 + h / 2)) self.w, self.h = w, h if self.features=='sfres50': self.x_window_size=(np.ceil(int(np.floor(w*(1+self.x_padding)))/self.cell_size),np.ceil(int(np.floor(h*(1+self.x_padding)))/self.cell_size)) else: self.x_window_size = (int(np.floor(w * (1 + self.x_padding))) // self.cell_size, int(np.floor(h * (1 + self.x_padding))) // self.cell_size) self.x_cos_window = cos_window(self.x_window_size) if self.features == 'sfres50': self.z_window_size = (np.ceil(int(np.floor(w * (1 + self.z_padding))) / self.cell_size), np.ceil(int(np.floor(h * (1 + self.z_padding))) / self.cell_size)) else: self.z_window_size=(int(np.floor(w*(1+self.z_padding)))//self.cell_size,int(np.floor(h*(1+self.z_padding)))//self.cell_size) self.z_cos_window = cos_window(self.z_window_size) s=np.sqrt(w*h)*self.output_sigma_factor/self.cell_size self.x_gaus = gaussian2d_labels(self.x_window_size, s) self.z_gaus = gaussian2d_labels(self.z_window_size, s) if self.features=='gray' or self.features=='color': first_frame = first_frame.astype(np.float32) / 255 x=self._crop(first_frame,self._center,(w,h),self.x_padding) x=x-np.mean(x) elif self.features=='hog': x=self._crop(first_frame,self._center,(w,h),self.x_padding) x=cv2.resize(x,(self.x_window_size[0]*self.cell_size,self.x_window_size[1]*self.cell_size)) x=extract_hog_feature(x, cell_size=self.cell_size) elif self.features=='cn': x = cv2.resize(first_frame, (self.x_window_size[0] * self.cell_size, self.x_window_size[1] * self.cell_size)) x=extract_cn_feature(x,self.cell_size) elif self.features=='sfres50': x=self._crop(first_frame,self._center,(w,h),self.x_padding) desired_sz = (int((self.x_window_size[0]+1) * self.cell_size), \ int((self.x_window_size[1]+1) * self.cell_size)) x = cv2.resize(x, desired_sz) x=extract_sfres50_feature(self.model,x,self.cell_size) else: raise NotImplementedError self.init_response_center = (0,0) x = self._get_windowed(x, self.x_cos_window) self.x1 = torch.from_numpy(x.astype(np.float32)).cuda() # if self.vis is not None: self.vis.image(self.x1.permute(2,0,1)[0:3,:,:],win='template')
def update(self, current_frame): if self.resize: current_frame = cv2.resize(current_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) response = None # Conduct V transformation over target template: if self.V is not None: x_ = Trans(self.x1, self.V, self.lr_v) else: self.V = CalTrans(self.x1, self.x1, self.lambda_v) x_ = self.x1 self.z_crop_siz = np.round((self.target_sz[1] * (1 + self.z_padding), self.target_sz[0] * (1 + self.z_padding))) for i in range(len(self.search_size)): tmp_sz = (self.target_sz[0] * (1 + self.z_padding) * self.search_size[i], self.target_sz[1] * (1 + self.z_padding) * self.search_size[i]) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.z_crop_siz) hc_features = self.get_features(patch, self.cell_size) self.z_cos_window = cos_window(np.round(tmp_sz) // self.cell_size) hc_features = hc_features * self.z_cos_window[:, :, None] z = torch.from_numpy(hc_features.astype(np.float32)).cuda() # Conduct U transformation over search region: if self.U is not None: if z.size() != self.U.size()[:-1]: raise NotImplementedError z_ = Trans(z, self.U, self.lr_u) else: z_ = z padding = [ int(np.ceil((self.x_window_size[1] - 1) / 2)), int(np.ceil((self.x_window_size[0] - 1) / 2)) ] if response is None: response = F.conv2d(z_.permute(2, 0, 1).unsqueeze(0), x_.permute(2, 0, 1).unsqueeze(0), padding=padding).squeeze(0).squeeze(0) response = response[:, :, np.newaxis] else: response_ = F.conv2d(z_.permute(2, 0, 1).unsqueeze(0), x_.permute(2, 0, 1).unsqueeze(0), padding=padding).squeeze(0).squeeze(0) response = np.concatenate( (response, response_[:, :, np.newaxis]), axis=2) if self.vis is not None: self.vis.image(x_.permute(2, 0, 1)[0:3, :, :], win='updated_template') self.vis.image(z.permute(2, 0, 1)[0:3, :, :], win='search region') self.vis.image(z_.permute(2, 0, 1)[0:3, :, :], win='updated search region') delta_y, delta_x, sz_id = np.unravel_index( np.argmax(response, axis=None), response.shape) self.sz_id = sz_id if delta_y + 1 > self.tmp_sz[1] / 2: delta_y = delta_y - self.tmp_sz[1] if delta_x + 1 > self.tmp_sz[0] / 2: delta_x = delta_x - self.tmp_sz[0] self.target_sz = (self.target_sz[0] * self.search_size[self.sz_id], self.target_sz[1] * self.search_size[self.sz_id]) tmp_sz = (self.target_sz[0] * (1 + self.x_padding), self.target_sz[1] * (1 + self.x_padding)) current_size_factor = tmp_sz[0] / self.x_crop_siz[0] x, y = self._center x += current_size_factor * self.cell_size * delta_x y += current_size_factor * self.cell_size * delta_y self._center = (x, y) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.x_crop_siz) hc_features = self.get_features(patch, self.cell_size) new_x = self.x_cos_window[:, :, None] * hc_features new_x_ = torch.from_numpy(new_x.astype(np.float32)).cuda() self.V = CalTrans(self.x1, new_x_, self.lambda_v) # new_z tmp_sz = (self.target_sz[0] * (1 + self.z_padding), self.target_sz[1] * (1 + self.z_padding)) self.z_window_size = (int(np.round(tmp_sz[0])) // self.cell_size, int(np.round(tmp_sz[1])) // self.cell_size) self.z_cos_window = cos_window(self.z_window_size) s = np.sqrt( self.target_sz[0] * self.target_sz[1]) * self.output_sigma_factor // self.cell_size self.z_gaus = gaussian2d_labels(self.z_window_size, s) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self.x_cos_window[:, :, None] new_z = torch.from_numpy(hc_features.astype(np.float32)).cuda() new_z_ = np.multiply( np.repeat(self.z_gaus[:, :, np.newaxis], new_z.shape[2], axis=2), new_z) new_z_ = torch.from_numpy(new_z_).cuda() self.U = CalTrans(new_z, new_z_, self.lambda_u) bbox = [(self._center[0] - self.target_sz[0] / 2), (self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1]] if self.resize is True: bbox = [ele * 2 for ele in bbox] max_score = response.max() return bbox, max_score