def update(self, current_frame, vis=False): z = self.get_sub_window(current_frame, self._center, self.crop_size) z = self._window[:, :, None] * z kf = fft2(self._dgk(self.x, z)) responses = np.real( ifft2(self.alphaf_num * kf.conj() / (self.alphaf_den))) if vis is True: self.score = responses curr = np.unravel_index(np.argmax(responses, axis=None), responses.shape) dy = self._init_response_center[0] - curr[0] dx = self._init_response_center[1] - curr[1] x_c, y_c = self._center x_c -= dx y_c -= dy self._center = (x_c, y_c) new_x = self.get_sub_window(current_frame, self._center, self.crop_size) new_x = new_x * self._window[:, :, None] kf = fft2(self._dgk(new_x, new_x)) new_alphaf_num = self.yf * kf new_alphaf_den = kf * (kf + self.lambda_) self.alphaf_num = ( 1 - self.interp_factor ) * self.alphaf_num + self.interp_factor * new_alphaf_num self.alphaf_den = ( 1 - self.interp_factor ) * self.alphaf_den + self.interp_factor * new_alphaf_den self.x = (1 - self.interp_factor) * self.x + self.interp_factor * new_x return [ self._center[0] - self.w / 2, self._center[1] - self.h / 2, self.w, self.h ]
def _dgk(self, x1, x2): c = np.fft.fftshift(ifft2(fft2(x1) * np.conj(fft2(x2)))) d = np.dot(x1.flatten().conj(), x1.flatten()) + np.dot( x2.flatten().conj(), x2.flatten()) - 2 * c k = np.exp(-1 / self.sigma**2 * np.clip(d, a_min=0, a_max=None) / np.size(x1)) return k
def create_csr_filter(self,img,Y,P): """ create csr filter create filter with Augmented Lagrangian iterative optimization method :param img: image patch (already normalized) :param Y: gaussian shaped labels (note that the peak must be at the top-left corner) :param P: padding mask :return: filter """ mu=5 beta=3 mu_max=20 max_iter=4 lambda_=mu/100 F=fft2(img) Sxy=F*np.conj(Y)[:,:,None] Sxx=F*np.conj(F) # mask filter H=fft2(ifft2(Sxy/(Sxx+lambda_))*P[:,:,None]) # initialize lagrangian multiplier L=np.zeros_like(H) iter=1 while True: G=(Sxy+mu*H-L)/(Sxx+mu) H=fft2(np.real(P[:,:,None]*ifft2(mu*G+L)/(mu+lambda_))) # stop optimization after fixed number of steps if iter>=max_iter: break L+=mu*(G-H) mu=min(mu_max,beta*mu) iter+=1 return H
def update(self, current_frame, vis=False): xt = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xtf = fft2(xt) response = np.real( ifft2( np.sum(self.hf_num * xtf, axis=2) / (self.hf_den + self.lambda_))) if vis is True: self.score = response self.win_sz = self.crop_size curr = np.unravel_index(np.argmax(response, axis=None), response.shape) dy = (curr[0] - self._init_response_center[0]) * self.current_scale_factor dx = (curr[1] - self._init_response_center[1]) * self.current_scale_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) self.current_scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_size, self.current_scale_factor) if self.scale_type == 'normal': self.current_scale_factor = np.clip(self.current_scale_factor, a_min=self._min_scale_factor, a_max=self._max_scale_factor) xl = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xlf = fft2(xl) new_hf_num = self.yf[:, :, None] * np.conj(xlf) new_hf_den = np.sum(xlf * np.conj(xlf), axis=2) self.hf_den = (1 - self.interp_factor ) * self.hf_den + self.interp_factor * new_hf_den self.hf_num = (1 - self.interp_factor ) * self.hf_num + self.interp_factor * new_hf_num self.target_sz = (self.base_target_size[0] * self.current_scale_factor, self.base_target_size[1] * self.current_scale_factor) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def _dgk(self, x1, x2): xf = fft2(x1) yf = fft2(x2) xx = (x1.flatten().T).dot(x1.flatten()) yy = (x2.flatten().T).dot(x2.flatten()) xyf = xf * np.conj(yf) if len(xyf.shape) == 2: xyf = xyf[:, :, np.newaxis] xy = np.real(ifft2(np.sum(xyf, axis=2))) d = xx + yy - 2 * xy k = np.exp(-1 / self.sigma**2 * np.clip(d, a_min=0, a_max=None) / np.size(x1)) return k
def init(self, first_frame, bbox): first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.base_target_size = (self.w, self.h) self.target_sz = (self.w, self.h) self._window = cos_window(self.crop_size) output_sigma = np.sqrt(self.w * self.h) * self.output_sigma_factor self.y = gaussian2d_labels(self.crop_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.yf = fft2(self.y) self.current_scale_factor = 1. xl = self.get_translation_sample(first_frame, self._center, self.crop_size, self.current_scale_factor, self._window) self.xlf = fft2(xl) self.hf_den = np.sum(self.xlf * np.conj(self.xlf), axis=2) self.hf_num = self.yf[:, :, None] * np.conj(self.xlf) if self.scale_type == 'normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_size, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_size[1], self.base_target_size[0]]))) / np.log(self._scale_step)) elif self.scale_type == 'LP': self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_size, self.current_scale_factor)
def ADMM(self, xf): g_f = np.zeros_like(xf) h_f = np.zeros_like(g_f) l_f = np.zeros_like(g_f) mu = 1 beta = 10 mumax = 10000 i = 1 T = self.feature_map_sz[0] * self.feature_map_sz[1] S_xx = np.sum(np.conj(xf) * xf, 2) while i <= self.admm_iterations: B = S_xx + (T * mu) S_lx = np.sum(np.conj(xf) * l_f, axis=2) S_hx = np.sum(np.conj(xf) * h_f, axis=2) tmp0 = (1 / (T * mu) * (self.yf[:, :, None] * xf)) - ((1 / mu) * l_f) + h_f tmp1 = 1 / (T * mu) * (xf * ((S_xx * self.yf)[:, :, None])) tmp2 = 1 / mu * (xf * (S_lx[:, :, None])) tmp3 = xf * S_hx[:, :, None] # solve for g g_f = tmp0 - (tmp1 - tmp2 + tmp3) / B[:, :, None] # solve for h h = (T / ((mu * T) + self.admm_lambda)) * ifft2(mu * g_f + l_f) xs, ys, h = self.get_subwindow_no_window(h, (int( self.feature_map_sz[0] / 2), int(self.feature_map_sz[1] / 2)), self.small_filter_sz) t = np.zeros( (self.feature_map_sz[1], self.feature_map_sz[0], h.shape[2]), dtype=np.complex64) t[ys, xs, :] = h h_f = fft2(t) l_f = l_f + (mu * (g_f - h_f)) mu = min(beta * mu, mumax) i += 1 return g_f
def ADMM(self, xlf, f_pre_f, mu): model_xf = xlf f_f = np.zeros_like(model_xf) g_f = np.zeros_like(f_f) h_f = np.zeros_like(f_f) gamma = self.init_penalty_factor gamma_max = self.max_penalty_factor gamma_scale_step = self.penalty_scale_step T = self.feature_map_sz[0] * self.feature_map_sz[1] S_xx = np.sum(np.conj(model_xf) * model_xf, axis=2) Sf_pre_f = np.sum(np.conj(model_xf) * f_pre_f, axis=2) Sfx_pre_f = model_xf * Sf_pre_f[:, :, None] iter = 1 while iter <= self.admm_max_iterations: B = S_xx + T * (gamma + mu) Sgx_f = np.sum(np.conj(model_xf) * g_f, axis=2) Shx_f = np.sum(np.conj(model_xf) * h_f, axis=2) tmp0 = (1 / (T * (gamma + mu)) * (self.yf[:, :, None] * model_xf)) - ((1 / (gamma + mu)) * h_f) + ( gamma / (gamma + mu)) * g_f + \ (mu / (gamma + mu)) * f_pre_f tmp1 = 1 / (T * (gamma + mu)) * (model_xf * ((S_xx * self.yf)[:, :, None])) tmp2 = mu / (gamma + mu) * Sfx_pre_f tmp3 = 1 / (gamma + mu) * (model_xf * (Shx_f[:, :, None])) tmp4 = gamma / (gamma + mu) * (model_xf * Sgx_f[:, :, None]) f_f = tmp0 - (tmp1 + tmp2 - tmp3 + tmp4) / B[:, :, None] g_f = fft2( self.argmin_g(self.reg_window, gamma, (ifft2(gamma * (f_f + h_f))))) h_f = h_f + (gamma * (f_f - g_f)) gamma = min(gamma_scale_step * gamma, gamma_max) iter += 1 return f_f
def train_model(self): d=[0.5,0.5] dim=self.z_cn2.shape[2] kf_cn=fft2(self.dense_gauss_kernel(self.z_cn2,self.z_cn2,self.cn_sigma)) kf_hog=fft2(self.dense_gauss_kernel(self.z_hog2,self.z_hog2,self.hog_sigma)) count=0 stop=False lambda1=0.01 threshold=0.03 predD=d while stop is not True: new_num1=self.yf*d[0]*kf_cn new_num2=self.yf*d[1]*kf_hog new_den1=d[0]*kf_cn*(d[0]*np.conj(kf_cn)+lambda1) new_den2=d[1]*kf_hog*(d[1]*np.conj(kf_hog)+lambda1) if self.frame_index==1: alphaf_num11=new_num1 alphaf_num22=new_num2 alphaf_den11=new_den1 alphaf_den22=new_den2 else: alphaf_num11=(1-self.lr_cn)*self.alphaf_num1+self.lr_cn*new_num1 alphaf_num22=(1-self.lr_hog)*self.alphaf_num2+self.lr_hog*new_num2 alphaf_den11=(1-self.lr_cn)*self.alphaf_den1+self.lr_cn*new_den1 alphaf_den22=(1-self.lr_hog)*self.alphaf_den2+self.lr_hog*new_den2 self.alphaf_num = alphaf_num11 +alphaf_num22 self.alphaf_den = alphaf_den11 + alphaf_den22 self.alphaf=self.alphaf_num/self.alphaf_den alpha=ifft2(self.alphaf) d=self.trainD(kf_cn,kf_hog,self.alphaf,alpha,lambda1,dim) count+=1 if count>1: delta_alpha=np.abs(alpha-prev_alpha) deltaD=np.abs(np.array(d)-np.array(predD)) if(np.sum(delta_alpha)<=threshold*np.sum(np.abs(prev_alpha))) and np.sum(np.array(deltaD))<=threshold*np.sum(np.abs(np.array(predD))): stop=True prev_alpha=alpha predD=d if count>=100: d=[0.5,0.5] break self.alphaf_num1=alphaf_num11 self.alphaf_num2=alphaf_num22 self.alphaf_den1=alphaf_den11 self.alphaf_den2=alphaf_den22 return d
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self._window = cos_window( (int(w * (1 + self.padding)), int(h * (1 + self.padding)))) self.crop_size = (self._window.shape[1], self._window.shape[0]) s = np.sqrt(w * h) * self.output_sigma_factor self.y = gaussian2d_labels(self.crop_size, s) self.yf = fft2(self.y) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.x = self.get_sub_window(first_frame, self._center, self.crop_size) self.x = self._window[:, :, None] * self.x kf = fft2(self._dgk(self.x, self.x)) self.alphaf_num = (self.yf) * kf self.alphaf_den = kf * (kf + self.lambda_)
def init(self, first_frame, bbox): assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) if w * h >= 100**2: self.resize = True x0, y0, w, h = x0 / 2, y0 / 2, w / 2, h / 2 first_frame = cv2.resize(first_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) self.crop_size = (int(np.floor(w * (1 + self.padding))), int(np.floor(h * (1 + self.padding)))) # for vis self._center = (x0 + w / 2, y0 + h / 2) self.w, self.h = w, h self.window_size = (int(np.floor(w * (1 + self.padding))) // self.cell_size, int(np.floor(h * (1 + self.padding))) // self.cell_size) self._window = cos_window(self.window_size) s = np.sqrt(w * h) * self.output_sigma_factor / self.cell_size self.yf = fft2(gaussian2d_rolled_labels(self.window_size, s)) self.search_size = np.linspace(0.985, 1.015, 7) self.target_sz = (w, h) #param0=[self._center[0],self._center[1],1, # 0,1/(self.crop_size[1]/self.crop_size[0]), # 0] #param0=self.affparam2mat(param0) #patch=self.warpimg(first_frame.astype(np.float32),param0,self.crop_size).astype(np.uint8) patch = cv2.getRectSubPix(first_frame, self.crop_size, self._center) patch = cv2.resize(patch, dsize=self.crop_size) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self._window[:, :, None] xf = fft2(hc_features) kf = self._kernel_correlation(xf, xf, kernel=self.kernel) self.model_alphaf = self.yf / (kf + self.lambda_) self.model_xf = xf
def _kernel_correlation(self, xf, yf, kernel='gaussian'): if kernel== 'gaussian': N=xf.shape[0]*xf.shape[1] xx=(np.dot(xf.flatten().conj().T,xf.flatten())/N) yy=(np.dot(yf.flatten().conj().T,yf.flatten())/N) xyf=xf*np.conj(yf) xy=np.sum(np.real(ifft2(xyf)),axis=2) kf = fft2(np.exp(-1 / self.sigma ** 2 * np.clip(xx+yy-2*xy,a_min=0,a_max=None) / np.size(xf))) elif kernel== 'linear': kf= np.sum(xf*np.conj(yf),axis=2)/np.size(xf) else: raise NotImplementedError return kf
def init(self, first_frame, bbox): assert len(first_frame.shape) == 3 and first_frame.shape[2] == 3 if self.features == 'gray': first_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY) bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self.crop_size = (int(np.floor(w * (1 + self.padding))), int(np.floor(h * (1 + self.padding)))) # for vis self._center = (np.floor(x0 + w / 2), np.floor(y0 + h / 2)) self.w, self.h = w, h self.window_size = (int(np.floor(w * (1 + self.padding))) // self.cell_size, int(np.floor(h * (1 + self.padding))) // self.cell_size) self._window = cos_window(self.window_size) s = np.sqrt(w * h) * self.output_sigma_factor / self.cell_size self.yf = fft2(gaussian2d_rolled_labels(self.window_size, s)) if self.features == 'gray' or self.features == 'color': first_frame = first_frame.astype(np.float32) / 255 x = self._crop(first_frame, self._center, (w, h)) x = x - np.mean(x) elif self.features == 'hog': x = self._crop(first_frame, self._center, (w, h)) x = cv2.resize(x, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) x = extract_hog_feature(x, cell_size=self.cell_size) elif self.features == 'cn': x = cv2.resize(first_frame, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) x = extract_cn_feature(x, self.cell_size) else: raise NotImplementedError self.xf = fft2(self._get_windowed(x, self._window)) self.init_response_center = (0, 0) self.alphaf = self._training(self.xf, self.yf)
def phase_correlation(src1, src2): s1f = fft2(src1) s2f = fft2(src2) num = s2f * np.conj(s1f) d = np.sqrt(num * np.conj(num)) + 2e-16 Cf = np.sum(num / d, axis=2) C = np.real(ifft2(Cf)) C = np.fft.fftshift(C, axes=(0, 1)) mscore = np.max(C) pty, ptx = np.unravel_index(np.argmax(C, axis=None), C.shape) slobe_y = slobe_x = 1 idy = np.arange(pty - slobe_y, pty + slobe_y + 1).astype(np.int64) idx = np.arange(ptx - slobe_x, ptx + slobe_x + 1).astype(np.int64) idy = np.clip(idy, a_min=0, a_max=C.shape[0] - 1) idx = np.clip(idx, a_min=0, a_max=C.shape[1] - 1) weight_patch = C[idy, :][:, idx] s = np.sum(weight_patch) + 2e-16 pty = np.sum(np.sum(weight_patch, axis=1) * idy) / s ptx = np.sum(np.sum(weight_patch, axis=0) * idx) / s pty = pty - (src1.shape[0]) // 2 ptx = ptx - (src1.shape[1]) // 2 return ptx, pty, mscore
def update(self, current_frame, vis=False): self.frame_idx += 1 im_patch_cf = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) pwp_search_area = (round(self.norm_pwp_search_area[0] / self.area_resize_factor), round(self.norm_pwp_search_area[1] / self.area_resize_factor)) im_patch_pwp = self.get_sub_window(current_frame, self._center, self.norm_pwp_search_area, pwp_search_area) likelihood_map = self.get_colour_map(im_patch_pwp, self.bg_hist, self.fg_hist, self.bin_mapping) likelihood_map[np.isnan(likelihood_map)] = 0. self.norm_target_sz = (int(self.norm_target_sz[0]), int(self.norm_target_sz[1])) response_pwp = get_center_likelihood(likelihood_map, self.norm_target_sz) xt = self.get_feature_map(im_patch_cf, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt center = ((self.norm_delta_area[0] - 1) / 2, (self.norm_delta_area[1] - 1) / 2) for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) hf = self.experts[i].hf_num / (np.sum( self.experts[i].hf_den, axis=2) + self.lambda_)[:, :, None] response_cf = np.real(ifft2(np.sum(np.conj(hf) * xtf, axis=2))) response_sz = (self.floor_odd(self.norm_delta_area[0] / self.cell_size), self.floor_odd(self.norm_delta_area[1] / self.cell_size)) response_cf = cv2.resize( crop_filter_response(response_cf, response_sz), self.norm_delta_area, cv2.INTER_NEAREST) response_cf[np.isnan(response_cf)] = 0. self.experts[i].response = ( 1 - self.merge_factor ) * response_cf + self.merge_factor * response_pwp row, col = np.unravel_index( np.argmax(self.experts[i].response, axis=None), self.experts[i].response.shape) dy = (row - center[1]) / self.area_resize_factor dx = (col - center[0]) / self.area_resize_factor self.experts[i].pos = (self._center[0] + dx, self._center[1] + dy) cx, cy, w, h = self.experts[i].pos[0], self.experts[i].pos[ 1], self.target_sz[0], self.target_sz[1] self.experts[i].rect_positions.append( [cx - w / 2, cy - h / 2, w, h]) self.experts[i].centers.append([cx, cy]) pre_center = self.experts[i].centers[self.frame_idx - 1] smooth = np.sqrt((cx - pre_center[0])**2 + (cy - pre_center[1])**2) self.experts[i].smoothes.append(smooth) self.experts[i].smooth_scores.append( np.exp(-smooth**2 / (2 * self.avg_dim**2))) if self.frame_idx >= self.period - 1: for i in range(self.expert_num): self.experts[i].rob_scores.append( self.robustness_eva(self.experts, i, self.frame_idx, self.period, self.weight, self.expert_num)) self.id_ensemble[i] = self.experts[i].rob_scores[ self.frame_idx] self.mean_score.append( np.sum(np.array(self.id_ensemble)) / self.expert_num) idx = np.argmax(np.array(self.id_ensemble)) self._center = self.experts[idx].pos self.response = self.experts[idx].response else: for i in range(self.expert_num): self.experts[i].rob_scores.append(1) self._center = self.experts[6].pos self.response = self.experts[6].response self.mean_score.append(0) if vis is True: self.score = self.response # adaptive update score1 = self.cal_psr(self.experts[0].response) score2 = self.cal_psr(self.experts[1].response) score3 = self.cal_psr(self.experts[2].response) self.psr_score.append((score1 + score2 + score3) / 3) if self.frame_idx == len(self.psr_score): self.frame_idx = self.frame_idx if self.frame_idx >= self.period - 1: final_score = self.mean_score[self.frame_idx] * self.psr_score[ self.frame_idx] ave_score = np.sum( np.array(self.mean_score)[self.period - 1:self.frame_idx + 1] * np.array(self.psr_score[self.period - 1:self.frame_idx + 1]) ) / (self.frame_idx + 1 - self.period + 1) threshold = self.update_thresh * ave_score if final_score > threshold: self.learning_rate_pwp = self.config.interp_factor_pwp self.learning_rate_cf = self.config.interp_factor_cf else: self.learning_rate_pwp = 0 self.learning_rate_cf = ( final_score / threshold)**3 * self.config.interp_factor_cf else: final_score = self.mean_score[self.frame_idx] * self.psr_score[ self.frame_idx] if self.scale_adaptation: self.scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_sz, self.scale_factor) self.target_sz = (round(self.base_target_sz[0] * self.scale_factor), round(self.base_target_sz[1] * self.scale_factor)) avg_dim = (self.target_sz[0] + self.target_sz[1]) / 2 bg_area = (round(self.target_sz[0] + avg_dim), round(self.target_sz[1] + avg_dim)) fg_area = (round(self.target_sz[0] - avg_dim * self.inner_padding), round(self.target_sz[1] - avg_dim * self.inner_padding)) bg_area = (min(bg_area[0], current_frame.shape[1] - 1), min(bg_area[1], current_frame.shape[0] - 1)) self.bg_area = (bg_area[0] - (bg_area[0] - self.target_sz[0]) % 2, bg_area[1] - (bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (fg_area[0] + (self.bg_area[0] - fg_area[0]) % 2, fg_area[1] + (self.bg_area[1] - fg_area[1]) % 2) self.area_resize_factor = np.sqrt( self.fixed_area / (self.bg_area[0] * self.bg_area[1])) im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) hf_den = np.conj(xtf) * xtf / (self.cf_response_size[0] * self.cf_response_size[1]) hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.experts[i].hf_den = ( 1 - self.learning_rate_cf ) * self.experts[i].hf_den + self.learning_rate_cf * hf_den self.experts[i].hf_num = ( 1 - self.learning_rate_cf ) * self.experts[i].hf_num + self.learning_rate_cf * hf_num if self.learning_rate_pwp != 0: im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, im_patch_bg, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ], final_score
def init(self, first_frame, bbox): self.frame_idx += 1 first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.target_sz = (self.w, self.h) self.bin_mapping = self.get_bin_mapping(self.n_bins) avg_dim = (w + h) / 2 self.bg_area = (round(w + avg_dim), round(h + avg_dim)) self.fg_area = (int(round(w - avg_dim * self.inner_padding)), int(round(h - avg_dim * self.inner_padding))) self.bg_area = (int(min(self.bg_area[0], first_frame.shape[1] - 1)), int(min(self.bg_area[1], first_frame.shape[0] - 1))) self.bg_area = (self.bg_area[0] - (self.bg_area[0] - self.target_sz[0]) % 2, self.bg_area[1] - (self.bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (self.fg_area[0] + (self.bg_area[0] - self.fg_area[0]) % 2, self.fg_area[1] + (self.bg_area[1] - self.fg_area[1]) % 2) self.area_resize_factor = np.sqrt(self.fixed_area / (self.bg_area[0] * self.bg_area[1])) self.norm_bg_area = (round(self.bg_area[0] * self.area_resize_factor), round(self.bg_area[1] * self.area_resize_factor)) self.cf_response_size = (int( np.floor(self.norm_bg_area[0] / self.cell_size)), int( np.floor(self.norm_bg_area[1] / self.cell_size))) norm_target_sz_w = 0.75 * self.norm_bg_area[ 0] - 0.25 * self.norm_bg_area[1] norm_target_sz_h = 0.75 * self.norm_bg_area[ 1] - 0.25 * self.norm_bg_area[0] self.norm_target_sz = (round(norm_target_sz_w), round(norm_target_sz_h)) norm_pad = (int(np.floor( (self.norm_bg_area[0] - norm_target_sz_w) / 2)), int(np.floor( (self.norm_bg_area[1] - norm_target_sz_h) / 2))) radius = min(norm_pad[0], norm_pad[1]) self.norm_delta_area = (2 * radius + 1, 2 * radius + 1) self.norm_pwp_search_area = (self.norm_target_sz[0] + self.norm_delta_area[0] - 1, self.norm_target_sz[1] + self.norm_delta_area[1] - 1) patch_padded = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) self.new_pwp_model = True self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, patch_padded, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins, ) self.new_pwp_model = False self._window = cos_window(self.cf_response_size) output_sigma = np.sqrt( self.norm_target_sz[0] * self.norm_target_sz[1]) * self.output_sigma_factor / self.cell_size self.y = gaussian2d_rolled_labels_staple(self.cf_response_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) # print(self._init_response_center) self.yf = fft2(self.y) if self.scale_adaptation is True: self.scale_factor = 1 self.base_target_sz = self.target_sz self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.scale_factor) im_patch_bg = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) self.experts[i].hf_den = np.conj(xtf) * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.experts[i].hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.rect_position_padded = None self.avg_dim = avg_dim for i in range(self.expert_num): self.experts[i].rect_positions.append([ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]) self.experts[i].rob_scores.append(1) self.experts[i].smoothes.append(0) self.experts[i].smooth_scores.append(1) self.experts[i].centers.append([self._center[0], self._center[1]])
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x0, y0, w, h = tuple(bbox) self._center = (int(x0 + w / 2), int(y0 + h / 2)) self.target_sz = (w, h) search_area = self.target_sz[ 0] * self.search_area_scale * self.target_sz[ 1] * self.search_area_scale self.sc = np.clip( 1, a_min=np.sqrt(search_area / self.max_image_sample_size), a_max=np.sqrt(search_area / self.min_image_sample_size)) self.base_target_sz = (self.target_sz[0] / self.sc, self.target_sz[1] / self.sc) if self.search_area_shape == 'proportional': self.crop_size = (int(self.base_target_sz[0] * self.search_area_scale), int(self.base_target_sz[1] * self.search_area_scale)) elif self.search_area_shape == 'square': w = int( np.sqrt(self.base_target_sz[0] * self.base_target_sz[1]) * self.search_area_scale) self.crop_size = (w, w) elif self.search_area_shape == 'fix_padding': tmp=int(np.sqrt(self.base_target_sz[0]*self.search_area_scale+(self.base_target_sz[1]-self.base_target_sz[0])/4))+\ (self.base_target_sz[0]+self.base_target_sz[1])/2 self.crop_size = (self.base_target_sz[0] + tmp, self.base_target_sz[1] + tmp) else: raise ValueError output_sigma = np.sqrt(np.floor(self.base_target_sz[0]/self.cell_size)*np.floor(self.base_target_sz[1]*self.cell_size))*\ self.output_sigma_factor self.crop_size = (int( round(self.crop_size[0] / self.cell_size) * self.cell_size), int( round(self.crop_size[1] / self.cell_size) * self.cell_size)) self.feature_map_sz = (self.crop_size[0] // self.cell_size, self.crop_size[1] // self.cell_size) y = gaussian2d_rolled_labels(self.feature_map_sz, output_sigma) self.cosine_window = (cos_window((y.shape[1], y.shape[0]))) self.yf = fft2(y) reg_scale = (int( np.floor(self.base_target_sz[0] / self.feature_downsample_ratio)), int( np.floor(self.base_target_sz[1] / self.feature_downsample_ratio))) use_sz = self.feature_map_sz #self.reg_window=self.create_reg_window(reg_scale,use_sz,self.p,self.reg_window_max, # self.reg_window_min,self.alpha,self.beta) self.reg_window = self.create_reg_window_const(reg_scale, use_sz, self.reg_window_max, self.reg_window_min) self.ky = np.roll( np.arange(-int(np.floor((self.feature_map_sz[1] - 1) / 2)), int(np.ceil((self.feature_map_sz[1] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[1] - 1) / 2))) self.kx = np.roll( np.arange(-int(np.floor((self.feature_map_sz[0] - 1) / 2)), int(np.ceil((self.feature_map_sz[0] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[0] - 1) / 2))) # scale scale_exp = np.arange( -int(np.floor((self.number_of_scales - 1) / 2)), int(np.ceil((self.number_of_scales - 1) / 2) + 1)) self.scale_factors = self.scale_step**scale_exp if self.number_of_scales > 0: self._min_scale_factor = self.scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self.scale_step)) self._max_scale_factor = self.scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_sz[1], self.base_target_sz[0]]))) / np.log(self.scale_step)) #print(self._min_scale_factor) #print(self._max_scale_factor) if self.scale_type == 'normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.sc) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_sz[1], self.base_target_sz[0]]))) / np.log(self._scale_step)) elif self.scale_type == 'LP': self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.sc) patch = self.get_sub_window( first_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(np.round(self.crop_size[0] * self.sc)), int(np.round(self.crop_size[1] * self.sc)))) xl_hc = self.extrac_hc_feature(patch, self.cell_size) xlf_hc = fft2(xl_hc * self.cosine_window[:, :, None]) f_pre_f_hc = np.zeros_like(xlf_hc) mu_hc = 0 self.f_pre_f_hc = self.ADMM(xlf_hc, f_pre_f_hc, mu_hc)
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.feature_ratio = self.cell_size self.search_area=(self.w/self.feature_ratio*self.search_area_scale)*\ (self.h/self.feature_ratio*self.search_area_scale) if self.search_area < self.cell_selection_thresh * self.filter_max_area: self.cell_size=int(min(self.feature_ratio,max(1,int(np.ceil(np.sqrt( self.w*self.search_area_scale/(self.cell_selection_thresh*self.filter_max_area)*\ self.h*self.search_area_scale/(self.cell_selection_thresh*self.filter_max_area) )))))) self.feature_ratio = self.cell_size self.search_area = (self.w / self.feature_ratio * self.search_area_scale) * \ (self.h / self.feature_ratio * self.search_area_scale) if self.search_area > self.filter_max_area: self.current_scale_factor = np.sqrt(self.search_area / self.filter_max_area) else: self.current_scale_factor = 1. self.base_target_sz = (self.w / self.current_scale_factor, self.h / self.current_scale_factor) self.target_sz = self.base_target_sz if self.search_area_shape == 'proportional': self.crop_size = (int(self.base_target_sz[0] * self.search_area_scale), int(self.base_target_sz[1] * self.search_area_scale)) elif self.search_area_shape == 'square': w = int( np.sqrt(self.base_target_sz[0] * self.base_target_sz[1]) * self.search_area_scale) self.crop_size = (w, w) elif self.search_area_shape == 'fix_padding': tmp=int(np.sqrt(self.base_target_sz[0]*self.search_area_scale+(self.base_target_sz[1]-self.base_target_sz[0])/4))+\ (self.base_target_sz[0]+self.base_target_sz[1])/2 self.crop_size = (self.base_target_sz[0] + tmp, self.base_target_sz[1] + tmp) else: raise ValueError self.crop_size = (int( round(self.crop_size[0] / self.feature_ratio) * self.feature_ratio), int( round(self.crop_size[1] / self.feature_ratio) * self.feature_ratio)) self.feature_map_sz = (self.crop_size[0] // self.feature_ratio, self.crop_size[1] // self.feature_ratio) output_sigma = np.sqrt( np.floor(self.base_target_sz[0] / self.feature_ratio) * np.floor(self.base_target_sz[1] / self.feature_ratio)) * self.output_sigma_factor y = gaussian2d_rolled_labels(self.feature_map_sz, output_sigma) self.yf = fft2(y) if self.interpolate_response == 1: self.interp_sz = (self.feature_map_sz[0] * self.feature_ratio, self.feature_map_sz[1] * self.feature_ratio) else: self.interp_sz = (self.feature_map_sz[0], self.feature_map_sz[1]) self._window = cos_window(self.feature_map_sz) if self.number_of_scales > 0: scale_exp = np.arange( -int(np.floor((self.number_of_scales - 1) / 2)), int(np.ceil((self.number_of_scales - 1) / 2)) + 1) self.scale_factors = self.scale_step**scale_exp self.min_scale_factor = self.scale_step**(np.ceil( np.log(max(5 / self.crop_size[0], 5 / self.crop_size[1])) / np.log(self.scale_step))) self.max_scale_factor = self.scale_step**(np.floor( np.log( min(first_frame.shape[0] / self.base_target_sz[1], first_frame.shape[1] / self.base_target_sz[0])) / np.log(self.scale_step))) if self.interpolate_response >= 3: self.ky = np.roll( np.arange(-int(np.floor((self.feature_map_sz[1] - 1) / 2)), int(np.ceil((self.feature_map_sz[1] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[1] - 1) / 2))) self.kx = np.roll( np.arange(-int(np.floor((self.feature_map_sz[0] - 1) / 2)), int(np.ceil((self.feature_map_sz[0] - 1) / 2 + 1))), -int(np.floor((self.feature_map_sz[0] - 1) / 2))).T self.small_filter_sz = (int( np.floor(self.base_target_sz[0] / self.feature_ratio)), int( np.floor(self.base_target_sz[1] / self.feature_ratio))) self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) pixels = self.get_sub_window( first_frame, self._center, model_sz=self.crop_size, scaled_sz=(int( np.round(self.crop_size[0] * self.current_scale_factor)), int( np.round(self.crop_size[1] * self.current_scale_factor)))) feature = self.extract_hc_feture(pixels, cell_size=self.feature_ratio) self.model_xf = fft2(self._window[:, :, None] * feature) self.g_f = self.ADMM(self.model_xf)
def tracking(self, img, pos, polish): """ obtain a subwindow for detecting at the positiono from last frame, and convert to Fourier domain find a proper window size :param img: :param pos: :param iter: :return: """ large_num = 0 if polish > large_num: w_sz0 = self.window_sz0 c_w = self.cos_window else: w_sz0 = self.window_sz_search0 c_w = self.cos_window_search if self.is_rotation: patch = self.get_affine_subwindow(img, pos, self.sc, self.rot, w_sz0) else: sz_s = (int(np.floor(self.sc[0] * w_sz0[0])), int(np.floor(self.sc[1] * w_sz0[1]))) patchO = cv2.getRectSubPix(img, sz_s, pos) patch = cv2.resize(patchO, w_sz0, cv2.INTER_CUBIC) z = self.get_features(patch, self.cell_size) z = z * c_w[:, :, None] zf = fft2(z) ssz = (zf.shape[1], zf.shape[0], zf.shape[2]) # calculate response of the classifier at all shifts wf = np.conj(self.model_xf) * self.model_alphaf[:, :, None] / np.size( self.model_xf) if polish <= large_num: w = pad(np.real(ifft2(wf)), (ssz[1], ssz[0])) wf = fft2(w) tmp_sz = ssz # compute convolution for each feature block in the Fourier domain # use general compute here for easy extension in future rff = np.sum(wf * zf, axis=2) rff_real = cv2.resize(rff.real, (tmp_sz[0], tmp_sz[1]), cv2.INTER_NEAREST) rff_imag = cv2.resize(rff.imag, (tmp_sz[0], tmp_sz[1]), cv2.INTER_NEAREST) rff = rff_real + 1.j * rff_imag response_cf = np.real(ifft2(rff)) #response_cf=np.fft.fftshift(response_cf,axes=(0,1)) response_cf = crop_filter_response( response_cf, (response_cf.shape[1], response_cf.shape[0])) response_color = np.zeros_like(response_cf) if self.use_color_hist: object_likelihood = self.get_colour_map(patch, self.pl, self.pi, self.bin_mapping) response_color = get_center_likelihood(object_likelihood, self.target_sz0) response_color = cv2.resize( response_color, (response_cf.shape[1], response_cf.shape[0]), cv2.INTER_CUBIC) # adaptive merge factor if self.adaptive_merge_factor is True: cf_conf = confidence_cf_apce(response_cf) adaptive_merge_factor = self.merge_factor * self.theta + ( 1 - self.theta) * (1 - cf_conf) response = ( 1 - adaptive_merge_factor ) * response_cf + adaptive_merge_factor * response_color else: response = (1 - self.merge_factor ) * response_cf + self.merge_factor * response_color if self.vis is True: self.score = response self.crop_size = self.window_sz # sub-pixel search pty, ptx = np.unravel_index(np.argmax(response, axis=None), response.shape) if self.is_subpixel: slobe = 2 idy = np.arange(pty - slobe, pty + slobe + 1) idx = np.arange(ptx - slobe, ptx + slobe + 1) idy = np.clip(idy, a_min=0, a_max=response.shape[0] - 1) idx = np.clip(idx, a_min=0, a_max=response.shape[1] - 1) weight_patch = response[idy, :][:, idx] s = np.sum(weight_patch) + 2e-16 pty = np.sum(np.sum(weight_patch, axis=1) * idy) / s ptx = np.sum(np.sum(weight_patch, axis=0) * idx) / s cscore = PSR(response, 0.1) # update the translation status dy = pty - (response.shape[0]) // 2 dx = ptx - (response.shape[1]) // 2 if self.is_rotation: sn, cs = np.sin(self.rot), np.cos(self.rot) pp = np.array([[self.sc[1] * cs, -self.sc[0] * sn], [self.sc[1] * sn, self.sc[0] * cs]]) x, y = pos delta = self.cell_size * np.array([[dy, dx]]).dot(pp) x += delta[0, 1] y += delta[0, 0] pos = (x, y) patchL = self.get_affine_subwindow( img, pos, [1., 1.], self.rot, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1])))) else: x, y = pos pos = (x + self.sc[0] * self.cell_size * dx, y + self.sc[1] * self.cell_size * dy) patchL = cv2.getRectSubPix( img, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1]))), pos) patchL = cv2.resize(patchL, self.scale_sz_window, cv2.INTER_CUBIC) patchLp = cv2.logPolar(patchL.astype(np.float32), (patchL.shape[1] // 2, patchL.shape[0] // 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, self.cell_size) #patchLp = patchLp * self.cos_window_scale[:, :, None] tmp_sc, tmp_rot, sscore = self.estimate_scale(self.model_patchLp, patchLp, self.mag) tmp_sc = np.clip(tmp_sc, a_min=0.6, a_max=1.4) if tmp_rot > 1 or tmp_rot < -1: tmp_rot = 0 return pos, tmp_sc, tmp_rot, cscore, sscore
def update(self, current_frame, vis=False): x = None for scale_ind in range(self.number_of_scales): current_scale = self.current_scale_factor * self.scale_factors[ scale_ind] sub_window = self.get_sub_window( current_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * current_scale)), int(round(self.crop_size[1] * current_scale)))) feature = self.extract_hc_feture(sub_window, self.cell_size)[:, :, :, np.newaxis] if x is None: x = feature else: x = np.concatenate((x, feature), axis=3) xtf = fft2(x * self._window[:, :, None, None]) responsef = np.sum(np.conj(self.g_f)[:, :, :, None] * xtf, axis=2) if self.interpolate_response == 2: self.interp_sz = (int(self.yf.shape[1] * self.feature_ratio * self.current_scale_factor), int(self.yf.shape[0] * self.feature_ratio * self.current_scale_factor)) responsef_padded = resize_dft2(responsef, self.interp_sz) response = np.real(ifft2(responsef_padded)) if self.interpolate_response == 3: raise ValueError elif self.interpolate_response == 4: disp_row, disp_col, sind = resp_newton(response, responsef_padded, self.newton_iterations, self.ky, self.kx, self.feature_map_sz) if vis is True: self.score = response[:, :, sind] self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) else: row, col, sind = np.unravel_index(np.argmax(response, axis=None), response.shape) if vis is True: self.score = response[:, :, sind] self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) disp_row = (row + int(np.floor(self.interp_sz[1] - 1) / 2)) % self.interp_sz[1] - int( np.floor((self.interp_sz[1] - 1) / 2)) disp_col = (col + int(np.floor(self.interp_sz[0] - 1) / 2)) % self.interp_sz[0] - int( np.floor((self.interp_sz[0] - 1) / 2)) if self.interpolate_response == 0 or self.interpolate_response == 3 or self.interpolate_response == 4: factor = self.feature_ratio * self.current_scale_factor * self.scale_factors[ sind] elif self.interpolate_response == 1: factor = self.current_scale_factor * self.scale_factors[sind] elif self.interpolate_response == 2: factor = self.scale_factors[sind] else: raise ValueError dx, dy = int(np.round(disp_col * factor)), int( np.round(disp_row * factor)) self.current_scale_factor = self.current_scale_factor * self.scale_factors[ sind] self.current_scale_factor = max(self.current_scale_factor, self.min_scale_factor) self.current_scale_factor = min(self.current_scale_factor, self.max_scale_factor) self.current_scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_sz, self.current_scale_factor) self._center = (self._center[0] + dx, self._center[1] + dy) pixels = self.get_sub_window( current_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * self.current_scale_factor)), int(round(self.crop_size[1] * self.current_scale_factor)))) feature = self.extract_hc_feture(pixels, cell_size=self.cell_size) #feature=cv2.resize(pixels,self.feature_map_sz)/255-0.5 xf = fft2(feature * self._window[:, :, None]) self.model_xf = ( 1 - self.interp_factor) * self.model_xf + self.interp_factor * xf self.g_f = self.ADMM(self.model_xf) target_sz = (self.target_sz[0] * self.current_scale_factor, self.target_sz[1] * self.current_scale_factor) return [ self._center[0] - target_sz[0] / 2, self._center[1] - target_sz[1] / 2, target_sz[0], target_sz[1] ], -1.0
def logupdate(self, init, img, pos, tmp_sc, tmp_rot): tmp = np.floor(self.sc[0] * tmp_sc * self.window_sz0[0]) + np.floor( self.sc[1] * tmp_sc * self.window_sz0[1]) if tmp < 10: tmp_sc = 1. self.sc = (self.sc[0] * tmp_sc, self.sc[1] * tmp_sc) self.rot = self.rot + tmp_rot self.window_sz = (int(np.floor(self.sc[0] * self.window_sz0[0])), int(np.floor(self.sc[1] * self.window_sz0[1]))) self.window_sz_search = (int( np.floor(self.sc[0] * self.window_sz_search0[0])), int( np.floor(self.sc[1] * self.window_sz_search0[1]))) # compute the current CF model # sampling the image if self.is_rotation: patch = self.get_affine_subwindow(img, pos, self.sc, self.rot, self.window_sz0) else: patchO = cv2.getRectSubPix(img, self.window_sz, pos) patch = cv2.resize(patchO, self.window_sz0, interpolation=cv2.INTER_CUBIC) x = self.get_features(patch, self.cell_size) x = x * self.cos_window[:, :, None] xf = fft2(x) #kf=np.sum(xf*np.conj(xf),axis=2)/xf.size kf = self._kernel_correlation(xf, xf, self.kernel_type) alphaf = self.yf / (kf + self.lambda_) if self.is_rotation: # here is not similarity transformation patchL = self.get_affine_subwindow( img, pos, [1., 1.], self.rot, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1])))) else: patchL = cv2.getRectSubPix( img, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1]))), pos) patchL = cv2.resize(patchL, self.scale_sz_window, cv2.INTER_CUBIC) # get logpolar space and apply feature extraction patchLp = cv2.logPolar(patchL.astype(np.float32), (patchL.shape[1] // 2, patchL.shape[0] // 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, self.cell_size) #patchLp = patchLp * self.cos_window_scale[:, :, None] # updating color histogram probabilities sz = (patch.shape[1], patch.shape[0]) #is_color=True if self.use_color_hist: pos_in = ((sz[0]) / 2 - 1, (sz[1]) / 2 - 1) lab_patch = patch inter_patch = cv2.getRectSubPix(lab_patch.astype( np.uint8), (int(round(sz[0] * self.inter_patch_rate)), int(round(sz[1] * self.inter_patch_rate))), pos_in) self.interp_patch = inter_patch pl = self.get_color_space_hist(lab_patch, self.nbin) pi = self.get_color_space_hist(inter_patch, self.nbin) interp_factor_scale = self.learning_rate_scale if init == 1: # first_frame self.model_alphaf = alphaf self.model_xf = xf self.model_patchLp = patchLp if self.use_color_hist: self.pl = pl self.pi = pi else: # CF model self.model_alphaf = ( 1 - self.interp_factor ) * self.model_alphaf + self.interp_factor * alphaf self.model_xf = (1 - self.interp_factor ) * self.model_xf + self.interp_factor * xf self.model_patchLp = ( 1 - interp_factor_scale ) * self.model_patchLp + interp_factor_scale * patchLp if self.use_color_hist: self.pi = (1 - self.color_update_rate ) * self.pi + self.color_update_rate * pi self.pl = (1 - self.color_update_rate ) * self.pl + self.color_update_rate * pl
def update(self, current_frame): if self.resize: current_frame = cv2.resize(current_frame, dsize=None, fx=0.5, fy=0.5).astype(np.uint8) response = None for i in range(len(self.search_size)): tmp_sz = (self.target_sz[0] * (1 + self.padding) * self.search_size[i], self.target_sz[1] * (1 + self.padding) * self.search_size[i]) #param0=[self._center[0],self._center[1],tmp_sz[0]/self.crop_size[0], # 0,tmp_sz[1]/self.crop_size[0]/(self.crop_size[1]/self.crop_size[0]), # 0] #param0=self.affparam2mat(param0) #patch=self.warpimg(current_frame.astype(np.float32),param0,self.crop_size).astype(np.uint8) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.crop_size) hc_features = self.get_features(patch, self.cell_size) hc_features = hc_features * self._window[:, :, None] zf = fft2(hc_features) kzf = self._kernel_correlation(zf, self.model_xf, kernel=self.kernel) if response is None: response = np.real(ifft2(self.model_alphaf * kzf))[:, :, np.newaxis] else: response = np.concatenate( (response, np.real(ifft2( self.model_alphaf * kzf))[:, :, np.newaxis]), axis=2) delta_y, delta_x, sz_id = np.unravel_index( np.argmax(response, axis=None), response.shape) self.sz_id = sz_id if delta_y + 1 > self.window_size[1] / 2: delta_y = delta_y - self.window_size[1] if delta_x + 1 > self.window_size[0] / 2: delta_x = delta_x - self.window_size[0] self.target_sz = (self.target_sz[0] * self.search_size[self.sz_id], self.target_sz[1] * self.search_size[self.sz_id]) tmp_sz = (self.target_sz[0] * (1 + self.padding), self.target_sz[1] * (1 + self.padding)) current_size_factor = tmp_sz[0] / self.crop_size[0] x, y = self._center x += current_size_factor * self.cell_size * delta_x y += current_size_factor * self.cell_size * delta_y self._center = (x, y) #param0 = [self._center[0], self._center[1], tmp_sz[0] / self.crop_size[0], # 0, tmp_sz[1] / self.crop_size[0] / (self.crop_size[1] / self.crop_size[0]), # 0] #param0 = self.affparam2mat(param0) #patch = self.warpimg(current_frame.astype(np.float32), param0, self.crop_size).astype(np.uint8) patch = cv2.getRectSubPix( current_frame, (int(np.round(tmp_sz[0])), int(np.round(tmp_sz[1]))), self._center) patch = cv2.resize(patch, self.crop_size) hc_features = self.get_features(patch, self.cell_size) hc_features = self._window[:, :, None] * hc_features xf = fft2(hc_features) kf = self._kernel_correlation(xf, xf, kernel=self.kernel) alphaf = self.yf / (kf + self.lambda_) self.model_alphaf = ( 1 - self.interp_factor ) * self.model_alphaf + self.interp_factor * alphaf self.model_xf = ( 1 - self.interp_factor) * self.model_xf + self.interp_factor * xf bbox = [(self._center[0] - self.target_sz[0] / 2), (self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1]] if self.resize is True: bbox = [ele * 2 for ele in bbox] max_score = response.max() return bbox, max_score
def init(self,first_frame,bbox): bbox=np.array(bbox).astype(np.int64) x,y,w,h=tuple(bbox) self.init_mask=np.ones((h,w),dtype=np.uint8) self._center=(x+w/2,y+h/2) self.w,self.h=w,h if np.all(first_frame[:,:,0]==first_frame[:,:,1]): self.use_segmentation=False # change 400 to 300 # for larger cell_size self.cell_size=int(min(4,max(1,w*h/300))) self.base_target_sz=(w,h) self.target_sz=self.base_target_sz template_size=(int(w+self.padding*np.sqrt(w*h)),int(h+self.padding*np.sqrt(w*h))) template_size=(template_size[0]+template_size[1])//2 self.template_size=(template_size,template_size) self.rescale_ratio=np.sqrt((200**2)/(self.template_size[0]*self.template_size[1])) self.rescale_ratio=np.clip(self.rescale_ratio,a_min=None,a_max=1) self.rescale_template_size=(int(self.rescale_ratio*self.template_size[0]), int(self.rescale_ratio*self.template_size[1])) self.yf=fft2(gaussian2d_rolled_labels((int(self.rescale_template_size[0]/self.cell_size), int(self.rescale_template_size[1]/self.cell_size)), self.y_sigma)) self._window=cos_window((self.yf.shape[1],self.yf.shape[0])) self.crop_size=self.rescale_template_size self.current_scale_factor = 1. if self.scale_type=='normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step ** np.ceil( np.log(np.max(5 / np.array(([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step ** np.floor(np.log(np.min( first_frame.shape[:2] / np.array([self.base_target_sz[1], self.base_target_sz[0]]))) / np.log( self._scale_step)) elif self.scale_type=='LP': self.scale_estimator=LPScaleEstimator(self.target_sz,config=self.scale_config) self.scale_estimator.init(first_frame,self._center,self.base_target_sz,self.current_scale_factor) # create dummy mask (approximation for segmentation) # size of the object in feature space obj_sz=(int(self.rescale_ratio*(self.base_target_sz[0]/self.cell_size)), int(self.rescale_ratio*(self.base_target_sz[1]/self.cell_size))) x0=int((self.yf.shape[1]-obj_sz[0])/2) y0=int((self.yf.shape[0]-obj_sz[1])/2) x1=x0+obj_sz[0] y1=y0+obj_sz[1] self.target_dummy_mask=np.zeros_like(self.yf,dtype=np.uint8) self.target_dummy_mask[y0:y1,x0:x1]=1 self.target_dummy_area=np.sum(self.target_dummy_mask) if self.use_segmentation: if self.segcolor_space=='bgr': seg_img=first_frame elif self.segcolor_space=='hsv': seg_img=cv2.cvtColor(first_frame,cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32)/180*255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg=Histogram(3,self.nbins) hist_bg=Histogram(3,self.nbins) self.extract_histograms(seg_img,bbox,hist_fg,hist_bg) mask=self.segment_region(seg_img,self._center,self.template_size,self.base_target_sz,self.current_scale_factor, hist_fg,hist_bg) self.hist_bg_p_bins=hist_bg.p_bins self.hist_fg_p_bins=hist_fg.p_bins init_mask_padded=np.zeros_like(mask) pm_x0=int(np.floor(mask.shape[1]/2-bbox[2]/2)) pm_y0=int(np.floor(mask.shape[0]/2-bbox[3]/2)) init_mask_padded[pm_y0:pm_y0+bbox[3],pm_x0:pm_x0+bbox[2]]=1 mask=mask*init_mask_padded mask=cv2.resize(mask,(self.yf.shape[1],self.yf.shape[0])) if self.mask_normal(mask,self.target_dummy_area) is True: kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3),anchor=(1,1)) mask=cv2.dilate(mask,kernel) else: mask=self.target_dummy_mask else: mask=self.target_dummy_mask # extract features f=self.get_csr_features(first_frame,self._center,self.current_scale_factor, self.template_size,self.rescale_template_size,self.cell_size) f=f*self._window[:,:,None] # create filters using segmentation mask self.H=self.create_csr_filter(f,self.yf,mask) response=np.real(ifft2(fft2(f)*np.conj(self.H))) chann_w=np.max(response.reshape(response.shape[0]*response.shape[1],-1),axis=0) self.chann_w=chann_w/np.sum(chann_w)
def init(self, first_frame, bbox): first_frame = first_frame.astype(np.float32) bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h self.crop_size = (int(w * (1 + self.padding)), int(h * (1 + self.padding))) self.target_sz = (self.w, self.h) self.bin_mapping = self.get_bin_mapping(self.n_bins) avg_dim = (w + h) / 2 self.bg_area = (round(w + avg_dim), round(h + avg_dim)) self.fg_area = (int(round(w - avg_dim * self.inner_padding)), int(round(h - avg_dim * self.inner_padding))) self.bg_area = (int(min(self.bg_area[0], first_frame.shape[1] - 1)), int(min(self.bg_area[1], first_frame.shape[0] - 1))) self.bg_area = (self.bg_area[0] - (self.bg_area[0] - self.target_sz[0]) % 2, self.bg_area[1] - (self.bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (self.fg_area[0] + (self.bg_area[0] - self.fg_area[0]) % 2, self.fg_area[1] + (self.bg_area[1] - self.fg_area[1]) % 2) self.area_resize_factor = np.sqrt(self.fixed_area / (self.bg_area[0] * self.bg_area[1])) self.norm_bg_area = (round(self.bg_area[0] * self.area_resize_factor), round(self.bg_area[1] * self.area_resize_factor)) self.cf_response_size = (int( np.floor(self.norm_bg_area[0] / self.hog_cell_size)), int( np.floor(self.norm_bg_area[1] / self.hog_cell_size))) norm_target_sz_w = 0.75 * self.norm_bg_area[ 0] - 0.25 * self.norm_bg_area[1] norm_target_sz_h = 0.75 * self.norm_bg_area[ 1] - 0.25 * self.norm_bg_area[0] self.norm_target_sz = (round(norm_target_sz_w), round(norm_target_sz_h)) norm_pad = (int(np.floor( (self.norm_bg_area[0] - norm_target_sz_w) / 2)), int(np.floor( (self.norm_bg_area[1] - norm_target_sz_h) / 2))) radius = min(norm_pad[0], norm_pad[1]) self.norm_delta_area = (2 * radius + 1, 2 * radius + 1) self.norm_pwp_search_area = (self.norm_target_sz[0] + self.norm_delta_area[0] - 1, self.norm_target_sz[1] + self.norm_delta_area[1] - 1) patch_padded = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) self.new_pwp_model = True self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, patch_padded, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins, ) self.new_pwp_model = False self._window = cos_window(self.cf_response_size) output_sigma = np.sqrt( self.norm_target_sz[0] * self.norm_target_sz[1] ) * self.output_sigma_factor / self.hog_cell_size self.y = gaussian2d_rolled_labels_staple(self.cf_response_size, output_sigma) self._init_response_center = np.unravel_index( np.argmax(self.y, axis=None), self.y.shape) self.yf = fft2(self.y) if self.use_ca: # w,h format self.offset = [[0, -self.target_sz[1]], [-self.target_sz[0], 0], [0, self.target_sz[1]], [self.target_sz[0], 0]] if self.scale_adaptation is True: self.scale_factor = 1 self.base_target_sz = self.target_sz self.scale_sigma = np.sqrt( self.num_scales) * self.scale_sigma_factor ss = np.arange(1, self.num_scales + 1) - np.ceil( self.num_scales / 2) ys = np.exp(-0.5 * (ss**2) / (self.scale_sigma**2)) self.ysf = np.fft.fft(ys) if self.num_scales % 2 == 0: scale_window = np.hanning(self.num_scales + 1) self.scale_window = scale_window[1:] else: self.scale_window = np.hanning(self.num_scales) ss = np.arange(1, self.num_scales + 1) self.scale_factors = self.scale_step**( np.ceil(self.num_scales / 2) - ss) self.scale_model_factor = 1. if (self.w * self.h) > self.scale_model_max_area: self.scale_model_factor = np.sqrt(self.scale_model_max_area / (self.w * self.h)) self.scale_model_sz = (int( np.floor(self.w * self.scale_model_factor)), int( np.floor(self.h * self.scale_model_factor))) self.current_scale_factor = 1. self.min_scale_factor = self.scale_step**(int( np.ceil( np.log(max(5 / self.crop_size[0], 5 / self.crop_size[1])) / np.log(self.scale_step)))) self.max_scale_factor = self.scale_step**(int( np.floor((np.log( min(first_frame.shape[1] / self.w, first_frame.shape[0] / self.h)) / np.log(self.scale_step))))) im_patch_bg = self.get_sub_window(first_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.hog_cell_size) xt = self._window[:, :, None] * xt xtf = fft2(xt) if self.use_ca: sum_kfn = np.zeros_like(xtf) for j in range(len(self.offset)): im_patch_bgn = self.get_sub_window( first_frame, (self._center[0] + self.offset[j][0], self._center[1] + self.offset[j][1]), self.norm_bg_area, self.bg_area) xtn = self.get_feature_map(im_patch_bgn, self.hog_cell_size) xtn = self._window[:, :, None] * xtn xtfn = fft2(xtn) sum_kfn += np.conj(xtfn) * xtfn self.hf_num = self.yf[:, :, None] * np.conj(xtf) self.hf_den = np.conj( xtf) * xtf + self.lambda_ + self.lambda_2 * sum_kfn else: self.hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) self.hf_den = np.conj(xtf) * xtf / (self.cf_response_size[0] * self.cf_response_size[1]) if self.scale_adaptation is True: im_patch_scale = self.get_scale_subwindow( first_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) self.sf_den = np.sum(xsf * np.conj(xsf), axis=0) self.sf_num = self.ysf * np.conj(xsf) self.rect_position_padded = None
def update(self, current_frame, vis=False): im_patch_cf = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) pwp_search_area = (round(self.norm_pwp_search_area[0] / self.area_resize_factor), round(self.norm_pwp_search_area[1] / self.area_resize_factor)) im_patch_pwp = self.get_sub_window(current_frame, self._center, self.norm_pwp_search_area, pwp_search_area) xt = self.get_feature_map(im_patch_cf, self.hog_cell_size) xt_windowed = self._window[:, :, None] * xt xtf = fft2(xt_windowed) if self.use_ca is False: if self.den_per_channel: hf = self.hf_num / (self.hf_den + self.lambda_) else: hf = self.hf_num / (np.sum(self.hf_den, axis=2) + self.lambda_)[:, :, None] else: if self.den_per_channel: hf = self.hf_num / self.hf_den else: hf = self.hf_num / (np.sum(self.hf_den, axis=2)[:, :, None]) if self.use_ca is False: response_cf = np.real(ifft2(np.sum(np.conj(hf) * xtf, axis=2))) else: response_cf = np.real(ifft2(np.sum(hf * xtf, axis=2))) response_sz = (self.floor_odd(self.norm_delta_area[0] / self.hog_cell_size), self.floor_odd(self.norm_delta_area[1] / self.hog_cell_size)) response_cf = crop_filter_response(response_cf, response_sz) if self.hog_cell_size > 1: if self.use_ca is True: #response_cf = self.mex_resize(response_cf, self.norm_delta_area) response_cf = cv2.resize(response_cf, self.norm_delta_area, cv2.INTER_NEAREST) else: response_cf = cv2.resize(response_cf, self.norm_delta_area, cv2.INTER_NEAREST) likelihood_map = self.get_colour_map(im_patch_pwp, self.bg_hist, self.fg_hist, self.bin_mapping) likelihood_map[np.isnan(likelihood_map)] = 0. response_cf[np.isnan(response_cf)] = 0. self.norm_target_sz = (int(self.norm_target_sz[0]), int(self.norm_target_sz[1])) response_pwp = get_center_likelihood(likelihood_map, self.norm_target_sz) response = (1 - self.merge_factor ) * response_cf + self.merge_factor * response_pwp if vis is True: self.score = response curr = np.unravel_index(np.argmax(response, axis=None), response.shape) center = ((self.norm_delta_area[0] - 1) / 2, (self.norm_delta_area[1] - 1) / 2) dy = (curr[0] - center[1]) / self.area_resize_factor dx = (curr[1] - center[0]) / self.area_resize_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) if self.scale_adaptation: im_patch_scale = self.get_scale_subwindow( current_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) scale_response = np.real( np.fft.ifft( np.sum(self.sf_num * xsf, axis=0) / (self.sf_den + self.lambda_))) recovered_scale = np.argmax(scale_response) self.scale_factor = self.scale_factor * self.scale_factors[ recovered_scale] self.scale_factor = np.clip(self.scale_factor, a_min=self.min_scale_factor, a_max=self.max_scale_factor) self.target_sz = (round(self.base_target_sz[0] * self.scale_factor), round(self.base_target_sz[1] * self.scale_factor)) avg_dim = (self.target_sz[0] + self.target_sz[1]) / 2 bg_area = (round(self.target_sz[0] + avg_dim), round(self.target_sz[1] + avg_dim)) fg_area = (round(self.target_sz[0] - avg_dim * self.inner_padding), round(self.target_sz[1] - avg_dim * self.inner_padding)) bg_area = (min(bg_area[0], current_frame.shape[1] - 1), min(bg_area[1], current_frame.shape[0] - 1)) self.bg_area = (bg_area[0] - (bg_area[0] - self.target_sz[0]) % 2, bg_area[1] - (bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (fg_area[0] + (self.bg_area[0] - fg_area[0]) % 2, fg_area[1] + (self.bg_area[1] - fg_area[1]) % 2) self.area_resize_factor = np.sqrt( self.fixed_area / (self.bg_area[0] * self.bg_area[1])) im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.hog_cell_size) xt = self._window[:, :, None] * xt xtf = fft2(xt) if self.use_ca: sum_kfn = np.zeros_like(xtf) for j in range(len(self.offset)): im_patch_bgn = self.get_sub_window( current_frame, (self._center[0] + self.offset[j][0], self._center[1] + self.offset[j][1]), self.norm_bg_area, self.bg_area) xtn = self.get_feature_map(im_patch_bgn, self.hog_cell_size) xtn = self._window[:, :, None] * xtn xtfn = fft2(xtn) sum_kfn += np.conj(xtfn) * xtfn new_hf_num = self.yf[:, :, None] * np.conj(xtf) new_hf_den = np.conj( xtf) * xtf + self.lambda_ + self.lambda_2 * sum_kfn else: new_hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) new_hf_den = (np.conj(xtf) * xtf) / (self.cf_response_size[0] * self.cf_response_size[1]) self.hf_den = (1 - self.interp_factor_cf ) * self.hf_den + self.interp_factor_cf * new_hf_den self.hf_num = (1 - self.interp_factor_cf ) * self.hf_num + self.interp_factor_cf * new_hf_num self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, im_patch_bg, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins) if self.scale_adaptation: im_patch_scale = self.get_scale_subwindow( current_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) new_sf_num = self.ysf * np.conj(xsf) new_sf_den = np.sum(xsf * np.conj(xsf), axis=0) self.sf_den = ( 1 - self.interp_factor_scale ) * self.sf_den + self.interp_factor_scale * new_sf_den self.sf_num = ( 1 - self.interp_factor_scale ) * self.sf_num + self.interp_factor_scale * new_sf_num return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def _training(self, x, y): k = self._dgk(x, x) alphaf = fft2(y) / (fft2(k) + self.lambda_) return alphaf
def _detection(self, alphaf, x, z): k = self._dgk(x, z) responses = np.real(ifft2(alphaf * fft2(k))) return responses
def update(self, current_frame, vis=False): assert len(current_frame.shape) == 3 and current_frame.shape[2] == 3 old_pos = (np.inf, np.inf) iter = 1 while iter <= self.refinement_iterations and ( np.abs(old_pos[0] - self._center[0]) > 1e-2 or np.abs(old_pos[1] - self._center[1]) > 1e-2): sample_scales = self.sc * self.scale_factors xt_hc = None sample_pos = (int(np.round(self._center[0])), int(np.round(self._center[1]))) for scale in sample_scales: sub_window = self.get_sub_window( current_frame, sample_pos, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * scale)), int(round(self.crop_size[1] * scale)))) hc_features = self.extrac_hc_feature( sub_window, self.cell_size)[:, :, :, np.newaxis] if xt_hc is None: xt_hc = hc_features else: xt_hc = np.concatenate((xt_hc, hc_features), axis=3) xtw_hc = xt_hc * self.cosine_window[:, :, None, None] xtf_hc = fft2(xtw_hc) responsef_hc = np.sum(np.conj(self.f_pre_f_hc)[:, :, :, None] * xtf_hc, axis=2) responsef = responsef_hc response = np.real(ifft2(responsef)) disp_row, disp_col, sind = resp_newton(response, responsef, self.newton_iterations, self.ky, self.kx, self.feature_map_sz) #row, col, sind = np.unravel_index(np.argmax(response, axis=None), response.shape) #disp_row = (row+ int(np.floor(self.feature_map_sz[1] - 1) / 2)) % self.feature_map_sz[1] - int( # np.floor((self.feature_map_sz[1] - 1) / 2)) #disp_col = (col + int(np.floor(self.feature_map_sz[0] - 1) / 2)) % self.feature_map_sz[0] - int( # np.floor((self.feature_map_sz[0] - 1) / 2)) if vis is True: self.score = response[:, :, sind].astype(np.float32) self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) dx, dy = (disp_col * self.cell_size * self.sc * self.scale_factors[sind]), (disp_row * self.cell_size * self.sc * self.scale_factors[sind]) scale_change_factor = self.scale_factors[sind] old_pos = self._center self._center = (sample_pos[0] + dx, sample_pos[1] + dy) self.sc = self.sc * scale_change_factor self.sc = np.clip(self.sc, self._min_scale_factor, self._max_scale_factor) self.sc = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.sc) if self.scale_type == 'normal': self.sc = np.clip(self.sc, a_min=self._min_scale_factor, a_max=self._max_scale_factor) iter += 1 sample_pos = (int(np.round(self._center[0])), int(np.round(self._center[1]))) patch = self.get_sub_window( current_frame, sample_pos, model_sz=self.crop_size, scaled_sz=(int(np.round(self.crop_size[0] * self.sc)), int(np.round(self.crop_size[1] * self.sc)))) xl_hc = self.extrac_hc_feature(patch, self.cell_size) xlw_hc = xl_hc * self.cosine_window[:, :, None] xlf_hc = fft2(xlw_hc) mu = self.temporal_regularization_factor self.f_pre_f_hc = self.ADMM(xlf_hc, self.f_pre_f_hc, mu) target_sz = (self.base_target_sz[0] * self.sc, self.base_target_sz[1] * self.sc) return [(self._center[0] - (target_sz[0]) / 2), (self._center[1] - (target_sz[1]) / 2), target_sz[0], target_sz[1]], -1.0
def update(self, current_frame, vis=False): assert len(current_frame.shape) == 3 and current_frame.shape[2] == 3 if self.features == 'gray': current_frame = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) if self.features == 'color' or self.features == 'gray': current_frame = current_frame.astype(np.float32) / 255 z = self._crop(current_frame, self._center, (self.w, self.h)) z = z - np.mean(z) elif self.features == 'hog': z = self._crop(current_frame, self._center, (self.w, self.h)) z = cv2.resize(z, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) z = extract_hog_feature(z, cell_size=self.cell_size) elif self.features == 'cn': z = self._crop(current_frame, self._center, (self.w, self.h)) z = cv2.resize(z, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) z = extract_cn_feature(z, cell_size=self.cell_size) else: raise NotImplementedError zf = fft2(self._get_windowed(z, self._window)) responses = self._detection(self.alphaf, self.xf, zf, kernel=self.kernel) if vis is True: self.score = responses self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) curr = np.unravel_index(np.argmax(responses, axis=None), responses.shape) if curr[0] + 1 > self.window_size[1] / 2: dy = curr[0] - self.window_size[1] else: dy = curr[0] if curr[1] + 1 > self.window_size[0] / 2: dx = curr[1] - self.window_size[0] else: dx = curr[1] dy, dx = dy * self.cell_size, dx * self.cell_size x_c, y_c = self._center x_c += dx y_c += dy self._center = (np.floor(x_c), np.floor(y_c)) if self.features == 'color' or self.features == 'gray': new_x = self._crop(current_frame, self._center, (self.w, self.h)) elif self.features == 'hog': new_x = self._crop(current_frame, self._center, (self.w, self.h)) new_x = cv2.resize(new_x, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) new_x = extract_hog_feature(new_x, cell_size=self.cell_size) elif self.features == 'cn': new_x = self._crop(current_frame, self._center, (self.w, self.h)) new_x = cv2.resize(new_x, (self.window_size[0] * self.cell_size, self.window_size[1] * self.cell_size)) new_x = extract_cn_feature(new_x, cell_size=self.cell_size) else: raise NotImplementedError new_xf = fft2(self._get_windowed(new_x, self._window)) alphaf_new = self._training(new_xf, self.yf, kernel=self.kernel) self.alphaf = self.interp_factor * alphaf_new + ( 1 - self.interp_factor) * self.alphaf self.xf = self.interp_factor * new_xf + (1 - self.interp_factor) * self.xf max_score = responses.max() return [(self._center[0] - self.w / 2), (self._center[1] - self.h / 2), self.w, self.h], max_score
def update(self,current_frame,vis=False): f=self.get_csr_features(current_frame,self._center,self.current_scale_factor, self.template_size,self.rescale_template_size,self.cell_size) f=f*self._window[:,:,None] if self.use_channel_weights is True: response_chann=np.real(ifft2(fft2(f)*np.conj(self.H))) response=np.sum(response_chann*self.chann_w[None,None,:],axis=2) else: response=np.real(ifft2(np.sum(fft2(f)*np.conj(self.H),axis=2))) if vis is True: self.score=response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) curr=np.unravel_index(np.argmax(response,axis=None),response.shape) if self.use_channel_weights is True: channel_discr=np.ones((response_chann.shape[2])) for i in range(response_chann.shape[2]): norm_response=self.normalize_img(response_chann[:,:,i]) from skimage.feature.peak import peak_local_max peak_locs=peak_local_max(norm_response,min_distance=5) if len(peak_locs)<2: continue vals=reversed(sorted(norm_response[peak_locs[:,0],peak_locs[:,1]])) second_max_val=None max_val=None for index,val in enumerate(vals): if index==0: max_val=val elif index==1: second_max_val=val else: break channel_discr[i]=max(0.5,1-(second_max_val/(max_val+1e-10))) v_neighbors=response[[(curr[0]-1)%response.shape[0],(curr[0])%response.shape[0], (curr[0]+1)%response.shape[0]],curr[1]] h_neighbors=response[curr[0], [(curr[1]-1) % response.shape[1], (curr[1]) % response.shape[1], (curr[1]+1) % response.shape[1]] ] row=curr[0]+self.subpixel_peak(v_neighbors) col=curr[1]+self.subpixel_peak(h_neighbors) if row+1>response.shape[0]/2: row=row-response.shape[0] if col+1>response.shape[1]/2: col=col-response.shape[1] # displacement dx=self.current_scale_factor*self.cell_size*(1/self.rescale_ratio)*col dy=self.current_scale_factor*self.cell_size*(1/self.rescale_ratio)*row self._center=(self._center[0]+dx,self._center[1]+dy) self.current_scale_factor = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.current_scale_factor) if self.scale_type == 'normal': self.current_scale_factor = np.clip(self.current_scale_factor, a_min=self._min_scale_factor, a_max=self._max_scale_factor) self.target_sz = (self.current_scale_factor * self.base_target_sz[0], self.current_scale_factor * self.base_target_sz[1]) region=[np.round(self._center[0] - self.target_sz[0] / 2),np.round( self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1]] if self.use_segmentation: if self.segcolor_space=='bgr': seg_img=current_frame elif self.segcolor_space=='hsv': seg_img=cv2.cvtColor(current_frame,cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32)/180*255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg=Histogram(3,self.nbins) hist_bg=Histogram(3,self.nbins) self.extract_histograms(seg_img,region,hist_fg,hist_bg) self.hist_fg_p_bins=(1-self.hist_lr)*self.hist_fg_p_bins+self.hist_lr*hist_fg.p_bins self.hist_bg_p_bins=(1-self.hist_lr)*self.hist_bg_p_bins+self.hist_lr*hist_bg.p_bins hist_fg.p_bins=self.hist_fg_p_bins hist_bg.p_bins=self.hist_bg_p_bins mask=self.segment_region(seg_img,self._center,self.template_size,self.base_target_sz,self.current_scale_factor, hist_fg,hist_bg) init_mask_padded=np.zeros_like(mask) pm_x0=int(np.floor(mask.shape[1]/2-region[2]/2)) pm_y0=int(np.floor(mask.shape[0]/2-region[3]/2)) init_mask_padded[pm_y0:pm_y0+int(np.round(region[3])),pm_x0:pm_x0+int(np.round(region[2]))]=1 mask=mask*init_mask_padded mask=cv2.resize(mask,(self.yf.shape[1],self.yf.shape[0])) if self.mask_normal(mask,self.target_dummy_area) is True: kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3),anchor=(1,1)) mask=cv2.dilate(mask,kernel) else: mask=self.target_dummy_mask pass else: mask=self.target_dummy_mask #cv2.imshow('Mask', (mask * 255).astype(np.uint8)) #cv2.waitKey(1) f = self.get_csr_features(current_frame, self._center, self.current_scale_factor, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] H_new=self.create_csr_filter(f,self.yf,mask) if self.use_channel_weights: response=np.real(ifft2(fft2(f)*np.conj(H_new))) chann_w = np.max(response.reshape(response.shape[0] * response.shape[1], -1), axis=0)*channel_discr chann_w=chann_w/np.sum(chann_w) self.chann_w=(1-self.channels_weight_lr)*self.chann_w+self.channels_weight_lr*chann_w self.chann_w=self.chann_w/np.sum(self.chann_w) self.H=(1-self.interp_factor)*self.H+self.interp_factor*H_new return region