def trainD(self, kf_cn, kf_hog, alphaf, alpha, lambda1, dim): d = [0, 0] tmp1 = ifft2(np.conj(kf_cn) * alphaf) tmp2 = ifft2(np.conj(kf_hog) * alphaf) y = ifft2(self.yf) tmp = 2 * y - lambda1 * alpha new_num1 = tmp.flatten().conj().T.dot(tmp1.flatten()) new_num2 = tmp.flatten().conj().T.dot(tmp2.flatten()) new_den1 = 2 * (tmp1.flatten().conj().T.dot(tmp1.flatten())) new_den2 = 2 * (tmp2.flatten().conj().T.dot(tmp2.flatten())) if self.frame_index == 1: d_num11 = new_num1 d_num22 = new_num2 d_den11 = new_den1 d_den22 = new_den2 else: d_num11 = (1 - self.lr_cn) * self.d_num1 + self.lr_cn * new_num1 d_num22 = (1 - self.lr_hog) * self.d_num2 + self.lr_hog * new_num2 d_den11 = (1 - self.lr_cn) * self.d_den1 + self.lr_cn * new_den1 d_den22 = (1 - self.lr_hog) * self.d_den2 + self.lr_hog * new_den2 d[0] = np.real(d_num11 / d_den11) d[1] = np.real(d_num22 / d_den22) self.d_num1 = d_num11 self.d_num2 = d_num22 self.d_den1 = d_den11 self.d_den2 = d_den22 return d
def create_csr_filter(self, img, Y, P): """ create csr filter create filter with Augmented Lagrangian iterative optimization method :param img: image patch (already normalized) :param Y: gaussian shaped labels (note that the peak must be at the top-left corner) :param P: padding mask :return: filter """ mu = 5 beta = 3 mu_max = 20 max_iter = 4 lambda_ = mu / 100 F = fft2(img) Sxy = F * np.conj(Y)[:, :, None] Sxx = F * np.conj(F) # mask filter H = fft2(ifft2(Sxy / (Sxx + lambda_)) * P[:, :, None]) # initialize lagrangian multiplier L = np.zeros_like(H) iter = 1 while True: G = (Sxy + mu * H - L) / (Sxx + mu) H = fft2( np.real(P[:, :, None] * ifft2(mu * G + L) / (mu + lambda_))) # stop optimization after fixed number of steps if iter >= max_iter: break L += mu * (G - H) mu = min(mu_max, beta * mu) iter += 1 return H
def ADMM(self, xlf, f_pre_f, mu): model_xf = xlf f_f = np.zeros_like(model_xf) g_f = np.zeros_like(f_f) h_f = np.zeros_like(f_f) gamma = self.init_penalty_factor gamma_max = self.max_penalty_factor gamma_scale_step = self.penalty_scale_step T = self.feature_map_sz[0] * self.feature_map_sz[1] S_xx = np.sum(np.conj(model_xf) * model_xf, axis=2) Sf_pre_f = np.sum(np.conj(model_xf) * f_pre_f, axis=2) Sfx_pre_f = model_xf * Sf_pre_f[:, :, None] iter = 1 while iter <= self.admm_max_iterations: B = S_xx + T * (gamma + mu) Sgx_f = np.sum(np.conj(model_xf) * g_f, axis=2) Shx_f = np.sum(np.conj(model_xf) * h_f, axis=2) tmp0 = (1 / (T * (gamma + mu)) * (self.yf[:, :, None] * model_xf)) - ((1 / (gamma + mu)) * h_f) + ( gamma / (gamma + mu)) * g_f + \ (mu / (gamma + mu)) * f_pre_f tmp1 = 1 / (T * (gamma + mu)) * (model_xf * ((S_xx * self.yf)[:, :, None])) tmp2 = mu / (gamma + mu) * Sfx_pre_f tmp3 = 1 / (gamma + mu) * (model_xf * (Shx_f[:, :, None])) tmp4 = gamma / (gamma + mu) * (model_xf * Sgx_f[:, :, None]) f_f = tmp0 - (tmp1 + tmp2 - tmp3 + tmp4) / B[:, :, None] g_f = fft2( self.argmin_g(self.reg_window, gamma, (ifft2(gamma * (f_f + h_f))))) h_f = h_f + (gamma * (f_f - g_f)) gamma = min(gamma_scale_step * gamma, gamma_max) iter += 1 return f_f
def update(self, current_frame, vis=False): z = self.get_sub_window(current_frame, self._center, self.crop_size) z = self._window[:, :, None] * z kf = fft2(self._dgk(self.x, z)) responses = np.real( ifft2(self.alphaf_num * kf.conj() / (self.alphaf_den))) if vis is True: self.score = responses curr = np.unravel_index(np.argmax(responses, axis=None), responses.shape) dy = self._init_response_center[0] - curr[0] dx = self._init_response_center[1] - curr[1] x_c, y_c = self._center x_c -= dx y_c -= dy self._center = (x_c, y_c) new_x = self.get_sub_window(current_frame, self._center, self.crop_size) new_x = new_x * self._window[:, :, None] kf = fft2(self._dgk(new_x, new_x)) new_alphaf_num = self.yf * kf new_alphaf_den = kf * (kf + self.lambda_) self.alphaf_num = ( 1 - self.interp_factor ) * self.alphaf_num + self.interp_factor * new_alphaf_num self.alphaf_den = ( 1 - self.interp_factor ) * self.alphaf_den + self.interp_factor * new_alphaf_den self.x = (1 - self.interp_factor) * self.x + self.interp_factor * new_x return [ self._center[0] - self.w / 2, self._center[1] - self.h / 2, self.w, self.h ]
def ADMM(self, xf): g_f = np.zeros_like(xf) h_f = np.zeros_like(g_f) l_f = np.zeros_like(g_f) mu = 1 beta = 10 mumax = 10000 i = 1 T = self.feature_map_sz[0] * self.feature_map_sz[1] S_xx = np.sum(np.conj(xf) * xf, 2) while i <= self.admm_iterations: B = S_xx + (T * mu) S_lx = np.sum(np.conj(xf) * l_f, axis=2) S_hx = np.sum(np.conj(xf) * h_f, axis=2) tmp0 = (1 / (T * mu) * (self.yf[:, :, None] * xf)) - ((1 / mu) * l_f) + h_f tmp1 = 1 / (T * mu) * (xf * ((S_xx * self.yf)[:, :, None])) tmp2 = 1 / mu * (xf * (S_lx[:, :, None])) tmp3 = xf * S_hx[:, :, None] # solve for g g_f = tmp0 - (tmp1 - tmp2 + tmp3) / B[:, :, None] # solve for h h = (T / ((mu * T) + self.admm_lambda)) * ifft2(mu * g_f + l_f) xs, ys, h = self.get_subwindow_no_window(h, (int( self.feature_map_sz[0] / 2), int(self.feature_map_sz[1] / 2)), self.small_filter_sz) t = np.zeros( (self.feature_map_sz[1], self.feature_map_sz[0], h.shape[2]), dtype=np.complex64) t[ys, xs, :] = h h_f = fft2(t) l_f = l_f + (mu * (g_f - h_f)) mu = min(beta * mu, mumax) i += 1 return g_f
def _dense_gauss_kernel(self, x1, x2): c = np.fft.fftshift(ifft2(fft2(x1) * np.conj(fft2(x2)))) d = np.dot(x1.flatten().conj(), x1.flatten()) + np.dot( x2.flatten().conj(), x2.flatten()) - 2 * c k = np.exp(-1 / self.sigma**2 * np.clip(d, a_min=0, a_max=None) / np.size(x1)) return k
def update_weights(self, features, channel_discr, H_new): response = np.real(ifft2(fft2(features) * np.conj(H_new))) chann_w = np.max(response.reshape( response.shape[0] * response.shape[1], -1), axis=0) * channel_discr chann_w = chann_w / np.sum(chann_w) self.chann_w = (1 - self.channels_weight_lr ) * self.chann_w + self.channels_weight_lr * chann_w self.chann_w = self.chann_w / np.sum(self.chann_w)
def train_model(self): d = [0.5, 0.5] dim = self.z_cn2.shape[2] kf_cn = fft2( self.dense_gauss_kernel(self.z_cn2, self.z_cn2, self.cn_sigma)) kf_hog = fft2( self.dense_gauss_kernel(self.z_hog2, self.z_hog2, self.hog_sigma)) count = 0 stop = False lambda1 = 0.01 threshold = 0.03 predD = d while stop is not True: new_num1 = self.yf * d[0] * kf_cn new_num2 = self.yf * d[1] * kf_hog new_den1 = d[0] * kf_cn * (d[0] * np.conj(kf_cn) + lambda1) new_den2 = d[1] * kf_hog * (d[1] * np.conj(kf_hog) + lambda1) if self.frame_index == 1: alphaf_num11 = new_num1 alphaf_num22 = new_num2 alphaf_den11 = new_den1 alphaf_den22 = new_den2 else: alphaf_num11 = ( 1 - self.lr_cn) * self.alphaf_num1 + self.lr_cn * new_num1 alphaf_num22 = (1 - self.lr_hog ) * self.alphaf_num2 + self.lr_hog * new_num2 alphaf_den11 = ( 1 - self.lr_cn) * self.alphaf_den1 + self.lr_cn * new_den1 alphaf_den22 = (1 - self.lr_hog ) * self.alphaf_den2 + self.lr_hog * new_den2 self.alphaf_num = alphaf_num11 + alphaf_num22 self.alphaf_den = alphaf_den11 + alphaf_den22 self.alphaf = self.alphaf_num / self.alphaf_den alpha = ifft2(self.alphaf) d = self.trainD(kf_cn, kf_hog, self.alphaf, alpha, lambda1, dim) count += 1 if count > 1: delta_alpha = np.abs(alpha - prev_alpha) deltaD = np.abs(np.array(d) - np.array(predD)) if (np.sum(delta_alpha) <= threshold * np.sum(np.abs(prev_alpha))) and np.sum( np.array(deltaD)) <= threshold * np.sum( np.abs(np.array(predD))): stop = True prev_alpha = alpha predD = d if count >= 100: d = [0.5, 0.5] break self.alphaf_num1 = alphaf_num11 self.alphaf_num2 = alphaf_num22 self.alphaf_den1 = alphaf_den11 self.alphaf_den2 = alphaf_den22 return d
def _dgk(self, x1, x2): xf = fft2(x1) yf = fft2(x2) xx=(x1.flatten().T).dot(x1.flatten()) yy=(x2.flatten().T).dot(x2.flatten()) xyf=xf*np.conj(yf) if len(xyf.shape)==2: xyf=xyf[:,:,np.newaxis] xy = np.real(ifft2(np.sum(xyf, axis=2))) d =xx + yy- 2 * xy k = np.exp(-1 / self.sigma ** 2 * np.clip(d,a_min=0,a_max=None) / np.size(x1)) return k
def update(self, current_frame, vis=False): xt = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xtf = fft2(xt) response = np.real( ifft2( np.sum(self.hf_num * xtf, axis=2) / (self.hf_den + self.lambda_))) #响应计算 if vis is True: self.score = response self.win_sz = self.crop_size curr = np.unravel_index(np.argmax(response, axis=None), response.shape) #响应最大的位置 dy = (curr[0] - self._init_response_center[0]) * self.current_scale_factor dx = (curr[1] - self._init_response_center[1]) * self.current_scale_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) self.current_scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_size, self.current_scale_factor) #独立进行多尺度估计,更新最大响应的尺度因子 if self.scale_type == 'normal': self.current_scale_factor = np.clip(self.current_scale_factor, a_min=self._min_scale_factor, a_max=self._max_scale_factor) xl = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xlf = fft2(xl) new_hf_num = self.yf[:, :, None] * np.conj(xlf) new_hf_den = np.sum(xlf * np.conj(xlf), axis=2) self.hf_den = (1 - self.interp_factor ) * self.hf_den + self.interp_factor * new_hf_den self.hf_num = (1 - self.interp_factor ) * self.hf_num + self.interp_factor * new_hf_num self.target_sz = (self.base_target_size[0] * self.current_scale_factor, self.base_target_size[1] * self.current_scale_factor) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def _kernel_correlation(self, xf, yf, kernel='gaussian'): if kernel== 'gaussian': N=xf.shape[0]*xf.shape[1] xx=(np.dot(xf.flatten().conj().T,xf.flatten())/N) yy=(np.dot(yf.flatten().conj().T,yf.flatten())/N) xyf=xf*np.conj(yf) xy=np.sum(np.real(ifft2(xyf)),axis=2) kf = fft2(np.exp(-1 / self.sigma ** 2 * np.clip(xx+yy-2*xy,a_min=0,a_max=None) / np.size(xf))) elif kernel== 'linear': kf= np.sum(xf*np.conj(yf),axis=2)/np.size(xf) else: raise NotImplementedError return kf
def get_response(self, features, vis=False): if self.use_channel_weights: response_chann = np.real(ifft2(fft2(features) * np.conj(self.H))) response = np.sum(response_chann * self.chann_w[None, None, :], axis=2) else: response = np.real( ifft2(np.sum(fft2(features) * np.conj(self.H), axis=2))) if vis: self.score = response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) if self.use_channel_weights: channel_discr = np.ones((response_chann.shape[2])) for i in range(response_chann.shape[2]): norm_response = self.normalize_img(response_chann[:, :, i]) peak_locs = peak_local_max(norm_response, min_distance=5) if len(peak_locs) < 2: continue vals = reversed( sorted(norm_response[peak_locs[:, 0], peak_locs[:, 1]])) second_max_val = None max_val = None for index, val in enumerate(vals): if index == 0: max_val = val elif index == 1: second_max_val = val else: break channel_discr[i] = max( 0.5, 1 - (second_max_val / (max_val + 1e-10))) return response, channel_discr
def phase_correlation(src1,src2): s1f=fft2(src1) s2f=fft2(src2) num=s2f*np.conj(s1f) d=np.sqrt(num*np.conj(num))+2e-16 Cf=np.sum(num/d,axis=2) C=np.real(ifft2(Cf)) C=np.fft.fftshift(C,axes=(0,1)) mscore=np.max(C) pty,ptx=np.unravel_index(np.argmax(C, axis=None), C.shape) slobe_y=slobe_x=1 idy=np.arange(pty-slobe_y,pty+slobe_y+1).astype(np.int64) idx=np.arange(ptx-slobe_x,ptx+slobe_x+1).astype(np.int64) idy=np.clip(idy,a_min=0,a_max=C.shape[0]-1) idx=np.clip(idx,a_min=0,a_max=C.shape[1]-1) weight_patch=C[idy,:][:,idx] s=np.sum(weight_patch)+2e-16 pty=np.sum(np.sum(weight_patch,axis=1)*idy)/s ptx=np.sum(np.sum(weight_patch,axis=0)*idx)/s pty=pty-(src1.shape[0])//2 ptx=ptx-(src1.shape[1])//2 return ptx,pty,mscore
def update(self, current_frame, vis=False): assert len(current_frame.shape) == 3 and current_frame.shape[2] == 3 old_pos = (np.inf, np.inf) iter = 1 while iter <= self.refinement_iterations and ( np.abs(old_pos[0] - self._center[0]) > 1e-2 or np.abs(old_pos[1] - self._center[1]) > 1e-2): sample_scales = self.sc * self.scale_factors xt_hc = None sample_pos = (int(np.round(self._center[0])), int(np.round(self._center[1]))) for scale in sample_scales: sub_window = self.get_sub_window( current_frame, sample_pos, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * scale)), int(round(self.crop_size[1] * scale)))) hc_features = self.extrac_hc_feature( sub_window, self.cell_size)[:, :, :, np.newaxis] if xt_hc is None: xt_hc = hc_features else: xt_hc = np.concatenate((xt_hc, hc_features), axis=3) xtw_hc = xt_hc * self.cosine_window[:, :, None, None] xtf_hc = fft2(xtw_hc) responsef_hc = np.sum(np.conj(self.f_pre_f_hc)[:, :, :, None] * xtf_hc, axis=2) responsef = responsef_hc response = np.real(ifft2(responsef)) disp_row, disp_col, sind = resp_newton(response, responsef, self.newton_iterations, self.ky, self.kx, self.feature_map_sz) #row, col, sind = np.unravel_index(np.argmax(response, axis=None), response.shape) #disp_row = (row+ int(np.floor(self.feature_map_sz[1] - 1) / 2)) % self.feature_map_sz[1] - int( # np.floor((self.feature_map_sz[1] - 1) / 2)) #disp_col = (col + int(np.floor(self.feature_map_sz[0] - 1) / 2)) % self.feature_map_sz[0] - int( # np.floor((self.feature_map_sz[0] - 1) / 2)) if vis is True: self.score = response[:, :, sind].astype(np.float32) self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) dx, dy = (disp_col * self.cell_size * self.sc * self.scale_factors[sind]), (disp_row * self.cell_size * self.sc * self.scale_factors[sind]) scale_change_factor = self.scale_factors[sind] old_pos = self._center self._center = (sample_pos[0] + dx, sample_pos[1] + dy) self.sc = self.sc * scale_change_factor self.sc = np.clip(self.sc, self._min_scale_factor, self._max_scale_factor) self.sc = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.sc) if self.scale_type == 'normal': self.sc = np.clip(self.sc, a_min=self._min_scale_factor, a_max=self._max_scale_factor) iter += 1 sample_pos = (int(np.round(self._center[0])), int(np.round(self._center[1]))) patch = self.get_sub_window( current_frame, sample_pos, model_sz=self.crop_size, scaled_sz=(int(np.round(self.crop_size[0] * self.sc)), int(np.round(self.crop_size[1] * self.sc)))) xl_hc = self.extrac_hc_feature(patch, self.cell_size) xlw_hc = xl_hc * self.cosine_window[:, :, None] xlf_hc = fft2(xlw_hc) mu = self.temporal_regularization_factor self.f_pre_f_hc = self.ADMM(xlf_hc, self.f_pre_f_hc, mu) target_sz = (self.base_target_sz[0] * self.sc, self.base_target_sz[1] * self.sc) return [(self._center[0] - (target_sz[0]) / 2), (self._center[1] - (target_sz[1]) / 2), target_sz[0], target_sz[1]]
def dense_gauss_kernel(self, x1, x2, sigma): c = ifft2(np.sum(fft2(x1) * np.conj(fft2(x2)), axis=2)) d = x1.flatten().conj().T.dot( x1.flatten()) + x2.flatten().conj().T.dot(x2.flatten()) - 2 * c k = np.exp(-1 / sigma**2 * d / np.size(d)) return k
def update(self, current_frame, vis=False): f = self.get_csr_features(current_frame, self._center, self.sc, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] if self.use_channel_weights is True: response_chann = np.real(ifft2(fft2(f) * np.conj(self.H))) response = np.sum(response_chann * self.chann_w[None, None, :], axis=2) else: response = np.real(ifft2(np.sum(fft2(f) * np.conj(self.H), axis=2))) if vis is True: self.score = response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) curr = np.unravel_index(np.argmax(response, axis=None), response.shape) if self.use_channel_weights is True: channel_discr = np.ones((response_chann.shape[2])) for i in range(response_chann.shape[2]): norm_response = self.normalize_img(response_chann[:, :, i]) from skimage.feature.peak import peak_local_max peak_locs = peak_local_max(norm_response, min_distance=5) if len(peak_locs) < 2: continue vals = reversed( sorted(norm_response[peak_locs[:, 0], peak_locs[:, 1]])) second_max_val = None max_val = None for index, val in enumerate(vals): if index == 0: max_val = val elif index == 1: second_max_val = val else: break channel_discr[i] = max( 0.5, 1 - (second_max_val / (max_val + 1e-10))) v_neighbors = response[[(curr[0] - 1) % response.shape[0], (curr[0]) % response.shape[0], (curr[0] + 1) % response.shape[0]], curr[1]] h_neighbors = response[curr[0], [(curr[1] - 1) % response.shape[1], (curr[1]) % response.shape[1], (curr[1] + 1) % response.shape[1]]] row = curr[0] + self.subpixel_peak(v_neighbors) col = curr[1] + self.subpixel_peak(h_neighbors) if row + 1 > response.shape[0] / 2: row = row - response.shape[0] if col + 1 > response.shape[1] / 2: col = col - response.shape[1] # displacement dx = self.sc * self.cell_size * (1 / self.rescale_ratio) * col dy = self.sc * self.cell_size * (1 / self.rescale_ratio) * row self._center = (self._center[0] + dx, self._center[1] + dy) patchL = cv2.getRectSubPix(current_frame, (int(np.floor(self.sc * self.scale_sz[0])), int(np.floor(self.sc * self.scale_sz[1]))), self._center) patchL = cv2.resize(patchL, self.scale_sz_window) # convert into logpolar patchLp = cv2.logPolar(patchL.astype(np.float32), ((patchL.shape[1] - 1) / 2, (patchL.shape[0] - 1) / 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, cell_size=4) tmp_sc, _, _ = self.estimate_scale(self.model_patchLp, patchLp, self.mag) tmp_sc = np.clip(tmp_sc, a_min=0.6, a_max=1.4) self.sc = self.sc * tmp_sc self.model_patchLp = ( 1 - self.learning_rate_scale ) * self.model_patchLp + self.learning_rate_scale * patchLp self.target_sz = (self.sc * self.base_target_sz[0], self.sc * self.base_target_sz[1]) region = [ np.round(self._center[0] - self.target_sz[0] / 2), np.round(self._center[1] - self.target_sz[1] / 2), self.target_sz[0], self.target_sz[1] ] if self.use_segmentation: if self.segcolor_space == 'bgr': seg_img = current_frame elif self.segcolor_space == 'hsv': seg_img = cv2.cvtColor(current_frame, cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32) / 180 * 255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg = Histogram(3, self.nbins) hist_bg = Histogram(3, self.nbins) self.extract_histograms(seg_img, region, hist_fg, hist_bg) self.hist_fg_p_bins = ( 1 - self.hist_lr ) * self.hist_fg_p_bins + self.hist_lr * hist_fg.p_bins self.hist_bg_p_bins = ( 1 - self.hist_lr ) * self.hist_bg_p_bins + self.hist_lr * hist_bg.p_bins hist_fg.p_bins = self.hist_fg_p_bins hist_bg.p_bins = self.hist_bg_p_bins mask = self.segment_region(seg_img, self._center, self.template_size, self.base_target_sz, self.sc, hist_fg, hist_bg) init_mask_padded = np.zeros_like(mask) pm_x0 = int(np.floor(mask.shape[1] / 2 - region[2] / 2)) pm_y0 = int(np.floor(mask.shape[0] / 2 - region[3] / 2)) init_mask_padded[pm_y0:pm_y0 + int(np.round(region[3])), pm_x0:pm_x0 + int(np.round(region[2]))] = 1 mask = mask * init_mask_padded mask = cv2.resize(mask, (self.yf.shape[1], self.yf.shape[0])) if self.mask_normal(mask, self.target_dummy_area) is True: kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3), anchor=(1, 1)) mask = cv2.dilate(mask, kernel) else: mask = self.target_dummy_mask pass else: mask = self.target_dummy_mask #cv2.imshow('Mask', (mask * 255).astype(np.uint8)) #cv2.waitKey(1) f = self.get_csr_features(current_frame, self._center, self.sc, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] H_new = self.create_csr_filter(f, self.yf, mask) if self.use_channel_weights: response = np.real(ifft2(fft2(f) * np.conj(H_new))) chann_w = np.max(response.reshape( response.shape[0] * response.shape[1], -1), axis=0) * channel_discr chann_w = chann_w / np.sum(chann_w) self.chann_w = ( 1 - self.channels_weight_lr ) * self.chann_w + self.channels_weight_lr * chann_w self.chann_w = self.chann_w / np.sum(self.chann_w) self.H = (1 - self.interp_factor) * self.H + self.interp_factor * H_new return region
def update(self, current_frame, vis=False): x = None for scale_ind in range(self.number_of_scales): current_scale = self.current_scale_factor * self.scale_factors[ scale_ind] sub_window = self.get_sub_window( current_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * current_scale)), int(round(self.crop_size[1] * current_scale)))) feature = self.extract_hc_feture(sub_window, self.cell_size)[:, :, :, np.newaxis] if x is None: x = feature else: x = np.concatenate((x, feature), axis=3) xtf = fft2(x * self._window[:, :, None, None]) responsef = np.sum(np.conj(self.g_f)[:, :, :, None] * xtf, axis=2) if self.interpolate_response == 2: self.interp_sz = (int(self.yf.shape[1] * self.feature_ratio * self.current_scale_factor), int(self.yf.shape[0] * self.feature_ratio * self.current_scale_factor)) responsef_padded = resize_dft2(responsef, self.interp_sz) response = np.real(ifft2(responsef_padded)) if self.interpolate_response == 3: raise ValueError elif self.interpolate_response == 4: disp_row, disp_col, sind = resp_newton(response, responsef_padded, self.newton_iterations, self.ky, self.kx, self.feature_map_sz) if vis is True: self.score = response[:, :, sind] self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) else: row, col, sind = np.unravel_index(np.argmax(response, axis=None), response.shape) if vis is True: self.score = response[:, :, sind] self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) disp_row = (row + int(np.floor(self.interp_sz[1] - 1) / 2)) % self.interp_sz[1] - int( np.floor((self.interp_sz[1] - 1) / 2)) disp_col = (col + int(np.floor(self.interp_sz[0] - 1) / 2)) % self.interp_sz[0] - int( np.floor((self.interp_sz[0] - 1) / 2)) if self.interpolate_response == 0 or self.interpolate_response == 3 or self.interpolate_response == 4: factor = self.feature_ratio * self.current_scale_factor * self.scale_factors[ sind] elif self.interpolate_response == 1: factor = self.current_scale_factor * self.scale_factors[sind] elif self.interpolate_response == 2: factor = self.scale_factors[sind] else: raise ValueError dx, dy = int(np.round(disp_col * factor)), int( np.round(disp_row * factor)) self.current_scale_factor = self.current_scale_factor * self.scale_factors[ sind] self.current_scale_factor = max(self.current_scale_factor, self.min_scale_factor) self.current_scale_factor = min(self.current_scale_factor, self.max_scale_factor) self.current_scale_factor = self.scale_estimator.update( current_frame, self._center, self.base_target_sz, self.current_scale_factor) self._center = (self._center[0] + dx, self._center[1] + dy) pixels = self.get_sub_window( current_frame, self._center, model_sz=self.crop_size, scaled_sz=(int(round(self.crop_size[0] * self.current_scale_factor)), int(round(self.crop_size[1] * self.current_scale_factor)))) feature = self.extract_hc_feture(pixels, cell_size=self.cell_size) #feature=cv2.resize(pixels,self.feature_map_sz)/255-0.5 xf = fft2(feature * self._window[:, :, None]) self.model_xf = ( 1 - self.interp_factor) * self.model_xf + self.interp_factor * xf self.g_f = self.ADMM(self.model_xf) target_sz = (self.target_sz[0] * self.current_scale_factor, self.target_sz[1] * self.current_scale_factor) return [ self._center[0] - target_sz[0] / 2, self._center[1] - target_sz[1] / 2, target_sz[0], target_sz[1] ]
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self.init_mask = np.ones((h, w), dtype=np.uint8) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h if np.all(first_frame[:, :, 0] == first_frame[:, :, 1]): self.use_segmentation = False # change 400 to 300 # for larger cell_size self.cell_size = int(min(4, max(1, w * h / 300))) self.base_target_sz = (w, h) template_size = (int(w + self.padding * np.sqrt(w * h)), int(h + self.padding * np.sqrt(w * h))) template_size = (template_size[0] + template_size[1]) // 2 self.template_size = (template_size, template_size) self.rescale_ratio = np.sqrt( (200**2) / (self.template_size[0] * self.template_size[1])) self.rescale_ratio = np.clip(self.rescale_ratio, a_min=None, a_max=1) self.rescale_template_size = (int(self.rescale_ratio * self.template_size[0]), int(self.rescale_ratio * self.template_size[1])) self.yf = fft2( gaussian2d_rolled_labels( (int(self.rescale_template_size[0] / self.cell_size), int(self.rescale_template_size[1] / self.cell_size)), self.y_sigma)) self._window = cos_window((self.yf.shape[1], self.yf.shape[0])) self.crop_size = self.rescale_template_size # create dummy mask (approximation for segmentation) # size of the object in feature space obj_sz = (int(self.rescale_ratio * (self.base_target_sz[0] / self.cell_size)), int(self.rescale_ratio * (self.base_target_sz[1] / self.cell_size))) x0 = int((self.yf.shape[1] - obj_sz[0]) / 2) y0 = int((self.yf.shape[0] - obj_sz[1]) / 2) x1 = x0 + obj_sz[0] y1 = y0 + obj_sz[1] self.target_dummy_mask = np.zeros_like(self.yf, dtype=np.uint8) self.target_dummy_mask[y0:y1, x0:x1] = 1 self.target_dummy_area = np.sum(self.target_dummy_mask) if self.use_segmentation: if self.segcolor_space == 'bgr': seg_img = first_frame elif self.segcolor_space == 'hsv': seg_img = cv2.cvtColor(first_frame, cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32) / 180 * 255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg = Histogram(3, self.nbins) hist_bg = Histogram(3, self.nbins) self.extract_histograms(seg_img, bbox, hist_fg, hist_bg) mask = self.segment_region(seg_img, self._center, self.template_size, self.base_target_sz, self.sc, hist_fg, hist_bg) self.hist_bg_p_bins = hist_bg.p_bins self.hist_fg_p_bins = hist_fg.p_bins init_mask_padded = np.zeros_like(mask) pm_x0 = int(np.floor(mask.shape[1] / 2 - bbox[2] / 2)) pm_y0 = int(np.floor(mask.shape[0] / 2 - bbox[3] / 2)) init_mask_padded[pm_y0:pm_y0 + bbox[3], pm_x0:pm_x0 + bbox[2]] = 1 mask = mask * init_mask_padded mask = cv2.resize(mask, (self.yf.shape[1], self.yf.shape[0])) if self.mask_normal(mask, self.target_dummy_area) is True: kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3), anchor=(1, 1)) mask = cv2.dilate(mask, kernel) else: mask = self.target_dummy_mask else: mask = self.target_dummy_mask # extract features f = self.get_csr_features(first_frame, self._center, self.sc, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] # create filters using segmentation mask self.H = self.create_csr_filter(f, self.yf, mask) response = np.real(ifft2(fft2(f) * np.conj(self.H))) chann_w = np.max(response.reshape( response.shape[0] * response.shape[1], -1), axis=0) self.chann_w = chann_w / np.sum(chann_w) avg_dim = (w + h) / 2.5 self.scale_sz = ((w + avg_dim) / self.sc, (h + avg_dim) / self.sc) self.scale_sz0 = self.scale_sz self.cos_window_scale = cos_window( (self.scale_sz_window[0], self.scale_sz_window[1])) self.mag = self.cos_window_scale.shape[0] / np.log( np.sqrt((self.cos_window_scale.shape[0]**2 + self.cos_window_scale.shape[1]**2) / 4)) # scale lp patchL = cv2.getRectSubPix(first_frame, (int(np.floor(self.sc * self.scale_sz[0])), int(np.floor(self.sc * self.scale_sz[1]))), self._center) patchL = cv2.resize(patchL, self.scale_sz_window) patchLp = cv2.logPolar(patchL.astype(np.float32), ((patchL.shape[1] - 1) / 2, (patchL.shape[0] - 1) / 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) self.model_patchLp = extract_hog_feature(patchLp, cell_size=4)
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self.init_mask = np.ones((h, w), dtype=np.uint8) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h if np.all(first_frame[:, :, 0] == first_frame[:, :, 1]): self.use_segmentation = False # change 400 to 300 # for larger cell_size self.cell_size = int(min(4, max(1, w * h / 300))) self.base_target_sz = (w, h) self.target_sz = self.base_target_sz template_size = (int(w + self.padding * np.sqrt(w * h)), int(h + self.padding * np.sqrt(w * h))) template_size = (template_size[0] + template_size[1]) // 2 self.template_size = (template_size, template_size) self.rescale_ratio = np.sqrt( (self.params['template_size']**2) / (self.template_size[0] * self.template_size[1])) self.rescale_ratio = np.clip(self.rescale_ratio, a_min=None, a_max=1) self.rescale_template_size = (int(self.rescale_ratio * self.template_size[0]), int(self.rescale_ratio * self.template_size[1])) self.yf = fft2( gaussian2d_rolled_labels( (int(self.rescale_template_size[0] / self.cell_size), int(self.rescale_template_size[1] / self.cell_size)), self.y_sigma)) self._window = cos_window((self.yf.shape[1], self.yf.shape[0])) self.crop_size = self.rescale_template_size self.current_scale_factor = 1. if self.scale_type == 'normal': self.scale_estimator = DSSTScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) self._num_scales = self.scale_estimator.num_scales self._scale_step = self.scale_estimator.scale_step self._min_scale_factor = self._scale_step**np.ceil( np.log( np.max(5 / np.array( ([self.crop_size[0], self.crop_size[1]])))) / np.log(self._scale_step)) self._max_scale_factor = self._scale_step**np.floor( np.log( np.min(first_frame.shape[:2] / np.array( [self.base_target_sz[1], self.base_target_sz[0]]))) / np.log(self._scale_step)) elif self.scale_type == 'LP': self.scale_estimator = LPScaleEstimator(self.target_sz, config=self.scale_config) self.scale_estimator.init(first_frame, self._center, self.base_target_sz, self.current_scale_factor) # create dummy mask (approximation for segmentation) # size of the object in feature space obj_sz = (int(self.rescale_ratio * (self.base_target_sz[0] / self.cell_size)), int(self.rescale_ratio * (self.base_target_sz[1] / self.cell_size))) x0 = int((self.yf.shape[1] - obj_sz[0]) / 2) y0 = int((self.yf.shape[0] - obj_sz[1]) / 2) x1 = x0 + obj_sz[0] y1 = y0 + obj_sz[1] self.target_dummy_mask = np.zeros_like(self.yf, dtype=np.uint8) self.target_dummy_mask[y0:y1, x0:x1] = 1 self.target_dummy_area = np.sum(self.target_dummy_mask) if self.use_segmentation: if self.segcolor_space == 'bgr': seg_img = first_frame elif self.segcolor_space == 'hsv': seg_img = cv2.cvtColor(first_frame, cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32) / 180 * 255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg = Histogram(3, self.nbins) hist_bg = Histogram(3, self.nbins) self.extract_histograms(seg_img, bbox, hist_fg, hist_bg) mask = self.segment_region(seg_img, self._center, self.template_size, self.base_target_sz, self.current_scale_factor, hist_fg, hist_bg) self.hist_bg_p_bins = hist_bg.p_bins self.hist_fg_p_bins = hist_fg.p_bins init_mask_padded = np.zeros_like(mask) pm_x0 = int(np.floor(mask.shape[1] / 2 - bbox[2] / 2)) pm_y0 = int(np.floor(mask.shape[0] / 2 - bbox[3] / 2)) init_mask_padded[pm_y0:pm_y0 + bbox[3], pm_x0:pm_x0 + bbox[2]] = 1 mask = mask * init_mask_padded mask = cv2.resize(mask, (self.yf.shape[1], self.yf.shape[0])) if self.mask_normal(mask, self.target_dummy_area) is True: kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3), anchor=(1, 1)) mask = cv2.dilate(mask, kernel) else: mask = self.target_dummy_mask else: mask = self.target_dummy_mask # extract features f = get_csr_features(first_frame, self._center, self.current_scale_factor, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] # create filters using segmentation mask self.H = self.create_csr_filter(f, self.yf, mask) response = np.real(ifft2(fft2(f) * np.conj(self.H))) chann_w = np.max(response.reshape( response.shape[0] * response.shape[1], -1), axis=0) self.chann_w = chann_w / np.sum(chann_w) # New: irrelevant channels! self.irrelevant_channels = [] top_channels = self.params['top_channels'] if top_channels: for chan_i, _ in sorted(enumerate(chann_w), reverse=True, key=lambda x: x[1])[top_channels:]: self.irrelevant_channels.append(chan_i) f = np.delete(f, self.irrelevant_channels, 2) # create filters using segmentation mask self.H = self.create_csr_filter(f, self.yf, mask) response = np.real(ifft2(fft2(f) * np.conj(self.H))) chann_w = np.max(response.reshape( response.shape[0] * response.shape[1], -1), axis=0) self.chann_w = chann_w / np.sum(chann_w)
def _detection(self, alphaf, x, z): k = self._dense_gauss_kernel(x, z) responses = np.real(ifft2(alphaf * fft2(k))) return responses
def update(self, current_frame, vis=False): xt = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xtf = fft2(xt) response = np.real( ifft2( np.sum(self.hf_num * xtf, axis=2) / (self.hf_den + self.lambda_))) if vis is True: self.score = response curr = np.unravel_index(np.argmax(response, axis=None), response.shape) dy = (curr[0] - self._init_response_center[0]) * self.current_scale_factor dx = (curr[1] - self._init_response_center[1]) * self.current_scale_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) xs = self.get_scale_sample(current_frame, self._center) xsf = np.fft.fft(xs, axis=1) scale_response = np.real( np.fft.ifft( np.sum(self.sf_num * xsf, axis=0) / (self.sf_den + self.lambda_))) recovered_scale = np.argmax(scale_response) self.current_scale_factor = self.current_scale_factor * self.scale_factors[ recovered_scale] self.current_scale_factor = np.clip(self.current_scale_factor, a_min=self.min_scale_factor, a_max=self.max_scale_factor) xl = self.get_translation_sample(current_frame, self._center, self.crop_size, self.current_scale_factor, self._window) xlf = fft2(xl) new_hf_num = self.yf[:, :, None] * np.conj(xlf) new_hf_den = np.sum(xlf * np.conj(xlf), axis=2) new_xs = self.get_scale_sample(current_frame, self._center) new_xsf = np.fft.fft(new_xs, axis=1) new_sf_num = self.ysf * np.conj(new_xsf) new_sf_den = np.sum(new_xsf * np.conj(new_xsf), axis=0) self.hf_den = (1 - self.interp_factor ) * self.hf_den + self.interp_factor * new_hf_den self.hf_num = (1 - self.interp_factor ) * self.hf_num + self.interp_factor * new_hf_num self.sf_den = (1 - self.interp_factor ) * self.sf_den + self.interp_factor * new_sf_den self.sf_num = (1 - self.interp_factor ) * self.sf_num + self.interp_factor * new_sf_num self.target_sz = (self.base_target_size[0] * self.current_scale_factor, self.base_target_size[1] * self.current_scale_factor) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def tracking(self, img, pos, polish): """ obtain a subwindow for detecting at the positiono from last frame, and convert to Fourier domain find a proper window size :param img: :param pos: :param iter: :return: """ large_num = 0 if polish > large_num: w_sz0 = self.window_sz0 c_w = self.cos_window else: w_sz0 = self.window_sz_search0 c_w = self.cos_window_search if self.is_rotation: patch = self.get_affine_subwindow(img, pos, self.sc, self.rot, w_sz0) else: sz_s = (int(np.floor(self.sc[0] * w_sz0[0])), int(np.floor(self.sc[1] * w_sz0[1]))) patchO = cv2.getRectSubPix(img, sz_s, pos) patch = cv2.resize(patchO, w_sz0, cv2.INTER_CUBIC) z = self.get_features(patch, self.cell_size) z = z * c_w[:, :, None] zf = fft2(z) ssz = (zf.shape[1], zf.shape[0], zf.shape[2]) # calculate response of the classifier at all shifts wf = np.conj(self.model_xf) * self.model_alphaf[:, :, None] / np.size( self.model_xf) if polish <= large_num: w = pad(np.real(ifft2(wf)), (ssz[1], ssz[0])) wf = fft2(w) tmp_sz = ssz # compute convolution for each feature block in the Fourier domain # use general compute here for easy extension in future rff = np.sum(wf * zf, axis=2) rff_real = cv2.resize(rff.real, (tmp_sz[0], tmp_sz[1]), cv2.INTER_NEAREST) rff_imag = cv2.resize(rff.imag, (tmp_sz[0], tmp_sz[1]), cv2.INTER_NEAREST) rff = rff_real + 1.j * rff_imag response_cf = np.real(ifft2(rff)) #response_cf=np.fft.fftshift(response_cf,axes=(0,1)) response_cf = crop_filter_response( response_cf, (response_cf.shape[1], response_cf.shape[0])) response_color = np.zeros_like(response_cf) if self.use_color_hist: object_likelihood = self.get_colour_map(patch, self.pl, self.pi, self.bin_mapping) response_color = get_center_likelihood(object_likelihood, self.target_sz0) response_color = cv2.resize( response_color, (response_cf.shape[1], response_cf.shape[0]), cv2.INTER_CUBIC) # adaptive merge factor if self.adaptive_merge_factor is True: cf_conf = confidence_cf_apce(response_cf) adaptive_merge_factor = self.merge_factor * self.theta + ( 1 - self.theta) * (1 - cf_conf) response = ( 1 - adaptive_merge_factor ) * response_cf + adaptive_merge_factor * response_color else: response = (1 - self.merge_factor ) * response_cf + self.merge_factor * response_color if self.vis is True: self.score = response self.crop_size = self.window_sz # sub-pixel search pty, ptx = np.unravel_index(np.argmax(response, axis=None), response.shape) if self.is_subpixel: slobe = 2 idy = np.arange(pty - slobe, pty + slobe + 1) idx = np.arange(ptx - slobe, ptx + slobe + 1) idy = np.clip(idy, a_min=0, a_max=response.shape[0] - 1) idx = np.clip(idx, a_min=0, a_max=response.shape[1] - 1) weight_patch = response[idy, :][:, idx] s = np.sum(weight_patch) + 2e-16 pty = np.sum(np.sum(weight_patch, axis=1) * idy) / s ptx = np.sum(np.sum(weight_patch, axis=0) * idx) / s cscore = PSR(response, 0.1) # update the translation status dy = pty - (response.shape[0]) // 2 dx = ptx - (response.shape[1]) // 2 if self.is_rotation: sn, cs = np.sin(self.rot), np.cos(self.rot) pp = np.array([[self.sc[1] * cs, -self.sc[0] * sn], [self.sc[1] * sn, self.sc[0] * cs]]) x, y = pos delta = self.cell_size * np.array([[dy, dx]]).dot(pp) x += delta[0, 1] y += delta[0, 0] pos = (x, y) patchL = self.get_affine_subwindow( img, pos, [1., 1.], self.rot, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1])))) else: x, y = pos pos = (x + self.sc[0] * self.cell_size * dx, y + self.sc[1] * self.cell_size * dy) patchL = cv2.getRectSubPix( img, (int(np.floor(self.sc[0] * self.scale_sz[0])), int(np.floor(self.sc[1] * self.scale_sz[1]))), pos) patchL = cv2.resize(patchL, self.scale_sz_window, cv2.INTER_CUBIC) patchLp = cv2.logPolar(patchL.astype(np.float32), (patchL.shape[1] // 2, patchL.shape[0] // 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, self.cell_size) #patchLp = patchLp * self.cos_window_scale[:, :, None] tmp_sc, tmp_rot, sscore = self.estimate_scale(self.model_patchLp, patchLp, self.mag) tmp_sc = np.clip(tmp_sc, a_min=0.6, a_max=1.4) if tmp_rot > 1 or tmp_rot < -1: tmp_rot = 0 return pos, tmp_sc, tmp_rot, cscore, sscore
def _detection(self, alphaf, xf, zf, kernel='gaussian'): kzf = self._kernel_correlation(zf, xf, kernel) responses = np.real(ifft2(alphaf * kzf)) return responses
def update(self, current_frame, vis=False): self.frame_idx += 1 im_patch_cf = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) pwp_search_area = (round(self.norm_pwp_search_area[0] / self.area_resize_factor), round(self.norm_pwp_search_area[1] / self.area_resize_factor)) im_patch_pwp = self.get_sub_window(current_frame, self._center, self.norm_pwp_search_area, pwp_search_area) likelihood_map = self.get_colour_map(im_patch_pwp, self.bg_hist, self.fg_hist, self.bin_mapping) likelihood_map[np.isnan(likelihood_map)] = 0. self.norm_target_sz = (int(self.norm_target_sz[0]), int(self.norm_target_sz[1])) response_pwp = get_center_likelihood(likelihood_map, self.norm_target_sz) xt = self.get_feature_map(im_patch_cf, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt center = ((self.norm_delta_area[0] - 1) / 2, (self.norm_delta_area[1] - 1) / 2) for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) hf = self.experts[i].hf_num / (np.sum(self.experts[i].hf_den, axis=2) + self.lambda_)[:, :, None] response_cf = np.real(ifft2(np.sum(np.conj(hf) * xtf, axis=2))) response_sz = (self.floor_odd(self.norm_delta_area[0] / self.cell_size), self.floor_odd(self.norm_delta_area[1] / self.cell_size)) response_cf = cv2.resize(crop_filter_response(response_cf, response_sz), self.norm_delta_area, cv2.INTER_NEAREST) response_cf[np.isnan(response_cf)] = 0. self.experts[i].response = (1 - self.merge_factor) * response_cf + self.merge_factor * response_pwp row, col = np.unravel_index(np.argmax(self.experts[i].response, axis=None), self.experts[i].response.shape) dy = (row - center[1]) / self.area_resize_factor dx = (col - center[0]) / self.area_resize_factor self.experts[i].pos = (self._center[0] + dx, self._center[1] + dy) cx, cy, w, h = self.experts[i].pos[0], self.experts[i].pos[1], self.target_sz[0], self.target_sz[1] self.experts[i].rect_positions.append([cx - w / 2, cy - h / 2, w, h]) self.experts[i].centers.append([cx, cy]) pre_center = self.experts[i].centers[self.frame_idx - 1] smooth = np.sqrt((cx - pre_center[0]) ** 2 + (cy - pre_center[1]) ** 2) self.experts[i].smoothes.append(smooth) self.experts[i].smooth_scores.append(np.exp(-smooth ** 2 / (2 * self.avg_dim ** 2))) if self.frame_idx >= self.period - 1: for i in range(self.expert_num): self.experts[i].rob_scores.append(self.robustness_eva(self.experts, i, self.frame_idx, self.period, self.weight, self.expert_num)) self.id_ensemble[i] = self.experts[i].rob_scores[self.frame_idx] self.mean_score.append(np.sum(np.array(self.id_ensemble)) / self.expert_num) idx = np.argmax(np.array(self.id_ensemble)) self._center = self.experts[idx].pos self.response = self.experts[idx].response else: for i in range(self.expert_num): self.experts[i].rob_scores.append(1) self._center = self.experts[6].pos self.response = self.experts[6].response self.mean_score.append(0) if vis is True: self.score = self.response # adaptive update score1 = self.cal_psr(self.experts[0].response) score2 = self.cal_psr(self.experts[1].response) score3 = self.cal_psr(self.experts[2].response) self.psr_score.append((score1 + score2 + score3) / 3) if self.frame_idx >= self.period - 1: final_score = self.mean_score[self.frame_idx] * self.psr_score[self.frame_idx] ave_score = np.sum(np.array(self.mean_score)[self.period-1:self.frame_idx + 1] * np.array(self.psr_score[self.period-1:self.frame_idx + 1])) / ( self.frame_idx + 1 - self.period+1) threshold = self.update_thresh * ave_score if final_score > threshold: self.learning_rate_pwp = self.config.interp_factor_pwp self.learning_rate_cf = self.config.interp_factor_cf else: self.learning_rate_pwp = 0 self.learning_rate_cf = (final_score / threshold) ** 3 * self.config.interp_factor_cf if self.scale_adaptation: self.scale_factor = self.scale_estimator.update(current_frame, self._center, self.base_target_sz, self.scale_factor) self.target_sz = (round(self.base_target_sz[0] * self.scale_factor), round(self.base_target_sz[1] * self.scale_factor)) avg_dim = (self.target_sz[0] + self.target_sz[1]) / 2 bg_area = (round(self.target_sz[0] + avg_dim), round(self.target_sz[1] + avg_dim)) fg_area = (round(self.target_sz[0] - avg_dim * self.inner_padding), round(self.target_sz[1] - avg_dim * self.inner_padding)) bg_area = (min(bg_area[0], current_frame.shape[1] - 1), min(bg_area[1], current_frame.shape[0] - 1)) self.bg_area = ( bg_area[0] - (bg_area[0] - self.target_sz[0]) % 2, bg_area[1] - (bg_area[1] - self.target_sz[1]) % 2) self.fg_area = ( fg_area[0] + (self.bg_area[0] - fg_area[0]) % 2, fg_area[1] + (self.bg_area[1] - fg_area[1]) % 2) self.area_resize_factor = np.sqrt(self.fixed_area / (self.bg_area[0] * self.bg_area[1])) im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.cell_size) xt = self._window[:, :, None] * xt xt_cn, xt_hog1, xt_hog2 = self.split_features(xt) self.experts[0].xt = xt_cn self.experts[1].xt = xt_hog1 self.experts[2].xt = xt_hog2 self.experts[3].xt = np.concatenate((xt_hog1, xt_cn), axis=2) self.experts[4].xt = np.concatenate((xt_hog2, xt_cn), axis=2) self.experts[5].xt = np.concatenate((xt_hog1, xt_hog2), axis=2) self.experts[6].xt = xt for i in range(self.expert_num): xtf = fft2(self.experts[i].xt) hf_den = np.conj(xtf) * xtf / (self.cf_response_size[0] * self.cf_response_size[1]) hf_num = np.conj(self.yf)[:, :, None] * xtf / (self.cf_response_size[0] * self.cf_response_size[1]) self.experts[i].hf_den = (1 - self.learning_rate_cf) * self.experts[i].hf_den + self.learning_rate_cf * hf_den self.experts[i].hf_num = (1 - self.learning_rate_cf) * self.experts[i].hf_num + self.learning_rate_cf * hf_num if self.learning_rate_pwp != 0: im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) self.bg_hist, self.fg_hist = self.update_hist_model(self.new_pwp_model, im_patch_bg, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins) return [self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1]]
def init(self, first_frame, bbox): bbox = np.array(bbox).astype(np.int64) x, y, w, h = tuple(bbox) self.init_mask = np.ones((h, w), dtype=np.uint8) self._center = (x + w / 2, y + h / 2) self.w, self.h = w, h if np.all(first_frame[:, :, 0] == first_frame[:, :, 1]): self.use_segmentation = False # change 400 to 300 # for larger cell_size self.cell_size = int(min(4, max(1, w * h / 300))) self.base_target_sz = (w, h) template_size = (int(w + self.padding * np.sqrt(w * h)), int(h + self.padding * np.sqrt(w * h))) template_size = (template_size[0] + template_size[1]) // 2 self.template_size = (template_size, template_size) self.rescale_ratio = np.sqrt( (200**2) / (self.template_size[0] * self.template_size[1])) self.rescale_ratio = np.clip(self.rescale_ratio, a_min=None, a_max=1) self.rescale_template_size = (int(self.rescale_ratio * self.template_size[0]), int(self.rescale_ratio * self.template_size[1])) self.yf = fft2( gaussian2d_rolled_labels( (int(self.rescale_template_size[0] / self.cell_size), int(self.rescale_template_size[1] / self.cell_size)), self.y_sigma)) self._window = cos_window((self.yf.shape[1], self.yf.shape[0])) self.crop_size = self.rescale_template_size # the same as DSST self.scale_sigma = np.sqrt(self.n_scales) * self.scale_sigma_factor ss = np.arange(1, self.n_scales + 1) - np.ceil(self.n_scales / 2) ys = np.exp(-0.5 * (ss**2) / (self.scale_sigma**2)) self.ysf = np.fft.fft(ys) if self.n_scales % 2 == 0: scale_window = np.hanning(self.n_scales + 1) self.scale_window = scale_window[1:] else: self.scale_window = np.hanning(self.n_scales) ss = np.arange(1, self.n_scales + 1) self.scale_factors = self.scale_step**(np.ceil(self.n_scales / 2) - ss) self.scale_model_factor = 1. if (self.scale_model_factor**2*self.template_size[0]*self.template_size[1]) >\ self.scale_model_max_area: self.scale_model_factor = np.sqrt( self.scale_model_max_area / (self.template_size[0] * self.template_size[1])) self.scale_model_sz = (int( np.floor(self.template_size[0] * self.scale_model_factor)), int( np.floor(self.template_size[1] * self.scale_model_factor))) self.current_scale_factor = 1. self.min_scale_factor = self.scale_step**(int( np.ceil( np.log( max(5 / self.template_size[0], 5 / self.template_size[1])) / np.log(self.scale_step)))) self.max_scale_factor = self.scale_step**(int( np.floor((np.log( min(first_frame.shape[1] / self.base_target_sz[0], first_frame.shape[0] / self.base_target_sz[1])) / np.log(self.scale_step))))) # create dummy mask (approximation for segmentation) # size of the object in feature space obj_sz = (int(self.rescale_ratio * (self.base_target_sz[0] / self.cell_size)), int(self.rescale_ratio * (self.base_target_sz[1] / self.cell_size))) x0 = int((self.yf.shape[1] - obj_sz[0]) / 2) y0 = int((self.yf.shape[0] - obj_sz[1]) / 2) x1 = x0 + obj_sz[0] y1 = y0 + obj_sz[1] self.target_dummy_mask = np.zeros_like(self.yf, dtype=np.uint8) self.target_dummy_mask[y0:y1, x0:x1] = 1 self.target_dummy_area = np.sum(self.target_dummy_mask) if self.use_segmentation: if self.segcolor_space == 'bgr': seg_img = first_frame elif self.segcolor_space == 'hsv': seg_img = cv2.cvtColor(first_frame, cv2.COLOR_BGR2HSV) seg_img[:, :, 0] = (seg_img[:, :, 0].astype(np.float32) / 180 * 255) seg_img = seg_img.astype(np.uint8) else: raise ValueError hist_fg = Histogram(3, self.nbins) hist_bg = Histogram(3, self.nbins) self.extract_histograms(seg_img, bbox, hist_fg, hist_bg) mask = self.segment_region(seg_img, self._center, self.template_size, self.base_target_sz, self.current_scale_factor, hist_fg, hist_bg) self.hist_bg_p_bins = hist_bg.p_bins self.hist_fg_p_bins = hist_fg.p_bins init_mask_padded = np.zeros_like(mask) pm_x0 = int(np.floor(mask.shape[1] / 2 - bbox[2] / 2)) pm_y0 = int(np.floor(mask.shape[0] / 2 - bbox[3] / 2)) init_mask_padded[pm_y0:pm_y0 + bbox[3], pm_x0:pm_x0 + bbox[2]] = 1 mask = mask * init_mask_padded mask = cv2.resize(mask, (self.yf.shape[1], self.yf.shape[0])) if self.mask_normal(mask, self.target_dummy_area) is True: kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3), anchor=(1, 1)) mask = cv2.dilate(mask, kernel) else: mask = self.target_dummy_mask else: mask = self.target_dummy_mask # extract features f = self.get_csr_features(first_frame, self._center, self.current_scale_factor, self.template_size, self.rescale_template_size, self.cell_size) f = f * self._window[:, :, None] # create filters using segmentation mask self.H = self.create_csr_filter(f, self.yf, mask) response = np.real(ifft2(fft2(f) * np.conj(self.H))) chann_w = np.max(response.reshape( response.shape[0] * response.shape[1], -1), axis=0) self.chann_w = chann_w / np.sum(chann_w) # mask a scale search model as well xs = self.get_scale_sample( first_frame, self._center, self.base_target_sz, self.current_scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz) xsf = np.fft.fft(xs, axis=1) self.sf_num = self.ysf * np.conj(xsf) self.sf_den = np.sum(xsf * np.conj(xsf), axis=0)
def update(self, current_frame, vis=False): self.frame_index += 1 old_pos = (np.inf, np.inf) iter = 1 while iter <= self.refinement_iterations and np.any( np.array(old_pos) != np.array(self._center)): patch = cv2.getRectSubPix( current_frame, (int(self.base_target_sz[0] * self.sc * (1 + self.padding)), int(self.base_target_sz[1] * self.sc * (1 + self.padding))), self._center) patch = cv2.resize(patch, self.win_sz).astype(np.uint8) xo_hog, xo_cn = self.get_features(patch, self.cell_size) xo_cn2, xo_hog2 = self.feature_projection( xo_cn, xo_hog, self.projection_matrix_cn, self.projection_matrix_hog, self._window) detect_k_cn = self.dense_gauss_kernel(self.z_cn2, xo_cn2, self.cn_sigma) detect_k_hog = self.dense_gauss_kernel(self.z_hog2, xo_hog2, self.hog_sigma) kf = fft2(self.d[0] * detect_k_cn + self.d[1] * detect_k_hog) responsef = self.alphaf * np.conj(kf) if self.interpolate_response > 0: if self.interpolate_response == 2: self.interp_sz = (int(self.yf.shape[1] * self.cell_size * self.sc), int(self.yf.shape[0] * self.cell_size * self.sc)) else: responsef = self.resize_dft2(responsef, self.interp_sz) response = np.real(ifft2(responsef)) if vis is True: self.score = response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) self.win_sz = self.win_sz row, col = np.unravel_index(np.argmax(response, axis=None), response.shape) disp_row = np.mod(row + np.floor( (self.interp_sz[1] - 1) / 2), self.interp_sz[1]) - np.floor( (self.interp_sz[1] - 1) / 2) disp_col = np.mod(col + np.floor( (self.interp_sz[0] - 1) / 2), self.interp_sz[0]) - np.floor( (self.interp_sz[0] - 1) / 2) if self.interpolate_response == 0: translation_vec = list( np.array([disp_row, disp_col]) * self.cell_size * self.sc) elif self.interpolate_response == 1: translation_vec = list( np.array([disp_row, disp_col]) * self.sc) elif self.interpolate_response == 2: translation_vec = [disp_row, disp_col] trans = np.sqrt(self.win_sz[0] * self.win_sz[1]) * self.sc / 3 old_pos = self._center self._center = (old_pos[0] + translation_vec[1], old_pos[1] + translation_vec[0]) iter += 1 if self.num_of_scales > 0: xs = self.get_scale_subwindow(current_frame, self._center) xs = self.scale_window * self.scale_basis.dot(xs) xsf = np.fft.fft(xs, axis=1) scale_responsef = np.sum(self.sf_num * xsf, axis=0) / (self.sf_den + self.lambda_) interp_scale_response = np.real( np.fft.ifft( self.resize_dft(scale_responsef, self.num_of_interp_scales))) recovered_scale = np.argmax(interp_scale_response) self.sc = self.sc * self.interp_scale_factors[recovered_scale] self.sc = np.clip(self.sc, a_min=self.min_scale_factor, a_max=self.max_scale_factor) new_xs = self.get_scale_subwindow(current_frame, self._center) self.s_num = (1 - self.interp_factor ) * self.s_num + self.interp_factor * new_xs bigY = self.s_num bigY_den = new_xs self.scale_basis, _ = np.linalg.qr(bigY) self.scale_basis_den, _ = np.linalg.qr(bigY_den) self.scale_basis = self.scale_basis.T self.scale_basis_den = self.scale_basis_den.T sf_proj = np.fft.fft(self.scale_window * self.scale_basis.dot(self.s_num), axis=1) self.sf_num = self.ysf * np.conj(sf_proj) new_xs = self.scale_window * self.scale_basis_den.dot(new_xs) xsf = np.fft.fft(new_xs, axis=1) new_sf_den = np.sum(xsf * np.conj(xsf), axis=0) self.sf_den = (1 - self.interp_factor ) * self.sf_den + self.interp_factor * new_sf_den patch = cv2.getRectSubPix( current_frame, (int(self.base_target_sz[0] * self.sc * (1 + self.padding)), int(self.base_target_sz[1] * self.sc * (1 + self.padding))), self._center) patch = cv2.resize(patch, self.win_sz).astype(np.uint8) xo_hog, xo_cn = self.get_features(patch, self.cell_size) self.z_hog = (1 - self.lr_hog) * self.z_hog + self.lr_hog * xo_hog self.z_cn = (1 - self.lr_cn) * self.z_cn + self.lr_cn * xo_cn data_matrix_cn = self.z_cn.reshape((-1, self.z_cn.shape[2])) pca_basis_cn, _, _ = np.linalg.svd( data_matrix_cn.T.dot(data_matrix_cn)) self.projection_matrix_cn = pca_basis_cn[:, :self. num_compressed_dim_cn] data_matrix_hog = self.z_hog.reshape((-1, self.z_hog.shape[2])) pca_basis_hog, _, _ = np.linalg.svd( data_matrix_hog.T.dot(data_matrix_hog)) self.projection_matrix_hog = pca_basis_hog[:, :self. num_compressed_dim_hog] self.z_cn2, self.z_hog2 = self.feature_projection( self.z_cn, self.z_hog, self.projection_matrix_cn, self.projection_matrix_hog, self._window) if self.frame_index % self.modnum == 0: self.train_model() target_sz = ((self.base_target_sz[0] * self.sc), (self.base_target_sz[1] * self.sc)) return [(self._center[0] - target_sz[0] / 2), (self._center[1] - target_sz[1] / 2), target_sz[0], target_sz[1]]
def _detection(self, alphaf, x, z): k = self._dgk(x, z) responses = np.real(ifft2(alphaf * fft2(k))) return responses
def update(self, current_frame, vis=False): self.frame_index += 1 old_pos = (np.inf, np.inf) iter = 1 while iter <= self.refinement_iterations and np.any( np.array(old_pos) != np.array(self._center)): patch = cv2.getRectSubPix( current_frame, (int(self.base_target_sz[0] * self.sc * (1 + self.padding)), int(self.base_target_sz[1] * self.sc * (1 + self.padding))), self._center) patch = cv2.resize(patch, self.win_sz).astype(np.uint8) xo_hog, xo_cn = self.get_features(patch, self.cell_size) xo_cn2, xo_hog2 = self.feature_projection( xo_cn, xo_hog, self.projection_matrix_cn, self.projection_matrix_hog, self._window) detect_k_cn = self.dense_gauss_kernel(self.z_cn2, xo_cn2, self.cn_sigma) detect_k_hog = self.dense_gauss_kernel(self.z_hog2, xo_hog2, self.hog_sigma) kf = fft2(self.d[0] * detect_k_cn + self.d[1] * detect_k_hog) responsef = self.alphaf * np.conj(kf) if self.interpolate_response > 0: if self.interpolate_response == 2: self.interp_sz = (int(self.yf.shape[1] * self.cell_size * self.sc), int(self.yf.shape[0] * self.cell_size * self.sc)) else: responsef = self.resize_dft2(responsef, self.interp_sz) response = np.real(ifft2(responsef)) if vis is True: self.score = response self.score = np.roll(self.score, int(np.floor(self.score.shape[0] / 2)), axis=0) self.score = np.roll(self.score, int(np.floor(self.score.shape[1] / 2)), axis=1) self.win_sz = self.win_sz row, col = np.unravel_index(np.argmax(response, axis=None), response.shape) disp_row = np.mod(row + np.floor( (self.interp_sz[1] - 1) / 2), self.interp_sz[1]) - np.floor( (self.interp_sz[1] - 1) / 2) disp_col = np.mod(col + np.floor( (self.interp_sz[0] - 1) / 2), self.interp_sz[0]) - np.floor( (self.interp_sz[0] - 1) / 2) if self.interpolate_response == 0: translation_vec = list( np.array([disp_row, disp_col]) * self.cell_size * self.sc) elif self.interpolate_response == 1: translation_vec = list( np.array([disp_row, disp_col]) * self.sc) elif self.interpolate_response == 2: translation_vec = [disp_row, disp_col] trans = np.sqrt(self.win_sz[0] * self.win_sz[1]) * self.sc / 3 old_pos = self._center self._center = (old_pos[0] + translation_vec[1], old_pos[1] + translation_vec[0]) iter += 1 patchL = cv2.getRectSubPix(current_frame, (int(np.floor(self.sc * self.scale_sz[0])), int(np.floor(self.sc * self.scale_sz[1]))), self._center) patchL = cv2.resize(patchL, self.scale_sz_window) # convert into logpolar patchLp = cv2.logPolar(patchL.astype(np.float32), ((patchL.shape[1] - 1) / 2, (patchL.shape[0] - 1) / 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, cell_size=4) tmp_sc, _, _ = self.estimate_scale(self.model_patchLp, patchLp, self.mag) tmp_sc = np.clip(tmp_sc, a_min=0.6, a_max=1.4) self.sc = self.sc * tmp_sc self.model_patchLp = ( 1 - self.learning_rate_scale ) * self.model_patchLp + self.learning_rate_scale * patchLp patch = cv2.getRectSubPix( current_frame, (int(self.base_target_sz[0] * self.sc * (1 + self.padding)), int(self.base_target_sz[1] * self.sc * (1 + self.padding))), self._center) patch = cv2.resize(patch, self.win_sz).astype(np.uint8) xo_hog, xo_cn = self.get_features(patch, self.cell_size) self.z_hog = (1 - self.lr_hog) * self.z_hog + self.lr_hog * xo_hog self.z_cn = (1 - self.lr_cn) * self.z_cn + self.lr_cn * xo_cn data_matrix_cn = self.z_cn.reshape((-1, self.z_cn.shape[2])) pca_basis_cn, _, _ = np.linalg.svd( data_matrix_cn.T.dot(data_matrix_cn)) self.projection_matrix_cn = pca_basis_cn[:, :self. num_compressed_dim_cn] data_matrix_hog = self.z_hog.reshape((-1, self.z_hog.shape[2])) pca_basis_hog, _, _ = np.linalg.svd( data_matrix_hog.T.dot(data_matrix_hog)) self.projection_matrix_hog = pca_basis_hog[:, :self. num_compressed_dim_hog] self.z_cn2, self.z_hog2 = self.feature_projection( self.z_cn, self.z_hog, self.projection_matrix_cn, self.projection_matrix_hog, self._window) if self.frame_index % self.modnum == 0: self.train_model() target_sz = ((self.base_target_sz[0] * self.sc), (self.base_target_sz[1] * self.sc)) return [(self._center[0] - target_sz[0] / 2), (self._center[1] - target_sz[1] / 2), target_sz[0], target_sz[1]]
def update(self, current_frame, vis=False): im_patch_cf = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) pwp_search_area = (round(self.norm_pwp_search_area[0] / self.area_resize_factor), round(self.norm_pwp_search_area[1] / self.area_resize_factor)) im_patch_pwp = self.get_sub_window(current_frame, self._center, self.norm_pwp_search_area, pwp_search_area) xt = self.get_feature_map(im_patch_cf, self.hog_cell_size) xt_windowed = self._window[:, :, None] * xt xtf = fft2(xt_windowed) if self.use_ca is False: if self.den_per_channel: hf = self.hf_num / (self.hf_den + self.lambda_) else: hf = self.hf_num / (np.sum(self.hf_den, axis=2) + self.lambda_)[:, :, None] else: if self.den_per_channel: hf = self.hf_num / self.hf_den else: hf = self.hf_num / (np.sum(self.hf_den, axis=2)[:, :, None]) if self.use_ca is False: response_cf = np.real(ifft2(np.sum(np.conj(hf) * xtf, axis=2))) else: response_cf = np.real(ifft2(np.sum(hf * xtf, axis=2))) response_sz = (self.floor_odd(self.norm_delta_area[0] / self.hog_cell_size), self.floor_odd(self.norm_delta_area[1] / self.hog_cell_size)) response_cf = crop_filter_response(response_cf, response_sz) if self.hog_cell_size > 1: if self.use_ca is True: #response_cf = self.mex_resize(response_cf, self.norm_delta_area) response_cf = cv2.resize(response_cf, self.norm_delta_area, cv2.INTER_NEAREST) else: response_cf = cv2.resize(response_cf, self.norm_delta_area, cv2.INTER_NEAREST) likelihood_map = self.get_colour_map(im_patch_pwp, self.bg_hist, self.fg_hist, self.bin_mapping) likelihood_map[np.isnan(likelihood_map)] = 0. response_cf[np.isnan(response_cf)] = 0. self.norm_target_sz = (int(self.norm_target_sz[0]), int(self.norm_target_sz[1])) response_pwp = get_center_likelihood(likelihood_map, self.norm_target_sz) response = (1 - self.merge_factor ) * response_cf + self.merge_factor * response_pwp if vis is True: self.score = response curr = np.unravel_index(np.argmax(response, axis=None), response.shape) center = ((self.norm_delta_area[0] - 1) / 2, (self.norm_delta_area[1] - 1) / 2) dy = (curr[0] - center[1]) / self.area_resize_factor dx = (curr[1] - center[0]) / self.area_resize_factor x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) if self.scale_adaptation: im_patch_scale = self.get_scale_subwindow( current_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) scale_response = np.real( np.fft.ifft( np.sum(self.sf_num * xsf, axis=0) / (self.sf_den + self.lambda_))) recovered_scale = np.argmax(scale_response) self.scale_factor = self.scale_factor * self.scale_factors[ recovered_scale] self.scale_factor = np.clip(self.scale_factor, a_min=self.min_scale_factor, a_max=self.max_scale_factor) self.target_sz = (round(self.base_target_sz[0] * self.scale_factor), round(self.base_target_sz[1] * self.scale_factor)) avg_dim = (self.target_sz[0] + self.target_sz[1]) / 2 bg_area = (round(self.target_sz[0] + avg_dim), round(self.target_sz[1] + avg_dim)) fg_area = (round(self.target_sz[0] - avg_dim * self.inner_padding), round(self.target_sz[1] - avg_dim * self.inner_padding)) bg_area = (min(bg_area[0], current_frame.shape[1] - 1), min(bg_area[1], current_frame.shape[0] - 1)) self.bg_area = (bg_area[0] - (bg_area[0] - self.target_sz[0]) % 2, bg_area[1] - (bg_area[1] - self.target_sz[1]) % 2) self.fg_area = (fg_area[0] + (self.bg_area[0] - fg_area[0]) % 2, fg_area[1] + (self.bg_area[1] - fg_area[1]) % 2) self.area_resize_factor = np.sqrt( self.fixed_area / (self.bg_area[0] * self.bg_area[1])) im_patch_bg = self.get_sub_window(current_frame, self._center, self.norm_bg_area, self.bg_area) xt = self.get_feature_map(im_patch_bg, self.hog_cell_size) xt = self._window[:, :, None] * xt xtf = fft2(xt) if self.use_ca: sum_kfn = np.zeros_like(xtf) for j in range(len(self.offset)): im_patch_bgn = self.get_sub_window( current_frame, (self._center[0] + self.offset[j][0], self._center[1] + self.offset[j][1]), self.norm_bg_area, self.bg_area) xtn = self.get_feature_map(im_patch_bgn, self.hog_cell_size) xtn = self._window[:, :, None] * xtn xtfn = fft2(xtn) sum_kfn += np.conj(xtfn) * xtfn new_hf_num = self.yf[:, :, None] * np.conj(xtf) new_hf_den = np.conj( xtf) * xtf + self.lambda_ + self.lambda_2 * sum_kfn else: new_hf_num = np.conj(self.yf)[:, :, None] * xtf / ( self.cf_response_size[0] * self.cf_response_size[1]) new_hf_den = (np.conj(xtf) * xtf) / (self.cf_response_size[0] * self.cf_response_size[1]) self.hf_den = (1 - self.interp_factor_cf ) * self.hf_den + self.interp_factor_cf * new_hf_den self.hf_num = (1 - self.interp_factor_cf ) * self.hf_num + self.interp_factor_cf * new_hf_num self.bg_hist, self.fg_hist = self.update_hist_model( self.new_pwp_model, im_patch_bg, self.bg_area, self.fg_area, self.target_sz, self.norm_bg_area, self.n_bins) if self.scale_adaptation: im_patch_scale = self.get_scale_subwindow( current_frame, self._center, self.base_target_sz, self.scale_factor * self.scale_factors, self.scale_window, self.scale_model_sz, self.hog_scale_cell_size) xsf = np.fft.fft(im_patch_scale, axis=1) new_sf_num = self.ysf * np.conj(xsf) new_sf_den = np.sum(xsf * np.conj(xsf), axis=0) self.sf_den = ( 1 - self.interp_factor_scale ) * self.sf_den + self.interp_factor_scale * new_sf_den self.sf_num = ( 1 - self.interp_factor_scale ) * self.sf_num + self.interp_factor_scale * new_sf_num return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]
def update(self, current_frame, vis=False): patch = self.get_sub_window(current_frame, self._center, self.crop_size, self.sc) xt = self.get_feature_map(patch) xt = xt * self._window[:, :, None] xtf = fft2(xt) response = np.real( ifft2( np.sum(self.hf_num * xtf, axis=2) / (self.hf_den + self.lambda_))) if vis is True: self.score = response curr = np.unravel_index(np.argmax(response, axis=None), response.shape) dy = (curr[0] - self._init_response_center[0]) * self.sc dx = (curr[1] - self._init_response_center[1]) * self.sc x_c, y_c = self._center x_c += dx y_c += dy self._center = (x_c, y_c) patch = self.get_sub_window(current_frame, self._center, self.crop_size, self.sc) xl = self.get_feature_map(patch) xl = xl * self._window[:, :, None] xlf = fft2(xl) new_hf_num = self.yf[:, :, None] * np.conj(xlf) new_hf_den = np.sum(xlf * np.conj(xlf), axis=2) patchL = cv2.getRectSubPix(current_frame, (int(np.floor(self.sc * self.scale_sz[0])), int(np.floor(self.sc * self.scale_sz[1]))), self._center) patchL = cv2.resize(patchL, self.scale_sz_window) # convert into logpolar patchLp = cv2.logPolar(patchL.astype(np.float32), ((patchL.shape[1] - 1) / 2, (patchL.shape[0] - 1) / 2), self.mag, flags=cv2.INTER_LINEAR + cv2.WARP_FILL_OUTLIERS) patchLp = extract_hog_feature(patchLp, cell_size=4) tmp_sc, _, _ = self.estimate_scale(self.model_patchLp, patchLp, self.mag) tmp_sc = np.clip(tmp_sc, a_min=0.6, a_max=1.4) self.sc = self.sc * tmp_sc self.hf_den = (1 - self.interp_factor ) * self.hf_den + self.interp_factor * new_hf_den self.hf_num = (1 - self.interp_factor ) * self.hf_num + self.interp_factor * new_hf_num self.model_patchLp = ( 1 - self.learning_rate_scale ) * self.model_patchLp + self.learning_rate_scale * patchLp self.target_sz = (self.base_target_size[0] * self.sc, self.base_target_size[1] * self.sc) return [ self._center[0] - self.target_sz[0] / 2, self._center[1] - self.target_sz[1] / 2, self.target_sz[0], self.target_sz[1] ]