def localize_target(self, sf: TensorList): if self.params.score_fusion_strategy == 'sum': scores = fourier.sample_fs(fourier.sum_fs(sf), self.output_sz) elif self.params.score_fusion_strategy == 'weightedsum': weight = self.fparams.attribute('translation_weight') scores = fourier.sample_fs(fourier.sum_fs(weight * sf), self.output_sz) elif self.params.score_fusion_strategy == 'transcale': alpha = self.fparams.attribute('scale_weight') beta = self.fparams.attribute('translation_weight') sample_sz = torch.round( self.output_sz.view(1, -1) * self.params.scale_factors.view(-1, 1)) scores = 0 for sfe, a, b in zip(sf, alpha, beta): sfe = fourier.shift_fs(sfe, math.pi * torch.ones(2)) scores_scales = [] for sind, sz in enumerate(sample_sz): pd = (self.output_sz - sz) / 2 scores_scales.append( F.pad(fourier.sample_fs(sfe[sind:sind + 1, ...], sz), (math.floor(pd[1].item()), math.ceil( pd[1].item()), math.floor( pd[0].item()), math.ceil(pd[0].item())))) scores_cat = torch.cat(scores_scales) scores = scores + (b - a) * scores_cat.mean( dim=0, keepdim=True) + a * scores_cat else: raise ValueError('Unknown score fusion strategy.') # Get maximum max_score, max_disp = dcf.max2d(scores) _, scale_ind = torch.max(max_score, dim=0) max_disp = max_disp.float().cpu() # Convert to displacements in the base scale if self.params.score_fusion_strategy in ['sum', 'weightedsum']: disp = (max_disp + self.output_sz / 2) % self.output_sz - self.output_sz / 2 elif self.params.score_fusion_strategy == 'transcale': disp = max_disp - self.output_sz / 2 # Compute translation vector and scale change factor translation_vec = disp[scale_ind, ...].view(-1) * ( self.img_support_sz / self.output_sz) * self.target_scale if self.params.score_fusion_strategy in ['sum', 'weightedsum']: translation_vec *= self.params.scale_factors[scale_ind] return translation_vec, scale_ind, scores
def localize_and_update_target(self, sf: TensorList, i): if self.params.score_fusion_strategy == 'weightedsum': weight = self.fparams.attribute('translation_weight') sf = fourier.sum_fs(weight * sf) scores = fourier.sample_fs(sf, self.output_sz) else: raise ValueError('Unknown score fusion strategy.') # Get maximum max_score, max_disp = dcf.max2d(scores) max_disp = max_disp.float().cpu() # Convert to displacements in the base scale if self.params.score_fusion_strategy in ['sum', 'weightedsum']: disp = (max_disp + self.output_sz / 2) % self.output_sz - self.output_sz / 2 elif self.params.score_fusion_strategy == 'transcale': disp = max_disp - self.output_sz / 2 # Compute translation vector and scale change factor translation_vec = disp.view(-1) * (self.img_support_sz / self.output_sz) # Update pos new_pos = self.mid_point.round() + translation_vec inside_ratio = 0.2 inside_offset = (inside_ratio - 0.5) * self.target_sz self.points[i] = torch.max( torch.min(new_pos, self.image_sz - inside_offset), inside_offset) return self.points[i].round(), max_score, scores
def track(self, image): self.frame_num += 1 # Convert image im = numpy_to_torch(image) # ------- LOCALIZATION ------- # # Get sample sample_pos = self.pos.round() sample_scales = self.target_scale * self.params.scale_factors test_xf = self.extract_fourier_sample(im, self.pos, sample_scales, self.img_sample_sz) # Compute scores sf = self.apply_filter(test_xf) translation_vec, scale_ind, s = self.localize_target(sf) scale_change_factor = self.params.scale_factors[scale_ind] # Update position and scale self.update_state(sample_pos + translation_vec, self.target_scale * scale_change_factor) self.predict_target_box(sample_pos, sample_scales[scale_ind], scale_ind) if self.params.debug >= 2: show_tensor(s[scale_ind, ...], 5) if self.params.debug >= 3: for i, hf in enumerate(self.filter): show_tensor(fourier.sample_fs(hf).abs().mean(1), 6 + i) # ------- UPDATE ------- # # Get train sample train_xf = TensorList( [xf[scale_ind:scale_ind + 1, ...] for xf in test_xf]) # Shift the sample shift_samp = 2 * math.pi * (self.pos - sample_pos) / ( sample_scales[scale_ind] * self.img_support_sz) train_xf = fourier.shift_fs(train_xf, shift=shift_samp) # Update memory self.update_memory(train_xf) # Train filter if self.frame_num % self.params.train_skipping == 1: self.filter_optimizer.run(self.params.CG_iter, train_xf) self.symmetrize_filter() # Return new state new_state = torch.cat( (self.pos[[1, 0]] - (self.target_sz[[1, 0]] - 1) / 2, self.target_sz[[1, 0]])) return new_state.tolist()
def localize_target(self, scores_raw): if self.params.score_fusion_strategy == 'weightedsum': weight = self.fparams.attribute('translation_weight') scores_raw = weight * scores_raw sf_weighted = fourier.cfft2(scores_raw) / (scores_raw.size(2) * scores_raw.size(3)) for i, (sz, ksz) in enumerate(zip(self.feature_sz, self.kernel_size)): sf_weighted[i] = fourier.shift_fs( sf_weighted[i], math.pi * (1 - torch.Tensor([ksz[0] % 2, ksz[1] % 2]) / sz)) scores_fs = fourier.sum_fs(sf_weighted) scores = fourier.sample_fs(scores_fs, self.output_sz) elif self.params.score_fusion_strategy == 'default': if len(scores_raw) > 1: raise NotImplementedError('Not implemented') scores = scores_raw[0] ksz = self.kernel_size[0] offset = torch.Tensor([ksz[0] % 2, ksz[1] % 2]) / 2 else: raise ValueError('Unknown score fusion strategy.') if self.output_window is not None and not getattr( self.params, 'perform_hn_without_windowing', False): raise NotImplementedError scores *= self.output_window if getattr(self.params, 'advanced_localization', False): return self.localize_advanced(scores) # Get maximum max_score, max_disp = dcf.max2d(scores) _, scale_ind = torch.max(max_score, dim=0) max_disp = max_disp.float().cpu() # Convert to displacements in the base scale if self.params.score_fusion_strategy == 'default': disp = max_disp + offset else: disp = (max_disp + self.output_sz / 2) % self.output_sz - self.output_sz / 2 # Compute translation vector and scale change factor translation_vec = disp[scale_ind, ...].view(-1) * ( self.img_support_sz / self.output_sz) * self.target_scale translation_vec *= self.params.scale_factors[scale_ind] # Shift the score output for visualization purposes if self.params.debug >= 2: sz = scores.shape[-2:] scores = torch.cat( [scores[..., sz[0] // 2:, :], scores[..., :sz[0] // 2, :]], -2) scores = torch.cat( [scores[..., :, sz[1] // 2:], scores[..., :, :sz[1] // 2]], -1) return translation_vec, scale_ind, scores, None
def localize_target(self, scores_raw): # Weighted sum (if multiple features) with interpolation in fourier domain weight = self.fparams.attribute('translation_weight', 1.0) #weight 没什么用 if (Debug): print("weight : ", weight) # scores_raw = weight * scores_raw # if (Debug): print("scores_raw: ", scores_raw) sf_weighted = fourier.cfft2(scores_raw) / (scores_raw.size(2) * scores_raw.size(3)) for i, (sz, ksz) in enumerate(zip(self.feature_sz, self.kernel_size)): # """Shift a sample a in the Fourier domain. sf_weighted[i] = fourier.shift_fs( sf_weighted[i], math.pi * (1 - torch.Tensor([ksz[0] % 2, ksz[1] % 2]) / sz)) #"""Sum a list of Fourier series expansions.""" scores_fs = fourier.sum_fs(sf_weighted) if (Debug): print("scores_fs : ", scores_fs) #"""Samples the Fourier series.""" scores = fourier.sample_fs(scores_fs, self.output_sz) if (Debug): print("scores: ", scores) if self.output_window is not None and not getattr( self.params, 'perform_hn_without_windowing', False): scores *= self.output_window if getattr(self.params, 'advanced_localization', False): if (Debug): print("advanced: ") return self.localize_advanced(scores) # Get maximum max_score, max_disp = dcf.max2d(scores) _, scale_ind = torch.max(max_score, dim=0) max_disp = max_disp.float().cpu() # Convert to displacements in the base scale disp = (max_disp + self.output_sz / 2) % self.output_sz - self.output_sz / 2 # Compute translation vector and scale change factor translation_vec = disp[scale_ind, ...].view(-1) * ( self.img_support_sz / self.output_sz) * self.target_scale translation_vec *= self.params.scale_factors[scale_ind] # Shift the score output for visualization purposes if self.params.debug >= 2: sz = scores.shape[-2:] scores = torch.cat( [scores[..., sz[0] // 2:, :], scores[..., :sz[0] // 2, :]], -2) scores = torch.cat( [scores[..., :, sz[1] // 2:], scores[..., :, :sz[1] // 2]], -1) return translation_vec, scale_ind, scores, None