class TrackerConfig(object): # These are the default hyper-params for DCFNet # OTB2013 / AUC(0.665) feature_path = 'param.pth' crop_sz = 125 lambda0 = 1e-4 padding = 2 output_sigma_factor = 0.1 interp_factor = 0.01 num_scale = 3 scale_step = 1.0275 scale_factor = scale_step**(np.arange(num_scale) - num_scale / 2) min_scale_factor = 0.2 max_scale_factor = 5 scale_penalty = 0.9925 scale_penalties = scale_penalty**(np.abs( (np.arange(num_scale) - num_scale / 2))) net_input_size = [crop_sz, crop_sz] net_average_image = np.array([104, 117, 123]).reshape(-1, 1, 1).astype(np.float32) output_sigma = crop_sz / (1 + padding) * output_sigma_factor y = gaussian_shaped_labels(output_sigma, net_input_size) yf = torch.rfft(torch.Tensor(y).view(1, 1, crop_sz, crop_sz).cuda(), signal_ndim=2) cos_window = torch.Tensor( np.outer(np.hanning(crop_sz), np.hanning(crop_sz))).cuda()
class TrackerConfig(object): # These are the default hyper-params for DCFNet # OTB2013 / AUC(0.635) crop_sz = 125 lambda0 = 1e-4 padding = 2 output_sigma_factor = 0.1 interp_factor = 0.01 num_scale = 3 scale_step = 1.0275 scale_factor = scale_step**(np.arange(num_scale) - num_scale / 2) min_scale_factor = 0.2 max_scale_factor = 5 scale_penalty = 0.9925 scale_penalties = scale_penalty**(np.abs( (np.arange(num_scale) - num_scale / 2))) net_input_size = [crop_sz, crop_sz] net_average_image = np.array([104, 117, 123]).reshape(-1, 1, 1).astype(np.float32) output_sigma = crop_sz / (1 + padding) * output_sigma_factor y = gaussian_shaped_labels(output_sigma, net_input_size) # the rfft reduce the output as half, due to conjugate symmetry yf = torch.rfft(torch.Tensor(y).view(1, 1, crop_sz, crop_sz).to(device), signal_ndim=2) # add the hanning of the input image cos_window = torch.Tensor( np.outer(np.hanning(crop_sz), np.hanning(crop_sz))).to(device)
u = ca - b v = u.abs() h = torch.histc(v) import matplotlib.pyplot as plt plt.hist(v.flatten().numpy(), bins=500, log=True) plt.show() exit() # fft_comparison() ############################################## lambda0 = 1e-4 y = util.gaussian_shaped_labels(4.166666666666667, [121, 121]).astype(np.float32) x = torch.rand((42, 32, 121, 121)) z = torch.rand((42, 32, 121, 121)) fft_label_view = torch.Tensor(y).view(1, 1, 121, 121).cuda() label_old = torch.rfft(fft_label_view, signal_ndim=2).repeat(42, 1, 1, 1, 1).cuda(non_blocking=True) label_new = fft.rfftn(fft_label_view, dim=[-2, -1]).repeat(42, 1, 1, 1).cuda(non_blocking=True) ############################################## zfnew = fft.rfftn(z, dim=[-2, -1]) zfold = torch.rfft(z, signal_ndim=2) xfnew = fft.rfftn(x, dim=[-2, -1])
def __init__(self, args, gpu_num: int, train_loader, val_loader, crop_sz=125, output_sz=121, lambda0=1e-4, padding=2.0, output_sigma_factor=0.1): self.crop_sz = crop_sz self.output_sz = output_sz self.lambda0 = lambda0 self.padding = padding output_sigma = crop_sz / (1 + padding) * output_sigma_factor self.args = args self.gpu_num = gpu_num self.train_loader = train_loader self.val_loader = val_loader self.batch_size = args.batch_size * gpu_num self.best_loss = 1e6 # shape: 121, 121 self.y = torch.tensor( util.gaussian_shaped_labels( output_sigma, [self.output_sz, self.output_sz]).astype(np.float32)).cuda() # shape: 1, 1, 121, 61, 2 self.yf = fft.rfftn(self.y.view(1, 1, self.output_sz, self.output_sz), dim=[-2, -1]) # Shape: 121, 121 self.initial_y = self.y.clone() # Shape: batch, 1, 121, 61 self.label = self.yf.repeat(self.batch_size, 1, 1, 1) self.model = DCFNet(lambda0=self.lambda0).cuda() print('GPU NUM: {:2d}'.format(gpu_num)) if gpu_num > 1: self.model = torch.nn.DataParallel(self.model, list(range(gpu_num))).cuda() self.criterion = nn.MSELoss(reduction='sum').cuda() self.optimizer = torch.optim.SGD(self.model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, gamma=util.compute_lr_gamma(args.lr, 1e-5, args.epochs)) # Bring the lr scheduler to the first epoch for epoch in range(args.start_epoch): self.lr_scheduler.step() # for training self.target = self.y.unsqueeze(0).unsqueeze(0).repeat( args.batch_size * gpu_num, 1, 1, 1) # optionally resume from a checkpoint if args.resume: if isfile(args.resume): print(f"=> loading checkpoint '{args.resume}'") checkpoint = torch.load(args.resume) self.args.start_epoch = checkpoint['epoch'] self.best_loss = checkpoint['best_loss'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) print( f"=> loaded checkpoint '{args.resume}' (epoch {checkpoint['epoch']})" ) else: print(f"=> no checkpoint found at '{args.resume}'") cudnn.benchmark = True checkpoint_path = args.save if args.save else config.checkpoint_root self.checkpoint_saver = util.CheckpointSaver(save_path=os.path.join( checkpoint_path, f'crop_{args.input_sz:d}_{args.padding:1.1f}'), verbose=True)
import torch import numpy as np import matplotlib.pyplot as plt import torch.autograd.profiler as profiler import util if __name__ == '__main__': n = 5 s = 121 y = torch.as_tensor(util.gaussian_shaped_labels(4, (s, s))) response = torch.zeros((n, 1, s, s)) response[0, 0, 60, 60] = 100 response[1, 0, 0, 60] = 100 response[2, 0, 60, 0] = 100 response[3, 0, 30, 100] = 100 response[4, 0, 80, 90] = 100 with profiler.profile(use_cuda=True, record_shapes=True, profile_memory=True, with_stack=True) as p: # with profiler.record_function('model_inference'): fake_y = util.create_fake_y(y, response) print( p.key_averages(group_by_stack_n=5).table( sort_by="self_cuda_time_total", row_limit=-1)) fig, ax = plt.subplots(2, n)