def init_classifier(self, init_backbone_feat_rgb, init_backbone_feat_d): # Get classification features x_rgb, x_d = self.get_classification_features(init_backbone_feat_rgb, init_backbone_feat_d) # Overwrite some parameters in the classifier. (These are not generally changed) self._overwrite_classifier_params(feature_dim=x_rgb.shape[-3]) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and self.params.get('use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1]*num) x_rgb = torch.cat([x_rgb, F.dropout2d(x_rgb[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)]) x_d = torch.cat([x_d, F.dropout2d(x_d[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)]) # Set feature size and other related sizes self.feature_sz = torch.Tensor(list(x_rgb.shape[-2:])) ksz = self.net_rgb.classifier.filter_size self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1)%2 # Construct output window self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped(self.output_sz.long(), (self.output_sz*self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = self.params.get('net_opt_iter', None) # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter_rgb, _, losses_rgb = self.net_rgb.classifier.get_filter(x_rgb, target_boxes, num_iter=num_iter, compute_losses=plot_loss) self.target_filter_d, _, losses_d = self.net_d.classifier.get_filter(x_d, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if self.params.get('update_classifier', True): self.init_memory(TensorList([x_rgb]), TensorList([x_d])) if plot_loss: if isinstance(losses_rgb, dict): losses_rgb = losses_rgb['train'] losses_d = losses_d['train'] self.losses_rgb = torch.cat(losses_rgb) self.losses_d = torch.cat(losses_d) if self.visdom is not None: self.visdom.register((self.losses_rgb, torch.arange(self.losses_rgb.numel())), 'lineplot', 3, 'Training Loss_RGB' + self.id_str) self.visdom.register((self.losses_d, torch.arange(self.losses_d.numel())), 'lineplot', 3, 'Training Loss_D' + self.id_str) elif self.params.debug >= 3: plot_graph(self.losses_rgb, 10, title='Training Loss_RGB' + self.id_str) plot_graph(self.losses_d, 10, title='Training Loss_D' + self.id_str)
def init_classifier(self, init_backbone_feat): # Get classification features x = self.get_classification_features(init_backbone_feat) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1]*num) x = torch.cat([x, F.dropout2d(x[0:1,...].expand(num,-1,-1,-1), p=prob, training=True)]) # Set feature size and other related sizes #18,18 self.feature_sz = torch.Tensor(list(x.shape[-2:])) ksz = self.net.classifier.filter_size self.kernel_size = torch.Tensor([ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1)%2 #print(['output_sz', self.output_sz]) # Construct output window self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = getattr(self.params, 'net_opt_iter', None) # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter, _, losses = self.net.classifier.get_filter(x, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if getattr(self.params, 'update_classifier', True): self.init_memory(TensorList([x])) if plot_loss: if isinstance(losses, dict): losses = losses['train'] self.losses = torch.stack(losses) if self.visdom is not None: self.visdom.register((self.losses, torch.arange(self.losses.numel())), 'lineplot', 3, 'Training Loss') elif self.params.debug >= 3: plot_graph(self.losses, 10, title='Training loss')
def __init__(self, training_samples: TensorList, y: TensorList, filter_reg: torch.Tensor, sample_weights: TensorList, response_activation, size): self.training_samples = training_samples self.y = y self.filter_reg = filter_reg self.sample_weights = sample_weights self.response_activation = response_activation self.size = size self.pool6 = torch.nn.AdaptiveMaxPool2d((1, self.size[0])) self.pool7 = torch.nn.AdaptiveMaxPool2d((self.size[0], 1)) self.output_sz = self.size self.device = 'cuda' self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to(self.device)
def setting_adaptive_search_region_using_speed(self, im): """ reinitialze search region scale for next frame """ self.atom.target_scale = 1.0 search_area = torch.prod(self.atom.target_sz * self.atom.params.search_area_scale).item() if search_area > self.atom.params.max_image_sample_size: self.atom.target_scale = math.sqrt(search_area / self.atom.params.max_image_sample_size) elif search_area < self.atom.params.min_image_sample_size: self.atom.target_scale = math.sqrt(search_area / self.atom.params.min_image_sample_size) # Target size in base scale self.atom.base_target_sz = self.atom.target_sz / self.atom.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.atom.params.features.stride()) if getattr(self.atom.params, 'search_area_shape', 'square') == 'square': self.atom.img_sample_sz = torch.round( torch.sqrt(torch.prod(self.atom.base_target_sz * self.atom.params.search_area_scale))) * torch.ones(2) elif self.atom.params.search_area_shape == 'initrect': # 选的非正方形 self.atom.img_sample_sz = torch.round(self.atom.base_target_sz * self.atom.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.atom.params.feature_size_odd: self.atom.img_sample_sz += feat_max_stride - self.atom.img_sample_sz % (2 * feat_max_stride) else: self.atom.img_sample_sz += feat_max_stride - (self.atom.img_sample_sz + feat_max_stride) % ( 2 * feat_max_stride) # Set sizes self.atom.img_support_sz = self.atom.img_sample_sz self.atom.feature_sz = self.atom.params.features.size(self.atom.img_sample_sz) self.atom.output_sz = self.atom.params.score_upsample_factor * self.atom.img_support_sz # Interpolated size of the output self.atom.iou_img_sample_sz = self.atom.img_sample_sz # Setup scale bounds im = numpy_to_torch(im) self.atom.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.atom.min_scale_factor = torch.max(10 / self.atom.base_target_sz) self.atom.max_scale_factor = torch.min(self.atom.image_sz / self.atom.base_target_sz) self.atom.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.atom.output_window = dcf.hann2d_clipped(self.atom.output_sz.long(), self.atom.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.atom.output_window = dcf.hann2d(self.atom.output_sz.long(), centered=False).to(self.params.device)
def init_learning(self): # Get window function self.feature_window = TensorList( [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz]) # Filter regularization self.filter_reg = self.fparams.attribute('filter_reg') # Activation function after the projection matrix (phi_1 in the paper) projection_activation = getattr(self.params, 'projection_activation', 'none') if isinstance(projection_activation, tuple): projection_activation, act_param = projection_activation if projection_activation == 'none': self.projection_activation = lambda x: x elif projection_activation == 'relu': self.projection_activation = torch.nn.ReLU(inplace=True) elif projection_activation == 'elu': self.projection_activation = torch.nn.ELU(inplace=True) elif projection_activation == 'mlu': self.projection_activation = lambda x: F.elu( F.leaky_relu(x, 1 / act_param), act_param) else: raise ValueError('Unknown activation') # Activation function after the output scores (phi_2 in the paper) response_activation = getattr(self.params, 'response_activation', 'none') if isinstance(response_activation, tuple): response_activation, act_param = response_activation if response_activation == 'none': self.response_activation = lambda x: x elif response_activation == 'relu': self.response_activation = torch.nn.ReLU(inplace=True) elif response_activation == 'elu': self.response_activation = torch.nn.ELU(inplace=True) elif response_activation == 'mlu': self.response_activation = lambda x: F.elu( F.leaky_relu(x, 1 / act_param), act_param) else: raise ValueError('Unknown activation')
def initialize(self, image, info: dict) -> dict: state = info['init_bbox'] # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # metricnet self.metric_model = model_load(self.params.metric_model_path) # warmup start with torch.no_grad(): tmp = np.random.rand(5, 3, 107, 107) tmp = torch.Tensor(tmp) tmp = (Variable(tmp)).type(torch.FloatTensor).cuda() tmp = self.metric_model(tmp) # warmup end self.target_metric_feature = get_target_feature( self.metric_model, np.array(state), np.array(image)) pos_generator = SampleGenerator( 'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3) gt_pos_examples = pos_generator( np.array(state).astype(np.int), 20, [0.7, 1]) gt_iou = 0.7 while gt_pos_examples.shape[0] == 0: gt_iou = gt_iou - 0.1 gt_pos_examples = pos_generator( np.array(state).astype(np.int), 20, [gt_iou, 1]) print('gt-iou:', gt_iou) with torch.no_grad(): gt_pos_features0 = get_anchor_feature(self.metric_model, np.array(image), gt_pos_examples) gt_pos_features = gt_pos_features0.cpu().detach().numpy() # target_metric_feature = self.target_metric_feature.repeat(gt_pos_features.shape[0], 1) # pos_all = torch.norm(gt_pos_features0 - target_metric_feature, 2, dim=1).view(-1) # self.similar=pos_all.mean()*self.params.sim_rate # print('similarThresh',self.similar) self.clf = lof_fit(gt_pos_features, k=5) self.lof_thresh = 0 self.target_features_all = [] self.target_features_all.append(self.target_metric_feature) # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if getattr(self.params, 'search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() out = {'time': time.time() - tic} return out
def initialize(self, image, info: dict) -> dict: state = info['init_bbox'] # Initialize some stuff self.frame_num = 1 if not self.params.has('device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = self.params.get('use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if self.params.get('search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() out = {'time': time.time() - tic} return out
def initialize(self, image, info: dict) -> dict: initSeed = 1 torch.manual_seed(initSeed) torch.cuda.manual_seed(initSeed) torch.cuda.manual_seed_all(initSeed) np.random.seed(initSeed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True os.environ['PYTHONHASHSEED'] = str(initSeed) state = info['init_bbox'] # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # metricnet self.metric_model = model_load(self.params.metric_model_path) # warmup start with torch.no_grad(): tmp = np.random.rand(5, 3, 107, 107) tmp = torch.Tensor(tmp) tmp = (Variable(tmp)).type(torch.FloatTensor).cuda() tmp = self.metric_model(tmp) # warmup end self.target_metric_feature = get_target_feature( self.metric_model, np.array(state), np.array(image)) pos_generator = SampleGenerator( 'gaussian', np.array([image.shape[1], image.shape[0]]), 0.1, 1.3) gt_pos_examples = pos_generator( np.array(state).astype(np.int), 20, [0.7, 1]) gt_iou = 0.7 while gt_pos_examples.shape[0] == 0: gt_iou = gt_iou - 0.1 gt_pos_examples = pos_generator( np.array(state).astype(np.int), 20, [gt_iou, 1]) # print('gt-iou:', gt_iou) # self.gt_pos_features = get_anchor_feature(self.metric_model, np.array(image), gt_pos_examples).cpu().detach().numpy() with torch.no_grad(): gt_pos_features0 = get_anchor_feature(self.metric_model, np.array(image), gt_pos_examples) gt_pos_features = gt_pos_features0.cpu().detach().numpy() target_metric_feature = self.target_metric_feature.repeat( gt_pos_features.shape[0], 1) pos_all = torch.norm(gt_pos_features0 - target_metric_feature, 2, dim=1).view(-1) self.similar = pos_all.mean() * self.params.sim_rate print('similarThresh', self.similar) self.target_features_all = [] self.target_features_all.append(self.target_metric_feature) self.clf = lof_fit(gt_pos_features, k=5) # Chack if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) # Set other sizes (corresponds to ECO code) self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2 self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.compressed_dim = self.fparams.attribute('compressed_dim') # Number of filters self.num_filters = len(self.filter_sz) # Get window function self.window = TensorList( [dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz]) # Get interpolation function self.interp_fs = TensorList([ dcf.get_interp_fourier(sz, self.params.interpolation_method, self.params.interpolation_bicubic_a, self.params.interpolation_centering, self.params.interpolation_windowing, self.params.device) for sz in self.filter_sz ]) # Get regularization filter self.reg_filter = TensorList([ dcf.get_reg_filter(self.img_support_sz, self.base_target_sz, fparams).to(self.params.device) for fparams in self.fparams ]) self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1) # Get label function output_sigma_factor = self.fparams.attribute('output_sigma_factor') sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt( self.base_target_sz.prod()) * output_sigma_factor self.yf = TensorList([ dcf.label_function(sz, sig).to(self.params.device) for sz, sig in zip(self.filter_sz, sigma) ]) # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate # Convert image im = numpy_to_torch(image) # Setup bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize projection matrix x_mat = TensorList( [e.permute(1, 0, 2, 3).reshape(e.shape[1], -1).clone() for e in x]) x_mat -= x_mat.mean(dim=1, keepdim=True) cov_x = x_mat @ x_mat.t() self.projection_matrix = TensorList([ torch.svd(C)[0][:, :cdim].clone() for C, cdim in zip(cov_x, self.compressed_dim) ]) # Transform to get the training sample train_xf = self.preprocess_sample(x) # Shift the samples back if 'shift' in self.params.augmentation: for xf in train_xf: if xf.shape[0] == 1: continue for i, shift in enumerate(self.params.augmentation['shift']): shift_samp = 2 * math.pi * torch.Tensor( shift) / self.img_support_sz xf[1 + i:2 + i, ...] = fourier.shift_fs(xf[1 + i:2 + i, ...], shift=shift_samp) # Shift sample shift_samp = 2 * math.pi * (self.pos - self.pos.round()) / ( self.target_scale * self.img_support_sz) train_xf = fourier.shift_fs(train_xf, shift=shift_samp) # Initialize first-frame training samples num_init_samples = train_xf.size(0) self.init_sample_weights = TensorList( [xf.new_ones(1) / xf.shape[0] for xf in train_xf]) self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4) # Sample counters and weights self.num_stored_samples = num_init_samples self.previous_replace_ind = [None] * len(self.num_stored_samples) self.sample_weights = TensorList( [xf.new_zeros(self.params.sample_memory_size) for xf in train_xf]) for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, num_init_samples): sw[:num] = init_sw # Initialize memory self.training_samples = TensorList([ xf.new_zeros(xf.shape[2], xf.shape[3], self.params.sample_memory_size, cdim, 2) for xf, cdim in zip(train_xf, self.compressed_dim) ]) # Initialize filter self.filter = TensorList([ xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2) for xf, cdim in zip(train_xf, self.compressed_dim) ]) # Do joint optimization self.joint_problem = FactorizedConvProblem(self.init_training_samples, self.yf, self.reg_filter, self.projection_matrix, self.params, self.init_sample_weights) joint_var = self.filter.concat(self.projection_matrix) self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug >= 1), visdom=self.visdom) if self.params.update_projection_matrix: self.joint_optimizer.run( self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter) # Re-project samples with the new projection matrix compressed_samples = complex.mtimes(self.init_training_samples, self.projection_matrix) for train_samp, init_samp in zip(self.training_samples, compressed_samples): train_samp[:, :, :init_samp.shape[2], :, :] = init_samp # Initialize optimizer self.filter_optimizer = FilterOptim(self.params, self.reg_energy) self.filter_optimizer.register(self.filter, self.training_samples, self.yf, self.sample_weights, self.reg_filter) self.filter_optimizer.sample_energy = self.joint_problem.sample_energy self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone( ) if not self.params.update_projection_matrix: self.filter_optimizer.run(self.params.init_CG_iter) # Post optimization self.filter_optimizer.run(self.params.post_init_CG_iter) self.symmetrize_filter() # metricnet_lof self.current_target_metric_feature = [] self.train_xf = [] # self.iou=[] # self.lof_thresh=3.5 self.lof_thresh = self.params.lof_rate
def initialize(self, image1, image2, state, *args, **kwargs): # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # Check if image is color self.params.features.set_is_color(image1.shape[2] == 3) self.params.features.set_is_color(image2.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') self.time = 0 tic = time.time() # Get position and size self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() self.target_scale = math.sqrt(search_area) / self.params.image_sample_size # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Set sizes self.img_sample_sz = torch.Tensor([self.params.image_sample_size, self.params.image_sample_size]) self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) if getattr(self.params, 'score_upsample_factor', None) is None: self.output_sz = self.feature_sz[0] else: self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz self.params.score_fusion_strategy = getattr(self.params, 'score_fusion_strategy', 'default') self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped(self.output_sz.long(), self.output_sz.long()*self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Convert image im1 = numpy_to_torch(image1) im2 = numpy_to_torch(image2) #self.im = im # Setup bounds self.image_sz = torch.Tensor([im1.shape[2], im1.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x1 = self.generate_init_samples(im1) x2 = self.generate_init_samples(im2) x = TensorList([torch.cat((v,i),1) for v, i in zip(x1, x2)]) self.init_classifier(x) if self.use_iou_net: self.init_iou_net() # Init memory # self.init_memory(x) self.time += time.time() - tic
def initialize(self, image, state, *args, **kwargs): # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features() # Chack if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') # Get position and size self.pos = torch.Tensor([state[1] + (state[3] - 1)/2, state[0] + (state[2] - 1)/2]) self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) self.img_sample_sz = torch.round(torch.sqrt(torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) self.img_sample_sz += feat_max_stride - self.img_sample_sz % (2 * feat_max_stride) # Set other sizes (corresponds to ECO code) self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.filter_sz = self.feature_sz + (self.feature_sz + 1) % 2 self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.compressed_dim = self.fparams.attribute('compressed_dim') # Number of filters self.num_filters = len(self.filter_sz) # Get window function self.window = TensorList([dcf.hann2d(sz).to(self.params.device) for sz in self.feature_sz]) # Get interpolation function self.interp_fs = TensorList([dcf.get_interp_fourier(sz, self.params.interpolation_method, self.params.interpolation_bicubic_a, self.params.interpolation_centering, self.params.interpolation_windowing, self.params.device) for sz in self.filter_sz]) # Get regularization filter self.reg_filter = TensorList([dcf.get_reg_filter(self.img_support_sz, self.base_target_sz, fparams).to(self.params.device) for fparams in self.fparams]) self.reg_energy = self.reg_filter.view(-1) @ self.reg_filter.view(-1) # Get label function output_sigma_factor = self.fparams.attribute('output_sigma_factor') sigma = (self.filter_sz / self.img_support_sz) * torch.sqrt(self.base_target_sz.prod()) * output_sigma_factor self.yf = TensorList([dcf.label_function(sz, sig).to(self.params.device) for sz, sig in zip(self.filter_sz, sigma)]) # Optimization options self.params.precond_learning_rate = self.fparams.attribute('learning_rate') if self.params.CG_forgetting_rate is None or max(self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = (1 - max(self.params.precond_learning_rate))**self.params.CG_forgetting_rate # Convert image im = numpy_to_torch(image) # Setup bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize projection matrix x_mat = TensorList([e.permute(1,0,2,3).reshape(e.shape[1], -1).clone() for e in x]) x_mat -= x_mat.mean(dim=1, keepdim=True) cov_x = x_mat @ x_mat.t() self.projection_matrix = TensorList([torch.svd(C)[0][:,:cdim].clone() for C, cdim in zip(cov_x, self.compressed_dim)]) # Transform to get the training sample train_xf = self.preprocess_sample(x) # Shift the samples back if 'shift' in self.params.augmentation: for xf in train_xf: if xf.shape[0] == 1: continue for i, shift in enumerate(self.params.augmentation['shift']): shift_samp = 2 * math.pi * torch.Tensor(shift) / self.img_support_sz xf[1+i:2+i,...] = fourier.shift_fs(xf[1+i:2+i,...], shift=shift_samp) # Shift sample shift_samp = 2*math.pi * (self.pos - self.pos.round()) / (self.target_scale * self.img_support_sz) train_xf = fourier.shift_fs(train_xf, shift=shift_samp) # Initialize first-frame training samples num_init_samples = train_xf.size(0) self.init_sample_weights = TensorList([xf.new_ones(1) / xf.shape[0] for xf in train_xf]) self.init_training_samples = train_xf.permute(2, 3, 0, 1, 4) # Sample counters and weights self.num_stored_samples = num_init_samples self.previous_replace_ind = [None]*len(self.num_stored_samples) self.sample_weights = TensorList([xf.new_zeros(self.params.sample_memory_size) for xf in train_xf]) for sw, init_sw, num in zip(self.sample_weights, self.init_sample_weights, num_init_samples): sw[:num] = init_sw # Initialize memory self.training_samples = TensorList( [xf.new_zeros(xf.shape[2], xf.shape[3], self.params.sample_memory_size, cdim, 2) for xf, cdim in zip(train_xf, self.compressed_dim)]) # Initialize filter self.filter = TensorList( [xf.new_zeros(1, cdim, xf.shape[2], xf.shape[3], 2) for xf, cdim in zip(train_xf, self.compressed_dim)]) # Do joint optimization self.joint_problem = FactorizedConvProblem(self.init_training_samples, self.yf, self.reg_filter, self.projection_matrix, self.params, self.init_sample_weights) joint_var = self.filter.concat(self.projection_matrix) self.joint_optimizer = GaussNewtonCG(self.joint_problem, joint_var, debug=(self.params.debug>=3)) if self.params.update_projection_matrix: self.joint_optimizer.run(self.params.init_CG_iter // self.params.init_GN_iter, self.params.init_GN_iter) # Re-project samples with the new projection matrix compressed_samples = complex.mtimes(self.init_training_samples, self.projection_matrix) for train_samp, init_samp in zip(self.training_samples, compressed_samples): train_samp[:,:,:init_samp.shape[2],:,:] = init_samp # Initialize optimizer self.filter_optimizer = FilterOptim(self.params, self.reg_energy) self.filter_optimizer.register(self.filter, self.training_samples, self.yf, self.sample_weights, self.reg_filter) self.filter_optimizer.sample_energy = self.joint_problem.sample_energy self.filter_optimizer.residuals = self.joint_optimizer.residuals.clone() if not self.params.update_projection_matrix: self.filter_optimizer.run(self.params.init_CG_iter) # Post optimization self.filter_optimizer.run(self.params.post_init_CG_iter) self.symmetrize_filter()
def init_classifier(self, init_backbone_feat): # Get classification features x = self.get_classification_features(init_backbone_feat) # Overwrite some parameters in the classifier. (These are not generally changed) self._overwrite_classifier_params(feature_dim=x.shape[-3]) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and self.params.get( 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1] * num) x = torch.cat([ x, F.dropout2d(x[0:1, ...].expand(num, -1, -1, -1), p=prob, training=True) ]) # Set feature size and other related sizes self.feature_sz = torch.Tensor(list(x.shape[-2:])) ksz = self.net.classifier.filter_size self.kernel_size = torch.Tensor( [ksz, ksz] if isinstance(ksz, (int, float)) else ksz) self.output_sz = self.feature_sz + (self.kernel_size + 1) % 2 # Construct output window self.output_window = None if self.params.get('window_output', False): if self.params.get('use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), (self.output_sz * self.params.effective_search_area / self.params.search_area_scale).long(), centered=True).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to( self.params.device) self.output_window = self.output_window.squeeze(0) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations plot_loss = self.params.debug > 0 num_iter = self.params.get('net_opt_iter', None) # mask in Transformer self.transformer_label = prutils.gaussian_label_function( target_boxes.cpu().view(-1, 4), 0.1, self.net.classifier.filter_size, self.feature_sz, self.img_sample_sz, end_pad_if_even=False) self.transformer_label = self.transformer_label.unsqueeze(1).cuda() self.x_clf = x self.transformer_memory, _ = self.net.classifier.transformer.encoder( self.x_clf.unsqueeze(1), pos=None) for i in range(x.shape[0]): _, cur_encoded_feat = self.net.classifier.transformer.decoder( x[i, ...].unsqueeze(0).unsqueeze(0), memory=self.transformer_memory, pos=self.transformer_label, query_pos=None) if i == 0: encoded_feat = cur_encoded_feat else: encoded_feat = torch.cat((encoded_feat, cur_encoded_feat), 0) x = encoded_feat.contiguous() # Get target filter by running the discriminative model prediction module with torch.no_grad(): self.target_filter, _, losses = self.net.classifier.get_filter( x, target_boxes, num_iter=num_iter, compute_losses=plot_loss) # Init memory if self.params.get('update_classifier', True): self.init_memory(TensorList([x])) '''
def initialize(self, image, state, gt, *args, **kwargs): if len(gt) == 8: ww = gt[2] - gt[0] hh = gt[7] - gt[1] else: ww = gt[2] hh = gt[3] # Initialize some stuff self.frame_num = 1 if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' if ww < 25 and hh < 25: self.feature_sz = TensorList([torch.Tensor([28., 28.])]) self.output_layer = TensorList(['layer2']) else: self.feature_sz = TensorList([torch.Tensor([14., 14.])]) # self.output_layer = TensorList(['layer3']) self.output_layer = TensorList(['layer3']) # Initialize some stuff if not hasattr(self.params, 'device'): self.params.device = 'cuda' if self.params.use_gpu else 'cpu' # Initialize features self.initialize_features(self.output_layer) # Check if image is color self.params.features.set_is_color(image.shape[2] == 3) # Get feature specific params self.fparams = self.params.features.get_fparams('feature_params') self.time = 0 tic = time.time() # Get position and size self.pos = torch.Tensor( [state[1] + (state[3] - 1) / 2, state[0] + (state[2] - 1) / 2]) self.target_sz = torch.Tensor([state[3], state[2]]) if state[3] > 50 or state[2] > 50: self.target_sz = torch.Tensor( [state[3] - state[3] / 8, state[2] - state[2] / 4]) else: self.target_sz = torch.Tensor([state[3], state[2]]) # Set search area self.target_scale = 1.0 search_area = torch.prod(self.target_sz * self.params.search_area_scale).item() if search_area > self.params.max_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.max_image_sample_size) elif search_area < self.params.min_image_sample_size: self.target_scale = math.sqrt(search_area / self.params.min_image_sample_size) # Check if IoUNet is used self.use_iou_net = getattr(self.params, 'use_iou_net', True) # Target size in base scale self.base_target_sz = self.target_sz / self.target_scale # Use odd square search area and set sizes feat_max_stride = max(self.params.features.stride()) if getattr(self.params, 'search_area_shape', 'square') == 'square': self.img_sample_sz = torch.round( torch.sqrt( torch.prod(self.base_target_sz * self.params.search_area_scale))) * torch.ones(2) elif self.params.search_area_shape == 'initrect': self.img_sample_sz = torch.round(self.base_target_sz * self.params.search_area_scale) else: raise ValueError('Unknown search area shape') if self.params.feature_size_odd: self.img_sample_sz += feat_max_stride - self.img_sample_sz % ( 2 * feat_max_stride) else: self.img_sample_sz += feat_max_stride - ( self.img_sample_sz + feat_max_stride) % (2 * feat_max_stride) # Set sizes self.img_support_sz = self.img_sample_sz self.feature_sz = self.params.features.size(self.img_sample_sz) self.output_sz = self.params.score_upsample_factor * self.img_support_sz # Interpolated size of the output self.kernel_size = self.fparams.attribute('kernel_size') self.iou_img_sample_sz = self.img_sample_sz # Optimization options self.params.precond_learning_rate = self.fparams.attribute( 'learning_rate') if self.params.CG_forgetting_rate is None or max( self.params.precond_learning_rate) >= 1: self.params.direction_forget_factor = 0 else: self.params.direction_forget_factor = ( 1 - max(self.params.precond_learning_rate) )**self.params.CG_forgetting_rate self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=False).to( self.params.device) # Initialize some learning things self.init_learning() # Convert image im = numpy_to_torch(image) self.im = im # For debugging only # Setup scale bounds self.image_sz = torch.Tensor([im.shape[2], im.shape[3]]) self.min_scale_factor = torch.max(10 / self.base_target_sz) self.max_scale_factor = torch.min(self.image_sz / self.base_target_sz) # Extract and transform sample x = self.generate_init_samples(im) # Initialize iounet if self.use_iou_net: self.init_iou_net() # Initialize projection matrix self.init_projection_matrix(x) # Transform to get the training sample train_x = self.preprocess_sample(x) # Generate label function init_y = self.init_label_function(train_x) # Init memory self.init_memory(train_x) # Init optimizer and do initial optimization self.init_optimization(train_x, init_y) self.pos_iounet = self.pos.clone() self.time += time.time() - tic self.pool1 = torch.nn.AdaptiveMaxPool2d((1, 224)) self.pool2 = torch.nn.AdaptiveMaxPool2d((224, 1))
def init_classifier_and_regressor(self, init_backbone_feat): # Get classification features x = self.net.get_backbone_clf_feat(init_backbone_feat) train_feat_18_cls = self.get_classification_features(init_backbone_feat) with torch.no_grad(): train_feat_18 = self.net.pyramid_first_conv(x=None, x_backbone=x) train_feat_36 = self.net.pyramid_36(train_feat_18, init_backbone_feat['layer2']) train_feat_72 = self.net.pyramid_72(train_feat_36, init_backbone_feat['layer1']) train_feat_72_cls = self.net.classifier_72.extract_classification_feat(train_feat_72. view(-1, *train_feat_72.shape[-3:])) train_feat_72_reg = self.net.regressor_72.extract_regression_feat( feat_36=train_feat_36.view(-1, *train_feat_36.shape[-3:]), feat_72=train_feat_72.view(-1, *train_feat_72.shape[-3:])) # Add the dropout augmentation here, since it requires extraction of the classification features if 'dropout' in self.params.augmentation and getattr(self.params, 'use_augmentation', True): num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1]*num) train_feat_18_cls = torch.cat([train_feat_18_cls, F.dropout2d(train_feat_18_cls[0:1, ...]. expand(num, -1, -1, -1), p=prob, training=True)]) train_feat_72_cls = torch.cat([train_feat_72_cls, F.dropout2d(train_feat_72_cls[0:1, ...]. expand(num, -1, -1, -1), p=prob,training=True)]) train_feat_72_reg = torch.cat([train_feat_72_reg, F.dropout2d(train_feat_72_reg[0:1, ...]. expand(num, -1, -1, -1), p=prob,training=True)]) # Get target boxes for the different augmentations target_boxes = self.init_target_boxes() # Set number of iterations num_iter = getattr(self.params, 'net_opt_iter', None) num_iter_72 = getattr(self.params, 'net_opt_iter_72', None) reg_num_iter = getattr(self.params, 'reg_net_opt_iter', None) # Get target filter by running the discriminative model prediction module with torch.no_grad(): # extract target_filter_72, target_filter_18 and target_reg_filter_72 using Clf and Reg model generators. self.target_filter_72, target_filters, losses = self.net.classifier_72.get_filter(train_feat_72_cls, target_boxes, num_iter=num_iter_72) self.target_filter_18, _, _ = self.net.classifier_18.get_filter(train_feat_18_cls, target_boxes, num_iter=num_iter) # get init_reg_filter using target sample and optimize filters using training samples target_feat_36 = train_feat_36.view(-1, *train_feat_36.shape[-3:])[0].unsqueeze(0) target_feat_72 = train_feat_72.view(-1, *train_feat_72.shape[-3:])[0].unsqueeze(0) target_bb = target_boxes[0].unsqueeze(0).clone() init_reg_filter = self.net.regressor_72.generate_init_filter(target_feat_36, target_feat_72, target_bb) if reg_num_iter > 0: self.target_reg_filter_72, _, reg_losses = self.net.regressor_72.generate_filter_optimizer( init_reg_filter, train_feat_72_reg, target_boxes.view(-1, 4).clone(), num_iter=reg_num_iter) else: self.target_reg_filter_72 = init_reg_filter # get initial Clf and Reg model used in tracking process, which merge the initial model and the optimized model. self.init_target_filter_72 = self.target_filter_72 self.init_target_filter_18 = self.target_filter_18 self.init_reg_filter = init_reg_filter # Set feature size and other related sizes self.feature_sz_18 = torch.Tensor(list(x.shape[-2:])) ksz_18 = self.net.classifier_18.filter_size self.kernel_size_18 = torch.Tensor([ksz_18, ksz_18] if isinstance(ksz_18, (int, float)) else ksz_18) self.output_sz_18 = self.feature_sz_18 + (self.kernel_size_18 + 1) % 2 self.feature_sz_72 = torch.Tensor(list(train_feat_72.shape[-2:])) ksz_72 = self.net.classifier_72.filter_size self.kernel_size_72 = torch.Tensor([ksz_72, ksz_72] if isinstance(ksz_72, (int, float)) else ksz_72) self.output_sz_72 = self.feature_sz_72 + (self.kernel_size_72 + 1) % 2 self.output_sz = torch.Tensor([72, 72]) # Construct output window self.output_window = None if getattr(self.params, 'window_output', False): if getattr(self.params, 'use_clipped_window', False): self.output_window = dcf.hann2d_clipped( self.output_sz.long(), self.output_sz.long() * self.params.effective_search_area / self.params.search_area_scale, centered=False).to(self.params.device) else: self.output_window = dcf.hann2d(self.output_sz.long(), centered=True).to(self.params.device) self.output_window = self.output_window.squeeze(0) # Init memory if getattr(self.params, 'update_classifier_and_regressor', True): self.init_memory(TensorList([train_feat_72_cls]), TensorList([train_feat_18_cls]), TensorList([train_feat_72_reg]))