def visualize_feature(self, features=None, stage='conv4_1', srch_window_size=None, subwindow=None, feature_weights=None, balance_weights=None): """ function: visualize the selected feature of the first frame """ assert (stage == 'conv4_1' or stage == 'conv4_3' or stage == 'all'), 'For now, TADT only support for conv4_1 and conv4_3' if stage == 'conv4_1': stage = 0 elif stage == 'conv4_3': stage = 1 if feature_weights is None or balance_weights is None: if stage == 'all': feature = torch.cat(features, dim=1) else: feature = features[stage] else: if stage == 'all': feature = features_selection(features, feature_weights, balance_weights, mode='reduction') else: feature = features[stage] feature_weight = feature_weights[stage] feature = feature_selection(feature, feature_weight, mode='reduction') heatmap = torch.sum(feature, dim=1) max_value = torch.max(heatmap) min_value = torch.min(heatmap) heatmap = (heatmap - min_value) / (max_value - min_value) * 255 heatmap = heatmap.cpu().numpy().astype(np.uint8).transpose(1, 2, 0) heatmap = cv2.resize(heatmap, srch_window_size, interpolation=cv2.INTER_LINEAR) heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) #subwindow = subwindow.numpy().astype(np.uint8).transpose(1,2,0) subwindow = subwindow.cpu().numpy().astype(np.uint8).transpose( 1, 2, 0) + 128 #convert torch tensor back to open cv image cv2.imshow( 'heatmap', cv2.addWeighted(cv2.cvtColor(subwindow, cv2.COLOR_BGR2RGB), 0.6, heatmap, 0.4, 0.0)) cv2.waitKey(0) cv2.destroyAllWindows()
def initialize_tadt(self, img_path, target_loc, visualize=False): #------------sequence parameters initialization---------------------------- img = default_image_loader( img_path) #<class 'numpy.ndarray'> [height, width, channel] self.target_location = target_loc origin_target_size = math.sqrt(self.target_location[2] * self.target_location[3]) origin_target_location = self.target_location #<class 'list'> origin_image_size = img.shape[0:2][::-1] # [width,height] if origin_target_size > self.config.MODEL.MAX_SIZE: self.rescale = self.config.MODEL.MAX_SIZE / origin_target_size elif origin_target_size < self.config.MODEL.MIN_SIZE: self.rescale = self.config.MODEL.MIN_SIZE / origin_target_size #----------------scale image cv2 numpy.adarray--------------- image = cv2.resize(img, tuple((np.ceil( np.array(origin_image_size) * self.rescale)).astype(int)), interpolation=cv2.INTER_LINEAR) #------scaled target location, get position and size [x1,y1,width,height]------ self.target_location = round_python2( np.array(self.target_location) * self.rescale) - np.array( [1, 1, 0, 0]) #0-index target_size = self.target_location[2:4] # [width, height] image_size = image.shape[0:2] # [height, width] search_size, ratio = cal_window_size(self.config.MODEL.MAX_SIZE, image_size, self.config.MODEL.SCALE_NUM, self.config.MODEL.TOTAL_STRIDE) self.input_size = np.array([search_size, search_size]) #------------First frame processing-------------------- self.srch_window_location = cal_srch_window_location( self.target_location, search_size) features = get_subwindow_feature(self.model, image, self.srch_window_location, self.input_size, visualize=visualize) #------------------------for visualize feature----------- #if do not want to visualize, comment these lines visualize_feature = True if visualize_feature: self.features = features self.subwindow = get_subwindow(self.srch_window_location, image, self.input_size, visualize=False) #----------- crop the target exemplar from the feature map------------------ patch_features, patch_locations = generate_patch_feature( target_size[::-1], self.srch_window_location, features) self.feature_pad = 2 self.b_feature_pad = int(self.feature_pad / 2) self.filter_sizes = [ torch.tensor(feature.shape).numpy() for feature in patch_features ] #-------------compute the indecis of target-aware features---------------- self.feature_weights, self.balance_weights = taf_model( features, self.filter_sizes, self.device) #-------------select the target-awares features--------------------------- self.exemplar_features = features_selection(patch_features, self.feature_weights, self.balance_weights, mode='reduction') #self.exemplar_features = fuse_feature(patch_features) #------------visualization------------------------------------------------ if self.display: self.prepare_visualize() self.visualization(img, origin_target_location, 0) self.results.append(origin_target_location)
def tracking(self, img_path, frame, visualize=False): #-------------read image and rescale the image----------------------------- img = default_image_loader( img_path) #<class 'numpy.ndarray'>[height, width, channel] image = cv2.resize(img, tuple((np.ceil( np.array(img.shape[0:2][::-1]) * self.rescale)).astype(int)), interpolation=cv2.INTER_LINEAR) tic = cv2.getTickCount() #-------------get multi-scale feature-------------------------------------- features = get_subwindow_feature(self.model, image, self.srch_window_location, self.input_size, visualize=visualize) feature_size = (torch.tensor( features[0].shape)).numpy().astype(int)[-2:] #selected_features = fuse_feature(features) selected_features = features_selection(features, self.feature_weights, self.balance_weights, mode='reduction') selected_features_1 = resize_tensor( selected_features, tuple(feature_size + self.feature_pad)) selected_features_3 = resize_tensor( selected_features, tuple(feature_size - self.feature_pad)) selected_features_1 = selected_features_1[:, :, self.b_feature_pad: feature_size[0] + self.b_feature_pad, self.b_feature_pad: feature_size[1] + self.b_feature_pad] selected_features_3 = torch.nn.functional.pad( selected_features_3, (self.b_feature_pad, self.b_feature_pad, self.b_feature_pad, self.b_feature_pad)) scaled_features = torch.cat( (selected_features_1, selected_features, selected_features_3), dim=0) #-------------get response map----------------------------------------------- response_map = self.siamese_model(scaled_features, self.exemplar_features).to('cpu') scaled_response_map = torch.squeeze( resize_tensor(response_map, tuple(self.srch_window_location[-2:].astype(int)), mode='bicubic', align_corners=True)) hann_window = generate_2d_window( 'hann', tuple(self.srch_window_location[-2:].astype(int)), scaled_response_map.shape[0]) scaled_response_maps = scaled_response_map + hann_window #-------------find max-response---------------------------------------------- scale_ind = calculate_scale(scaled_response_maps, self.config.MODEL.SCALE_WEIGHTS) response_map = scaled_response_maps[scale_ind, :, :].numpy() max_h, max_w = np.where(response_map == np.max(response_map)) if len(max_h) > 1: max_h = np.array([ max_h[0], ]) if len(max_w) > 1: max_w = np.array([ max_w[0], ]) #-------------update tracking state and save tracking result---------------------------------------- target_loc_center = np.append( self.target_location[0:2] + (self.target_location[2:4]) / 2, self.target_location[2:4]) target_loc_center[0:2] = target_loc_center[0:2] + ( np.append(max_w, max_h) - (self.srch_window_location[2:4] / 2 - 1) ) * self.config.MODEL.SCALES[scale_ind] target_loc_center[ 2:4] = target_loc_center[2:4] * self.config.MODEL.SCALES[scale_ind] #print('target_loc_center in current frame:',target_loc_center) self.target_location = np.append( target_loc_center[0:2] - (target_loc_center[2:4]) / 2, target_loc_center[2:4]) #print('target_location in current frame:', target_location) self.srch_window_location[2:4] = (round_python2( self.srch_window_location[2:4] * self.config.MODEL.SCALES[scale_ind])) self.srch_window_location[0:2] = target_loc_center[0:2] - ( self.srch_window_location[2:4]) / 2 tracking_bbox = (self.target_location + np.array([1, 1, 0, 0])) / self.rescale - np.array( [1, 1, 0, 0]) #tracking_bbox: 0-index self.results.append(tracking_bbox) self.toc += cv2.getTickCount() - tic if self.display: self.visualization(img, tracking_bbox.astype(int), frame)
def initialize_tadt(self, img_path, target_loc, visualize=False): #------------sequence parameters initialization---------------------------- img = default_image_loader( img_path) #<class 'numpy.ndarray'> [height, width, channel] self.target_location = target_loc[0] origin_target_size = math.sqrt( self.target_location[2] * self.target_location[3]) #area of bounding box origin_image_size = img.shape[0:2][::-1] # [width,height] if origin_target_size > self.config.MODEL.MAX_SIZE: self.rescale = self.config.MODEL.MAX_SIZE / origin_target_size elif origin_target_size < self.config.MODEL.MIN_SIZE: self.rescale = self.config.MODEL.MIN_SIZE / origin_target_size #----------------scale image cv2 numpy.adarray--------------- image = cv2.resize(img, tuple((np.ceil( np.array(origin_image_size) * self.rescale)).astype(int)), interpolation=cv2.INTER_LINEAR) #----------------shift image--------------- shift = np.float32([[1, 0, 1], [0, 1, 1]]) shifted_img = cv2.warpAffine(image, shift, (image.shape[1], image.shape[0])) #------scaled target location, get position and size [x1,y1,width,height]------ self.target_location = round_python2( np.array(self.target_location) * self.rescale) - np.array( [1, 1, 0, 0]) #0-index target_size = self.target_location[2:4] # [width, height] image_size = image.shape[0:2] # [height, width] search_size, ratio = cal_window_size(self.config.MODEL.MAX_SIZE, image_size, 2, self.config.MODEL.TOTAL_STRIDE) self.input_size = np.array([search_size, search_size]) #------------First frame processing-------------------- self.srch_window_location = cal_srch_window_location( self.target_location, search_size) self.srch_window_location2 = cal_srch_window_location( round_python2(np.array(target_loc[1]) * self.rescale) - np.array([1, 1, 0, 0]), search_size) features = get_subwindow_feature( self.model, image, self.srch_window_location2, self.input_size) #two tensors, one from each Conv layer features2 = get_subwindow_feature( self.model, image, self.srch_window_location, self.input_size) #two tensors, one from each Conv layer features3 = get_subwindow_feature( self.model, shifted_img, self.srch_window_location, self.input_size) #two tensors, one from each Conv layer #----------- crop the target exemplar from the feature map------------------ patch_features, patch_locations = generate_patch_feature( target_size[::-1], self.srch_window_location, features) self.feature_pad = 2 self.b_feature_pad = int(self.feature_pad / 2) self.filter_sizes = [ torch.tensor(feature.shape).numpy() for feature in patch_features ] #-------------compute the indices of target-aware features---------------- #self.feature_weights, self.balance_weights = taf_model([features, features2], self.filter_sizes, self.device) self.feature_weights, self.balance_weights = taf_model_diff( [features, features2], 1, self.device) #-------------select the target-awares features--------------------------- self.exemplar_features = features_selection(patch_features, self.feature_weights, self.balance_weights, mode='reduction') #self.exemplar_features = fuse_feature(patch_features) #-------------calculate global average pooling of exemplar features------------------- kernel_size = self.exemplar_features[0].shape[1:3] self.exemplar_features_gap = nn.AvgPool2d(kernel_size)( self.exemplar_features[0]) #------------visualization------------------------------------------------ if self.display: self.prepare_visualize() self.visualization(img, target_loc[0], 0) self.results.append(target_loc[0]) #------------------------to visualize what is inside subwindow----------- display_subwindow = False if display_subwindow: subwindow = get_subwindow(self.srch_window_location, image, self.input_size, visualize=display_subwindow) #------------------------to visualize heatmap on full frame or subwindow----------- vis_heatmap_full_frame = False vis_heatmap_subwindow = False if vis_heatmap_full_frame or vis_heatmap_subwindow: if vis_heatmap_full_frame: subwindow, track_features = get_frame_features(self.model, img) elif vis_heatmap_subwindow: subwindow = get_subwindow(self.srch_window_location, image, self.input_size, visualize=False) track_features = features srch_window_size = (subwindow.shape[2], subwindow.shape[1]) self.visualize_feature(features=track_features, stage='conv4_1', srch_window_size=srch_window_size, subwindow=subwindow, feature_weights=self.feature_weights, balance_weights=self.balance_weights) #------------------------to visualize convolution between feature maps and exemplar----------- vis_conv_feature_map = True if vis_conv_feature_map: subwindow, track_features = get_frame_features(self.model, img) self.visualize_conv(features=track_features, stage='conv4_3', maps_num=0, exemplar_features=patch_features, feature_weights=self.feature_weights, balance_weights=self.balance_weights)
def visualize_conv(self, features=None, stage='conv4_1', maps_num=0, exemplar_features=None, feature_weights=None, balance_weights=None): """ function: visualize the selected feature of the first frame """ assert (stage == 'conv4_1' or stage == 'conv4_3' or stage == 'all'), 'For now, TADT only support for conv4_1 and conv4_3' if stage == 'conv4_1': stage = 0 elif stage == 'conv4_3': stage = 1 if feature_weights is None or balance_weights is None: if stage == 'all': feature = torch.cat(features, dim=1) exemplar = torch.cat(exemplar_features, dim=1) else: feature = features[stage] exemplar = exemplar_features[stage] else: if stage == 'all': feature = features_selection(features, feature_weights, balance_weights, mode='reduction') exemplar = features_selection(exemplar_features, feature_weights, balance_weights, mode='reduction') else: feature = features[stage] feature_weight = feature_weights[stage] feature = feature_selection(feature, feature_weight, mode='reduction') exemplar = exemplar_features[stage] exemplar = feature_selection(exemplar, feature_weight, mode='reduction') convolution = nn.functional.conv2d( Variable(feature.view(feature.shape)), Variable(exemplar.view(exemplar.shape))) convolution = torch.sum(convolution, dim=1) convolution = convolution.cpu().numpy().astype(np.uint8).transpose( 1, 2, 0) #convert torch tensor back to open cv image feature_map = torch.sum(feature, dim=1) feature_map = feature_map.cpu().numpy().astype(np.uint8).transpose( 1, 2, 0) exemplar_map = torch.sum(exemplar, dim=1) exemplar_map = exemplar_map.cpu().numpy().astype(np.uint8).transpose( 1, 2, 0) #match_template = cv2.matchTemplate( # feature[:,1,:,:].cpu().numpy().astype(np.uint8).transpose(1,2,0), # exemplar[:,1,:,:].cpu().numpy().astype(np.uint8).transpose(1,2,0), # 3) #3 = cv.TM_CCORR_NORMED #cv2.imshow('exemplar', exemplar_map) #cv2.imshow('feature_map', feature_map) #cv2.imshow('convolution', match_template) #cv2.imwrite('./feature_map_conv4_3_layer_359.jpg', feature_map) #for i in range(exemplar.shape[1]): # match_template = cv2.matchTemplate( # feature[:,i,:,:].cpu().numpy().astype(np.uint8).transpose(1,2,0), # exemplar[:,i,:,:].cpu().numpy().astype(np.uint8).transpose(1,2,0), # 3) #3 = cv.TM_CCORR_NORMED # cv2.imwrite('./match_templates/match_template_' + str(i) + '.jpg', match_template) cv2.waitKey(0) cv2.destroyAllWindows()
def tracking(self, img_path, frame, visualize=False): #-------------read image and rescale the image----------------------------- img = default_image_loader( img_path) #<class 'numpy.ndarray'>[height, width, channel] image = cv2.resize(img, tuple((np.ceil( np.array(img.shape[0:2][::-1]) * self.rescale)).astype(int)), interpolation=cv2.INTER_LINEAR) tic = cv2.getTickCount() #-------------get multi-scale feature-------------------------------------- features = get_subwindow_feature(self.model, image, self.srch_window_location, self.input_size) feature_size = (torch.tensor( features[0].shape)).numpy().astype(int)[-2:] #selected_features = fuse_feature(features) #-------------select the target-aware features new frame (not exemplar)--------------------------- selected_features = features_selection(features, self.feature_weights, self.balance_weights, mode='reduction') selected_features_1 = resize_tensor( selected_features, tuple(feature_size + self.feature_pad)) selected_features_3 = resize_tensor( selected_features, tuple(feature_size - self.feature_pad)) selected_features_1 = selected_features_1[:, :, self.b_feature_pad: feature_size[0] + self.b_feature_pad, self.b_feature_pad: feature_size[1] + self.b_feature_pad] selected_features_3 = torch.nn.functional.pad( selected_features_3, (self.b_feature_pad, self.b_feature_pad, self.b_feature_pad, self.b_feature_pad)) scaled_features = torch.cat( (selected_features_1, selected_features, selected_features_3), dim=0) #-------------get response map (final target aware bluish feature map result of correlation)------------------------- response_map = self.siamese_model(scaled_features, self.exemplar_features).to('cpu') scaled_response_map = torch.squeeze( resize_tensor(response_map, tuple(self.srch_window_location[-2:].astype(int)), mode='bicubic', align_corners=True)) hann_window = generate_2d_window( 'hann', tuple(self.srch_window_location[-2:].astype(int)), scaled_response_map.shape[0]) scaled_response_maps = scaled_response_map + hann_window #-------------calculate ROI---------------------------------------------- scale_ind = calculate_scale(scaled_response_maps, self.config.MODEL.SCALE_WEIGHTS) #response_map_reshaped = response_map[scale_ind,0,:,:].numpy() #center_h, center_w = np.where(response_map_reshaped == np.max(response_map_reshaped)) #find center ROI #center_h, center_w = center_h[0], center_w[0] #region_size = self.exemplar_features[0].shape[1:3] #width_size = int(region_size[0]/2) #width_remainder = region_size[0] % 2 #height_size = int(region_size[1]/2) #height_remainder = region_size[1] % 2 #plt.imshow(response_map_reshaped) #plt.plot(center_w, center_h, "xr", markersize=5) #plt.plot(center_w - width_size, center_h - height_size, "or", markersize=5) #plt.plot(center_w + width_size + width_remainder, center_h + height_size + height_remainder, "or", markersize=5) #plt.show() #roi_features = scaled_features[scale_ind, : , center_w - width_size : center_w + width_size + width_remainder, center_h - height_size : center_h + height_size + height_remainder] #roi_size = roi_features.shape[1:3] #-------------calculate Global Average Pooling current frame features-------------------- #roi_features_gap = nn.AvgPool2d(roi_size)(roi_features) #-------------calculate Affinity Matrix-------------------- #self.affinity_matrix = torch.sum(self.exemplar_features_gap * roi_features_gap) / len(roi_features_gap) #-------------find max-response---------------------------------------------- response_map = scaled_response_maps[scale_ind, :, :].numpy() max_h, max_w = np.where(response_map == np.max(response_map)) if len(max_h) > 1: max_h = np.array([ max_h[0], ]) if len(max_w) > 1: max_w = np.array([ max_w[0], ]) #-------------update tracking state and save tracking result---------------------------------------- target_loc_center = np.append( self.target_location[0:2] + (self.target_location[2:4]) / 2, self.target_location[2:4]) target_loc_center[0:2] = target_loc_center[0:2] + ( np.append(max_w, max_h) - (self.srch_window_location[2:4] / 2 - 1) ) * self.config.MODEL.SCALES[scale_ind] target_loc_center[ 2:4] = target_loc_center[2:4] * self.config.MODEL.SCALES[scale_ind] #print('target_loc_center in current frame:',target_loc_center) self.target_location = np.append( target_loc_center[0:2] - (target_loc_center[2:4]) / 2, target_loc_center[2:4]) #print('target_location in current frame:', self.target_location) self.srch_window_location[2:4] = (round_python2( self.srch_window_location[2:4] * self.config.MODEL.SCALES[scale_ind])) self.srch_window_location[0:2] = target_loc_center[0:2] - ( self.srch_window_location[2:4]) / 2 #print('srch_window_location: ', self.srch_window_location) tracking_bbox = (self.target_location + np.array([1, 1, 0, 0])) / self.rescale - np.array( [1, 1, 0, 0]) #tracking_bbox: 0-index self.results.append(tracking_bbox) #-------------calculate global average pooling of new frame features------------------- kernel_size = selected_features[0].shape[1:2] self.selected_features_gap = nn.AvgPool2d(kernel_size)( selected_features[0]) self.toc += cv2.getTickCount() - tic if self.display: self.visualization(img, tracking_bbox.astype(int), frame)