def tracking(self, image): self.i += 1 local_state1, self.score_map, update, score_max, dis = self.local_track( image) gt_err = self.groundtruth[self.i, 2] < 3 or self.groundtruth[self.i, 3] < 3 gt_nan = any(np.isnan(self.groundtruth[self.i])) if gt_err: iou = -1 elif gt_nan: iou = 0 else: iou = compute_iou(self.groundtruth[self.i], local_state1) width = self.last_gt[3] - self.last_gt[1] height = self.last_gt[2] - self.last_gt[0] if self.p.visualization: show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), np.array(self.last_gt, dtype=np.int32), '2', groundtruth=self.groundtruth, update=update, frame_id=self.i, score=max(self.score_map.flatten())) return [ float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height) ], self.score_map, iou, score_max, dis
def eval_tracking(Dataset, video_spe=None, save=False, p=None): if Dataset == 'otb': data_dir = '/media/zj/4T/Dataset/OTB-100' elif Dataset == "lasot": data_dir = '/media/zj/4T/Dataset/LaSOT/dataset/images' tmp = video_spe.split('-') data_dir = os.path.join(data_dir, tmp[0]) elif Dataset == 'uav123': data_dir = '/media/zj/4T/Dataset/UAV123/Dataset_UAV123/UAV123' if video_spe is not None: sequence_list = [video_spe] else: sequence_list = os.listdir(data_dir) sequence_list.sort() sequence_list = [ title for title in sequence_list if not title.endswith("txt") ] base_save_path = p.save_path for seq_id, video in enumerate(sequence_list): if Dataset == "otb" or Dataset == "uav123": sequence_path = video['path'] nz = video['nz'] ext = video['ext'] start_frame = video['startFrame'] end_frame = video['endFrame'] init_omit = 0 if 'initOmit' in video: init_omit = video['initOmit'] image_list = [ '{base_path}/{sequence_path}/{frame:0{nz}}.{ext}'.format( base_path=data_dir, sequence_path=sequence_path, frame=frame_num, nz=nz, ext=ext) for frame_num in range(start_frame + init_omit, end_frame + 1) ] anno_path = '{}/{}'.format(data_dir, video['anno_path']) try: groundtruth = np.loadtxt(str(anno_path), dtype=np.float64) except: groundtruth = np.loadtxt(str(anno_path), delimiter=',', dtype=np.float64) result_save_path = os.path.join(base_save_path, video['name'] + '.txt') image_dir = image_list[0] elif Dataset == "lasot": sequence_dir = data_dir + '/' + video + '/img/' gt_dir = data_dir + '/' + video + '/groundtruth.txt' image_list = os.listdir(sequence_dir) image_list.sort() image_list = [ im for im in image_list if im.endswith("jpg") or im.endswith("jpeg") ] try: groundtruth = np.loadtxt(gt_dir, delimiter=',') except: groundtruth = np.loadtxt(gt_dir) result_save_path = os.path.join(base_save_path, video + '.txt') image_dir = sequence_dir + image_list[0] if os.path.exists(result_save_path): continue region = Region(groundtruth[0, 0], groundtruth[0, 1], groundtruth[0, 2], groundtruth[0, 3]) # image = cv.cvtColor(cv.imread(image_dir), cv.COLOR_BGR2RGB) image = Image.open(image_dir).convert('RGB') tracker = metric_tracker(image, region, video=video, p=p) #,groundtruth=groundtruth num_frames = len(image_list) bBoxes = np.zeros((num_frames, 4)) bBoxes2 = np.zeros((num_frames, 4)) bBoxes[0, :] = groundtruth[0, :] bBoxes2[0, :] = groundtruth[0, :] for im_id in range(1, len(image_list)): if Dataset == 'lasot': imagefile = sequence_dir + image_list[im_id] else: imagefile = image_list[im_id] # image = cv.cvtColor(cv.imread(imagefile), cv.COLOR_BGR2RGB) image = Image.open(imagefile).convert('RGB') # print("%d: " % seq_id + video + ": %d /" % im_id + "%d" % len(image_list)) if p.base_tracker == 'pymdnet': region = tracker.pymdnet_track(image) elif p.base_tracker == 'metricnet': region = tracker.metricnet_track(image) if p.visualization: show_res(cv.cvtColor(np.array(image), cv.COLOR_RGB2BGR), np.array(region).astype('int16'), '1') bBoxes[im_id, :] = region # bbox_overlap[im_id] = overlap_ratio(groundtruth[im_id], bBoxes[im_id, :])[0] #print(region) fps = tracker.i / tracker.spf_total print('fps', fps) if save: np.savetxt(result_save_path, bBoxes, fmt="%.6f,%.6f,%.6f,%.6f")
def tracking(self, image): self.i += 1 mask = None candidate_bboxes = None # state, pyscore = self.pymdnet_track(image) # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]] self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) tic = time.time() local_state, self.score_map, update, local_score, dis, flag, update_score = self.local_track( image) md_score = self.pymdnet_eval(image, np.array(local_state).reshape([-1, 4]))[0] self.score_max = md_score if md_score > 0 and flag == 'normal': self.flag = 'found' if self.p.use_mask: self.siamstate['target_pos'] = self.local_Tracker.pos.numpy( )[::-1] self.siamstate[ 'target_sz'] = self.local_Tracker.target_sz.numpy()[::-1] siamscore, mask = self.siammask_track( cv2.cvtColor(image, cv2.COLOR_RGB2BGR)) self.local_Tracker.pos = torch.FloatTensor( self.siamstate['target_pos'][::-1].copy()) self.local_Tracker.target_sz = torch.FloatTensor( self.siamstate['target_sz'][::-1].copy()) local_state = torch.cat( (self.local_Tracker.pos[[1, 0]] - (self.local_Tracker.target_sz[[1, 0]] - 1) / 2, self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy() self.last_gt = np.array([ local_state[1], local_state[0], local_state[1] + local_state[3], local_state[0] + local_state[2] ]) elif md_score < 0 or flag == 'not_found': self.count += 1 self.flag = 'not_found' candidate_bboxes = self.Global_Track_eval(image, 10) candidate_scores = self.pymdnet_eval(image, candidate_bboxes) max_id = np.argmax(candidate_scores) if candidate_scores[max_id] > 0: redet_bboxes = candidate_bboxes[max_id] if self.count >= 5: self.last_gt = np.array([ redet_bboxes[1], redet_bboxes[0], redet_bboxes[1] + redet_bboxes[3], redet_bboxes[2] + redet_bboxes[0] ]) self.local_Tracker.pos = torch.FloatTensor([ (self.last_gt[0] + self.last_gt[2] - 1) / 2, (self.last_gt[1] + self.last_gt[3] - 1) / 2 ]) self.local_Tracker.target_sz = torch.FloatTensor([ (self.last_gt[2] - self.last_gt[0]), (self.last_gt[3] - self.last_gt[1]) ]) self.score_max = candidate_scores[max_id] self.count = 0 if update: self.collect_samples_pymdnet(image) self.pymdnet_long_term_update() width = self.last_gt[3] - self.last_gt[1] height = self.last_gt[2] - self.last_gt[0] toc = time.time() - tic print(toc) # if self.flag == 'found' and self.score_max > 0: # confidence_score = 0.99 # elif self.flag == 'not_found': # confidence_score = 0.0 # else: # confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1) confidence_score = np.clip( (local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) / 2, 0, 1) if self.p.visualization: show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR), np.array(self.last_gt, dtype=np.int32), '2', groundtruth=self.groundtruth, update=update_score, can_bboxes=candidate_bboxes, frame_id=self.i, tracker_score=md_score, mask=mask) return [ float(self.last_gt[1]), float(self.last_gt[0]), float(width), float(height) ], self.score_map, 0, confidence_score, 0