Ejemplo n.º 1
0
    def tracking(self, image):
        self.i += 1
        local_state1, self.score_map, update, score_max, dis = self.local_track(
            image)
        gt_err = self.groundtruth[self.i, 2] < 3 or self.groundtruth[self.i,
                                                                     3] < 3
        gt_nan = any(np.isnan(self.groundtruth[self.i]))
        if gt_err:
            iou = -1
        elif gt_nan:
            iou = 0
        else:
            iou = compute_iou(self.groundtruth[self.i], local_state1)

        width = self.last_gt[3] - self.last_gt[1]
        height = self.last_gt[2] - self.last_gt[0]

        if self.p.visualization:
            show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR),
                     np.array(self.last_gt, dtype=np.int32),
                     '2',
                     groundtruth=self.groundtruth,
                     update=update,
                     frame_id=self.i,
                     score=max(self.score_map.flatten()))

        return [
            float(self.last_gt[1]),
            float(self.last_gt[0]),
            float(width),
            float(height)
        ], self.score_map, iou, score_max, dis
Ejemplo n.º 2
0
def eval_tracking(Dataset, video_spe=None, save=False, p=None):
    if Dataset == 'otb':
        data_dir = '/media/zj/4T/Dataset/OTB-100'
    elif Dataset == "lasot":
        data_dir = '/media/zj/4T/Dataset/LaSOT/dataset/images'
        tmp = video_spe.split('-')
        data_dir = os.path.join(data_dir, tmp[0])
    elif Dataset == 'uav123':
        data_dir = '/media/zj/4T/Dataset/UAV123/Dataset_UAV123/UAV123'

    if video_spe is not None:
        sequence_list = [video_spe]
    else:
        sequence_list = os.listdir(data_dir)
        sequence_list.sort()
        sequence_list = [
            title for title in sequence_list if not title.endswith("txt")
        ]
    base_save_path = p.save_path
    for seq_id, video in enumerate(sequence_list):
        if Dataset == "otb" or Dataset == "uav123":
            sequence_path = video['path']
            nz = video['nz']
            ext = video['ext']
            start_frame = video['startFrame']
            end_frame = video['endFrame']

            init_omit = 0
            if 'initOmit' in video:
                init_omit = video['initOmit']

            image_list = [
                '{base_path}/{sequence_path}/{frame:0{nz}}.{ext}'.format(
                    base_path=data_dir,
                    sequence_path=sequence_path,
                    frame=frame_num,
                    nz=nz,
                    ext=ext)
                for frame_num in range(start_frame + init_omit, end_frame + 1)
            ]

            anno_path = '{}/{}'.format(data_dir, video['anno_path'])

            try:
                groundtruth = np.loadtxt(str(anno_path), dtype=np.float64)
            except:
                groundtruth = np.loadtxt(str(anno_path),
                                         delimiter=',',
                                         dtype=np.float64)
            result_save_path = os.path.join(base_save_path,
                                            video['name'] + '.txt')
            image_dir = image_list[0]
        elif Dataset == "lasot":
            sequence_dir = data_dir + '/' + video + '/img/'
            gt_dir = data_dir + '/' + video + '/groundtruth.txt'
            image_list = os.listdir(sequence_dir)
            image_list.sort()
            image_list = [
                im for im in image_list
                if im.endswith("jpg") or im.endswith("jpeg")
            ]
            try:
                groundtruth = np.loadtxt(gt_dir, delimiter=',')
            except:
                groundtruth = np.loadtxt(gt_dir)
            result_save_path = os.path.join(base_save_path, video + '.txt')
            image_dir = sequence_dir + image_list[0]

        if os.path.exists(result_save_path):
            continue

        region = Region(groundtruth[0, 0], groundtruth[0, 1],
                        groundtruth[0, 2], groundtruth[0, 3])

        # image = cv.cvtColor(cv.imread(image_dir), cv.COLOR_BGR2RGB)
        image = Image.open(image_dir).convert('RGB')
        tracker = metric_tracker(image, region, video=video,
                                 p=p)  #,groundtruth=groundtruth
        num_frames = len(image_list)
        bBoxes = np.zeros((num_frames, 4))
        bBoxes2 = np.zeros((num_frames, 4))

        bBoxes[0, :] = groundtruth[0, :]
        bBoxes2[0, :] = groundtruth[0, :]
        for im_id in range(1, len(image_list)):
            if Dataset == 'lasot':
                imagefile = sequence_dir + image_list[im_id]
            else:
                imagefile = image_list[im_id]
            # image = cv.cvtColor(cv.imread(imagefile), cv.COLOR_BGR2RGB)
            image = Image.open(imagefile).convert('RGB')
            # print("%d: " % seq_id + video + ": %d /" % im_id + "%d" % len(image_list))
            if p.base_tracker == 'pymdnet':
                region = tracker.pymdnet_track(image)
            elif p.base_tracker == 'metricnet':
                region = tracker.metricnet_track(image)
            if p.visualization:
                show_res(cv.cvtColor(np.array(image), cv.COLOR_RGB2BGR),
                         np.array(region).astype('int16'), '1')
            bBoxes[im_id, :] = region
            # bbox_overlap[im_id] = overlap_ratio(groundtruth[im_id], bBoxes[im_id, :])[0]
            #print(region)
        fps = tracker.i / tracker.spf_total
        print('fps', fps)
        if save:
            np.savetxt(result_save_path, bBoxes, fmt="%.6f,%.6f,%.6f,%.6f")
Ejemplo n.º 3
0
    def tracking(self, image):
        self.i += 1
        mask = None
        candidate_bboxes = None
        # state, pyscore = self.pymdnet_track(image)
        # self.last_gt = [state[1], state[0], state[1] + state[3], state[0] + state[2]]
        self.local_Tracker.pos = torch.FloatTensor([
            (self.last_gt[0] + self.last_gt[2] - 1) / 2,
            (self.last_gt[1] + self.last_gt[3] - 1) / 2
        ])
        self.local_Tracker.target_sz = torch.FloatTensor([
            (self.last_gt[2] - self.last_gt[0]),
            (self.last_gt[3] - self.last_gt[1])
        ])
        tic = time.time()
        local_state, self.score_map, update, local_score, dis, flag, update_score = self.local_track(
            image)

        md_score = self.pymdnet_eval(image,
                                     np.array(local_state).reshape([-1, 4]))[0]
        self.score_max = md_score

        if md_score > 0 and flag == 'normal':
            self.flag = 'found'
            if self.p.use_mask:
                self.siamstate['target_pos'] = self.local_Tracker.pos.numpy(
                )[::-1]
                self.siamstate[
                    'target_sz'] = self.local_Tracker.target_sz.numpy()[::-1]
                siamscore, mask = self.siammask_track(
                    cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
                self.local_Tracker.pos = torch.FloatTensor(
                    self.siamstate['target_pos'][::-1].copy())
                self.local_Tracker.target_sz = torch.FloatTensor(
                    self.siamstate['target_sz'][::-1].copy())
                local_state = torch.cat(
                    (self.local_Tracker.pos[[1, 0]] -
                     (self.local_Tracker.target_sz[[1, 0]] - 1) / 2,
                     self.local_Tracker.target_sz[[1, 0]])).data.cpu().numpy()
            self.last_gt = np.array([
                local_state[1], local_state[0],
                local_state[1] + local_state[3],
                local_state[0] + local_state[2]
            ])
        elif md_score < 0 or flag == 'not_found':
            self.count += 1
            self.flag = 'not_found'
            candidate_bboxes = self.Global_Track_eval(image, 10)
            candidate_scores = self.pymdnet_eval(image, candidate_bboxes)
            max_id = np.argmax(candidate_scores)
            if candidate_scores[max_id] > 0:
                redet_bboxes = candidate_bboxes[max_id]
                if self.count >= 5:
                    self.last_gt = np.array([
                        redet_bboxes[1], redet_bboxes[0],
                        redet_bboxes[1] + redet_bboxes[3],
                        redet_bboxes[2] + redet_bboxes[0]
                    ])
                    self.local_Tracker.pos = torch.FloatTensor([
                        (self.last_gt[0] + self.last_gt[2] - 1) / 2,
                        (self.last_gt[1] + self.last_gt[3] - 1) / 2
                    ])
                    self.local_Tracker.target_sz = torch.FloatTensor([
                        (self.last_gt[2] - self.last_gt[0]),
                        (self.last_gt[3] - self.last_gt[1])
                    ])
                    self.score_max = candidate_scores[max_id]
                    self.count = 0
        if update:
            self.collect_samples_pymdnet(image)

        self.pymdnet_long_term_update()

        width = self.last_gt[3] - self.last_gt[1]
        height = self.last_gt[2] - self.last_gt[0]
        toc = time.time() - tic
        print(toc)
        # if self.flag == 'found' and self.score_max > 0:
        #     confidence_score = 0.99
        # elif self.flag == 'not_found':
        #     confidence_score = 0.0
        # else:
        #     confidence_score = np.clip((local_score+np.arctan(0.2*self.score_max)/math.pi+0.5)/2, 0, 1)
        confidence_score = np.clip(
            (local_score + np.arctan(0.2 * self.score_max) / math.pi + 0.5) /
            2, 0, 1)
        if self.p.visualization:
            show_res(cv2.cvtColor(image, cv2.COLOR_RGB2BGR),
                     np.array(self.last_gt, dtype=np.int32),
                     '2',
                     groundtruth=self.groundtruth,
                     update=update_score,
                     can_bboxes=candidate_bboxes,
                     frame_id=self.i,
                     tracker_score=md_score,
                     mask=mask)

        return [
            float(self.last_gt[1]),
            float(self.last_gt[0]),
            float(width),
            float(height)
        ], self.score_map, 0, confidence_score, 0