Ejemplo n.º 1
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("device :", device)

    # ground truth
    gt_on = args.gt_on  # IoU 정확도를 측정할 것인지
    f = open('ground_truth/Non_video4_GT.txt', 'r')  # GT 파일
    record = args.record  # IoU 정확도, 이미지를 저장할 것인지

    # create model
    model = ModelBuilder()

    # load model
    checkpoint = torch.load("pretrained_model/model.pth",
                            map_location=lambda storage, loc: storage.cpu())

    model.load_state_dict(checkpoint)
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    video_name = args.video_name.split('/')[-1].split('.')[0]
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    frame_num = 0
    first_time = True
    current_target = -1
    for frame, focal in get_frames(args.video_name, args.type, args.img2d_ref,
                                   args.start_num, args.last_num):
        frame_num += 1
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            max_index = -1
            max_val = 0
            if first_time:
                outputs = [tracker.track(cv2.imread(f)) for f in focal]

                for i in range(len(outputs)):
                    if outputs[i]['best_score'] >= max_val:
                        max_val = outputs[i]['best_score']
                        max_index = i
                first_time = False
                current_target = max_index
            else:
                outputs = [
                    tracker.track(cv2.imread(focal[i]))
                    for i in range(current_target - 3, current_target + 3)
                ]

                for i in range(len(outputs)):
                    if outputs[i]['best_score'] >= max_val:
                        max_val = outputs[i]['best_score']
                        max_index = i
                if max_index > 3:
                    current_target = current_target + abs(3 - max_index)
                elif max_index < 3:
                    current_target = current_target - abs(3 - max_index)

            ground_truth(outputs[max_index]['bbox'][:2],
                         outputs[max_index]['bbox'][2:])

            bbox = list(map(int, outputs[max_index]['bbox']))

            cv2.rectangle(frame, (bbox[0], bbox[1]),
                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 0, 255),
                          3)
            save_path = os.path.join('data/result2',
                                     '{:03d}.jpg'.format(frame_num))
            cv2.imwrite(save_path, frame)

            # ground truth
            if gt_on:
                line = f.readline()
                bbox_label = line.split(',')
                bbox_label = list(map(int, bbox_label))

                iou = IOU(bbox, bbox_label)

                labelx = bbox_label[0] + (bbox_label[2] / 2)
                labely = bbox_label[1] + (bbox_label[3] / 2)

                pre = ((outputs[max_index]['cx'] - labelx)**2 +
                       (outputs[max_index]['cy'] - labely)**2)**0.5

                if record:
                    result_iou = open('ground_truth/result_iou.txt', 'a')
                    result_iou.write(str(iou) + ',')
                    result_iou.close()

                    result_pre = open('ground_truth/result_pre.txt', 'a')
                    result_pre.write(str(pre) + ',')
                    result_pre.close()

                cv2.rectangle(frame, (bbox_label[0], bbox_label[1]),
                              (bbox_label[0] + bbox_label[2],
                               bbox_label[1] + bbox_label[3]), (255, 255, 255),
                              3)

            cv2.imshow(video_name, frame)

            if record:
                save_image(frame_num, frame)
            cv2.waitKey(40)
Ejemplo n.º 2
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("device :", device)

    # ground truth
    gt_on = args.gt_on  # IoU 정확도를 측정할 것인지
    f = open('ground_truth/Non_video4_GT.txt', 'r')  # GT 파일

    # create model
    model = ModelBuilder()

    # load model
    checkpoint = torch.load("pretrained_model/model.pth",
                            map_location=lambda storage, loc: storage.cpu())

    model.load_state_dict(checkpoint)
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    video_name = args.video_name.split('/')[-1].split('.')[0]
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    frame_num = 0
    for frame in get_frames(args.video_name, args.type, args.img2d_ref, args.start_num, args.last_num):
        frame_num += 1
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            outputs = tracker.track(frame)
            bbox = list(map(int, outputs['bbox']))

            #### ground truth ####
            if gt_on:
                line = f.readline()
                bbox_label = line.split(',')
                bbox_label = list(map(int, bbox_label))

                labelx = bbox_label[0] + (bbox_label[2] / 2)
                labely = bbox_label[1] + (bbox_label[3] / 2)

                iou = IOU(bbox, bbox_label)
                pre = ((outputs['cx'] - labelx)**2 +
                       (outputs['cy'] - labely)**2) ** 0.5

                if args.record:
                    result_iou = open('ground_truth/result_iou.txt', 'a')
                    result_iou.write(str(iou) + ',')
                    result_iou.close()

                    result_pre = open('ground_truth/result_pre.txt', 'a')
                    result_pre.write(str(pre) + ',')
                    result_pre.close()

                cv2.rectangle(frame, (bbox_label[0], bbox_label[1]),
                              (bbox_label[0]+bbox_label[2],
                               bbox_label[1]+bbox_label[3]),
                              (255, 255, 255), 3)

            #### ----------------- ####

            cv2.rectangle(frame, (bbox[0], bbox[1]),
                          (bbox[0]+bbox[2], bbox[1]+bbox[3]),
                          (0, 0, 255), 3)
            cv2.imshow(video_name, frame)
            if args.record:
                save_image(frame_num, frame)
            cv2.waitKey(40)
Ejemplo n.º 3
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print("device :", device)

    # ground truth
    f = open('ground_truth/new_record.txt', 'r')

    # create model
    model = ModelBuilder()

    # load model
    checkpoint = torch.load("pretrained_model/model.pth",
                            map_location=lambda storage, loc: storage.cpu())

    model.load_state_dict(checkpoint)
    model.eval().to(device)

    # build tracker
    tracker = build_tracker(model)

    first_frame = True
    root = "test"
    video_name = root.split('/')[-1].split('.')[0]
    cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)

    a = 0
    first_time = True
    current_target = -1
    for frame, focal in get_frames(root):
        a += 1
        if first_frame:
            try:
                init_rect = cv2.selectROI(video_name, frame, False, False)
            except:
                exit()
            tracker.init(frame, init_rect)
            first_frame = False
        else:
            ''' 전체 범위 방법 '''
            max_index = tracker.get_cls(focal)
            current_target = max_index
            ''' 범위 지정 방법 '''
            # if first_time:
            #     max_index = tracker.get_cls(focal)
            #     current_target = max_index
            #     first_time = False
            # else:
            #     max_index = tracker.get_cls(
            #         focal[current_target-3:current_target+3])
            #     if max_index > 3:
            #         current_target = current_target + abs(3 - max_index)
            #     elif max_index < 3:
            #         current_target = current_target - abs(3 - max_index)

            print("Focal Image Index: ", current_target)

            output = tracker.track(cv2.imread(focal[current_target]))

            bbox = list(map(int, output['bbox']))

            # ground truth
            line = f.readline()
            bbox_label = line.split(',')
            bbox_label = list(map(int, bbox_label))
            left_top_label = (bbox_label[0], bbox_label[1])
            right_bottom_label = (bbox_label[0] + bbox_label[2],
                                  bbox_label[1] + bbox_label[3])

            left_top = (bbox[0], bbox[1])
            right_bottom = (bbox[0] + bbox[2], bbox[1] + bbox[3])

            center = ((left_top[0] + right_bottom[0]) / 2,
                      (left_top[1] + right_bottom[1]) / 2)
            center_label = ((left_top_label[0] + right_bottom_label[0]) / 2,
                            (left_top_label[1] + right_bottom_label[1]) / 2)

            distance = ((center[0] - center_label[0])**2 +
                        (center[1] - center_label[1])**2)**0.5

            result_cls = open('ground_truth/result_cls.txt', 'a')
            result_cls.write(str(distance) + ',')
            result_cls.close()

            cv2.rectangle(frame, left_top, right_bottom, (0, 255, 0), 3)
            cv2.putText(frame, str(current_target + start_num), (30, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255))
            cv2.putText(frame, str(distance), (30, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255))
            cv2.imshow(video_name, frame)
            '''output 이미지 저장'''
            save_path = os.path.join('data/result', '{:03d}.jpg'.format(a))
            cv2.imwrite(save_path, frame)
            ''''''
            cv2.waitKey(40)