def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device :", device) # ground truth gt_on = args.gt_on # IoU 정확도를 측정할 것인지 f = open('ground_truth/Non_video4_GT.txt', 'r') # GT 파일 record = args.record # IoU 정확도, 이미지를 저장할 것인지 # create model model = ModelBuilder() # load model checkpoint = torch.load("pretrained_model/model.pth", map_location=lambda storage, loc: storage.cpu()) model.load_state_dict(checkpoint) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True video_name = args.video_name.split('/')[-1].split('.')[0] cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) frame_num = 0 first_time = True current_target = -1 for frame, focal in get_frames(args.video_name, args.type, args.img2d_ref, args.start_num, args.last_num): frame_num += 1 if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: max_index = -1 max_val = 0 if first_time: outputs = [tracker.track(cv2.imread(f)) for f in focal] for i in range(len(outputs)): if outputs[i]['best_score'] >= max_val: max_val = outputs[i]['best_score'] max_index = i first_time = False current_target = max_index else: outputs = [ tracker.track(cv2.imread(focal[i])) for i in range(current_target - 3, current_target + 3) ] for i in range(len(outputs)): if outputs[i]['best_score'] >= max_val: max_val = outputs[i]['best_score'] max_index = i if max_index > 3: current_target = current_target + abs(3 - max_index) elif max_index < 3: current_target = current_target - abs(3 - max_index) ground_truth(outputs[max_index]['bbox'][:2], outputs[max_index]['bbox'][2:]) bbox = list(map(int, outputs[max_index]['bbox'])) cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 0, 255), 3) save_path = os.path.join('data/result2', '{:03d}.jpg'.format(frame_num)) cv2.imwrite(save_path, frame) # ground truth if gt_on: line = f.readline() bbox_label = line.split(',') bbox_label = list(map(int, bbox_label)) iou = IOU(bbox, bbox_label) labelx = bbox_label[0] + (bbox_label[2] / 2) labely = bbox_label[1] + (bbox_label[3] / 2) pre = ((outputs[max_index]['cx'] - labelx)**2 + (outputs[max_index]['cy'] - labely)**2)**0.5 if record: result_iou = open('ground_truth/result_iou.txt', 'a') result_iou.write(str(iou) + ',') result_iou.close() result_pre = open('ground_truth/result_pre.txt', 'a') result_pre.write(str(pre) + ',') result_pre.close() cv2.rectangle(frame, (bbox_label[0], bbox_label[1]), (bbox_label[0] + bbox_label[2], bbox_label[1] + bbox_label[3]), (255, 255, 255), 3) cv2.imshow(video_name, frame) if record: save_image(frame_num, frame) cv2.waitKey(40)
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device :", device) # ground truth gt_on = args.gt_on # IoU 정확도를 측정할 것인지 f = open('ground_truth/Non_video4_GT.txt', 'r') # GT 파일 # create model model = ModelBuilder() # load model checkpoint = torch.load("pretrained_model/model.pth", map_location=lambda storage, loc: storage.cpu()) model.load_state_dict(checkpoint) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True video_name = args.video_name.split('/')[-1].split('.')[0] cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) frame_num = 0 for frame in get_frames(args.video_name, args.type, args.img2d_ref, args.start_num, args.last_num): frame_num += 1 if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: outputs = tracker.track(frame) bbox = list(map(int, outputs['bbox'])) #### ground truth #### if gt_on: line = f.readline() bbox_label = line.split(',') bbox_label = list(map(int, bbox_label)) labelx = bbox_label[0] + (bbox_label[2] / 2) labely = bbox_label[1] + (bbox_label[3] / 2) iou = IOU(bbox, bbox_label) pre = ((outputs['cx'] - labelx)**2 + (outputs['cy'] - labely)**2) ** 0.5 if args.record: result_iou = open('ground_truth/result_iou.txt', 'a') result_iou.write(str(iou) + ',') result_iou.close() result_pre = open('ground_truth/result_pre.txt', 'a') result_pre.write(str(pre) + ',') result_pre.close() cv2.rectangle(frame, (bbox_label[0], bbox_label[1]), (bbox_label[0]+bbox_label[2], bbox_label[1]+bbox_label[3]), (255, 255, 255), 3) #### ----------------- #### cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0, 0, 255), 3) cv2.imshow(video_name, frame) if args.record: save_image(frame_num, frame) cv2.waitKey(40)
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print("device :", device) # ground truth f = open('ground_truth/new_record.txt', 'r') # create model model = ModelBuilder() # load model checkpoint = torch.load("pretrained_model/model.pth", map_location=lambda storage, loc: storage.cpu()) model.load_state_dict(checkpoint) model.eval().to(device) # build tracker tracker = build_tracker(model) first_frame = True root = "test" video_name = root.split('/')[-1].split('.')[0] cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) a = 0 first_time = True current_target = -1 for frame, focal in get_frames(root): a += 1 if first_frame: try: init_rect = cv2.selectROI(video_name, frame, False, False) except: exit() tracker.init(frame, init_rect) first_frame = False else: ''' 전체 범위 방법 ''' max_index = tracker.get_cls(focal) current_target = max_index ''' 범위 지정 방법 ''' # if first_time: # max_index = tracker.get_cls(focal) # current_target = max_index # first_time = False # else: # max_index = tracker.get_cls( # focal[current_target-3:current_target+3]) # if max_index > 3: # current_target = current_target + abs(3 - max_index) # elif max_index < 3: # current_target = current_target - abs(3 - max_index) print("Focal Image Index: ", current_target) output = tracker.track(cv2.imread(focal[current_target])) bbox = list(map(int, output['bbox'])) # ground truth line = f.readline() bbox_label = line.split(',') bbox_label = list(map(int, bbox_label)) left_top_label = (bbox_label[0], bbox_label[1]) right_bottom_label = (bbox_label[0] + bbox_label[2], bbox_label[1] + bbox_label[3]) left_top = (bbox[0], bbox[1]) right_bottom = (bbox[0] + bbox[2], bbox[1] + bbox[3]) center = ((left_top[0] + right_bottom[0]) / 2, (left_top[1] + right_bottom[1]) / 2) center_label = ((left_top_label[0] + right_bottom_label[0]) / 2, (left_top_label[1] + right_bottom_label[1]) / 2) distance = ((center[0] - center_label[0])**2 + (center[1] - center_label[1])**2)**0.5 result_cls = open('ground_truth/result_cls.txt', 'a') result_cls.write(str(distance) + ',') result_cls.close() cv2.rectangle(frame, left_top, right_bottom, (0, 255, 0), 3) cv2.putText(frame, str(current_target + start_num), (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255)) cv2.putText(frame, str(distance), (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255)) cv2.imshow(video_name, frame) '''output 이미지 저장''' save_path = os.path.join('data/result', '{:03d}.jpg'.format(a)) cv2.imwrite(save_path, frame) '''''' cv2.waitKey(40)