def load_dataset(type, thing_classes, set_config): frames_path = '/mnt/gpid08/users/ian.riera/AICity_data/frames' gt_path = '/mnt/gpid08/users/ian.riera/AICity_data/train/S03/c010/ai_challenge_s03_c010-full_annotation.xml' gt = read_annotations(gt_path, grouped=True, use_parked=True) num_frames = 2141 frame_list = np.arange(0, 2141) # Strategy B train_val_frames = frame_list[0:int(len(frame_list) * 0.25)] train_folds, val_folds = create_splits(train_val_frames, 3) train_fold = train_folds[set_config] val_fold = val_folds[set_config] dataset_dicts = [] for frame_id in train_val_frames: if frame_id in train_fold and type == 'train' or frame_id in val_fold and type != 'train': record = {} filename = os.path.join(frames_path, str(frame_id) + '.png') record["file_name"] = filename # record["image_id"] = i+j record["image_id"] = filename record["height"] = 1080 record["width"] = 1920 record["annotations"] = parse_annotation(gt[frame_id]) dataset_dicts.append(record) return dataset_dicts
def task1_2(paths, ap=0.5): gt = read_annotations(paths['gt'], grouped=True, use_parked=True) det = read_detections(paths['det']) rec, prec, ap = voc_eval(det, gt, ap, use_confidence=True) print(ap) return
def task1_1(paths, show, noise_params): gt = read_annotations(paths['gt'], grouped=False, use_parked=True) det = read_detections(paths['det'], grouped=True) grouped_gt = group_by_frame(gt) # if we want to replicate results # np.random.seed(10) cap = cv2.VideoCapture(paths['video']) # cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id) # to start from frame #frame_id num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if noise_params['add']: noisy_gt = add_noise(gt, noise_params, num_frames) grouped_noisy_gt = group_by_frame(noisy_gt) for frame_id in range(num_frames): _, frame = cap.read() if show['gt']: frame = draw_boxes(frame, grouped_gt[frame_id], color='g') if show['det']: frame = draw_boxes(frame, det[frame_id], color='b', det=True) if show['noisy']: frame = draw_boxes(frame, grouped_noisy_gt[frame_id], color='r') cv2.imshow('frame', frame) if cv2.waitKey() == 113: # press q to quit break frame_id += 1 cv2.destroyAllWindows() return
def load_dataset(type, thing_classes): frames_path = '/mnt/gpid08/users/ian.riera/AICity_data/frames' gt_path = '/mnt/gpid08/users/ian.riera/AICity_data/train/S03/c010/ai_challenge_s03_c010-full_annotation.xml' gt = read_annotations(gt_path, grouped=True, use_parked=True) num_frames = 2141 train_val_split = 0.67 # train_val_split = 1.0 if type == 'train': init_frame = 0 last_frame = int(train_val_split*(int(num_frames*0.25))) elif type == 'val' or type == 'test': init_frame = int(train_val_split*(int(num_frames*0.25))) last_frame = int(num_frames*0.25) # elif type == 'test': # init_frame = int(num_frames*0.25) # last_frame = num_frames dataset_dicts = [] for frame_id in range(init_frame, last_frame): record = {} filename = os.path.join(frames_path, str(frame_id) + '.png') record["file_name"] = filename # record["image_id"] = i+j record["image_id"] = filename record["height"] = 1080 record["width"] = 1920 record["annotations"] = parse_annotation(gt[frame_id]) dataset_dicts.append(record) return dataset_dicts
box[2] - box[0]) + ',' + str(box[3] - box[1]) + ',' + str(scores[idx].item()) + ',-1,-1,-1\n' with open(det_path, 'a+') as f: f.write(det) # gt = read_annotations(os.path.join(args.data_path, args.seq, 'ai_challenge_s03_c010-full_annotation.xml'), grouped=True, use_parked=True) gt = read_detections(params['gt_path'], grouped=True) gt = fill_gt(gt, num_frames) det = read_detections(det_path, grouped=False) rec, prec, ap = voc_eval(det, gt, 0.5, use_confidence=True) print('AP: ', ap) if args.show_boxes: gt = read_annotations(params['gt_path'], grouped=True, use_parked=True) det = read_detections(det_path, grouped=True) vidcap = cv2.VideoCapture(params['video_path']) # vidcap.set(cv2.CAP_PROP_POS_FRAMES, initial_frame) # to start from frame #frame_id num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) for frame_id in range(num_frames): _, frame = vidcap.read() if frame_id >= 1755 and frame_id <= 1835: frame = draw_boxes(frame, gt[frame_id], color='g') frame = draw_boxes(frame, det[frame_id], color='b', det=True) cv2.imshow('frame', frame) if cv2.waitKey() == 113: # press q to quit
def eval(vidcap, frame_size, mean, std, params): gt = read_annotations(params['gt_path'], grouped=True, use_parked=False) init_frame = int(vidcap.get(cv2.CAP_PROP_POS_FRAMES)) frame_id = init_frame detections = [] annotations = {} for t in tqdm(range(params['num_frames_eval'])): _, frame = vidcap.read() frame = cv2.cvtColor(frame, color_space[params['color_space']][0]) if params['color_space'] == 'H': H, S, V = np.split(frame, 3, axis=2) frame = np.squeeze(H) if params['color_space'] == 'L': L, A, B = np.split(frame, 3, axis=2) frame = np.squeeze(L) if params['color_space'] == 'CbCr': Y, Cb, Cr = np.split(frame, 3, axis=2) frame = np.dstack((Cb, Cr)) segmentation, mean, std = bg_est_method[params['bg_est']](frame, frame_size, mean, std, params) roi = cv2.imread(params['roi_path'], cv2.IMREAD_GRAYSCALE) / 255 segmentation = segmentation * roi segmentation = postprocess_fg(segmentation) if params[ 'save_results'] and frame_id >= 1169 and frame_id < 1229: # if frame_id >= 535 and frame_id < 550 cv2.imwrite( params['results_path'] + f"seg_{str(frame_id)}_pp_{str(params['alpha'])}.bmp", segmentation.astype(int)) det_bboxes = fg_bboxes(segmentation, frame_id, params) detections += det_bboxes gt_bboxes = [] if frame_id in gt: gt_bboxes = gt[frame_id] annotations[frame_id] = gt_bboxes if params['show_boxes']: seg = cv2.cvtColor(segmentation.astype(np.uint8), cv2.COLOR_GRAY2RGB) seg_boxes = draw_boxes(image=seg, boxes=det_bboxes, color='r', linewidth=3) seg_boxes = draw_boxes(image=seg_boxes, boxes=gt_bboxes, color='g', linewidth=3) cv2.imshow("Segmentation mask with detected boxes and gt", seg_boxes) if cv2.waitKey() == 113: # press q to quit break frame_id += 1 detections = temporal_filter(group_by_frame(detections), init=init_frame, end=frame_id) rec, prec, ap = voc_evaluation.voc_eval(detections, annotations, ovthresh=0.5, use_confidence=False) return ap
def eval_tracking_MaximumOverlap(vidcap, test_len, params, opticalFlow=None): print("Evaluating Tracking") gt = read_annotations(params["gt_path"], grouped=True, use_parked=True) det = read_detections(params["det_path"], grouped=True, confidenceThr=0.4) frame_id = int(vidcap.get(cv2.CAP_PROP_POS_FRAMES)) first_frame_id = frame_id print(frame_id) detections = [] annotations = {} list_positions = {} center_seen_last5frames = {} id_seen_last5frames = {} tracking = Tracking() det_bboxes_old = -1 old_frame = None # Create an accumulator that will be updated during each frame accumulator = mm.MOTAccumulator(auto_id=True) for t in tqdm(range((train_len + test_len) - first_frame_id)): _, frame = vidcap.read() # cv2.imshow('Frame', frame) # keyboard = cv2.waitKey(30) flow_aux = np.zeros(shape=(frame.shape[0], frame.shape[1], 2)) if params['use_optical_flow'] and old_frame is not None: for d in det_bboxes_old: # print(d) flow = None # print("Computing optical flow") flow = computeOpticalFlow(old_frame, frame, d, option=params['optical_flow_option']) d.flow = [flow[0], -flow[1]] flow_aux[int(d.ytl):int(d.ybr), int(d.xtl):int(d.xbr), :] = flow plot_flow( old_frame[:, :, [2, 1, 0]], flow_aux, step=16, fname= '/home/oscar/workspace/master/modules/m6/project/mcv-m6-2021-team4/W4/OF_BB/' + f"tracking_{str(frame_id)}_IoU.png") det_bboxes = det[frame_id] det_bboxes = tracking.set_frame_ids(det_bboxes, det_bboxes_old) detections += det_bboxes id_seen = [] gt_bboxes = [] if frame_id in gt: gt_bboxes = gt[frame_id] annotations[frame_id] = gt_bboxes objs = [bbox.center for bbox in gt_bboxes] hyps = [bbox.center for bbox in det_bboxes] for object_bb in det_bboxes: if object_bb.id in list(list_positions.keys()): if t < 5: id_seen_last5frames[object_bb.id] = object_bb.id center_seen_last5frames[object_bb.id] = object_bb.center list_positions[object_bb.id].append( [int(x) for x in object_bb.center]) else: if (t < 5): id_seen_last5frames[object_bb.id] = object_bb.id center_seen_last5frames[object_bb.id] = object_bb.center id_seen.append(object_bb) list_positions[object_bb.id] = [[ int(x) for x in object_bb.center ]] # To detect pared cars for bbox in id_seen: for idx in list(id_seen_last5frames.keys()): if idx != bbox.id: center = [center_seen_last5frames[idx]] mse = (np.square( np.subtract(np.array(center), np.array([int(x) for x in bbox.center])))).mean() if mse < 300: setattr(bbox, 'id', idx) accumulator.update( [bbox.id for bbox in gt_bboxes], # Ground truth objects in this frame [bbox.id for bbox in det_bboxes], # Detector hypotheses in this frame mm.distances.norm2squared_matrix( objs, hyps ) # Distances from object 1 to hypotheses 1, 2, 3 and Distances from object 2 to hypotheses 1, 2, 3 ) if params['show_boxes']: drawed_frame_aux = draw_boxes(image=frame, boxes=det_bboxes, color='r', linewidth=3, det=False, boxIds=True, tracker=list_positions) drawed_frame = deepcopy(drawed_frame_aux) if not det_bboxes_old == -1: drawed_frame = draw_boxes_old(image=drawed_frame, boxes=det_bboxes_old, color='r', linewidth=3, det=False, boxIds=True, tracker=list_positions) cv2.rectangle(drawed_frame, (10, 2), (120, 20), (255, 255, 255), -1) cv2.putText(drawed_frame, str(vidcap.get(cv2.CAP_PROP_POS_FRAMES)), (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) cv2.imshow('Frame', drawed_frame) cv2.waitKey(30) cv2.imwrite( params['results_path'] + f"tracking_{str(frame_id)}_IoU.jpg", drawed_frame.astype(int)) drawed_frame2 = deepcopy(drawed_frame_aux) if not det_bboxes_old == -1: drawed_frame2 = draw_boxes_old(image=drawed_frame2, boxes=det_bboxes_old, color='r', linewidth=3, det=False, boxIds=True, tracker=list_positions, shifted=True) cv2.rectangle(drawed_frame2, (10, 2), (120, 20), (255, 255, 255), -1) cv2.putText(drawed_frame2, str(vidcap.get(cv2.CAP_PROP_POS_FRAMES)), (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) cv2.imshow('Frame', drawed_frame2) cv2.waitKey(30) cv2.imwrite( './W4/OF_shifted_BB/' + f"tracking_{str(frame_id)}_IoU.jpg", drawed_frame2.astype(int)) if params['save_results'] and frame_id >= (355 + 535) and frame_id < ( 410 + 535): # if frame_id >= 535 and frame_id < 550 drawed_frame = frame drawed_frame = draw_boxes(image=drawed_frame, boxes=det_bboxes, color='r', linewidth=3, det=False, boxIds=True, tracker=list_positions) if not det_bboxes_old == -1: drawed_frame = draw_boxes_old(image=drawed_frame, boxes=det_bboxes_old, color='r', linewidth=3, det=False, boxIds=True, tracker=list_positions) cv2.rectangle(drawed_frame, (10, 2), (120, 20), (255, 255, 255), -1) cv2.putText(drawed_frame, str(vidcap.get(cv2.CAP_PROP_POS_FRAMES)), (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) cv2.imwrite( params['results_path'] + f"tracking_{str(frame_id)}_IoU.jpg", drawed_frame.astype(int)) frame_id += 1 old_frame = frame det_bboxes_old = det_bboxes mh = mm.metrics.create() summary = mh.compute(accumulator, metrics=['precision', 'recall', 'idp', 'idr', 'idf1'], name='acc') print(summary)
def task2(gt_path, det_path, video_path, results_path): plot_frames_path = os.path.join(results_path, 'plot_frames/') video_frames_path = os.path.join(results_path, 'video_frames/') print(plot_frames_path) # If folder doesn't exist -> create it os.makedirs(plot_frames_path, exist_ok=True) os.makedirs(video_frames_path, exist_ok=True) show_det = True show_noisy = False gt = read_annotations(gt_path, grouped=False, use_parked=True) det = read_detections(det_path, grouped=True) grouped_gt = group_by_frame(gt) noise_params = { 'add': False, 'drop': 0.0, 'generate_close': 0.0, 'generate_random': 0.0, 'type': 'specific', # options: 'specific', 'gaussian', None 'std': 40, # pixels 'position': False, 'size': True, 'keep_ratio': True } if noise_params['add']: noisy_gt = add_noise(gt, noise_params) grouped_noisy_gt = group_by_frame(noisy_gt) cap = cv2.VideoCapture(video_path) # cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id) # to start from frame #frame_id num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) iou_list = {} for frame_id in range(20): _, frame = cap.read() frame = draw_boxes(frame, grouped_gt[frame_id], color='g') if show_det: frame = draw_boxes(frame, det[frame_id], color='b', det=True) frame_iou = mean_iou(det[frame_id], grouped_gt[frame_id], sort=True) if show_noisy: frame = draw_boxes(frame, grouped_noisy_gt[frame_id], color='r') frame_iou = mean_iou(grouped_noisy_gt[frame_id], grouped_gt[frame_id]) iou_list[frame_id] = frame_iou plot = plot_iou(iou_list, num_frames) ''' if show: fig.show() cv2.imshow('frame', frame) if cv2.waitKey() == 113: # press q to quit break ''' imageio.imwrite(video_frames_path + '{}.png'.format(frame_id), frame) plot.savefig(plot_frames_path + 'iou_{}.png'.format(frame_id)) plt.close(plot) frame_id += 1 save_gif(plot_frames_path, results_path + 'iou.gif') save_gif(video_frames_path, results_path + 'bbox.gif') # cv2.destroyAllWindows() return
filtered_detections.append(det) return filtered_detections if __name__ == '__main__': args = parse_args() det_path = 'results/task1_1/' + args.model + '/detections.txt' if args.detect: det_path = model_detect[args.model](args.video_path) if args.visualize: gt = read_annotations(args.gt_path, grouped=True, use_parked=True) det = read_detections(det_path, grouped=False) det = group_by_frame(filter_by_conf(det, conf_thr=args.min_conf)) vidcap = cv2.VideoCapture(args.video_path) # vidcap.set(cv2.CAP_PROP_POS_FRAMES, initial_frame) # to start from frame #frame_id num_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) for frame_id in range(num_frames): _, frame = vidcap.read() if frame_id >= 1755 and frame_id <= 1835: frame = draw_boxes(frame, gt[frame_id], color='g') frame = draw_boxes(frame, det[frame_id], color='b', det=True)