def task11(): #Read gt file path = 'ai_challenge_s03_c010-full_annotation.xml' reader = ReadData(path) gt, num_iter = reader.getGTfromXML() sortedFrames, sortedBBOX = reader.bboxInFrame(gt) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) summary = [] addNoise = True addDrop = False if addNoise: stdPixels = np.linspace(0, 100, 11) for n in range(len(stdPixels)): std = int(stdPixels[n]) predictedBBOX = [] for i in range(len(sortedBBOX)): noisyBBOX = sortedBBOX[i] + np.random.normal(0, std, 4) # mean 0 , std = stdPixels predictedBBOX.append(noisyBBOX) predictionsInfo = reader.joinBBOXfromFrame(sortedFrames, predictedBBOX, isGT=False) rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=len(sortedFrames), ovthresh=0.5) print('Noise std:', std) print('mAP:', np.mean(ap)) print('Mean IOU:', meanIoU) gtInfo = reader.resetGT(gtInfo) summary.append({"std": std, "prec": np.mean(prec), "rec": np.mean(rec), "iou":meanIoU,"mAP":np.mean(ap)}) if addDrop: dropThr = np.linspace(0, 0.9, 11) for n in range(len(dropThr)): predictedBBOX = [] for i in range(len(sortedBBOX)): drop_box = random.rand()<dropThr[n] if not drop_box: predictedBBOX.append(sortedBBOX[i]) else: predictedBBOX.append(None) predictionsInfo = reader.joinBBOXfromFrame(sortedFrames, predictedBBOX, isGT=False) rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=len(sortedFrames), ovthresh=0.5) print('Thr:', dropThr[n]) print('mAP:', np.mean(ap)) print('Mean IOU:', meanIoU) gtInfo = reader.resetGT(gtInfo) summary.append( {"dropThr": dropThr[n], "prec": np.mean(prec), "rec": np.mean(rec), "iou": meanIoU, "mAP": np.mean(ap)}) plots = True if plots and addNoise: graph = PlotCreator() graph.plotCurve(datax=[dict['std'] for dict in summary],datay=[dict['iou'] for dict in summary],labelx='Noise std (pixels)',labely='IoU') graph.plotCurve(datax=[dict['std'] for dict in summary],datay=[dict['mAP'] for dict in summary],labelx='Noise std (pixels)',labely='mAP') elif plots and addDrop: graph = PlotCreator() graph.plotCurve(datax=[dict['dropThr'] for dict in summary],datay=[dict['iou'] for dict in summary],labelx='Drop Thr',labely='IoU') graph.plotCurve(datax=[dict['dropThr'] for dict in summary],datay=[dict['mAP'] for dict in summary],labelx='Drop Thr',labely='mAP')
def get_aicity_dataset(frame_idx_list): path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/ai_challenge_s03_c010-full_annotation.xml' video_path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/vdo.avi' reader = ReadData(path) gt, num_iter = reader.getGTfromXML() sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(gt, 0, 2141) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) dataset_dicts = [] directory = '/home/group09/code/week6/datasets/AICity_data/AICity_frames' for frame_idx in tqdm(frame_idx_list): filename = str(frame_idx).zfill(4) + '.png' record = {} im_path = os.path.join(directory, filename) im = cv2.imread(im_path) print(filename) height, width = im.shape[:2] record["file_name"] = im_path record["image_id"] = str(frame_idx).zfill(4) record["height"] = height record["width"] = width classes = ['Car'] objs = [] for [ x1, y1, x2, y2 ] in gtInfo[frame_idx]['bbox']: # for every bbox in a frame's gt class_id = 0 obj = { "type": 'Car', "bbox": [x1, y1, x2, y2], "bbox_mode": BoxMode.XYXY_ABS, "category_id": 0 } objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def task3(): algorithms = ['KNN', 'MOG2', 'CNT', 'GMG', 'GSOC', 'MOG'] #'LSBP' for i in range(len(algorithms)): # Read gt file path = "ai_challenge_s03_c010-full_annotation.xml" reader = ReadData(path) model = BgsModel( path='/home/mar/Desktop/M6/Lab1/AICity_data/train/S03/c010/vdo.avi', color_space='gray', alg=algorithms[i]) vidLen = model.retVidLen() gt, num_iter = reader.getGTfromXML() gt = reader.preprocessGT(gt) sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame( gt, int(vidLen * 0.25)) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) predictionsInfo, num_bboxes = model.foreground_extraction( showVid=True, gt=gtInfo, use_postprocessing=True) # Uncomment if original video with bboxes is needed: # video_with_bbox("D:\MCV\M6\AICity_data\train\S03\c010\vdo.avi", algorithms[i], gtInfo, predictionsInfo, int(vidLen*0.25), int(vidLen)) rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=num_bboxes, ovthresh=0.5) print('Method: ', algorithms[i]) print('mAP:', ap) print('Mean IoU:', meanIoU)
def task2(): # Read gt file path = 'ai_challenge_s03_c010-full_annotation.xml' reader = ReadData(path) # Compute the mean and variance for each of the pixels along the 25% of the video gaussModel = GaussianModel( path='/home/mar/Desktop/M6/Lab1/AICity_data/train/S03/c010/vdo.avi', color_space='gray') vidLen = gaussModel.retVidLen() # Load gt for plot gt, num_iter = reader.getGTfromXML() gt = reader.preprocessGT(gt) sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame( gt, int(vidLen * 0.25)) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) # Separate foreground from background alpha = [3, 4, 5, 6] rho_values = [0.001, 0.01, 0.1, 0.5] configurations = [] ap_results = [] iou_results = [] for a in alpha: for r in rho_values: # Compute mean and std gaussModel.mean_std_Welford() print('Alpha:', a) predictionsInfo, num_bboxes = gaussModel.foreground_extraction_task2( showVid=True, gt=gtInfo, alpha=a, rho=r, adaptive=True) gtInfo = reader.resetGT(gtInfo) rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=num_bboxes, ovthresh=0.5) configurations.append([a, r]) ap_results.append(ap) iou_results.append(meanIoU) print("") print("Alpha: ", a) print("Rho: ", r) print('mAP:', ap) print('Mean IoU:', meanIoU) with open('ap_results_2.pkl', 'wb') as handle: pickle.dump(ap_results, handle) with open('iou_results_2".pkl', 'wb') as handle: pickle.dump(iou_results, handle) with open('configurations_2.pkl', 'wb') as handle: pickle.dump(configurations, handle)
def task11_12(): # Read gt file path = 'ai_challenge_s03_c010-full_annotation.xml' reader = ReadData(path) # Compute the mean and variance for each of the pixels along the 25% of the video gaussModel = GaussianModel( path='/home/mar/Desktop/M6/Lab1/AICity_data/train/S03/c010/vdo.avi', color_space='gray') vidLen = gaussModel.retVidLen() # Load gt for plot gt, num_iter = reader.getGTfromXML() gt = reader.preprocessGT(gt) sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame( gt, int(vidLen * 0.25)) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) # Compute mean and std gaussModel.mean_std_Welford() # Separate foreground from background alpha = [6.25] summary = [] for a in alpha: print('Alpha:', a) predictionsInfo, num_bboxes = gaussModel.foreground_extraction( showVid=True, gt=gtInfo, alpha=a, noiseRemoval=True) # --------------------------------------TASK 1.2------------------------------------------------- gtInfo = reader.resetGT(gtInfo) rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=num_bboxes, ovthresh=0.5) print('mAP:', ap) print('Mean IoU:', meanIoU) summary.append({"alpha": a, "iou": meanIoU, "mAP": np.mean(ap)}) plots = False if plots: graph = PlotCreator() graph.plotCurve(datax=[dict['alpha'] for dict in summary], datay=[dict['iou'] for dict in summary], labelx='Alpha', labely='IoU', name='iouAlpha') graph.plotCurve(datax=[dict['alpha'] for dict in summary], datay=[dict['mAP'] for dict in summary], labelx='Alpha', labely='mAP', name='mAPAlpha')
def task12(): path = 'ai_challenge_s03_c010-full_annotation.xml' pred_paths = ['AICity_data/train/S03/c010/det/det_mask_rcnn.txt', 'AICity_data/train/S03/c010/det/det_ssd512.txt', 'AICity_data/train/S03/c010/det/det_yolo3.txt'] pred_nets = ['mask_rcnn', 'det_ssd512', 'det_yolo3'] for pc in range(len(pred_nets)): reader = ReadData(path) gt, num_iter = reader.getGTfromXML() sortedFrames_gt, sortedBBOX_gt = reader.bboxInFrame(gt) gtInfo = reader.joinBBOXfromFrame(sortedFrames_gt, sortedBBOX_gt, isGT=True) reader = ReadData(pred_paths[pc]) pred, _ = reader.getPredfromTXT() sortedFrames_pred, sortedBBOX_pred, sortedScore_pred = reader.bboxInFrame_Score(pred) predictionsInfo = reader.joinBBOXfromFrame_Score(sortedFrames_pred, sortedBBOX_pred, sortedScore_pred,isGT=False) rec, prec, ap, meanIoU = VOC_ap_score(gtInfo,predictionsInfo,num_bboxes=len(sortedFrames_gt),ovthresh=0.5) print('Inference with',pred_nets[pc]) print('mAP:',np.mean(ap)) print('meanIoU:', np.mean(meanIoU))
def task1(): # Paths det_path = 'aic19-track1-mtmc-train/train/S03/c011/det/det_yolo3.txt' # 'retinanet_S03/prediction_results_retina_S03_c010.pkl' video_path = 'aic19-track1-mtmc-train/train/S03/c011/vdo.avi' gt_path = 'aic19-track1-mtmc-train/train/S03/c011/gt/gt.txt' # Parameters threshold = 0.5 # minimum iou to consider the tracking between consecutive frames kill_time = 90 # nº of frames to close the track of an object presence = 20 # nº of frames required of an object to exist movement = 150 # minimum distance that a car requires to do to be considered (from start to end) small_movement = 10 # minimum distance that a car requires to do to be considered (each 10 frames) min_size = 1000000 # minimum area of the bbox to be detected # Flags video = False showVid = False showGT = True showDET = True compute_score = True save_tracks = True optical_flow = False # Read the detections from files frame_bboxes = [] new_frame_bboxes = [] if det_path[-3:] == 'pkl': # Get the bboxes (Read from our pickles) with (open(det_path, "rb")) as openfile: while True: try: frame_bboxes.append(pickle.load(openfile)) except EOFError: break frame_bboxes = frame_bboxes[0] # correct the data to the desired format for frame_index in range(frame_bboxes[0][-1] + 1): position_index = [j for j, k in enumerate(frame_bboxes[0]) if k == frame_index] if len(position_index) == 0: new_frame_bboxes.append([]) else: aux_list = [] for superindex in position_index: aux_bbox = frame_bboxes[3][superindex].cpu().numpy() aux_list.append(aux_bbox) new_frame_bboxes.append(aux_list) elif det_path[-3:] == 'txt': # Load detections (Read from the dataset) readerDET = ReadData(det_path) frame_bboxes = readerDET.getDETfromTXT()[0] # transform det for frame_index in range(frame_bboxes[-1][0] + 1): position_index = [j for j, k in enumerate(frame_bboxes) if k[0] == frame_index] if len(position_index) == 0: new_frame_bboxes.append([]) else: aux_list = [] for superindex in position_index: aux_bbox = np.array(frame_bboxes[superindex][3:7]) aux_list.append(aux_bbox) new_frame_bboxes.append(aux_list) # Once we have done the detection we can start with the tracking cap = cv2.VideoCapture(video_path) previous_frame = cv2.cvtColor(cap.read()[1], cv2.COLOR_BGR2GRAY) bbox_per_frame = [] id_per_frame = [] frame = new_frame_bboxes[0] # load the bbox for the first frame # Since we evaluate the current frame and the consecutive, we loop for range - 1 for Nframe in trange(len(new_frame_bboxes) - 1, desc="Tracking"): next_frame = new_frame_bboxes[Nframe + 1] current_frame = cv2.cvtColor(cap.read()[1], cv2.COLOR_BGR2GRAY) if optical_flow: # apply optical flow to improve the bounding box and get better iou with the following frame # predict flow with block matching blockSize = 16 searchArea = 96 quantStep = 16 method = 'cv2.TM_CCORR_NORMED' predicted_flow = compute_block_matching(previous_frame, current_frame, 'backward', searchArea, blockSize, method, quantStep) # assign a new ID to each unassigned bbox for i in range(len(frame)): new_bbox = frame[i] area = new_bbox[2] * new_bbox[3] if area < min_size: continue # append the bbox to the list bbox_per_id = [] bbox_per_id.append(list(new_bbox)) bbox_per_frame.append(bbox_per_id) # append the id to the list index_per_id = [] index_per_id.append(Nframe) id_per_frame.append(index_per_id) # we loop for each track and we compute the iou with each detection of the next frame for id in range(len(bbox_per_frame)): length = len(bbox_per_frame[id]) bbox_per_id = bbox_per_frame[id] # bboxes of a track bbox1 = bbox_per_id[length - 1] # last bbox stored of the track index_per_id = id_per_frame[id] # list of frames where the track appears if optical_flow: vectorU = predicted_flow[int(bbox1[1]):int(bbox1[3]), int(bbox1[0]):int(bbox1[2]), 0] vectorV = predicted_flow[int(bbox1[1]):int(bbox1[3]), int(bbox1[0]):int(bbox1[2]), 1] dx = vectorU.mean() dy = vectorV.mean() # apply movemement to the bbox new_bbox1 = list(np.zeros(4)) new_bbox1[0] = bbox1[0] + dx new_bbox1[2] = bbox1[2] + dx new_bbox1[1] = bbox1[1] + dy new_bbox1[3] = bbox1[3] + dy # don't do anything if the track is closed if index_per_id[-1] == -1: continue # get the list of ious, one with each detection of the next frame iou_list = [] for detections in range(len(next_frame)): if detections >= len(next_frame): break bbox2 = next_frame[detections] # detection of the next frame area = bbox2[2] * bbox2[3] if area < min_size: iou_list.append(0) # we fake a low iou continue if optical_flow: iou_list.append(iou(np.array(new_bbox1), bbox2)) else: iou_list.append(iou(np.array(bbox1), bbox2)) # break the loop if there are no more bboxes in the frame to track if len(next_frame) == 0: # kill_time control not_in_scene = Nframe - index_per_id[-1] # nº of frames that we don't track this object if not_in_scene > kill_time: # if it surpasses the kill_time, close the track by adding a -1 index_per_id.append(-1) break # assign the bbox to the closest track best_iou = max(iou_list) # if the mas iou is lower than 0.5, we assume that it doesn't have a correspondence if best_iou > threshold: best_detection = [j for j, k in enumerate(iou_list) if k == best_iou] best_detection = best_detection[0] # append to the list the bbox of the next frame bbox_per_id.append(list(next_frame[best_detection])) index_per_id.append(Nframe + 1) # we delete the detection from the list in order to speed up the following comparisons del next_frame[best_detection] else: # kill_time control not_in_scene = Nframe - index_per_id[-1] # nº of frames that we don't track this object if not_in_scene > kill_time: # if it surpasses the kill_time, close the track by adding a -1 index_per_id.append(-1) frame = next_frame # the next frame will be the current previous_frame = current_frame # update the frame for next iteration # Post-processing for track in trange(len(bbox_per_frame), desc="Post-processing"): # delete the tracks that does not exists for a minimum required time if len(bbox_per_frame[track]) < presence: bbox_per_frame[track] = None id_per_frame[track] = None else: # delete the tracks of the parked cars bbox1 = bbox_per_frame[track][0] bbox2 = bbox_per_frame[track][-1] centerBbox1 = centroid(bbox1) centerBbox2 = centroid(bbox2) dist = euclid_dist(centerBbox1, centerBbox2) # euclidean distance # delete if the bbox did not move a required minimum distance if dist < movement: bbox_per_frame[track] = None id_per_frame[track] = None # cut the detections while the car still has not move else: # check the movement each 10 frames, 1 second for frame_jump in range(10, len(bbox_per_frame[track]), 10): bbox1 = bbox_per_frame[track][frame_jump - 10] bbox2 = bbox_per_frame[track][frame_jump] centerBbox1 = centroid(bbox1) centerBbox2 = centroid(bbox2) dist = euclid_dist(centerBbox1, centerBbox2) if dist > small_movement: # car starts moving bbox_per_frame[track] = bbox_per_frame[track][frame_jump - 10:] id_per_frame[track] = id_per_frame[track][frame_jump - 10:] break # delete the Nones bbox_per_frame = [i for i in bbox_per_frame if i] id_per_frame = [i for i in id_per_frame if i] if video: # Generate colors for each track id_colors = [] for i in range(len(id_per_frame)): color = list(np.random.choice(range(256), size=3)) id_colors.append(color) # Load gt for plot reader = ReadData(gt_path) gt, num_iter = reader.getGTfromTXT() # Define the codec and create VideoWriter object vidCapture = cv2.VideoCapture(video_path) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('task.avi', fourcc, 8.0, (1920, 1080)) # for each frame draw rectangles to the detected bboxes for i in trange(len(new_frame_bboxes), desc="Video"): vidCapture.set(cv2.CAP_PROP_POS_FRAMES, i) im = vidCapture.read()[1] # draw gt GTdet_in_frame = [j for j, k in enumerate(gt) if k[0] == i] if showGT: for gtDet in GTdet_in_frame: bbox = gt[gtDet][3:7] id = gt[gtDet][1] cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(im, 'ID: ' + str(id) + ' (GT)', (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2) # draw detections if showDET: for id in range(len(id_per_frame)): ids = id_per_frame[id] if i in ids: id_index = ids.index(i) bbox = bbox_per_frame[id][id_index] color = id_colors[id] cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (int(color[0]), int(color[1]), int(color[2])), 2) cv2.putText(im, 'ID: ' + str(id), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (int(color[0]), int(color[1]), int(color[2])), 2) if showVid: cv2.imshow('Video', im) out.write(im) if cv2.waitKey(1) & 0xFF == ord('q'): break vidCapture.release() out.release() cv2.destroyAllWindows() if save_tracks: filename = 'tracks_c10.pkl' dictionary = {} dictionary['id'] = list(range(len(bbox_per_frame))) dictionary['frame'] = id_per_frame dictionary['box'] = bbox_per_frame outfile = open(filename, 'wb') pickle.dump(dictionary, outfile) outfile.close() if compute_score: # Load gt for plot reader = ReadData(gt_path) gt, num_iter = reader.getGTfromTXT() # init accumulator acc = mm.MOTAccumulator(auto_id=True) # Loop for all frames for Nframe in trange(len(new_frame_bboxes), desc="Score"): # get the ids of the tracks from the ground truth at this frame gt_list = [item[1] for item in gt if item[0] == Nframe] gt_list = np.unique(gt_list) # get the ids of the detected tracks at this frame pred_list = [] for ID in range(len(id_per_frame)): aux = np.where(np.array(id_per_frame[ID]) == Nframe)[0] if len(aux) > 0: pred_list.append(int(ID)) # compute the distance for each pair distances = [] for i in range(len(gt_list)): dist = [] # compute the ground truth bbox bboxGT = gt_list[i] bboxGT = [item[3:7] for item in gt if (item[0] == Nframe and item[1] == bboxGT)] bboxGT = list(bboxGT[0]) # compute centroid GT centerGT = centroid(bboxGT) for j in range(len(pred_list)): # compute the predicted bbox bboxPR = pred_list[j] aux_id = id_per_frame[bboxPR].index(Nframe) bboxPR = bbox_per_frame[bboxPR][aux_id] # compute centroid PR centerPR = centroid(bboxPR) d = euclid_dist(centerGT, centerPR) # euclidean distance dist.append(d) distances.append(dist) # update the accumulator acc.update(gt_list, pred_list, distances) # Compute and show the final metric results mh = mm.metrics.create() summary = mh.compute(acc, metrics=['idf1', 'idp', 'idr', 'precision', 'recall'], name='ACC:') strsummary = mm.io.render_summary(summary, formatters={'idf1': '{:.2%}'.format, 'idp': '{:.2%}'.format, 'idr': '{:.2%}'.format, 'precision': '{:.2%}'.format, 'recall': '{:.2%}'.format}, namemap={'idf1': 'IDF1', 'idp': 'IDP', 'idr': 'IDR', 'precision': 'Prec', 'recall': 'Rec'}) print(strsummary)
def task22(): pkl_path = "boxesScores.pkl" video_path = 'AICity_data/train/S03/c010/vdo.avi' gt_path = 'ai_challenge_s03_c010-full_annotation.xml' video = True showVid = True # Get the bboxes frame_bboxes = [] with (open(pkl_path, "rb")) as openfile: while True: try: frame_bboxes.append(pickle.load(openfile)) except EOFError: break frame_bboxes = frame_bboxes[0] # Get the GT reader = ReadData(gt_path) gt, num_iter = reader.getGTfromXML() out = [] out_id = [] out_bbox = [] total_time = 0.0 total_frames = 0 # create instance of the SORT tracker mot_tracker = Sort() # init accumulator acc = mm.MOTAccumulator(auto_id=True) # Loop for all frames for Nframe in trange(len(frame_bboxes), desc="Tracking and Score"): # prepare detection format for update the tracker (Kalman Filter) trans_dets = [] for bbox in frame_bboxes[Nframe]: # convert from x,y,w,h to x1,y1,x2,y2 dets = bbox[0] x1 = dets[0] y1 = dets[1] x2 = dets[2] + x1 y2 = dets[3] + y1 dets = np.array([x1, y1, x2, y2, bbox[1]]) trans_dets.append(dets) total_frames += 1 # mot tracker start_time = time.time() trackers = mot_tracker.update(np.array(trans_dets)) cycle_time = time.time() - start_time total_time += cycle_time out.append(trackers) # IDF1 evaluation # get ground truth centroids id_gt = [item[1] for item in gt if item[0] == Nframe] id_gt = np.unique(id_gt) gt_list = [item[3:7] for item in gt if item[0] == Nframe] gt_centroids = [centroid(item) for item in gt_list] # get predictions centroids pr_list = trackers[:, 0:4] out_bbox.append(pr_list) id_pr = trackers[:, 4] out_id.append(id_pr) pr_centroids = [ centroid([item[0], item[1], item[2] - item[0], item[3] - item[1]]) for item in pr_list ] # Compute euclidean distance for each pair distances = [] for i in range(len(gt_list)): dist = [] centerGT = gt_centroids[i] for j in range(len(pr_list)): centerPR = pr_centroids[j] d = euclid_dist(centerGT, centerPR) dist.append(d) distances.append(dist) # update the accumulator acc.update(id_gt, id_pr, distances) print("Total Tracking took: %.3f for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time)) # Compute and show the final metric results mh = mm.metrics.create() summary = mh.compute(acc, metrics=['idf1'], name='IDF1:') strsummary = mm.io.render_summary(summary, formatters={'idf1': '{:.2%}'.format}, namemap={'idf1': 'idf1'}) print(strsummary) if video: # Generate colors for each track id_colors = [] max_id = max([max(list(k)) for k in out_id]) for i in range(int(max_id) + 1): color = list(np.random.choice(range(256), size=3)) id_colors.append(color) # Define the codec and create VideoWriter object vidCapture = cv2.VideoCapture(video_path) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('task2_2.avi', fourcc, 10.0, (1920, 1080)) # for each frame draw rectangles to the detected bboxes for i in trange(len(frame_bboxes), desc="Video"): vidCapture.set(cv2.CAP_PROP_POS_FRAMES, i) im = vidCapture.read()[1] for id in range(len(out_id[i])): id_index = int(out_id[i][id]) bbox = out_bbox[i][id] color = id_colors[id_index] cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1])), (int(color[0]), int(color[1]), int(color[2])), 2) cv2.putText(im, 'ID: ' + str(id_index), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (int(color[0]), int(color[1]), int(color[2])), 2) if showVid: cv2.imshow('Video', im) out.write(im) if cv2.waitKey(1) & 0xFF == ord('q'): break vidCapture.release() out.release() cv2.destroyAllWindows()
def task21(): pkl_path = "boxesScores.pkl" video_path = 'AICity_data/train/S03/c010/vdo.avi' gt_path = 'ai_challenge_s03_c010-full_annotation.xml' threshold = 0.5 # minimum iou to consider the tracking between consecutive frames kill_time = 90 # nº of frames to close the track of an object video = True showVid = True compute_score = True # Get the bboxes frame_bboxes = [] with (open(pkl_path, "rb")) as openfile: while True: try: frame_bboxes.append(pickle.load(openfile)) except EOFError: break frame_bboxes = frame_bboxes[0] # correct the data to the desired format aux_frame_boxes = [] for frame_b in frame_bboxes: auxiliar, _ = zip(*frame_b) aux_frame_boxes.append(list(auxiliar)) frame_bboxes = aux_frame_boxes # Once we have done the detection we can start with the tracking bbox_per_frame = [] id_per_frame = [] frame = frame_bboxes[0] # load the bbox for the first frame # Since we evaluate the current frame and the consecutive, we loop for range - 1 for Nframe in trange(len(frame_bboxes) - 1, desc="Tracking"): next_frame = frame_bboxes[Nframe + 1] # assign a new ID to each unassigned bbox for i in range(len(frame)): new_bbox = frame[i] # append the bbox to the list bbox_per_id = [] bbox_per_id.append(list(new_bbox)) bbox_per_frame.append(bbox_per_id) # append the id to the list index_per_id = [] index_per_id.append(Nframe) id_per_frame.append(index_per_id) # we loop for each track and we compute the iou with each detection of the next frame for id in range(len(bbox_per_frame)): length = len(bbox_per_frame[id]) bbox_per_id = bbox_per_frame[id] # bboxes of a track bbox1 = bbox_per_id[length - 1] # last bbox stored of the track index_per_id = id_per_frame[ id] # list of frames where the track appears # don't do anything if the track is closed if index_per_id[-1] == -1: continue # get the list of ious, one with each detection of the next frame iou_list = [] for detections in range(len(next_frame)): bbox2 = next_frame[detections] # detection of the next frame iou_list.append(iou(np.array(bbox1), bbox2)) # break the loop if there are no more bboxes in the frame to track if len(next_frame) == 0: # kill_time control not_in_scene = Nframe - index_per_id[ -1] # nº of frames that we don't track this object if not_in_scene > kill_time: # if it surpasses the kill_time, close the track by adding a -1 index_per_id.append(-1) break # assign the bbox to the closest track best_iou = max(iou_list) # if the mas iou is lower than 0.5, we assume that it doesn't have a correspondence if best_iou > threshold: best_detection = [ j for j, k in enumerate(iou_list) if k == best_iou ] best_detection = best_detection[0] # append to the list the bbox of the next frame bbox_per_id.append(list(next_frame[best_detection])) index_per_id.append(Nframe + 1) # we delete the detection from the list in order to speed up the following comparisons del next_frame[best_detection] else: # kill_time control not_in_scene = Nframe - index_per_id[ -1] # nº of frames that we don't track this object if not_in_scene > kill_time: # if it surpasses the kill_time, close the track by adding a -1 index_per_id.append(-1) frame = next_frame # the next frame will be the current if video: # Generate colors for each track id_colors = [] for i in range(len(id_per_frame)): color = list(np.random.choice(range(256), size=3)) id_colors.append(color) # Define the codec and create VideoWriter object vidCapture = cv2.VideoCapture(video_path) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('task2_1.avi', fourcc, 10.0, (1920, 1080)) # for each frame draw rectangles to the detected bboxes for i in trange(len(frame_bboxes), desc="Video"): vidCapture.set(cv2.CAP_PROP_POS_FRAMES, i) im = vidCapture.read()[1] for id in range(len(id_per_frame)): ids = id_per_frame[id] if i in ids: id_index = ids.index(i) bbox = bbox_per_frame[id][id_index] color = id_colors[id] cv2.rectangle( im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (int(color[0]), int(color[1]), int(color[2])), 2) cv2.putText(im, 'ID: ' + str(id), (int(bbox[0]), int(bbox[1]) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (int(color[0]), int(color[1]), int(color[2])), 2) if showVid: cv2.imshow('Video', im) out.write(im) if cv2.waitKey(1) & 0xFF == ord('q'): break vidCapture.release() out.release() cv2.destroyAllWindows() if compute_score: # Load gt for plot reader = ReadData(gt_path) gt, num_iter = reader.getGTfromXML() # init accumulator acc = mm.MOTAccumulator(auto_id=True) # Loop for all frames for Nframe in trange(len(frame_bboxes), desc="Score"): # get the ids of the tracks from the ground truth at this frame gt_list = [item[1] for item in gt if item[0] == Nframe] gt_list = np.unique(gt_list) # get the ids of the detected tracks at this frame pred_list = [] for ID in range(len(id_per_frame)): aux = np.where(np.array(id_per_frame[ID]) == Nframe)[0] if len(aux) > 0: pred_list.append(int(ID)) # compute the distance for each pair distances = [] for i in range(len(gt_list)): dist = [] # compute the ground truth bbox bboxGT = gt_list[i] bboxGT = [ item[3:7] for item in gt if (item[0] == Nframe and item[1] == bboxGT) ] bboxGT = list(bboxGT[0]) # compute centroid GT centerGT = centroid(bboxGT) for j in range(len(pred_list)): # compute the predicted bbox bboxPR = pred_list[j] aux_id = id_per_frame[bboxPR].index(Nframe) bboxPR = bbox_per_frame[bboxPR][aux_id] # compute centroid PR centerPR = centroid(bboxPR) d = euclid_dist(centerGT, centerPR) # euclidean distance dist.append(d) distances.append(dist) # update the accumulator acc.update(gt_list, pred_list, distances) # Compute and show the final metric results mh = mm.metrics.create() summary = mh.compute(acc, metrics=['idf1'], name='IDF1:') strsummary = mm.io.render_summary(summary, formatters={'idf1': '{:.2%}'.format}, namemap={'idf1': 'idf1'}) print(strsummary)
def torchModel(model_name, video_path, xml_path, init_frame, end_frame): reader = ReadData(xml_path) gt, num_iter = reader.getGTfromXML() vid = VideoModel(path=video_path, color_space='gray') vidLen = vid.retVidLen() sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame( gt, init_frame, end_frame) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) if model_name == 'maskRCNN': model = detection.maskrcnn_resnet50_fpn(pretrained=True) vidCapture = cv2.VideoCapture(video_path) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() with torch.no_grad(): thr = [0.3] tensor = transforms.ToTensor() count = 0 for minConf in thr: labels = [] boxes = [] scores = [] frames = [] for fr in tqdm(range(init_frame, end_frame)): vidCapture.set(cv2.CAP_PROP_POS_FRAMES, fr) im = vidCapture.read()[1] x = [tensor(im).to(device)] bbox_pred = model(x)[0] ordered_bbox = list( zip(bbox_pred['labels'], bbox_pred['scores'], bbox_pred['boxes'])) # get car bboxes car_bbox = [] for pred in ordered_bbox: if pred[0] == 3 and pred[1] > minConf: car_bbox.append(pred) for box in car_bbox: labels.append('car') scores.append(box[1]) boxes.append(box[2]) frames.append(fr) count += 1 predictionsInfo = reader.fixFormat(frames, boxes, labels, scores, False) gtInfo = reader.resetGT(gtInfo) rec, prec, ap, meanIoU, meanIoUF = ap_score(gtInfo, predictionsInfo, num_bboxes=len(boxes), ovthresh=0.5) showVid = True if showVid: graph = PlotCreator() graph.plotVid(init_frame, end_frame, vidCapture, gtInfo, predictionsInfo) return rec, prec, ap, meanIoU, meanIoUF
def task2(): path = 'ai_challenge_s03_c010-full_annotation.xml' video_path = 'AICity_data/train/S03/c010/vdo.avi' # Networks mask_rcnn, det_ssd512 and det_yolo3 (task 1.2) pred_paths = ['AICity_data/train/S03/c010/det/det_mask_rcnn.txt', 'AICity_data/train/S03/c010/det/det_ssd512.txt', 'AICity_data/train/S03/c010/det/det_yolo3.txt'] # Import GT reader = ReadData(path) gt, num_iter = reader.getGTfromXML() sortedFrames_gt, sortedBBOX_gt = reader.bboxInFrame(gt) gtInfo = reader.joinBBOXfromFrame(sortedFrames_gt, sortedBBOX_gt, isGT=True) # Noisy GT (task 1.1) stdPixels = 10.0 predictedBBOX = [] for i in range(len(sortedFrames_gt)): noisyBBOX = sortedBBOX_gt[i] + np.random.normal(0, stdPixels, 4)#mean 0 , std = stdPixels predictedBBOX.append(noisyBBOX) predictionsInfo = reader.joinBBOXfromFrame(sortedFrames_gt,predictedBBOX,isGT=False) rec, prec, ap, meanIoU = ap_score(gtInfo,predictionsInfo,num_bboxes=len(sortedFrames_gt),ovthresh=0.5) video_with_bbox(video_path,'noisyGT', gtInfo, predictionsInfo) meanIoUvideoplot('noisyGT', meanIoU) pred_nets = ['mask_rcnn', 'det_ssd512', 'det_yolo3'] for pc in range(len(pred_nets)): reader = ReadData(pred_paths[pc]) pred, _ = reader.getPredfromTXT() sortedFrames_pred, sortedBBOX_pred, sortedScore_pred = reader.bboxInFrame_Score(pred) predictionsInfo = reader.joinBBOXfromFrame_Score(sortedFrames_pred, sortedBBOX_pred, sortedScore_pred,isGT=False) rec, prec, ap, meanIoU = VOC_ap_score(gtInfo,predictionsInfo,num_bboxes=len(sortedFrames_gt),ovthresh=0.5) video_with_bbox(video_path, pred_nets[pc], gtInfo, predictionsInfo) meanIoUvideoplot(pred_nets[pc], meanIoU)
def detectronModels(model_name,video_path,xml_path,init_frame, end_frame): # set up configuration cfg = get_cfg() model_name = 'fasterRCNN' # Adding the configuration to a desired model if model_name == 'retinaNet': cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml")) cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_50_FPN_3x.yaml") elif model_name == 'fasterRCNN': cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") ############### INFERENCE cap = cv2.VideoCapture(video_path) if model_name == 'retinaNet': cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.3 else: cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3 predictor = DefaultPredictor(cfg) frames = [] scores = [] labels = [] bboxes = [] boxesScore_pkl=[] for frame_idx in tqdm(range(init_frame,end_frame)): cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx) im = cap.read()[1] output = predictor(im) car_instances = output["instances"][output["instances"].pred_classes==2].to("cpu") box_pkl = [] for idx in range(len(car_instances.pred_boxes)): bbox = car_instances.pred_boxes[idx] score = car_instances.scores[idx] bboxes.append(bbox.tensor[0]) frames.append(frame_idx) scores.append(score) labels.append('Car') box_pkl.append([bbox.tensor[0].cpu().numpy(),score.cpu().numpy()]) boxesScore_pkl.append(box_pkl) with open('boxesScores.pkl', 'wb') as f: pickle.dump(boxesScore_pkl, f) prediction_results = [frames,scores,labels,bboxes] with open('prediction_results_retina.pkl', 'wb') as f: pickle.dump(prediction_results, f) reader = ReadData(xml_path) predictionsInfo = reader.fixFormat(frames, bboxes, labels, scores, False) gt, num_iter = reader.getGTfromXML() sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(gt=gt,initFrame=init_frame,endFrame=end_frame-1) gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True) gtInfo = reader.resetGT(gtInfo) rec, prec, ap, meanIoU,meanIoUF = ap_score(gtInfo, predictionsInfo, num_bboxes=len(bboxes), ovthresh=0.5) showVid = True if showVid: graph = PlotCreator() graph.plotVid(init_frame, end_frame, cap, gtInfo, predictionsInfo) return rec,prec,ap,meanIoU,meanIoUF