Beispiel #1
0
def task11():
    #Read gt file
    path = 'ai_challenge_s03_c010-full_annotation.xml'
    reader = ReadData(path)
    gt, num_iter = reader.getGTfromXML()
    sortedFrames, sortedBBOX = reader.bboxInFrame(gt)
    gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

    summary = []
    addNoise = True
    addDrop = False
    if addNoise:
        stdPixels = np.linspace(0, 100, 11)
        for n in range(len(stdPixels)):
            std = int(stdPixels[n])
            predictedBBOX = []
            for i in range(len(sortedBBOX)):
                noisyBBOX = sortedBBOX[i] + np.random.normal(0, std, 4)  # mean 0 , std = stdPixels
                predictedBBOX.append(noisyBBOX)
            predictionsInfo = reader.joinBBOXfromFrame(sortedFrames, predictedBBOX, isGT=False)
            rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=len(sortedFrames), ovthresh=0.5)

            print('Noise std:', std)
            print('mAP:', np.mean(ap))
            print('Mean IOU:', meanIoU)
            gtInfo = reader.resetGT(gtInfo)
            summary.append({"std": std, "prec": np.mean(prec), "rec": np.mean(rec), "iou":meanIoU,"mAP":np.mean(ap)})

    if addDrop:
        dropThr = np.linspace(0, 0.9, 11)
        for n in range(len(dropThr)):
            predictedBBOX = []
            for i in range(len(sortedBBOX)):
                drop_box = random.rand()<dropThr[n]
                if not drop_box:
                    predictedBBOX.append(sortedBBOX[i])
                else:
                    predictedBBOX.append(None)
            predictionsInfo = reader.joinBBOXfromFrame(sortedFrames, predictedBBOX, isGT=False)

            rec, prec, ap, meanIoU = ap_score(gtInfo, predictionsInfo, num_bboxes=len(sortedFrames), ovthresh=0.5)
            print('Thr:', dropThr[n])
            print('mAP:', np.mean(ap))
            print('Mean IOU:', meanIoU)
            gtInfo = reader.resetGT(gtInfo)
            summary.append(
                {"dropThr": dropThr[n], "prec": np.mean(prec), "rec": np.mean(rec), "iou": meanIoU, "mAP": np.mean(ap)})


    plots = True
    if plots and addNoise:
        graph = PlotCreator()
        graph.plotCurve(datax=[dict['std'] for dict in summary],datay=[dict['iou'] for dict in summary],labelx='Noise std (pixels)',labely='IoU')
        graph.plotCurve(datax=[dict['std'] for dict in summary],datay=[dict['mAP'] for dict in summary],labelx='Noise std (pixels)',labely='mAP')

    elif plots and addDrop:
        graph = PlotCreator()
        graph.plotCurve(datax=[dict['dropThr'] for dict in summary],datay=[dict['iou'] for dict in summary],labelx='Drop Thr',labely='IoU')
        graph.plotCurve(datax=[dict['dropThr'] for dict in summary],datay=[dict['mAP'] for dict in summary],labelx='Drop Thr',labely='mAP')
Beispiel #2
0
    def get_aicity_dataset(frame_idx_list):
        path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/ai_challenge_s03_c010-full_annotation.xml'
        video_path = '/home/group09/code/week6/datasets/AICity_data/train/S03/c010/vdo.avi'

        reader = ReadData(path)
        gt, num_iter = reader.getGTfromXML()

        sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(gt, 0, 2141)
        gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

        dataset_dicts = []
        directory = '/home/group09/code/week6/datasets/AICity_data/AICity_frames'
        for frame_idx in tqdm(frame_idx_list):
            filename = str(frame_idx).zfill(4) + '.png'
            record = {}
            im_path = os.path.join(directory, filename)
            im = cv2.imread(im_path)
            print(filename)
            height, width = im.shape[:2]

            record["file_name"] = im_path
            record["image_id"] = str(frame_idx).zfill(4)
            record["height"] = height
            record["width"] = width

            classes = ['Car']

            objs = []
            for [
                    x1, y1, x2, y2
            ] in gtInfo[frame_idx]['bbox']:  # for every bbox in a frame's gt
                class_id = 0
                obj = {
                    "type": 'Car',
                    "bbox": [x1, y1, x2, y2],
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "category_id": 0
                }

                objs.append(obj)

            record["annotations"] = objs
            dataset_dicts.append(record)

        return dataset_dicts
Beispiel #3
0
def task3():

    algorithms = ['KNN', 'MOG2', 'CNT', 'GMG', 'GSOC', 'MOG']  #'LSBP'

    for i in range(len(algorithms)):
        # Read gt file
        path = "ai_challenge_s03_c010-full_annotation.xml"
        reader = ReadData(path)

        model = BgsModel(
            path='/home/mar/Desktop/M6/Lab1/AICity_data/train/S03/c010/vdo.avi',
            color_space='gray',
            alg=algorithms[i])
        vidLen = model.retVidLen()

        gt, num_iter = reader.getGTfromXML()
        gt = reader.preprocessGT(gt)
        sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(
            gt, int(vidLen * 0.25))
        gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

        predictionsInfo, num_bboxes = model.foreground_extraction(
            showVid=True, gt=gtInfo, use_postprocessing=True)

        # Uncomment if original video with bboxes is needed:
        # video_with_bbox("D:\MCV\M6\AICity_data\train\S03\c010\vdo.avi", algorithms[i], gtInfo, predictionsInfo, int(vidLen*0.25), int(vidLen))
        rec, prec, ap, meanIoU = ap_score(gtInfo,
                                          predictionsInfo,
                                          num_bboxes=num_bboxes,
                                          ovthresh=0.5)
        print('Method: ', algorithms[i])
        print('mAP:', ap)
        print('Mean IoU:', meanIoU)
Beispiel #4
0
def task2():
    # Read gt file
    path = 'ai_challenge_s03_c010-full_annotation.xml'
    reader = ReadData(path)

    # Compute the mean and variance for each of the pixels along the 25% of the video
    gaussModel = GaussianModel(
        path='/home/mar/Desktop/M6/Lab1/AICity_data/train/S03/c010/vdo.avi',
        color_space='gray')
    vidLen = gaussModel.retVidLen()

    # Load gt for plot
    gt, num_iter = reader.getGTfromXML()
    gt = reader.preprocessGT(gt)
    sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(
        gt, int(vidLen * 0.25))
    gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

    # Separate foreground from background
    alpha = [3, 4, 5, 6]
    rho_values = [0.001, 0.01, 0.1, 0.5]
    configurations = []
    ap_results = []
    iou_results = []
    for a in alpha:
        for r in rho_values:
            # Compute mean and std
            gaussModel.mean_std_Welford()

            print('Alpha:', a)
            predictionsInfo, num_bboxes = gaussModel.foreground_extraction_task2(
                showVid=True, gt=gtInfo, alpha=a, rho=r, adaptive=True)

            gtInfo = reader.resetGT(gtInfo)
            rec, prec, ap, meanIoU = ap_score(gtInfo,
                                              predictionsInfo,
                                              num_bboxes=num_bboxes,
                                              ovthresh=0.5)
            configurations.append([a, r])
            ap_results.append(ap)
            iou_results.append(meanIoU)
            print("")
            print("Alpha: ", a)
            print("Rho: ", r)
            print('mAP:', ap)
            print('Mean IoU:', meanIoU)

    with open('ap_results_2.pkl', 'wb') as handle:
        pickle.dump(ap_results, handle)
    with open('iou_results_2".pkl', 'wb') as handle:
        pickle.dump(iou_results, handle)
    with open('configurations_2.pkl', 'wb') as handle:
        pickle.dump(configurations, handle)
Beispiel #5
0
def task11_12():
    # Read gt file
    path = 'ai_challenge_s03_c010-full_annotation.xml'
    reader = ReadData(path)

    # Compute the mean and variance for each of the pixels along the 25% of the video
    gaussModel = GaussianModel(
        path='/home/mar/Desktop/M6/Lab1/AICity_data/train/S03/c010/vdo.avi',
        color_space='gray')
    vidLen = gaussModel.retVidLen()

    # Load gt for plot
    gt, num_iter = reader.getGTfromXML()
    gt = reader.preprocessGT(gt)
    sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(
        gt, int(vidLen * 0.25))
    gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

    # Compute mean and std
    gaussModel.mean_std_Welford()
    # Separate foreground from background
    alpha = [6.25]
    summary = []
    for a in alpha:
        print('Alpha:', a)
        predictionsInfo, num_bboxes = gaussModel.foreground_extraction(
            showVid=True, gt=gtInfo, alpha=a, noiseRemoval=True)

        # --------------------------------------TASK 1.2-------------------------------------------------
        gtInfo = reader.resetGT(gtInfo)
        rec, prec, ap, meanIoU = ap_score(gtInfo,
                                          predictionsInfo,
                                          num_bboxes=num_bboxes,
                                          ovthresh=0.5)
        print('mAP:', ap)
        print('Mean IoU:', meanIoU)
        summary.append({"alpha": a, "iou": meanIoU, "mAP": np.mean(ap)})

    plots = False
    if plots:
        graph = PlotCreator()
        graph.plotCurve(datax=[dict['alpha'] for dict in summary],
                        datay=[dict['iou'] for dict in summary],
                        labelx='Alpha',
                        labely='IoU',
                        name='iouAlpha')
        graph.plotCurve(datax=[dict['alpha'] for dict in summary],
                        datay=[dict['mAP'] for dict in summary],
                        labelx='Alpha',
                        labely='mAP',
                        name='mAPAlpha')
Beispiel #6
0
def task12():
    path = 'ai_challenge_s03_c010-full_annotation.xml'

    pred_paths = ['AICity_data/train/S03/c010/det/det_mask_rcnn.txt',
                  'AICity_data/train/S03/c010/det/det_ssd512.txt',
                  'AICity_data/train/S03/c010/det/det_yolo3.txt']

    pred_nets = ['mask_rcnn', 'det_ssd512', 'det_yolo3']

    for pc in range(len(pred_nets)):
        reader = ReadData(path)
        gt, num_iter = reader.getGTfromXML()
        sortedFrames_gt, sortedBBOX_gt = reader.bboxInFrame(gt)
        gtInfo = reader.joinBBOXfromFrame(sortedFrames_gt, sortedBBOX_gt, isGT=True)

        reader = ReadData(pred_paths[pc])
        pred, _ = reader.getPredfromTXT()
        sortedFrames_pred, sortedBBOX_pred, sortedScore_pred = reader.bboxInFrame_Score(pred)
        predictionsInfo = reader.joinBBOXfromFrame_Score(sortedFrames_pred, sortedBBOX_pred, sortedScore_pred,isGT=False)

        rec, prec, ap, meanIoU = VOC_ap_score(gtInfo,predictionsInfo,num_bboxes=len(sortedFrames_gt),ovthresh=0.5)
        print('Inference with',pred_nets[pc])
        print('mAP:',np.mean(ap))
        print('meanIoU:', np.mean(meanIoU))
Beispiel #7
0
def task1():
    # Paths
    det_path = 'aic19-track1-mtmc-train/train/S03/c011/det/det_yolo3.txt'  # 'retinanet_S03/prediction_results_retina_S03_c010.pkl'
    video_path = 'aic19-track1-mtmc-train/train/S03/c011/vdo.avi'
    gt_path = 'aic19-track1-mtmc-train/train/S03/c011/gt/gt.txt'

    # Parameters
    threshold = 0.5  # minimum iou to consider the tracking between consecutive frames
    kill_time = 90  # nº of frames to close the track of an object
    presence = 20  # nº of frames required of an object to exist
    movement = 150  # minimum distance that a car requires to do to be considered (from start to end)
    small_movement = 10  # minimum distance that a car requires to do to be considered (each 10 frames)
    min_size = 1000000  # minimum area of the bbox to be detected

    # Flags
    video = False
    showVid = False
    showGT = True
    showDET = True
    compute_score = True
    save_tracks = True
    optical_flow = False

    # Read the detections from files
    frame_bboxes = []
    new_frame_bboxes = []
    if det_path[-3:] == 'pkl':
        # Get the bboxes (Read from our pickles)
        with (open(det_path, "rb")) as openfile:
            while True:
                try:
                    frame_bboxes.append(pickle.load(openfile))
                except EOFError:
                    break
        frame_bboxes = frame_bboxes[0]
        # correct the data to the desired format
        for frame_index in range(frame_bboxes[0][-1] + 1):
            position_index = [j for j, k in enumerate(frame_bboxes[0]) if k == frame_index]
            if len(position_index) == 0:
                new_frame_bboxes.append([])
            else:
                aux_list = []
                for superindex in position_index:
                    aux_bbox = frame_bboxes[3][superindex].cpu().numpy()
                    aux_list.append(aux_bbox)
                new_frame_bboxes.append(aux_list)
    elif det_path[-3:] == 'txt':
        # Load detections (Read from the dataset)
        readerDET = ReadData(det_path)
        frame_bboxes = readerDET.getDETfromTXT()[0]
        # transform det
        for frame_index in range(frame_bboxes[-1][0] + 1):
            position_index = [j for j, k in enumerate(frame_bboxes) if k[0] == frame_index]
            if len(position_index) == 0:
                new_frame_bboxes.append([])
            else:
                aux_list = []
                for superindex in position_index:
                    aux_bbox = np.array(frame_bboxes[superindex][3:7])
                    aux_list.append(aux_bbox)
                new_frame_bboxes.append(aux_list)

    # Once we have done the detection we can start with the tracking
    cap = cv2.VideoCapture(video_path)
    previous_frame = cv2.cvtColor(cap.read()[1], cv2.COLOR_BGR2GRAY)
    bbox_per_frame = []
    id_per_frame = []
    frame = new_frame_bboxes[0]  # load the bbox for the first frame
    # Since we evaluate the current frame and the consecutive, we loop for range - 1
    for Nframe in trange(len(new_frame_bboxes) - 1, desc="Tracking"):
        next_frame = new_frame_bboxes[Nframe + 1]
        current_frame = cv2.cvtColor(cap.read()[1], cv2.COLOR_BGR2GRAY)

        if optical_flow:
            # apply optical flow to improve the bounding box and get better iou with the following frame
            # predict flow with block matching
            blockSize = 16
            searchArea = 96
            quantStep = 16
            method = 'cv2.TM_CCORR_NORMED'
            predicted_flow = compute_block_matching(previous_frame, current_frame, 'backward', searchArea, blockSize,
                                                    method, quantStep)

        # assign a new ID to each unassigned bbox
        for i in range(len(frame)):
            new_bbox = frame[i]
            area = new_bbox[2] * new_bbox[3]
            if area < min_size:
                continue

            # append the bbox to the list
            bbox_per_id = []
            bbox_per_id.append(list(new_bbox))
            bbox_per_frame.append(bbox_per_id)
            # append the id to the list
            index_per_id = []
            index_per_id.append(Nframe)
            id_per_frame.append(index_per_id)

        # we loop for each track and we compute the iou with each detection of the next frame
        for id in range(len(bbox_per_frame)):
            length = len(bbox_per_frame[id])
            bbox_per_id = bbox_per_frame[id]  # bboxes of a track
            bbox1 = bbox_per_id[length - 1]  # last bbox stored of the track
            index_per_id = id_per_frame[id]  # list of frames where the track appears

            if optical_flow:
                vectorU = predicted_flow[int(bbox1[1]):int(bbox1[3]), int(bbox1[0]):int(bbox1[2]), 0]
                vectorV = predicted_flow[int(bbox1[1]):int(bbox1[3]), int(bbox1[0]):int(bbox1[2]), 1]
                dx = vectorU.mean()
                dy = vectorV.mean()
                # apply movemement to the bbox
                new_bbox1 = list(np.zeros(4))
                new_bbox1[0] = bbox1[0] + dx
                new_bbox1[2] = bbox1[2] + dx
                new_bbox1[1] = bbox1[1] + dy
                new_bbox1[3] = bbox1[3] + dy

            # don't do anything if the track is closed
            if index_per_id[-1] == -1:
                continue

            # get the list of ious, one with each detection of the next frame
            iou_list = []
            for detections in range(len(next_frame)):
                if detections >= len(next_frame):
                    break
                bbox2 = next_frame[detections]  # detection of the next frame
                area = bbox2[2] * bbox2[3]
                if area < min_size:
                    iou_list.append(0)  # we fake a low iou
                    continue
                if optical_flow:
                    iou_list.append(iou(np.array(new_bbox1), bbox2))
                else:
                    iou_list.append(iou(np.array(bbox1), bbox2))

            # break the loop if there are no more bboxes in the frame to track
            if len(next_frame) == 0:
                # kill_time control
                not_in_scene = Nframe - index_per_id[-1]  # nº of frames that we don't track this object
                if not_in_scene > kill_time:  # if it surpasses the kill_time, close the track by adding a -1
                    index_per_id.append(-1)
                break

            # assign the bbox to the closest track
            best_iou = max(iou_list)
            # if the mas iou is lower than 0.5, we assume that it doesn't have a correspondence
            if best_iou > threshold:
                best_detection = [j for j, k in enumerate(iou_list) if k == best_iou]
                best_detection = best_detection[0]

                # append to the list the bbox of the next frame
                bbox_per_id.append(list(next_frame[best_detection]))
                index_per_id.append(Nframe + 1)

                # we delete the detection from the list in order to speed up the following comparisons
                del next_frame[best_detection]
            else:
                # kill_time control
                not_in_scene = Nframe - index_per_id[-1]  # nº of frames that we don't track this object
                if not_in_scene > kill_time:  # if it surpasses the kill_time, close the track by adding a -1
                    index_per_id.append(-1)

        frame = next_frame  # the next frame will be the current
        previous_frame = current_frame  # update the frame for next iteration

    # Post-processing
    for track in trange(len(bbox_per_frame), desc="Post-processing"):
        # delete the tracks that does not exists for a minimum required time
        if len(bbox_per_frame[track]) < presence:
            bbox_per_frame[track] = None
            id_per_frame[track] = None
        else:
            # delete the tracks of the parked cars
            bbox1 = bbox_per_frame[track][0]
            bbox2 = bbox_per_frame[track][-1]
            centerBbox1 = centroid(bbox1)
            centerBbox2 = centroid(bbox2)
            dist = euclid_dist(centerBbox1, centerBbox2)  # euclidean distance
            # delete if the bbox did not move a required minimum distance
            if dist < movement:
                bbox_per_frame[track] = None
                id_per_frame[track] = None
            # cut the detections while the car still has not move
            else:
                # check the movement each 10 frames, 1 second
                for frame_jump in range(10, len(bbox_per_frame[track]), 10):
                    bbox1 = bbox_per_frame[track][frame_jump - 10]
                    bbox2 = bbox_per_frame[track][frame_jump]
                    centerBbox1 = centroid(bbox1)
                    centerBbox2 = centroid(bbox2)
                    dist = euclid_dist(centerBbox1, centerBbox2)
                    if dist > small_movement:  # car starts moving
                        bbox_per_frame[track] = bbox_per_frame[track][frame_jump - 10:]
                        id_per_frame[track] = id_per_frame[track][frame_jump - 10:]
                        break
    # delete the Nones
    bbox_per_frame = [i for i in bbox_per_frame if i]
    id_per_frame = [i for i in id_per_frame if i]

    if video:
        # Generate colors for each track
        id_colors = []
        for i in range(len(id_per_frame)):
            color = list(np.random.choice(range(256), size=3))
            id_colors.append(color)

        # Load gt for plot
        reader = ReadData(gt_path)
        gt, num_iter = reader.getGTfromTXT()

        # Define the codec and create VideoWriter object
        vidCapture = cv2.VideoCapture(video_path)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('task.avi', fourcc, 8.0, (1920, 1080))
        # for each frame draw rectangles to the detected bboxes
        for i in trange(len(new_frame_bboxes), desc="Video"):
            vidCapture.set(cv2.CAP_PROP_POS_FRAMES, i)
            im = vidCapture.read()[1]
            # draw gt
            GTdet_in_frame = [j for j, k in enumerate(gt) if k[0] == i]
            if showGT:
                for gtDet in GTdet_in_frame:
                    bbox = gt[gtDet][3:7]
                    id = gt[gtDet][1]
                    cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
                    cv2.putText(im, 'ID: ' + str(id) + ' (GT)', (int(bbox[0]), int(bbox[1]) - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
            # draw detections
            if showDET:
                for id in range(len(id_per_frame)):
                    ids = id_per_frame[id]
                    if i in ids:
                        id_index = ids.index(i)
                        bbox = bbox_per_frame[id][id_index]
                        color = id_colors[id]
                        cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),
                                      (int(color[0]), int(color[1]), int(color[2])), 2)
                        cv2.putText(im, 'ID: ' + str(id), (int(bbox[0]), int(bbox[1]) - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (int(color[0]), int(color[1]), int(color[2])), 2)
            if showVid:
                cv2.imshow('Video', im)
            out.write(im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        vidCapture.release()
        out.release()
        cv2.destroyAllWindows()

    if save_tracks:
        filename = 'tracks_c10.pkl'
        dictionary = {}
        dictionary['id'] = list(range(len(bbox_per_frame)))
        dictionary['frame'] = id_per_frame
        dictionary['box'] = bbox_per_frame
        outfile = open(filename, 'wb')
        pickle.dump(dictionary, outfile)
        outfile.close()

    if compute_score:
        # Load gt for plot
        reader = ReadData(gt_path)
        gt, num_iter = reader.getGTfromTXT()

        # init accumulator
        acc = mm.MOTAccumulator(auto_id=True)

        # Loop for all frames
        for Nframe in trange(len(new_frame_bboxes), desc="Score"):

            # get the ids of the tracks from the ground truth at this frame
            gt_list = [item[1] for item in gt if item[0] == Nframe]
            gt_list = np.unique(gt_list)

            # get the ids of the detected tracks at this frame
            pred_list = []
            for ID in range(len(id_per_frame)):
                aux = np.where(np.array(id_per_frame[ID]) == Nframe)[0]
                if len(aux) > 0:
                    pred_list.append(int(ID))

            # compute the distance for each pair
            distances = []
            for i in range(len(gt_list)):
                dist = []
                # compute the ground truth bbox
                bboxGT = gt_list[i]
                bboxGT = [item[3:7] for item in gt if (item[0] == Nframe and item[1] == bboxGT)]
                bboxGT = list(bboxGT[0])
                # compute centroid GT
                centerGT = centroid(bboxGT)
                for j in range(len(pred_list)):
                    # compute the predicted bbox
                    bboxPR = pred_list[j]
                    aux_id = id_per_frame[bboxPR].index(Nframe)
                    bboxPR = bbox_per_frame[bboxPR][aux_id]
                    # compute centroid PR
                    centerPR = centroid(bboxPR)
                    d = euclid_dist(centerGT, centerPR)  # euclidean distance
                    dist.append(d)
                distances.append(dist)

            # update the accumulator
            acc.update(gt_list, pred_list, distances)

        # Compute and show the final metric results
        mh = mm.metrics.create()
        summary = mh.compute(acc, metrics=['idf1', 'idp', 'idr', 'precision', 'recall'], name='ACC:')
        strsummary = mm.io.render_summary(summary, formatters={'idf1': '{:.2%}'.format, 'idp': '{:.2%}'.format,
                                                               'idr': '{:.2%}'.format, 'precision': '{:.2%}'.format,
                                                               'recall': '{:.2%}'.format}, namemap={'idf1': 'IDF1',
                                                                                                    'idp': 'IDP',
                                                                                                    'idr': 'IDR',
                                                                                                    'precision': 'Prec',
                                                                                                    'recall': 'Rec'})
        print(strsummary)
Beispiel #8
0
def task22():

    pkl_path = "boxesScores.pkl"
    video_path = 'AICity_data/train/S03/c010/vdo.avi'
    gt_path = 'ai_challenge_s03_c010-full_annotation.xml'
    video = True
    showVid = True

    # Get the bboxes
    frame_bboxes = []
    with (open(pkl_path, "rb")) as openfile:
        while True:
            try:
                frame_bboxes.append(pickle.load(openfile))
            except EOFError:
                break
    frame_bboxes = frame_bboxes[0]

    # Get the GT
    reader = ReadData(gt_path)
    gt, num_iter = reader.getGTfromXML()

    out = []
    out_id = []
    out_bbox = []
    total_time = 0.0
    total_frames = 0

    # create instance of the SORT tracker
    mot_tracker = Sort()
    # init accumulator
    acc = mm.MOTAccumulator(auto_id=True)

    # Loop for all frames
    for Nframe in trange(len(frame_bboxes), desc="Tracking and Score"):

        # prepare detection format for update the tracker (Kalman Filter)
        trans_dets = []
        for bbox in frame_bboxes[Nframe]:
            # convert from x,y,w,h to x1,y1,x2,y2
            dets = bbox[0]
            x1 = dets[0]
            y1 = dets[1]
            x2 = dets[2] + x1
            y2 = dets[3] + y1
            dets = np.array([x1, y1, x2, y2, bbox[1]])
            trans_dets.append(dets)
        total_frames += 1

        # mot tracker
        start_time = time.time()
        trackers = mot_tracker.update(np.array(trans_dets))
        cycle_time = time.time() - start_time
        total_time += cycle_time

        out.append(trackers)

        # IDF1 evaluation
        # get ground truth centroids
        id_gt = [item[1] for item in gt if item[0] == Nframe]
        id_gt = np.unique(id_gt)
        gt_list = [item[3:7] for item in gt if item[0] == Nframe]
        gt_centroids = [centroid(item) for item in gt_list]

        # get predictions centroids
        pr_list = trackers[:, 0:4]
        out_bbox.append(pr_list)
        id_pr = trackers[:, 4]
        out_id.append(id_pr)
        pr_centroids = [
            centroid([item[0], item[1], item[2] - item[0], item[3] - item[1]])
            for item in pr_list
        ]

        # Compute euclidean distance for each pair
        distances = []
        for i in range(len(gt_list)):
            dist = []
            centerGT = gt_centroids[i]
            for j in range(len(pr_list)):
                centerPR = pr_centroids[j]
                d = euclid_dist(centerGT, centerPR)
                dist.append(d)
            distances.append(dist)

        # update the accumulator
        acc.update(id_gt, id_pr, distances)

    print("Total Tracking took: %.3f for %d frames or %.1f FPS" %
          (total_time, total_frames, total_frames / total_time))

    # Compute and show the final metric results
    mh = mm.metrics.create()
    summary = mh.compute(acc, metrics=['idf1'], name='IDF1:')
    strsummary = mm.io.render_summary(summary,
                                      formatters={'idf1': '{:.2%}'.format},
                                      namemap={'idf1': 'idf1'})
    print(strsummary)

    if video:
        # Generate colors for each track
        id_colors = []
        max_id = max([max(list(k)) for k in out_id])
        for i in range(int(max_id) + 1):
            color = list(np.random.choice(range(256), size=3))
            id_colors.append(color)

        # Define the codec and create VideoWriter object
        vidCapture = cv2.VideoCapture(video_path)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('task2_2.avi', fourcc, 10.0, (1920, 1080))
        # for each frame draw rectangles to the detected bboxes
        for i in trange(len(frame_bboxes), desc="Video"):
            vidCapture.set(cv2.CAP_PROP_POS_FRAMES, i)
            im = vidCapture.read()[1]
            for id in range(len(out_id[i])):
                id_index = int(out_id[i][id])
                bbox = out_bbox[i][id]
                color = id_colors[id_index]
                cv2.rectangle(im, (int(bbox[0]), int(bbox[1])),
                              (int(bbox[2] - bbox[0]), int(bbox[3] - bbox[1])),
                              (int(color[0]), int(color[1]), int(color[2])), 2)
                cv2.putText(im, 'ID: ' + str(id_index),
                            (int(bbox[0]), int(bbox[1]) - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                            (int(color[0]), int(color[1]), int(color[2])), 2)
            if showVid:
                cv2.imshow('Video', im)
            out.write(im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        vidCapture.release()
        out.release()
        cv2.destroyAllWindows()
Beispiel #9
0
def task21():

    pkl_path = "boxesScores.pkl"
    video_path = 'AICity_data/train/S03/c010/vdo.avi'
    gt_path = 'ai_challenge_s03_c010-full_annotation.xml'
    threshold = 0.5  # minimum iou to consider the tracking between consecutive frames
    kill_time = 90  # nº of frames to close the track of an object
    video = True
    showVid = True
    compute_score = True

    # Get the bboxes
    frame_bboxes = []
    with (open(pkl_path, "rb")) as openfile:
        while True:
            try:
                frame_bboxes.append(pickle.load(openfile))
            except EOFError:
                break
    frame_bboxes = frame_bboxes[0]
    # correct the data to the desired format
    aux_frame_boxes = []
    for frame_b in frame_bboxes:
        auxiliar, _ = zip(*frame_b)
        aux_frame_boxes.append(list(auxiliar))
    frame_bboxes = aux_frame_boxes

    # Once we have done the detection we can start with the tracking
    bbox_per_frame = []
    id_per_frame = []
    frame = frame_bboxes[0]  # load the bbox for the first frame
    # Since we evaluate the current frame and the consecutive, we loop for range - 1
    for Nframe in trange(len(frame_bboxes) - 1, desc="Tracking"):
        next_frame = frame_bboxes[Nframe + 1]

        # assign a new ID to each unassigned bbox
        for i in range(len(frame)):
            new_bbox = frame[i]

            # append the bbox to the list
            bbox_per_id = []
            bbox_per_id.append(list(new_bbox))
            bbox_per_frame.append(bbox_per_id)
            # append the id to the list
            index_per_id = []
            index_per_id.append(Nframe)
            id_per_frame.append(index_per_id)

        # we loop for each track and we compute the iou with each detection of the next frame
        for id in range(len(bbox_per_frame)):
            length = len(bbox_per_frame[id])
            bbox_per_id = bbox_per_frame[id]  # bboxes of a track
            bbox1 = bbox_per_id[length - 1]  # last bbox stored of the track
            index_per_id = id_per_frame[
                id]  # list of frames where the track appears

            # don't do anything if the track is closed
            if index_per_id[-1] == -1:
                continue

            # get the list of ious, one with each detection of the next frame
            iou_list = []
            for detections in range(len(next_frame)):
                bbox2 = next_frame[detections]  # detection of the next frame
                iou_list.append(iou(np.array(bbox1), bbox2))

            # break the loop if there are no more bboxes in the frame to track
            if len(next_frame) == 0:
                # kill_time control
                not_in_scene = Nframe - index_per_id[
                    -1]  # nº of frames that we don't track this object
                if not_in_scene > kill_time:  # if it surpasses the kill_time, close the track by adding a -1
                    index_per_id.append(-1)
                break

            # assign the bbox to the closest track
            best_iou = max(iou_list)
            # if the mas iou is lower than 0.5, we assume that it doesn't have a correspondence
            if best_iou > threshold:
                best_detection = [
                    j for j, k in enumerate(iou_list) if k == best_iou
                ]
                best_detection = best_detection[0]

                # append to the list the bbox of the next frame
                bbox_per_id.append(list(next_frame[best_detection]))
                index_per_id.append(Nframe + 1)

                # we delete the detection from the list in order to speed up the following comparisons
                del next_frame[best_detection]
            else:
                # kill_time control
                not_in_scene = Nframe - index_per_id[
                    -1]  # nº of frames that we don't track this object
                if not_in_scene > kill_time:  # if it surpasses the kill_time, close the track by adding a -1
                    index_per_id.append(-1)

        frame = next_frame  # the next frame will be the current

    if video:
        # Generate colors for each track
        id_colors = []
        for i in range(len(id_per_frame)):
            color = list(np.random.choice(range(256), size=3))
            id_colors.append(color)

        # Define the codec and create VideoWriter object
        vidCapture = cv2.VideoCapture(video_path)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter('task2_1.avi', fourcc, 10.0, (1920, 1080))
        # for each frame draw rectangles to the detected bboxes
        for i in trange(len(frame_bboxes), desc="Video"):
            vidCapture.set(cv2.CAP_PROP_POS_FRAMES, i)
            im = vidCapture.read()[1]
            for id in range(len(id_per_frame)):
                ids = id_per_frame[id]
                if i in ids:
                    id_index = ids.index(i)
                    bbox = bbox_per_frame[id][id_index]
                    color = id_colors[id]
                    cv2.rectangle(
                        im, (int(bbox[0]), int(bbox[1])),
                        (int(bbox[2]), int(bbox[3])),
                        (int(color[0]), int(color[1]), int(color[2])), 2)
                    cv2.putText(im, 'ID: ' + str(id),
                                (int(bbox[0]), int(bbox[1]) - 10),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                                (int(color[0]), int(color[1]), int(color[2])),
                                2)
            if showVid:
                cv2.imshow('Video', im)
            out.write(im)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        vidCapture.release()
        out.release()
        cv2.destroyAllWindows()

    if compute_score:
        # Load gt for plot
        reader = ReadData(gt_path)
        gt, num_iter = reader.getGTfromXML()

        # init accumulator
        acc = mm.MOTAccumulator(auto_id=True)

        # Loop for all frames
        for Nframe in trange(len(frame_bboxes), desc="Score"):

            # get the ids of the tracks from the ground truth at this frame
            gt_list = [item[1] for item in gt if item[0] == Nframe]
            gt_list = np.unique(gt_list)

            # get the ids of the detected tracks at this frame
            pred_list = []
            for ID in range(len(id_per_frame)):
                aux = np.where(np.array(id_per_frame[ID]) == Nframe)[0]
                if len(aux) > 0:
                    pred_list.append(int(ID))

            # compute the distance for each pair
            distances = []
            for i in range(len(gt_list)):
                dist = []
                # compute the ground truth bbox
                bboxGT = gt_list[i]
                bboxGT = [
                    item[3:7] for item in gt
                    if (item[0] == Nframe and item[1] == bboxGT)
                ]
                bboxGT = list(bboxGT[0])
                # compute centroid GT
                centerGT = centroid(bboxGT)
                for j in range(len(pred_list)):
                    # compute the predicted bbox
                    bboxPR = pred_list[j]
                    aux_id = id_per_frame[bboxPR].index(Nframe)
                    bboxPR = bbox_per_frame[bboxPR][aux_id]
                    # compute centroid PR
                    centerPR = centroid(bboxPR)
                    d = euclid_dist(centerGT, centerPR)  # euclidean distance
                    dist.append(d)
                distances.append(dist)

            # update the accumulator
            acc.update(gt_list, pred_list, distances)

        # Compute and show the final metric results
        mh = mm.metrics.create()
        summary = mh.compute(acc, metrics=['idf1'], name='IDF1:')
        strsummary = mm.io.render_summary(summary,
                                          formatters={'idf1': '{:.2%}'.format},
                                          namemap={'idf1': 'idf1'})
        print(strsummary)
Beispiel #10
0
def torchModel(model_name, video_path, xml_path, init_frame, end_frame):

    reader = ReadData(xml_path)
    gt, num_iter = reader.getGTfromXML()
    vid = VideoModel(path=video_path, color_space='gray')
    vidLen = vid.retVidLen()
    sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(
        gt, init_frame, end_frame)
    gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

    if model_name == 'maskRCNN':
        model = detection.maskrcnn_resnet50_fpn(pretrained=True)

    vidCapture = cv2.VideoCapture(video_path)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()

    with torch.no_grad():
        thr = [0.3]
        tensor = transforms.ToTensor()
        count = 0
        for minConf in thr:
            labels = []
            boxes = []
            scores = []
            frames = []
            for fr in tqdm(range(init_frame, end_frame)):

                vidCapture.set(cv2.CAP_PROP_POS_FRAMES, fr)
                im = vidCapture.read()[1]

                x = [tensor(im).to(device)]
                bbox_pred = model(x)[0]

                ordered_bbox = list(
                    zip(bbox_pred['labels'], bbox_pred['scores'],
                        bbox_pred['boxes']))
                # get car bboxes
                car_bbox = []
                for pred in ordered_bbox:
                    if pred[0] == 3 and pred[1] > minConf:
                        car_bbox.append(pred)

                for box in car_bbox:
                    labels.append('car')
                    scores.append(box[1])
                    boxes.append(box[2])
                    frames.append(fr)

                    count += 1

            predictionsInfo = reader.fixFormat(frames, boxes, labels, scores,
                                               False)
            gtInfo = reader.resetGT(gtInfo)
            rec, prec, ap, meanIoU, meanIoUF = ap_score(gtInfo,
                                                        predictionsInfo,
                                                        num_bboxes=len(boxes),
                                                        ovthresh=0.5)

        showVid = True

        if showVid:
            graph = PlotCreator()
            graph.plotVid(init_frame, end_frame, vidCapture, gtInfo,
                          predictionsInfo)

    return rec, prec, ap, meanIoU, meanIoUF
Beispiel #11
0
def task2():
    path = 'ai_challenge_s03_c010-full_annotation.xml'
    video_path = 'AICity_data/train/S03/c010/vdo.avi'
    # Networks mask_rcnn, det_ssd512 and det_yolo3 (task 1.2)
    pred_paths = ['AICity_data/train/S03/c010/det/det_mask_rcnn.txt',
                  'AICity_data/train/S03/c010/det/det_ssd512.txt',
                  'AICity_data/train/S03/c010/det/det_yolo3.txt']

    # Import GT
    reader = ReadData(path)
    gt, num_iter = reader.getGTfromXML()
    sortedFrames_gt, sortedBBOX_gt = reader.bboxInFrame(gt)
    gtInfo = reader.joinBBOXfromFrame(sortedFrames_gt, sortedBBOX_gt, isGT=True)

    # Noisy GT (task 1.1)
    stdPixels =  10.0
    predictedBBOX = []
    for i in range(len(sortedFrames_gt)):
        noisyBBOX = sortedBBOX_gt[i] + np.random.normal(0, stdPixels, 4)#mean 0 , std = stdPixels
        predictedBBOX.append(noisyBBOX)

    predictionsInfo = reader.joinBBOXfromFrame(sortedFrames_gt,predictedBBOX,isGT=False)
    rec, prec, ap, meanIoU = ap_score(gtInfo,predictionsInfo,num_bboxes=len(sortedFrames_gt),ovthresh=0.5)

    video_with_bbox(video_path,'noisyGT', gtInfo, predictionsInfo)
    meanIoUvideoplot('noisyGT', meanIoU)

    pred_nets = ['mask_rcnn', 'det_ssd512', 'det_yolo3']

    for pc in range(len(pred_nets)):
        reader = ReadData(pred_paths[pc])
        pred, _ = reader.getPredfromTXT()
        sortedFrames_pred, sortedBBOX_pred, sortedScore_pred = reader.bboxInFrame_Score(pred)
        predictionsInfo = reader.joinBBOXfromFrame_Score(sortedFrames_pred, sortedBBOX_pred, sortedScore_pred,isGT=False)

        rec, prec, ap, meanIoU = VOC_ap_score(gtInfo,predictionsInfo,num_bboxes=len(sortedFrames_gt),ovthresh=0.5)

        video_with_bbox(video_path, pred_nets[pc], gtInfo, predictionsInfo)
        meanIoUvideoplot(pred_nets[pc], meanIoU)
Beispiel #12
0
def detectronModels(model_name,video_path,xml_path,init_frame, end_frame):
    # set up configuration
    cfg = get_cfg()
    model_name = 'fasterRCNN'
    # Adding the configuration to a desired model
    if model_name == 'retinaNet':
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_50_FPN_3x.yaml"))
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_50_FPN_3x.yaml")
    elif model_name == 'fasterRCNN':
        cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")

    ############### INFERENCE
    cap = cv2.VideoCapture(video_path)

    if model_name == 'retinaNet':
        cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.3
    else:
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3
    predictor = DefaultPredictor(cfg)

    frames = []
    scores = []
    labels = []
    bboxes = []
    boxesScore_pkl=[]

    for frame_idx in tqdm(range(init_frame,end_frame)):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        im = cap.read()[1]
        output = predictor(im)

        car_instances = output["instances"][output["instances"].pred_classes==2].to("cpu")
        box_pkl = []
        for idx in range(len(car_instances.pred_boxes)):
            bbox = car_instances.pred_boxes[idx]
            score = car_instances.scores[idx]

            bboxes.append(bbox.tensor[0])
            frames.append(frame_idx)
            scores.append(score)
            labels.append('Car')

            box_pkl.append([bbox.tensor[0].cpu().numpy(),score.cpu().numpy()])
        boxesScore_pkl.append(box_pkl)

    with open('boxesScores.pkl', 'wb') as f:
        pickle.dump(boxesScore_pkl, f)

    prediction_results = [frames,scores,labels,bboxes]
    with open('prediction_results_retina.pkl', 'wb') as f:
        pickle.dump(prediction_results, f)

    reader = ReadData(xml_path)
    predictionsInfo = reader.fixFormat(frames, bboxes, labels, scores, False)
    gt, num_iter = reader.getGTfromXML()
    sortedFrames, sortedBBOX, numBBOX = reader.bboxInFrame(gt=gt,initFrame=init_frame,endFrame=end_frame-1)
    gtInfo = reader.joinBBOXfromFrame(sortedFrames, sortedBBOX, isGT=True)

    gtInfo = reader.resetGT(gtInfo)
    rec, prec, ap, meanIoU,meanIoUF = ap_score(gtInfo, predictionsInfo, num_bboxes=len(bboxes), ovthresh=0.5)


    showVid = True
    if showVid:
        graph = PlotCreator()
        graph.plotVid(init_frame, end_frame, cap, gtInfo, predictionsInfo)

    return rec,prec,ap,meanIoU,meanIoUF