Exemple #1
0
def frameAP(opt, print_info=True):
    redo = opt.redo
    th = opt.th
    split = 'val'
    model_name = opt.model_name
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)

    inference_dirname = opt.inference_dir
    print('inference_dirname is ', inference_dirname)
    print('threshold is ', th)

    vlist = dataset._test_videos[opt.split - 1]
    # load per-frame detections
    frame_detections_file = os.path.join(inference_dirname,
                                         'frame_detections.pkl')
    if os.path.isfile(frame_detections_file) and not redo:
        with open(frame_detections_file, 'rb') as fid:
            alldets = pickle.load(fid)
    else:
        alldets = load_frame_detections(opt, dataset, opt.K, vlist,
                                        inference_dirname)
        try:
            with open(frame_detections_file, 'wb') as fid:
                pickle.dump(alldets, fid, protocol=4)
        except:
            print(
                "OverflowError: cannot serialize a bytes object larger than 4 GiB"
            )

    results = {}
    # compute AP for each class
    for ilabel, label in enumerate(dataset.labels):
        # detections of this class
        detections = alldets[alldets[:, 2] == ilabel, :]

        # load ground-truth of this class
        gt = {}
        for iv, v in enumerate(vlist):
            tubes = dataset._gttubes[v]

            if ilabel not in tubes:
                continue

            for tube in tubes[ilabel]:
                for i in range(tube.shape[0]):
                    k = (iv, int(tube[i, 0]))
                    if k not in gt:
                        gt[k] = []
                    gt[k].append(tube[i, 1:5].tolist())

        for k in gt:
            gt[k] = np.array(gt[k])

        # pr will be an array containing precision-recall values
        pr = np.empty((detections.shape[0] + 1, 2),
                      dtype=np.float32)  # precision,recall
        pr[0, 0] = 1.0
        pr[0, 1] = 0.0
        fn = sum([g.shape[0] for g in gt.values()])  # false negatives
        fp = 0  # false positives
        tp = 0  # true positives

        for i, j in enumerate(np.argsort(-detections[:, 3])):
            k = (int(detections[j, 0]), int(detections[j, 1]))
            box = detections[j, 4:8]
            ispositive = False

            if k in gt:
                ious = iou2d(gt[k], box)
                amax = np.argmax(ious)

                if ious[amax] >= th:
                    ispositive = True
                    gt[k] = np.delete(gt[k], amax, 0)

                    if gt[k].size == 0:
                        del gt[k]

            if ispositive:
                tp += 1
                fn -= 1
            else:
                fp += 1

            pr[i + 1, 0] = float(tp) / float(tp + fp)
            pr[i + 1, 1] = float(tp) / float(tp + fn)

        results[label] = pr

    # display results
    ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels])
    frameap_result = np.mean(ap)
    if print_info:
        log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+')
        log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th))
        print('Task_{} frameAP_{}\n'.format(model_name, th))
        log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result))
        log_file.close()
        print("{:20s} {:8.2f}".format("mAP", frameap_result))

    return frameap_result
Exemple #2
0
def frameAP(opt, print_info=True):
    redo = opt.redo
    th = opt.th
    split = 'val'
    model_name = opt.model_name
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)

    inference_dirname = opt.inference_dir
    print('inference_dirname is ', inference_dirname)
    print('threshold is ', th)

    # ORIG
    vlist = dataset._test_videos[opt.split - 1]
    #vlist = dataset._train_videos[opt.split - 1]
    '''
    # ADDED: to analyze a specific class
    tar_class = 'wave'
    vlist_filt = []
    for vv in range(len(vlist)):
        cls_name, clip_name = vlist[vv].split('/')
        if cls_name == tar_class:
            vlist_filt.append(vlist[vv])
    vlist = vlist_filt
    '''

    # load per-frame detections
    frame_detections_file = os.path.join(inference_dirname,
                                         'frame_detections.pkl')
    if os.path.isfile(frame_detections_file) and not redo:
        print('load previous linking results...')
        print('if you want to reproduce it, please add --redo')
        with open(frame_detections_file, 'rb') as fid:
            alldets = pickle.load(fid)
    else:

        if opt.inference_mode == 'stream':
            alldets = load_frame_detections_stream(opt, dataset, opt.K, vlist,
                                                   inference_dirname)
        else:
            alldets = load_frame_detections(opt, dataset, opt.K, vlist,
                                            inference_dirname)
        try:
            with open(frame_detections_file, 'wb') as fid:
                pickle.dump(alldets, fid, protocol=4)
        except:
            print(
                "OverflowError: cannot serialize a bytes object larger than 4 GiB"
            )

    results = {}
    # compute AP for each class
    for ilabel, label in enumerate(dataset.labels):  # e.g.: 0, 'brush_hair'
        # detections of this class
        detections = alldets[alldets[:, 2] == ilabel, :]

        # load ground-truth of this class
        gt = {}
        for iv, v in enumerate(vlist):
            tubes = dataset._gttubes[v]

            if ilabel not in tubes:
                continue

            for tube in tubes[ilabel]:
                for i in range(tube.shape[0]):  # for each frame
                    k = (iv, int(tube[i, 0]))  # video id, frame id
                    if k not in gt:  # if not yet added to gt
                        gt[k] = []
                    gt[k].append(tube[i, 1:5].tolist())

        for k in gt:
            gt[k] = np.array(gt[k])

        # added: record of the original gt of a class (it won't be deleted or modified during evaluation)
        if opt.evaluation_mode == 'trimmed':
            gt_past = copy.deepcopy(gt)
            gt_keys_list = list(gt.keys())
            gt_vid = []
            for vv in gt_keys_list:
                if vv[0] in gt_vid:
                    continue
                gt_vid.append(vv[0])

        # pr will be an array containing precision-recall values
        #pr = np.empty((detections.shape[0] + 1, 2), dtype=np.float32)  # precision,recall
        pr = -1 * np.ones(
            (detections.shape[0] + 1, 2), dtype=np.float32)  # precision,recall
        pr[0, 0] = 1.0
        pr[0, 1] = 0.0
        fn = sum(
            [g.shape[0] for g in gt.values()]
        )  # false negatives # ALPHA: == number of frames (each frame has exactly 1 action instance)
        fp = 0  # false positives
        tp = 0  # true positives
        '''
        # Below may not be needed now if detection is conducted on all frames
        # ADDED: remove potential fn (when not evaluating all frames)?
        # Confirmed: can still be used when evaluating the whole set (at least for JHMDB)
        if opt.dataset == 'hmdb':
            num_tp = 0
            prev_k = (-1, -1)
            for ii, jj in enumerate(detections): 
                k = (int(detections[ii, 0]), int(detections[ii, 1])) # (video id, frame id)
                if k in gt and k != prev_k:
                    num_tp += 1
                prev_k = k
            fn = num_tp
        '''
        for i, j in enumerate(
                np.argsort(-detections[:, 3]
                           )):  # j: index of the det (highest to lowest score)
            k = (int(detections[j, 0]), int(detections[j, 1])
                 )  # (video id, frame id)
            box = detections[j, 4:8]
            ispositive = False

            if k in gt:
                ious = iou2d(gt[k], box)
                amax = np.argmax(ious)

                if ious[amax] >= th:
                    ispositive = True
                    gt[k] = np.delete(gt[k], amax, 0)

                    if gt[k].size == 0:
                        del gt[k]

            # untrimmed evaluation (for ucf24)
            # basically, when a frame is not in the non-modified gt list but its video id appears ...

            if opt.evaluation_mode == 'trimmed':
                if k[0] in gt_vid and not (k in gt_past):
                    continue

            if ispositive:
                tp += 1
                fn -= 1
            else:
                fp += 1

            # ADDED: to avoid division by zero error; is it needed?
            if tp + fp == 0 or tp + fn == 0:
                continue

            pr[i + 1, 0] = float(tp) / float(tp + fp)
            pr[i + 1, 1] = float(tp) / float(tp + fn)

        pr_trimmed = pr[pr[:, 0] != -1]
        results[label] = pr_trimmed

    # display results
    ap = 100 * np.array([pr_to_ap(results[label]) for label in dataset.labels])

    # ADDED: display individual class performance
    frameap_percls = {}
    for cl, cls_name in enumerate(dataset.labels):
        frameap_percls[cls_name] = ap[cl]

    for key, value in frameap_percls.items():
        print(key, ':', value)

    frameap_result = np.mean(ap)
    if print_info:
        log_file = open(os.path.join(opt.root_dir, 'result', opt.exp_id), 'a+')
        log_file.write('\nTask_{} frameAP_{}\n'.format(model_name, th))
        print('Task_{} frameAP_{}\n'.format(model_name, th))
        log_file.write("\n{:20s} {:8.2f}\n\n".format("mAP", frameap_result))
        log_file.close()
        print("{:20s} {:8.2f}".format("mAP", frameap_result))

    return frameap_result
Exemple #3
0
def frameAP_error(opt, redo=False):
    th = opt.th
    split = 'val'
    Dataset = get_dataset(opt.dataset)
    dataset = Dataset(opt, split)
    inference_dirname = opt.inference_dir
    print('inference_dirname is ', inference_dirname)
    print('threshold is ', th)
    eval_file = os.path.join(inference_dirname,
                             "frameAP{:g}ErrorAnalysis.pkl".format(th))

    if os.path.isfile(eval_file) and not redo:
        with open(eval_file, 'rb') as fid:
            res = pickle.load(fid)
    else:
        vlist = dataset._test_videos[opt.split - 1]
        # load per- frame detections
        frame_detections_file = os.path.join(inference_dirname,
                                             'frame_detections.pkl')
        if os.path.isfile(frame_detections_file) and not redo:
            print('load frameAP pre-result')
            with open(frame_detections_file, 'rb') as fid:
                alldets = pickle.load(fid)
        else:
            alldets = load_frame_detections(opt, dataset, opt.K, vlist,
                                            inference_dirname)
            with open(frame_detections_file, 'wb') as fid:
                pickle.dump(alldets, fid)
        res = {}
        # alldets: list of numpy array with <video_index> <frame_index> <ilabel> <score> <x1> <y1> <x2> <y2>
        # compute AP for each class
        print(len(dataset.labels))
        for ilabel, label in enumerate(dataset.labels):
            # detections of this class
            detections = alldets[alldets[:, 2] == ilabel, :]

            gt = {}
            othergt = {}
            labellist = {}

            # iv,v : 0 Basketball/v_Basketball_g01_c01
            for iv, v in enumerate(vlist):
                # tubes: dict {ilabel: (list of)<frame number> <x1> <y1> <x2> <y2>}
                tubes = dataset._gttubes[v]
                # labellist[iv]: label list for v
                labellist[iv] = tubes.keys()

                for il in tubes:
                    # tube: list of <frame number> <x1> <y1> <x2> <y2>
                    for tube in tubes[il]:
                        for i in range(tube.shape[0]):
                            # k: (video_index, frame_index)
                            k = (iv, int(tube[i, 0]))
                            if il == ilabel:
                                if k not in gt:
                                    gt[k] = []
                                gt[k].append(tube[i, 1:5].tolist())
                            else:
                                if k not in othergt:
                                    othergt[k] = []
                                othergt[k].append(tube[i, 1:5].tolist())

            for k in gt:
                gt[k] = np.array(gt[k])
            for k in othergt:
                othergt[k] = np.array(othergt[k])

            dupgt = deepcopy(gt)

            # pr will be an array containing precision-recall values and 4 types of errors:
            # localization, classification, timing, others
            pr = np.empty((detections.shape[0] + 1, 6),
                          dtype=np.float32)  # precision, recall
            pr[0, 0] = 1.0
            pr[0, 1:] = 0.0

            fn = sum([g.shape[0] for g in gt.values()])  # false negatives
            fp = 0  # false positives
            tp = 0  # true positives
            EL = 0  # localization errors
            EC = 0  # classification error: overlap >=0.5 with an another object
            EO = 0  # other errors
            ET = 0  # timing error: the video contains the action but not at this frame

            for i, j in enumerate(np.argsort(-detections[:, 3])):
                k = (int(detections[j, 0]), int(detections[j, 1]))
                box = detections[j, 4:8]
                ispositive = False

                if k in dupgt:
                    if k in gt:
                        ious = iou2d(gt[k], box)
                        amax = np.argmax(ious)
                    if k in gt and ious[amax] >= th:
                        ispositive = True
                        gt[k] = np.delete(gt[k], amax, 0)
                        if gt[k].size == 0:
                            del gt[k]
                    else:
                        EL += 1

                elif k in othergt:
                    ious = iou2d(othergt[k], box)
                    if np.max(ious) >= th:
                        EC += 1
                    else:
                        EO += 1
                elif ilabel in labellist[k[0]]:
                    ET += 1
                else:
                    EO += 1
                if ispositive:
                    tp += 1
                    fn -= 1
                else:
                    fp += 1

                pr[i + 1, 0] = float(tp) / float(tp + fp)  # precision
                pr[i + 1, 1] = float(tp) / float(tp + fn)  # recall
                pr[i + 1, 2] = float(EL) / float(tp + fp)
                pr[i + 1, 3] = float(EC) / float(tp + fp)
                pr[i + 1, 4] = float(ET) / float(tp + fp)
                pr[i + 1, 5] = float(EO) / float(tp + fp)

            res[label] = pr

        # save results
        with open(eval_file, 'wb') as fid:
            pickle.dump(res, fid)

    # display results
    AP = 100 * np.array(
        [pr_to_ap(res[label][:, [0, 1]]) for label in dataset.labels])
    othersap = [
        100 *
        np.array([pr_to_ap(res[label][:, [j, 1]]) for label in dataset.labels])
        for j in range(2, 6)
    ]

    EL = othersap[0]
    EC = othersap[1]
    ET = othersap[2]
    EO = othersap[3]
    # missed detections = 1 - recall
    EM = 100 - 100 * np.array([res[label][-1, 1] for label in dataset.labels])

    LIST = [AP, EL, EC, ET, EO, EM]

    print('Error Analysis')

    print("")
    print("{:20s} {:8s} {:8s} {:8s} {:8s} {:8s} {:8s}".format(
        'label', '   AP   ', '  Loc.  ', '  Cls.  ', '  Time  ', ' Other ',
        ' missed '))
    print("")
    for il, label in enumerate(dataset.labels):
        print("{:20s} ".format(label) +
              " ".join(["{:8.2f}".format(L[il]) for L in LIST]))

    print("")
    print("{:20s} ".format("mean") +
          " ".join(["{:8.2f}".format(np.mean(L)) for L in LIST]))
    print("")
def build_tubes(opt):
    print('inference finish, start building tubes!', flush=True)
    K = opt.K
    dataset = VisualizationDataset(opt)

    outfile = os.path.join(opt.inference_dir, "tubes.pkl")

    RES = {}
    # load detected tubelets
    VDets = {}
    for startframe in range(1, dataset._nframes + 2 - K):
        resname = os.path.join(opt.inference_dir,
                               "{:0>5}.pkl".format(startframe))
        if not os.path.isfile(resname):
            print("ERROR: Missing extracted tubelets " + resname, flush=True)
            sys.exit()

        with open(resname, 'rb') as fid:
            VDets[startframe] = pickle.load(fid)
    for ilabel in range(opt.num_classes):
        FINISHED_TUBES = []
        CURRENT_TUBES = []  # tubes is a list of tuple (frame, lstubelets)

        # calculate average scores of tubelets in tubes

        def tubescore(tt):
            return np.mean(np.array([tt[i][1][-1] for i in range(len(tt))]))

        for frame in range(1, dataset._nframes + 2 - K):
            # load boxes of the new frame and do nms while keeping Nkeep highest scored
            ltubelets = VDets[frame][
                ilabel +
                1]  # [:,range(4*K) + [4*K + 1 + ilabel]]  Nx(4K+1) with (x1 y1 x2 y2)*K ilabel-score

            ltubelets = nms_tubelets(ltubelets, 0.6, top_k=10)

            # just start new tubes
            if frame == 1:
                for i in range(ltubelets.shape[0]):
                    CURRENT_TUBES.append([(1, ltubelets[i, :])])
                continue

            # sort current tubes according to average score
            avgscore = [tubescore(t) for t in CURRENT_TUBES]
            argsort = np.argsort(-np.array(avgscore))
            CURRENT_TUBES = [CURRENT_TUBES[i] for i in argsort]
            # loop over tubes
            finished = []
            for it, t in enumerate(CURRENT_TUBES):
                # compute ious between the last box of t and ltubelets
                last_frame, last_tubelet = t[-1]
                ious = []
                offset = frame - last_frame
                if offset < K:
                    nov = K - offset
                    ious = sum([
                        iou2d(
                            ltubelets[:, 4 * iov:4 * iov + 4],
                            last_tubelet[4 * (iov + offset):4 *
                                         (iov + offset + 1)])
                        for iov in range(nov)
                    ]) / float(nov)
                else:
                    ious = iou2d(ltubelets[:, :4],
                                 last_tubelet[4 * K - 4:4 * K])

                valid = np.where(ious >= 0.5)[0]

                if valid.size > 0:
                    # take the one with maximum score
                    idx = valid[np.argmax(ltubelets[valid, -1])]
                    CURRENT_TUBES[it].append((frame, ltubelets[idx, :]))
                    ltubelets = np.delete(ltubelets, idx, axis=0)
                else:
                    if offset >= opt.K:
                        finished.append(it)

            # finished tubes that are done
            for it in finished[::
                               -1]:  # process in reverse order to delete them with the right index why --++--
                FINISHED_TUBES.append(CURRENT_TUBES[it][:])
                del CURRENT_TUBES[it]

            # start new tubes
            for i in range(ltubelets.shape[0]):
                CURRENT_TUBES.append([(frame, ltubelets[i, :])])

        # all tubes are not finished
        FINISHED_TUBES += CURRENT_TUBES

        # build real tubes
        output = []
        for t in FINISHED_TUBES:
            score = tubescore(t)

            # just start new tubes
            if score < 0.005:
                continue

            beginframe = t[0][0]
            endframe = t[-1][0] + K - 1
            length = endframe + 1 - beginframe

            # delete tubes with short duraton
            if length < 15:
                continue

            # build final tubes by average the tubelets
            out = np.zeros((length, 6), dtype=np.float32)
            out[:, 0] = np.arange(beginframe, endframe + 1)
            n_per_frame = np.zeros((length, 1), dtype=np.int32)
            for i in range(len(t)):
                frame, box = t[i]
                for k in range(K):
                    out[frame - beginframe + k, 1:5] += box[4 * k:4 * k + 4]
                    out[frame - beginframe + k,
                        -1] += box[-1]  # single frame confidence
                    n_per_frame[frame - beginframe + k, 0] += 1
            out[:, 1:] /= n_per_frame
            output.append([out, score])
            # out: [num_frames, (frame idx, x1, y1, x2, y2, score)]

        RES[ilabel] = output
        # RES{ilabel:[(out[length,6],score)]}ilabel[0,...]
    os.system("rm -rf " + opt.inference_dir + "/*.pkl")
    with open(outfile, 'wb') as fid:
        pickle.dump(RES, fid)