def _compute_distance_matrix(prev_json_data, prev_boxes, prev_poses,
    cur_json_data, cur_boxes, cur_poses,
    cost_types, cost_weights):
    assert(len(cost_types) == len(cost_weights))
    all_Cs = []
    for cost_type, cost_weight in zip(cost_types, cost_weights):
        if cost_weight == 0:
            continue
        if cost_type == 'bbox_overlap':
            all_Cs.append((1 - _compute_pairwise_iou(prev_boxes, cur_boxes)))
        elif cost_type == 'cnn-cosdist':
            all_Cs.append(_compute_pairwise_deep_cosine_dist(
                img_utils.get_image_path(prev_json_data), prev_boxes,
                img_utils.get_image_path(cur_json_data), cur_boxes))
        elif cost_type == 'pose-pck':
            all_Cs.append(_compute_pairwise_kps_pck_distance(prev_poses, cur_poses))
        elif cost_type == 'pose-oks':
            all_Cs.append(_compute_pairwise_kps_oks_distance(prev_poses, prev_boxes, cur_poses, cur_boxes))
        else:
            raise NotImplementedError('Unknown cost type {}'.format(cost_type))
        # print ('cost_weight', cost_weight)
        # print ('all_Cs', all_Cs)
        all_Cs[-1] *= cost_weight
        # print ('before sum', all_Cs)
    return np.sum(np.stack(all_Cs, axis=0), axis=0)
def _compute_distance_matrix(
    prev_json_data,
    prev_boxes,
    prev_poses,
    cur_json_data,
    cur_boxes,
    cur_poses,
    cost_types,
    cost_weights,
):
    assert (len(cost_weights) == len(cost_types))
    all_Cs = []
    for cost_type, cost_weight in zip(cost_types, cost_weights):
        if cost_weight == 0:
            continue
        if cost_type == 'bbox-overlap':
            all_Cs.append((1 - _compute_pairwise_iou(prev_boxes, cur_boxes)))
        elif cost_type == 'cnn-cosdist':
            all_Cs.append(
                _compute_pairwise_deep_cosine_dist(
                    img_utils.get_image_path(prev_json_data), prev_boxes,
                    img_utils.get_image_path(cur_json_data), cur_boxes))
        elif cost_type == 'pose-pck':
            kps_names = cur_json_data['dataset'].person_cat_info['keypoints']
            all_Cs.append(
                _compute_pairwise_kpt_distance(prev_poses, cur_poses,
                                               kps_names))
        else:
            raise NotImplementedError('Unknown cost type {}'.format(cost_type))
        all_Cs[-1] *= cost_weight
    return np.sum(np.stack(all_Cs, axis=0), axis=0)
def _compute_distance_matrix(
    prev_json_data, prev_boxes, prev_poses,
    cur_json_data, cur_boxes, cur_poses,
    cost_types, cost_weights,
):
    assert(len(cost_weights) == len(cost_types))
    all_Cs = []
    for cost_type, cost_weight in zip(cost_types, cost_weights):
        if cost_weight == 0:
            continue
        if cost_type == 'bbox-overlap':
            all_Cs.append((1 - _compute_pairwise_iou(prev_boxes, cur_boxes)))
        elif cost_type == 'cnn-cosdist':
            all_Cs.append(_compute_pairwise_deep_cosine_dist(
                img_utils.get_image_path(prev_json_data), prev_boxes,
                img_utils.get_image_path(cur_json_data), cur_boxes))
        elif cost_type == 'pose-pck':
            kps_names = cur_json_data['dataset'].person_cat_info['keypoints']
            all_Cs.append(_compute_pairwise_kpt_distance(
                prev_poses, cur_poses, kps_names))
        else:
            raise NotImplementedError('Unknown cost type {}'.format(cost_type))
        all_Cs[-1] *= cost_weight
    return np.sum(np.stack(all_Cs, axis=0), axis=0)
def compute_matches_tracks(json_data, dets, lstm_model):
    # Consider all consecutive frames, and match the boxes
    num_imgs = len(json_data)
    all_tracks = [[]] * len(json_data)
    # First split the images into videos
    all_video_roidb = []
    video_entries = []
    for img_id in range(num_imgs):
        if img_id == 0 or _is_same_video(json_data[img_id - 1],
                                         json_data[img_id]):
            video_entries.append((json_data[img_id], img_id))
        else:
            all_video_roidb.append(
                sorted(video_entries,
                       key=lambda x: img_utils.get_image_path(x[0])))
            video_entries = [(json_data[img_id], img_id)]
    if len(video_entries) > 0:
        all_video_roidb.append(video_entries)
    # Make sure I got everything
    assert (len(json_data) == len(gen_utils.flatten_list(all_video_roidb)))
    logger.info('Computing tracks for {} videos.'.format(len(all_video_roidb)))
    for vid_id in tqdm(range(len(all_video_roidb)), desc='Tracks compute'):
        if cfg.TRACKING.LSTM_TEST.LSTM_TRACKING_ON:
            tracks = _compute_tracks_video_lstm(all_video_roidb[vid_id], dets,
                                                lstm_model)
        else:
            tracks = _compute_tracks_video(all_video_roidb[vid_id], dets)
        if cfg.TRACKING.FLOW_SMOOTHING_ON:
            tracks = _smooth_pose_video(all_video_roidb[vid_id], dets, tracks)
        # resort and assign
        for i, (_, det_id) in enumerate(all_video_roidb[vid_id]):
            all_tracks[det_id] = tracks[i]
            if cfg.TRACKING.DEBUG.DUMMY_TRACKS:
                # Replace with random track IDs
                all_tracks[det_id] = [
                    np.random.randint(FIRST_TRACK_ID, MAX_TRACK_IDS + 1)
                    for _ in tracks[i]
                ]
    dets['all_tracks'] = [[], all_tracks]
    _summarize_track_stats(all_tracks, json_data)
    return dets
Exemple #5
0
def _known_shot_change(video_json_data, frame_id):
    if not cfg.TRACKING.DEBUG.UPPER_BOUND_3_SHOTS:
        # Only use it if in debugging mode, to get a upper bound
        return False
    if frame_id == 0:
        return True
    # read the CSV
    import csv
    D = {}
    with open('/path/to/shot_boundaries_val.csv', 'r') as fin:
        reader = csv.reader(fin)
        for row in reader:
            D[row[0]] = [int(el) for el in row[1].strip().split(',') if len(el) > 0]
    vname = osp.dirname(img_utils.get_image_path(video_json_data[frame_id][0]))
    vname = osp.basename(osp.dirname(vname)) + '/' + osp.basename(vname)
    assert(vname in D)
    # frame_id + 1 since the frame_id are 0 indexed and I labeled as per 1-index
    if frame_id + 1 in D[vname]:
        return True
    else:
        return False
def compute_matches_tracks(json_data, dets, lstm_model):
    # Consider all consecutive frames, and match the boxes
    num_imgs = len(json_data)
    all_tracks = [[]] * len(json_data)
    # First split the images into videos
    all_video_roidb = []
    video_entries = []
    for img_id in range(num_imgs):
        if img_id == 0 or _is_same_video(
                json_data[img_id - 1], json_data[img_id]):
            video_entries.append((json_data[img_id], img_id))
        else:
            all_video_roidb.append(sorted(
                video_entries, key=lambda x: img_utils.get_image_path(x[0])))
            video_entries = [(json_data[img_id], img_id)]
    if len(video_entries) > 0:
        all_video_roidb.append(video_entries)
    # Make sure I got everything
    assert(len(json_data) == len(gen_utils.flatten_list(all_video_roidb)))
    logger.info('Computing tracks for {} videos.'.format(len(all_video_roidb)))
    for vid_id in tqdm(range(len(all_video_roidb)), desc='Tracks compute'):
        if cfg.TRACKING.LSTM_TEST.LSTM_TRACKING_ON:
            tracks = _compute_tracks_video_lstm(
                all_video_roidb[vid_id], dets, lstm_model)
        else:
            tracks = _compute_tracks_video(all_video_roidb[vid_id], dets)
        if cfg.TRACKING.FLOW_SMOOTHING_ON:
            tracks = _smooth_pose_video(
                all_video_roidb[vid_id], dets, tracks)
        # resort and assign
        for i, (_, det_id) in enumerate(all_video_roidb[vid_id]):
            all_tracks[det_id] = tracks[i]
            if cfg.TRACKING.DEBUG.DUMMY_TRACKS:
                # Replace with random track IDs
                all_tracks[det_id] = [
                    np.random.randint(FIRST_TRACK_ID, MAX_TRACK_IDS + 1) for
                    _ in tracks[i]]
    dets['all_tracks'] = [[], all_tracks]
    _summarize_track_stats(all_tracks, json_data)
    return dets
def _is_same_video(json1, json2):
    return (osp.dirname(img_utils.get_image_path(json1)) == osp.dirname(
        img_utils.get_image_path(json2)))
def _compute_tracks_video(video_json_data, dets):
    nframes = len(video_json_data)
    video_tracks = []
    next_track_id = FIRST_TRACK_ID
    for frame_id in range(nframes):
        frame_tracks = []
        # each element is (roidb entry, idx in the dets/original roidb)
        frame_data, det_id = video_json_data[frame_id]
        cur_boxes = _get_boxes(dets, det_id)
        cur_poses = _get_poses(dets, det_id)
        if (frame_id == 0 or _known_shot_change(video_json_data, frame_id)) \
                and not cfg.TRACKING.DEBUG.UPPER_BOUND:
            matches = -np.ones((cur_boxes.shape[0], ))
        else:
            cur_frame_data = frame_data
            if cfg.TRACKING.DEBUG.UPPER_BOUND:
                prev_boxes = frame_data['boxes']
                prev_poses = [el for el in frame_data['gt_keypoints']]
                prev_frame_data = {
                    'image': img_utils.get_image_path(cur_frame_data)
                }
            else:
                prev_boxes = _get_boxes(dets, video_json_data[frame_id - 1][1])
                prev_poses = _get_poses(dets, video_json_data[frame_id - 1][1])
                # 0-index to remove the other index to the dets structure
                prev_frame_data = video_json_data[frame_id - 1][0]
            matches = _compute_matches(
                prev_frame_data,
                cur_frame_data,
                prev_boxes,
                cur_boxes,
                prev_poses,
                cur_poses,
                cost_types=cfg.TRACKING.DISTANCE_METRICS,
                cost_weights=cfg.TRACKING.DISTANCE_METRIC_WTS,
                bipart_match_algo=cfg.TRACKING.BIPARTITE_MATCHING_ALGO)
        if cfg.TRACKING.DEBUG.UPPER_BOUND:
            prev_tracks = frame_data['tracks'].reshape((-1)).tolist()
            matched = np.where(np.array(matches) != -1)[0]
            # Remove things unmatched
            matches = matches[matched]
            new_boxes = _get_boxes(dets, det_id)[matched]
            # This doesn't help, but made the pose score go low
            # new_boxes[:, -1] = 1.0  # make the detections 100% confidence
            new_poses = [_get_poses(dets, det_id)[el] for el in matched]
            if cfg.TRACKING.DEBUG.UPPER_BOUND_2_GT_KPS:
                # Set the points to be GT points
                new_boxes[:, :4] = frame_data['boxes'][matches]
                for match_id in range(matches.shape[0]):
                    if cfg.TRACKING.DEBUG.UPPER_BOUND_2_GT_KPS_ONLY_CONF:
                        dims_to_replace = np.array(2)
                    else:
                        dims_to_replace = np.arange(3)
                    new_poses[match_id][dims_to_replace, :] = frame_data[
                        'gt_keypoints'][matches[match_id]][dims_to_replace, :]
            _set_boxes(new_boxes, dets, det_id)
            _set_poses(new_poses, dets, det_id)
        else:
            prev_tracks = video_tracks[frame_id - 1] if frame_id > 0 else None
        if cfg.TRACKING.DEBUG.UPPER_BOUND_5_GT_KPS_ONLY:
            gt_prev_boxes = frame_data['boxes']
            gt_prev_poses = [el for el in frame_data['gt_keypoints']]
            gt_prev_frame_data = {
                'image': img_utils.get_image_path(frame_data)
            }
            matches_gt = _compute_matches(gt_prev_frame_data,
                                          frame_data,
                                          gt_prev_boxes,
                                          cur_boxes,
                                          gt_prev_poses,
                                          cur_poses,
                                          cost_types=('bbox-overlap', ),
                                          cost_weights=(1.0, ),
                                          bipart_match_algo='hungarian')
            # replace the predicted poses
            for match_id in range(matches_gt.shape[0]):
                if matches_gt[match_id] == -1:
                    continue
                cur_poses[match_id][:3, :] = gt_prev_poses[
                    matches_gt[match_id]][:3, :]
        for m in matches:
            if m == -1:  # didn't match to any
                frame_tracks.append(next_track_id)
                next_track_id += 1
                if next_track_id >= MAX_TRACK_IDS:
                    logger.warning('Exceeded max track ids ({}) for {}'.format(
                        MAX_TRACK_IDS, frame_data['image']))
                    next_track_id %= MAX_TRACK_IDS
            else:
                frame_tracks.append(prev_tracks[m])
        video_tracks.append(frame_tracks)
    return video_tracks
Exemple #9
0
def _run_posetrack_eval(roidb, det_file, dataset, output_dir):
    with open(det_file, 'rb') as fin:
        dets = pkl.load(fin)
    assert len(roidb) == len(dets['all_boxes'][1]), \
        'Mismatch {} vs {}'.format(len(roidb), len(dets['all_boxes'][1]))
    gen_utils.mkdir_p(output_dir)
    out_filenames = video2filenames(dataset.annotation_directory)
    out_data = {}  # each video to all predictions
    eval_tracking = False
    if 'all_tracks' in dets:
        eval_tracking = True
    for i, entry in enumerate(roidb):
        image_name = get_image_path(entry)[len(dataset.image_directory):]
        video_name = osp.dirname(image_name)
        frame_num = int(osp.basename(image_name).split('.')[0])
        boxes = dets['all_boxes'][1][i]
        kps = dets['all_keyps'][1][i]
        if eval_tracking:  # means there is a "all_tracks" in the dets
            tracks = dets['all_tracks'][1][i]
        else:
            tracks = [1] * len(kps)
        data_el = {
            'image': image_name,
            'imagenum': [frame_num],
            'annorect': _convert_data_to_annorect_struct(boxes, kps, tracks),
        }
        if video_name in out_data:
            out_data[video_name].append(data_el)
        else:
            out_data[video_name] = [data_el]

    logger.info('Saving the JSON files to {}'.format(output_dir))
    # clear out the previous predictions, if any
    gen_utils.run_cmd('rm -r {}/*'.format(output_dir), print_cmd=False)
    for vname in tqdm(out_data.keys(), desc='Writing JSON files for eval'):
        vdata = out_data[vname]
        outfpath = osp.join(output_dir, out_filenames['images' + vname])
        #             output_dir, out_filenames[osp.join('images', vname)])
        with open(outfpath, 'w') as fout:
            json.dump({'annolist': vdata}, fout, indent=4)

    logger.info('Wrote all predictions in JSON to {}'.format(output_dir))
    logger.info('Running dataset level evaluation...')
    st_time = time.time()
    ####jianbo add this line and comment following two lines
    score_ap, score_mot, apAll, preAll, recAll, mota = _run_eval(
        dataset.annotation_directory, output_dir, eval_tracking)
    #     logger.info(_run_eval(dataset.annotation_directory, output_dir, eval_tracking))
    #     logger.info('...Done in {}'.format(time.time() - st_time))
    #####
    # TODO(rgirdhar): Do this better
    if cfg.EVAL.EVAL_MPII_PER_VIDEO:  # run the evaluation per-video
        res = []
        logger.info('Running per-video evaluation...')
        st_time = time.time()
        pervid_outpath = osp.join(
            osp.dirname(osp.normpath(output_dir)),
            osp.basename(det_file) + '_per_video_scores.txt')
        # Earlier I used multi-processing to compute the predictions in parallel
        # but now I've updated the eval code itself to use multiprocessing so
        # can not use multiprocessing here (else it gives an error that daemon
        # processes can not spawn children). Hense setting num processes to 0.
        res = map(
            partial(_run_eval_single_video,
                    out_filenames=out_filenames,
                    output_dir=output_dir,
                    dataset=dataset,
                    eval_tracking=eval_tracking), out_data.keys())
        logger.info('...Done in {} seconds'.format(time.time() - st_time))
        res = sorted(res, key=lambda x: x[1])  # sort on score
        logger.info('Writing per-video scores to {}'.format(pervid_outpath))
        with open(pervid_outpath, 'w') as fout:
            for el in res:
                fout.write('{} {} {}\n'.format(el[0], el[1], el[2]))
    return score_ap, score_mot, apAll, preAll, recAll, mota
def _run_posetrack_eval(roidb, det_file, dataset, output_dir):
    with open(det_file, 'rb') as fin:
        dets = pkl.load(fin)
    assert len(roidb) == len(dets['all_boxes'][1]), \
        'Mismatch {} vs {}'.format(len(roidb), len(dets['all_boxes'][1]))
    gen_utils.mkdir_p(output_dir)
    out_filenames = video2filenames(dataset.annotation_directory)
    out_data = {}  # each video to all predictions
    eval_tracking = False
    if 'all_tracks' in dets:
        eval_tracking = True
    for i, entry in enumerate(roidb):
        image_name = get_image_path(entry)[len(dataset.image_directory):]
        video_name = osp.dirname(image_name)
        frame_num = int(osp.basename(image_name).split('.')[0])
        boxes = dets['all_boxes'][1][i]
        kps = dets['all_keyps'][1][i]
        if eval_tracking:  # means there is a "all_tracks" in the dets
            tracks = dets['all_tracks'][1][i]
        else:
            tracks = [1] * len(kps)
        data_el = {
            'image': image_name,
            'imagenum': [frame_num],
            'annorect': _convert_data_to_annorect_struct(boxes, kps, tracks),
        }
        if video_name in out_data:
            out_data[video_name].append(data_el)
        else:
            out_data[video_name] = [data_el]

    logger.info('Saving the JSON files to {}'.format(output_dir))
    # clear out the previous predictions, if any
    gen_utils.run_cmd('rm -r {}/*'.format(output_dir), print_cmd=False)
    for vname in tqdm(out_data.keys(), desc='Writing JSON files for eval'):
        vdata = out_data[vname]
        outfpath = osp.join(
            output_dir, out_filenames[osp.join('images', vname)])
        with open(outfpath, 'w') as fout:
            json.dump({'annolist': vdata}, fout)
    logger.info('Wrote all predictions in JSON to {}'.format(output_dir))
    logger.info('Running dataset level evaluation...')
    st_time = time.time()
    logger.info(_run_eval(dataset.annotation_directory, output_dir, eval_tracking))
    logger.info('...Done in {}'.format(time.time() - st_time))
    # TODO(rgirdhar): Do this better
    if cfg.EVAL.EVAL_MPII_PER_VIDEO:  # run the evaluation per-video
        res = []
        logger.info('Running per-video evaluation...')
        st_time = time.time()
        pervid_outpath = osp.join(
            osp.dirname(osp.normpath(output_dir)),
            osp.basename(det_file) + '_per_video_scores.txt')
        # Earlier I used multi-processing to compute the predictions in parallel
        # but now I've updated the eval code itself to use multiprocessing so
        # can not use multiprocessing here (else it gives an error that daemon
        # processes can not spawn children). Hense setting num processes to 0.
        res = map(partial(
            _run_eval_single_video,
            out_filenames=out_filenames,
            output_dir=output_dir,
            dataset=dataset,
            eval_tracking=eval_tracking), out_data.keys())
        logger.info('...Done in {} seconds'.format(time.time() - st_time))
        res = sorted(res, key=lambda x: x[1])  # sort on score
        logger.info('Writing per-video scores to {}'.format(pervid_outpath))
        with open(pervid_outpath, 'w') as fout:
            for el in res:
                fout.write('{} {} {}\n'.format(el[0], el[1], el[2]))
def _is_same_video(json1, json2):
    return (osp.dirname(img_utils.get_image_path(json1)) ==
            osp.dirname(img_utils.get_image_path(json2)))
def _compute_tracks_video(video_json_data, dets):
    nframes = len(video_json_data)
    video_tracks = []
    next_track_id = FIRST_TRACK_ID
    for frame_id in range(nframes):
        frame_tracks = []
        # each element is (roidb entry, idx in the dets/original roidb)
        frame_data, det_id = video_json_data[frame_id]
        cur_boxes = _get_boxes(dets, det_id)
        cur_poses = _get_poses(dets, det_id)
        if (frame_id == 0 or _known_shot_change(video_json_data, frame_id)) \
                and not cfg.TRACKING.DEBUG.UPPER_BOUND:
            matches = -np.ones((cur_boxes.shape[0], ))
        else:
            cur_frame_data = frame_data
            if cfg.TRACKING.DEBUG.UPPER_BOUND:
                prev_boxes = frame_data['boxes']
                prev_poses = [el for el in frame_data['gt_keypoints']]
                prev_frame_data = {'image': img_utils.get_image_path(cur_frame_data)}
            else:
                prev_boxes = _get_boxes(dets, video_json_data[frame_id - 1][1])
                prev_poses = _get_poses(dets, video_json_data[frame_id - 1][1])
                # 0-index to remove the other index to the dets structure
                prev_frame_data = video_json_data[frame_id - 1][0]
            matches = _compute_matches(
                prev_frame_data, cur_frame_data,
                prev_boxes, cur_boxes, prev_poses, cur_poses,
                cost_types=cfg.TRACKING.DISTANCE_METRICS,
                cost_weights=cfg.TRACKING.DISTANCE_METRIC_WTS,
                bipart_match_algo=cfg.TRACKING.BIPARTITE_MATCHING_ALGO)
        if cfg.TRACKING.DEBUG.UPPER_BOUND:
            prev_tracks = frame_data['tracks'].reshape((-1)).tolist()
            matched = np.where(np.array(matches) != -1)[0]
            # Remove things unmatched
            matches = matches[matched]
            new_boxes = _get_boxes(dets, det_id)[matched]
            # This doesn't help, but made the pose score go low
            # new_boxes[:, -1] = 1.0  # make the detections 100% confidence
            new_poses = [_get_poses(dets, det_id)[el] for el in matched]
            if cfg.TRACKING.DEBUG.UPPER_BOUND_2_GT_KPS:
                # Set the points to be GT points
                new_boxes[:, :4] = frame_data['boxes'][matches]
                for match_id in range(matches.shape[0]):
                    if cfg.TRACKING.DEBUG.UPPER_BOUND_2_GT_KPS_ONLY_CONF:
                        dims_to_replace = np.array(2)
                    else:
                        dims_to_replace = np.arange(3)
                    new_poses[match_id][dims_to_replace, :] = frame_data[
                        'gt_keypoints'][matches[match_id]][dims_to_replace, :]
            _set_boxes(new_boxes, dets, det_id)
            _set_poses(new_poses, dets, det_id)
        else:
            prev_tracks = video_tracks[frame_id - 1] if frame_id > 0 else None
        if cfg.TRACKING.DEBUG.UPPER_BOUND_5_GT_KPS_ONLY:
            gt_prev_boxes = frame_data['boxes']
            gt_prev_poses = [el for el in frame_data['gt_keypoints']]
            gt_prev_frame_data = {'image': img_utils.get_image_path(frame_data)}
            matches_gt = _compute_matches(
                gt_prev_frame_data, frame_data,
                gt_prev_boxes, cur_boxes, gt_prev_poses, cur_poses,
                cost_types=('bbox-overlap', ),
                cost_weights=(1.0, ), bipart_match_algo='hungarian')
            # replace the predicted poses
            for match_id in range(matches_gt.shape[0]):
                if matches_gt[match_id] == -1:
                    continue
                cur_poses[match_id][:3, :] = gt_prev_poses[matches_gt[match_id]][:3, :]
        for m in matches:
            if m == -1:  # didn't match to any
                frame_tracks.append(next_track_id)
                next_track_id += 1
                if next_track_id >= MAX_TRACK_IDS:
                    logger.warning('Exceeded max track ids ({}) for {}'.format(
                        MAX_TRACK_IDS, frame_data['image']))
                    next_track_id %= MAX_TRACK_IDS
            else:
                frame_tracks.append(prev_tracks[m])
        video_tracks.append(frame_tracks)
    return video_tracks