Example #1
0
def roi_propagation(vid_proto,
                    box_proto,
                    net,
                    det_fun=im_detect,
                    scheme='max',
                    length=None,
                    sample_rate=1,
                    offset=0,
                    cls_indices=None,
                    keep_feat=False,
                    batch_size=1024):
    track_proto = {}
    track_proto['video'] = vid_proto['video']
    track_proto['method'] = 'roi_propagation'
    max_frame = vid_proto['frames'][-1]['frame']
    if not length: length = max_frame
    tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate,
                                 offset)

    for idx, frame in enumerate(vid_proto['frames'], start=1):
        # Load the demo image
        image_name = frame_path_at(vid_proto, frame['frame'])
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois, track_index = _cur_rois(tracks, frame['frame'])
        if len(rois) == 0: continue

        timer = Timer()
        timer.tic()

        # scores: n x c, boxes: n x (c x 4)
        scores, boxes, features = _batch_im_detect(net, im, rois, det_fun,
                                                   batch_size)

        if not keep_feat:
            features = None
        if cls_indices is not None:
            boxes = boxes[:, cls_indices, :]
            scores = scores[:, cls_indices]
            # scores normalization
            scores = scores / np.sum(scores, axis=1, keepdims=True)

        # propagation schemes
        pred_boxes = score_guided_box_merge(scores, boxes, scheme)

        # update track bbox
        _update_track(tracks, boxes, pred_boxes, scores, features, track_index,
                      frame['frame'])
        timer.toc()
        print('Frame {}: Detection took {:.3f}s for '
              '{:d} object proposals').format(frame['frame'], timer.total_time,
                                              len(rois))
    track_proto['tracks'] = tracks
    return track_proto
Example #2
0
def show_tracks(vid_proto, track_proto):
    for frame in vid_proto['frames']:
        img = imread(frame_path_at(vid_proto, frame['frame']))
        boxes = [track_box_at_frame(tracklet, frame['frame']) \
                for tracklet in track_proto['tracks']]
        tracked = add_bbox(img, boxes, None, None, 2)
        cv2.imshow('tracks', tracked)
        if cv2.waitKey(0) == ord('q'):
            cv2.destroyAllWindows()
            sys.exit(0)
    cv2.destroyAllWindows()
Example #3
0
def show_track_res(track_res, vid_proto):
    cv2.namedWindow('tracks')
    for frame_res in track_res:
        if frame_res['frame'] == -1: break
        frame = frame_res['frame']
        img = imread(frame_path_at(vid_proto, frame))
        boxes = frame_res['roi'].tolist()
        tracked = add_bbox(img, boxes, None, None, 2)
        cv2.imshow('tracks', tracked)
        if cv2.waitKey(0) == ord('q'):
            cv2.destroyAllWindows()
            sys.exit(0)
    cv2.destroyAllWindows()
Example #4
0
def track_propagation(vid_proto,
                      track_proto,
                      net,
                      det_fun=im_detect,
                      cls_indices=None,
                      keep_feat=False,
                      batch_size=1024):
    new_track_proto = {}
    new_track_proto['video'] = vid_proto['video']
    new_track_proto['method'] = 'track_propagation'
    tracks = copy.copy(track_proto['tracks'])

    for idx, frame in enumerate(vid_proto['frames'], start=1):
        # Load the demo image
        image_name = frame_path_at(vid_proto, frame['frame'])
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois, track_index = _cur_rois(tracks, frame['frame'])
        if len(rois) == 0: continue

        timer = Timer()
        timer.tic()

        # scores: n x c, boxes: n x (c x 4)
        scores, boxes, features = _batch_im_detect(net, im, rois, det_fun,
                                                   batch_size)

        if not keep_feat:
            features = None
        if cls_indices is not None:
            scores = scores[:, cls_indices]
            # scores normalization
            scores = scores / np.sum(scores, axis=1, keepdims=True)

        # update track scores and boxes
        _update_track_scores_boxes(tracks, scores, boxes, features,
                                   track_index, frame['frame'])
        timer.toc()
        print('Frame {}: Detection took {:.3f}s for '
              '{:d} object proposals').format(frame['frame'], timer.total_time,
                                              len(rois))
    new_track_proto['tracks'] = tracks
    return new_track_proto
Example #5
0
def naive_box_regression(net_rpn,
                         net_no_rpn,
                         vid_proto,
                         scheme='max',
                         class_idx=None):
    """Generating tubelet proposals based on the region proposals of first frame."""

    track_proto = {}
    track_proto['video'] = vid_proto['video']
    track_proto['method'] = 'naive_box_regression'
    tracks = []
    pred_boxes = None

    for idx, frame in enumerate(vid_proto['frames'], start=1):
        # Load the demo image
        image_name = frame_path_at(vid_proto, frame['frame'])
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        timer = Timer()
        timer.tic()
        if idx == 1:
            scores, boxes = im_detect(net_rpn, im, pred_boxes)
        else:
            scores, boxes = im_detect(net_no_rpn, im, pred_boxes)

        boxes = boxes.reshape((boxes.shape[0], -1, 4))
        if scheme is 'mean' or idx == 1:
            # use mean regressions as predictios
            pred_boxes = np.mean(boxes, axis=1)
        elif scheme is 'max':
            # use the regressions of the class with the maximum probability
            # excluding __background__ class
            max_cls = scores[:, 1:].argmax(axis=1) + 1
            pred_boxes = boxes[np.arange(len(boxes)), max_cls, :]
        else:
            # use class specific regression as predictions
            pred_boxes = boxes[:, class_idx, :]
        _append_boxes(tracks, frame['frame'], pred_boxes, scores)
        timer.toc()
        print('Detection took {:.3f}s for '
              '{:d} object proposals').format(timer.total_time, boxes.shape[0])
    track_proto['tracks'] = tracks
    return track_proto
Example #6
0
    parser.add_argument('--top_k', default=10, type=int)
    parser.add_argument('--no_nms', action='store_false', dest='nms')
    parser.set_defaults(nms=True)
    args = parser.parse_args()

    vid_proto = proto_load(args.vid_file)
    if args.save_dir and not os.path.isdir(args.save_dir):
        os.mkdir(args.save_dir)
    cls_index = imagenet_vdet_class_idx[args.cls]

    for frame in vid_proto['frames']:
        det_file = os.path.join(
            args.det_root, "{}.mat".format(os.path.splitext(frame['path'])[0]))
        det = sio.loadmat(det_file)
        frame_idx = frame['frame']
        img = imread(frame_path_at(vid_proto, frame_idx))
        boxes = det['boxes'][:, cls_index, :].astype('single')
        scores = det['zs'][:, cls_index].astype('single')
        if args.nms:
            keep = nms(np.hstack((boxes, scores[:, np.newaxis])), 0.3)
        else:
            keep = range(len(boxes))
        kept_boxes = [boxes[i, :] for i in keep]
        kept_scores = [scores[i] for i in keep]
        top_idx = np.argsort(np.asarray(kept_scores))[::-1]
        top_boxes = [kept_boxes[top_idx[i]] for i in \
            xrange(min(args.top_k, len(kept_boxes)))]
        top_scores = [kept_scores[top_idx[i]] for i in \
            xrange(min(args.top_k, len(kept_boxes)))]
        det_img = add_bbox(img, top_boxes, top_scores)
        cv2.imshow('detection', det_img)
Example #7
0
            # 	fids.remove(int(line[0]))
            # 	if len(fids) == 0:
            # 		print ("get all fids")
            # 		break
    print(len(ret))

    ## match the gt with detection for each video
    ## show the gt

    # remove the current images
    os.system("rm saveImgs/*")

    for frame_index, frame in enumerate(vid['frames']):
        # print (frame)
        # print (vid['root_path'])
        imgpath = frame_path_at(vid, frame['frame'])
        imgbasename = os.path.basename(imgpath)
        imgsavepath = os.path.join('saveImgs/', imgbasename)
        img = cv2.imread(imgpath)

        # load gt boundbox and annotation
        # print (annot['annotations'])
        boxes = [
            track_box_at_frame(tracklet, frame['frame'])
            for tracklet in [anno['track'] for anno in annot['annotations']]
        ]
        classes = [
            track_class_at_frame(tracklet, frame['frame'])
            for tracklet in [anno['track'] for anno in annot['annotations']]
        ]
        # print (boxes)
Example #8
0
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                .astype(np.float32, copy=False)
            keep = nms(cls_dets, 0.3, force_cpu=True)
            for keep_id in keep:
                kept_track_ids.append(track_ids[inds[keep_id]])
                kept_class.append(j)

    colors = unique_colors(len(kept_track_ids))
    kept_tracks = [tracks[i] for i in kept_track_ids]
    idx = 0
    while True:
        frame = vid_proto['frames'][idx]
        frame_id = frame['frame']
        print "Frame id: {}".format(frame_id)
        img = imread(frame_path_at(vid_proto, frame['frame']))
        boxes = []
        scores = []
        show_track_ids = []
        cur_colors = []
        cur_classes = []
        for track_id, (class_id,
                       track) in enumerate(zip(kept_class, kept_tracks)):
            if frame_id in track['frame']:
                boxes.append(track[args.box_key][track['frame'] == frame_id][
                    0, class_id, :].tolist())
                scores.append(track[args.score_key][track['frame'] == frame_id]
                              [0, class_id].tolist())
                cur_colors.append(colors[track_id])
                cur_classes.append(imagenet_vdet_classes[class_id])
                show_track_ids.append(track_id)
    vid_proto = proto_load(args.vid_file)
    score_proto = proto_load(args.score_file)
    with open(args.image_set_file) as f:
        image_set = dict([line.strip().split() for line in f.readlines()])
    vid_name = vid_proto['video']
    assert vid_name == score_proto['video']

    # build dict
    frame_to_image_name = {}
    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        frame_to_image_name[frame_id] = os.path.join(
            vid_name, os.path.splitext(frame['path'])[0])

    # get image shape
    height, width = imread(frame_path_at(vid_proto, 1)).shape[:2]

    dets = []
    for tubelet in score_proto['tubelets']:
        if tubelet['gt'] == 1:
            raise ValueError('Dangerous: Score file contains gt tracks!')
        class_index = tubelet['class_index']
        for box in tubelet['boxes']:
            frame_idx = box['frame']
            image_name = frame_to_image_name[frame_idx]
            frame_idx = image_set[image_name]
            bbox = map(lambda x:max(x,0), box['bbox'])
            bbox[0] = min(width - 1, bbox[0])
            bbox[2] = min(width - 1, bbox[2])
            bbox[1] = min(height - 1, bbox[1])
            bbox[3] = min(height - 1, bbox[3])
Example #10
0
    score_proto = proto_load(args.score_file)
    with open(args.image_set_file) as f:
        image_set = dict([line.strip().split() for line in f.readlines()])
    vid_name = vid_proto['video']
    assert vid_name == score_proto['video']

    # build dict
    frame_to_image_name = {}
    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        frame_to_image_name[frame_id] = os.path.join(
            vid_name,
            os.path.splitext(frame['path'])[0])

    # get image shape
    height, width = imread(frame_path_at(vid_proto, 1)).shape[:2]

    dets = []
    for tubelet in score_proto['tubelets']:
        if tubelet['gt'] == 1:
            raise ValueError('Dangerous: Score file contains gt tracks!')
        class_index = tubelet['class_index']
        for box in tubelet['boxes']:
            frame_idx = box['frame']
            image_name = frame_to_image_name[frame_idx]
            frame_idx = image_set[image_name]
            bbox = map(lambda x: max(x, 0), box['bbox'])
            bbox[0] = min(width - 1, bbox[0])
            bbox[2] = min(width - 1, bbox[2])
            bbox[1] = min(height - 1, bbox[1])
            bbox[3] = min(height - 1, bbox[3])
Example #11
0
def sequence_roi_propagation(vid_proto,
                             box_proto,
                             net,
                             det_fun=sequence_im_detect,
                             window=2,
                             scheme='max',
                             length=None,
                             sample_rate=1,
                             offset=0,
                             keep_feat=False,
                             batch_size=1024):
    track_proto = {}
    track_proto['video'] = vid_proto['video']
    track_proto['method'] = 'sequence_roi_propagation'
    max_frame = vid_proto['frames'][-1]['frame']
    if not length: length = max_frame
    tracks = _box_proto_to_track(box_proto, max_frame, length, sample_rate,
                                 offset)

    track_anchors = sorted(set([track[0]['frame'] for track in tracks]))
    sequence_frames = _sequence_frames(vid_proto, window, track_anchors,
                                       length)
    for idx, frames in enumerate(sequence_frames, start=1):
        # Load the demo image
        images = map(lambda x: imread(frame_path_at(vid_proto, x['frame'])),
                     frames)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois, track_index = _cur_rois(tracks, frames[0]['frame'])
        if len(rois) == 0: continue

        timer = Timer()
        timer.tic()

        # scores: n x 2, boxes: n x ((len-1) x 4), features: n x (len x f)
        scores, boxes, features = _batch_sequence_im_detect(
            net, images, rois, det_fun, batch_size)

        if not keep_feat:
            features = None

        # update track bbox
        boxes = boxes.reshape((len(rois), len(images) - 1, 4))
        if keep_feat:
            features = features.reshape((len(rois), len(images), -1))
        frame_ids = [frame['frame'] for frame in frames]
        prev_id = -1
        for i in xrange(len(images)):
            frame_id = frames[i]['frame']
            # stop when encounting duplicate frames
            if frame_id == prev_id:
                break
            prev_id = frame_id
            if i == 0:
                _update_track_by_key(tracks, 'bbox', rois, track_index,
                                     frame_id)
            else:
                # minus 1 because boxes[0] correspond to the second frame
                _update_track_by_key(tracks, 'bbox', boxes[:,
                                                           i - 1, :].tolist(),
                                     track_index, frame_id)
                _update_track_by_key(tracks, 'roi', boxes[:,
                                                          i - 1, :].tolist(),
                                     track_index, frame_id)
            if keep_feat:
                _update_track_by_key(tracks, 'feature',
                                     features[:, i, :].tolist(), track_index,
                                     frame_id)
        timer.toc()
        print(
            'Frame {}-{}: Detection took {:.3f}s for '
            '{:d} object proposals').format(frame_ids[0], frame_ids[-1],
                                            timer.total_time, len(rois))
    track_proto['tracks'] = tracks
    return track_proto
Example #12
0
def roi_train_propagation(vid_proto,
                          box_proto,
                          net,
                          det_fun=im_detect,
                          cls_indices=None,
                          scheme='weighted',
                          num_tracks=16,
                          length=20,
                          fg_ratio=None,
                          batch_size=16):
    assert vid_proto['video'] == box_proto['video']
    # calculate the number of boxes on each frame
    all_boxes = {}
    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        boxes = boxes_at_frame(box_proto, frame_id)
        if len(boxes) >= num_tracks: all_boxes[frame_id] = boxes

    try:
        st_frame = random.choice(all_boxes.keys())
    except:
        raise ValueError('{} has not valid frames for tracking.'.format(
            vid_proto['video']))
    st_boxes = _sample_boxes(all_boxes[st_frame], num_tracks, fg_ratio)

    results = [{'frame': -1} for i in xrange(length)]
    anchor = 0
    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        if frame_id < st_frame: continue
        if anchor >= length: break

        res = results[anchor]
        res['frame'] = frame_id
        if anchor == 0:
            res['roi'] = np.asarray([st_box['bbox'] for st_box in st_boxes])

        # Load the demo image
        image_name = frame_path_at(vid_proto, frame_id)
        im = imread(image_name)

        # Detect all object classes and regress object bounds
        # extract rois on the current frame
        rois = res['roi']
        assert rois.shape[0] == num_tracks

        timer = Timer()
        timer.tic()

        # scores: n x c, boxes: n x (c x 4), features: n * c
        scores, boxes, features = _batch_im_detect(net, im, rois, det_fun,
                                                   batch_size)

        if cls_indices is not None:
            boxes = boxes[:, cls_indices, :]
            scores = scores[:, cls_indices]
            # scores normalization
            scores = scores / np.sum(scores, axis=1, keepdims=True)

        # propagation schemes
        pred_boxes = score_guided_box_merge(scores, boxes, scheme)

        results[anchor]['bbox'] = boxes
        results[anchor]['feat'] = features
        if anchor + 1 < length:
            results[anchor + 1]['roi'] = pred_boxes
        anchor += 1
    return results
Example #13
0
import cv2

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('vid_file')
    parser.add_argument('annot_file')
    parser.add_argument('--save_dir', default=None)
    args = parser.parse_args()

    vid_proto = proto_load(args.vid_file)
    annot_proto = proto_load(args.annot_file)

    colors = unique_colors(len(annot_proto['annotations']))

    for frame in vid_proto['frames']:
        img = imread(frame_path_at(vid_proto, frame['frame']))
        boxes = [track_box_at_frame(tracklet, frame['frame']) \
                for tracklet in [annot['track'] for annot in annot_proto['annotations']]]
        tracked = add_bbox(img, boxes, None, 10)
        if args.save_dir:
            if not os.path.isdir(args.save_dir):
                os.makedirs(args.save_dir)
            imwrite(os.path.join(args.save_dir, "{:04d}.jpg".format(frame['frame'])),
                    tracked)
        else:
            cv2.imshow('tracks', tracked)
            if cv2.waitKey(0) == ord('q'):
                cv2.destroyAllWindows()
                sys.exit(0)
            cv2.destroyAllWindows()
Example #14
0
    parser.add_argument('--merge', dest='merge', action='store_true')
    parser.add_argument('--debug', dest='visual_debug', action='store_true')
    parser.set_defaults(merge=False, visual_debug=False)
    args = parser.parse_args()

    norm_width = 500.
    bound = args.bound

    vid_proto = proto_load(args.vid_file)
    print("Processing {}: {} files... ".format(args.vid_file,
                                               len(vid_proto['frames']))),
    sys.stdout.flush()
    tic = time.time()
    for frame1, frame2 in zip(vid_proto['frames'][:-1],
                              vid_proto['frames'][1:]):
        img_path = frame_path_at(vid_proto, frame1['frame'])
        img1 = cvReadGrayImg(img_path)
        img2 = cvReadGrayImg(frame_path_at(vid_proto, frame2['frame']))
        h, w = img1.shape
        fxy = norm_width / w
        # normalize image size
        flow = cv2.calcOpticalFlowFarneback(
            cv2.resize(img1, None, fx=fxy, fy=fxy),
            cv2.resize(img2, None, fx=fxy, fy=fxy), 0.5, 3, 15, 3, 7, 1.5, 0)
        # map optical flow back
        flow = flow / fxy
        # normalization
        flow = np.round((flow + bound) / (2. * bound) * 255.)
        flow[flow < 0] = 0
        flow[flow > 255] = 255
        flow = cv2.resize(flow, (w, h))
sys.path.insert(0, osp.join(this_dir, '../../external/'))
from vdetlib.utils.protocol import proto_load, frame_path_at, annots_at_frame
import shutil

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('vid_proto')
    parser.add_argument('annot_proto')
    parser.add_argument('save_dir')
    args = parser.parse_args()

    vid_proto = proto_load(args.vid_proto)
    annot_proto = proto_load(args.annot_proto)
    if not osp.isdir(args.save_dir):
        os.makedirs(args.save_dir)

    for frame in vid_proto['frames']:
        frame_id = frame['frame']
        image_path = frame_path_at(vid_proto, frame_id)
        annots = annots_at_frame(annot_proto, frame_id)
        cls_idx = [annot['class_index'] for annot in annots]
        uniq_cls = set(cls_idx)
        for cls in uniq_cls:
            save_dir = osp.join(args.save_dir,
                "{:02d}".format(cls))
            if not osp.isdir(save_dir):
                os.makedirs(save_dir)
            save_path = osp.join(save_dir,
                '_'.join(image_path.split('/')[-2:]))
            shutil.copyfile(image_path, save_path)
    args = parser.parse_args()

    manual_boxes = [{"class": cls, "boxes": []} for cls in imagenet_vdet_classes[1:]]
    for annot_file in [line.strip() for line in open(args.annot_list)]:
        print "Processing {}".format(annot_file)
        with open(annot_file) as f:
            annot = json.load(f)
        vid_file = os.path.join(args.vid_dir, annot['video']+'.vid')
        assert os.path.isfile(vid_file)
        with open(vid_file) as f:
            vid_proto = json.load(f)
        assert vid_proto['video'] == annot['video']
        frames = [frame for track in annot['annotations'] for frame in track['track']]
        for frame in frames:
            cls_idx = frame['class_index']
            assert manual_boxes[cls_idx-1]['class'] == frame['class']
            if frame['generated'] == 1:
                # skip generated boxes
                continue
            # manually labeled boxes
            frame_path = frame_path_at(vid_proto, frame['frame'])
            manual_boxes[cls_idx-1]['boxes'].append(
                [frame_path] + frame['bbox'])

    for count in manual_boxes:
        save_file = os.path.join(args.save_dir, count['class']+'_manual_box_list.txt')
        with open(save_file, 'w') as f:
            print "Writing to {}".format(save_file)
            for box in count['boxes']:
                f.write('\t'.join(map(str,box))+'\n')