Beispiel #1
0
        def _process_single_anno(anno):
            if anno['VideoType'] == 'neg_frames':
                _process_neg_frames(anno)
                return

            if Enable_Time_Log:
                t1 = time.time()

            te = timestamp_to_ms(anno['Time'])
            ts = te - conf_dict['ob_window_len'] * conf_dict['interval']
            frames_dir = os.path.join(
                conf_dict['pos_video_dir'], anno['VideoID'])

            if conf_dict['use_frames_first'] and os.path.isdir(frames_dir):
                frame_ids = read_frames_dir(
                    frames_dir, max(0.0, ts), te, return_id=True)
                frame_ids = sample_frames(frame_ids, conf_dict['ob_window_len'])
                frames = read_frames_dir_with_fids(frames_dir, frame_ids)
                ctx_frames = read_frames_dir(frames_dir, 0.0, te, return_id=True)
            else:
                video_file = os.path.join(
                    conf_dict['pos_video_dir'],
                    '{}.mp4'.format(anno['VideoID']))
                frames = clip_video_to_frames(video_file, max(0.0, ts), te)
                frames = sample_frames(frames, conf_dict['ob_window_len'])
                ctx_frames = clip_video_to_frames(video_file, 0.0, te)

            anno_copy = {k: v for k, v in anno.items()}
            anno_copy['Frames'] = [pickle.dumps(img) for img in frames]
            anno_copy['FrameIDs'] = [len(ctx_frames) - 1]
            data_queue.put(anno_copy)

            if Enable_Time_Log:
                t2 = time.time()
                print('CV2 reader takes {:.3f}s'.format(t2 - t1))
Beispiel #2
0
 def setup_class(self):
     self.roi_feat_resolution = 5
     self.scene_sensor = SceneSensor(
         YOLOv4_MODEL,
         gpu=0,
         img_shape=[3, 416, 416],
         roi_feat_resolution=self.roi_feat_resolution,
         algorithm='yolov4')
     self.frames = clip_video_to_frames(R2_VIDEO, 0., None)
Beispiel #3
0
def test_image_encoder():
    patch_n = 2
    frames = clip_video_to_frames(VIDEO, 3001., 4000.)
    image_encoder = create_box_encoder(
        Encoder_on_MARS_Dataset, batch_size=patch_n)

    h, w, _ = frames[0].shape
    boxes = [[int(w*0.1), int(h*0.1), int(w*0.5), int(h*0.5)]] * patch_n

    features = image_encoder(frames[0], boxes)
    assert len(features) == len(boxes)
def test_clip_video_to_frames():
    ffmpeg_clip = 'data/clip_by_ffmpeg.mp4'
    try:
        # NOTE: ffmpeg would append extra copied frames
        os.system('ffmpeg -ss 1 -i %s -c copy -t 2 -y %s' %
                  (VIDEO, ffmpeg_clip))
    except Exception:
        pass

    if not os.path.exists(ffmpeg_clip):
        return True

    frames = clip_video_to_frames(VIDEO, 1000., 3000.)
    cap = cv2.VideoCapture(ffmpeg_clip)
    success, i = True, 0
    while success and i < len(frames):
        success, frame = cap.read()
        assert np.all(frame == frames[i])
        i += 1

    cap.release()
def test_save_as_gif():
    frames = clip_video_to_frames(VIDEO, 1000., 3000.)
    gif_file = 'data/test_save_as_gif.gif'
    save_as_gif(frames, gif_file)
    assert os.path.exists(gif_file)
Beispiel #6
0
        def _process_single_anno(anno):
            if anno['VideoType'] == 'neg_frames':
                _process_neg_frames(anno)
                return

            te = timestamp_to_ms(anno['Time'])
            ts = te - conf_dict['ob_window_len'] * conf_dict['interval']
            frames_dir = os.path.join(video_dir, anno['VideoID'])
            # print('=================', frames_dir, anno['VideoType'])
            if conf_dict['use_frames_first'] and os.path.isdir(frames_dir):
                # Read images
                try:
                    frames = read_frames_dir(frames_dir, max(0.0, ts), te)
                    frames = sample_frames(frames, conf_dict['ob_window_len'])
                    ctx_frames = read_frames_dir(frames_dir, 0.0, te)
                except Exception:
                    warnings.warn('OpenCV IO error. Reading {}'.format(
                        frames_dir))
                    return

                h, w, _ = frames[0].shape
                if h / w == 480 / 640:
                    frames = [cv2.resize(i, (640, 480)) for i in frames]
                elif h / w == 720 / 1280:
                    frames = [cv2.resize(i, (1280, 720)) for i in frames]
            else:
                # Read video
                video_file = os.path.join(
                    video_dir, '{}.mp4'.format(anno['VideoID']))
                frames = clip_video_to_frames(video_file, max(0.0, ts), te)
                frames = sample_frames(frames, conf_dict['ob_window_len'])
                ctx_frames = clip_video_to_frames(video_file, 0.0, te)

            track_states_file = os.path.join(
                video_dir, '{}_states.pkl'.format(anno['VideoID']))
            with open(track_states_file, 'rb') as f:
                track_states = pickle.load(f)
            last_frame_tracks = track_states[len(ctx_frames)-1][0]
            obj_ids = anno['ID'].split(',') if anno['ID'] != '' else []
            check_passed = True
            for idx in obj_ids:
                check_passed = check_passed and idx in last_frame_tracks
            if not check_passed:
                warnings.warn(
                    'Failed to process annotation: {}\n'.format(anno))
                return

            if conf_dict['augment']:
                while True:
                    aug_frames = video_aug(frames)
                    instances = scene_sensor.get_instances(aug_frames[-1:])[0]
                    iou_lst = [max_iou(last_frame_tracks[idx], instances)
                               for idx in obj_ids]
                    if len(iou_lst) == 0 or min(iou_lst) > 0.5:
                        break
                frames = aug_frames

            instances_lst = scene_sensor.get_instances_with_feats(
                frames, get_full_fm=False)
            success, data = convert_instances_lst_to_data(
                instances_lst, conf_dict['tokens_per_frame'],
                last_frame_tracks, obj_ids, anno['WAE_id'],
                conf_dict['inst_crop_shape'], conf_dict['inst_fm_shape'],
                conf_dict['inst_pos_dim'], conf_dict['inst_cls_dim'],
                conf_dict['visual_token_dim'])
            if success:
                data_queue.put(data)
            else:
                warnings.warn(
                    'Failed to process annotation: {}\n'.format(anno))
Beispiel #7
0
    def _process_single_anno(self, idx, anno, txt, data_dir):
        if not hasattr(self, "scene_sensor"):
            self.scene_sensor = SceneSensor(
                self.yolov4_model_dir,
                gpu=self.gpu,
                img_shape=[3, 416, 416],
                roi_feat_resolution=self.roi_feat_resolution,
                algorithm='yolov4')

        # Read annos and data
        track_states_file = os.path.join(
            self.video_tracking_dir, '{}_states.pkl'.format(anno['VideoID']))
        with open(track_states_file, 'rb') as f:
            track_states = pickle.load(f)

        video_file = os.path.join(
            self.video_tracking_dir, '{}.mp4'.format(anno['VideoID']))
        frames = clip_video_to_frames(video_file, 0.0, None)

        # Extract frames
        related_frames, related_tracks = [], []
        for frame, (tracks, bboxes) in zip(frames, track_states):
            if anno['ID'] not in tracks:
                continue

            related_frames.append(frame)
            related_tracks.append(tracks[anno['ID']])

        instances_lst = self.scene_sensor.get_instances_with_feats(
            related_frames, get_full_fm=False)

        for frame, instances, track in zip(
                related_frames, instances_lst, related_tracks):
            _, inst_id = max_iou(track, instances, return_id=True)
            if inst_id == -1:
                warnings.warn(
                    'Cannot find corresponding instance for track in '
                    'anno: {}\n'.format(anno))
                continue

            x1, y1, x2, y2 = instances[inst_id]['bbox']
            cv2.imwrite(os.path.join(data_dir, '{}.jpg'.format(idx)),
                        frame[int(y1):int(y2), int(x1):int(x2)])
            np.save(os.path.join(data_dir, '{}.npy'.format(idx)),
                    instances[inst_id]['fm'])
            with open(txt, 'a') as f:
                if anno['Salutation'] == 'man':
                    tree_mask, cls0, cls1, cls2 = '100', 0, -1, -1
                elif anno['Salutation'] == 'woman':
                    tree_mask, cls0, cls1, cls2 = '100', 1, -1, -1
                elif anno['Salutation'] == 'young_boy':
                    tree_mask, cls0, cls1, cls2 = '110', 0, 0, -1
                elif anno['Salutation'] == 'uncle':
                    tree_mask, cls0, cls1, cls2 = '110', 0, 1, -1
                elif anno['Salutation'] == 'young_girl':
                    tree_mask, cls0, cls1, cls2 = '101', 1, -1, 0
                elif anno['Salutation'] == 'aunt':
                    tree_mask, cls0, cls1, cls2 = '101', 1, -1, 1

                f.write('{} {} {} {} {}\n'.format(
                    idx, tree_mask, cls0, cls1, cls2))

            idx += 1

        return idx
Beispiel #8
0
 def setup_class(self):
     self.scene_sensor = SceneSensor(YOLOv3_MODEL,
                                     gpu=0,
                                     algorithm='yolov3')
     self.frames = clip_video_to_frames(VIDEO, 3001., 4000.)