def _process_single_anno(anno): if anno['VideoType'] == 'neg_frames': _process_neg_frames(anno) return if Enable_Time_Log: t1 = time.time() te = timestamp_to_ms(anno['Time']) ts = te - conf_dict['ob_window_len'] * conf_dict['interval'] frames_dir = os.path.join( conf_dict['pos_video_dir'], anno['VideoID']) if conf_dict['use_frames_first'] and os.path.isdir(frames_dir): frame_ids = read_frames_dir( frames_dir, max(0.0, ts), te, return_id=True) frame_ids = sample_frames(frame_ids, conf_dict['ob_window_len']) frames = read_frames_dir_with_fids(frames_dir, frame_ids) ctx_frames = read_frames_dir(frames_dir, 0.0, te, return_id=True) else: video_file = os.path.join( conf_dict['pos_video_dir'], '{}.mp4'.format(anno['VideoID'])) frames = clip_video_to_frames(video_file, max(0.0, ts), te) frames = sample_frames(frames, conf_dict['ob_window_len']) ctx_frames = clip_video_to_frames(video_file, 0.0, te) anno_copy = {k: v for k, v in anno.items()} anno_copy['Frames'] = [pickle.dumps(img) for img in frames] anno_copy['FrameIDs'] = [len(ctx_frames) - 1] data_queue.put(anno_copy) if Enable_Time_Log: t2 = time.time() print('CV2 reader takes {:.3f}s'.format(t2 - t1))
def setup_class(self): self.roi_feat_resolution = 5 self.scene_sensor = SceneSensor( YOLOv4_MODEL, gpu=0, img_shape=[3, 416, 416], roi_feat_resolution=self.roi_feat_resolution, algorithm='yolov4') self.frames = clip_video_to_frames(R2_VIDEO, 0., None)
def test_image_encoder(): patch_n = 2 frames = clip_video_to_frames(VIDEO, 3001., 4000.) image_encoder = create_box_encoder( Encoder_on_MARS_Dataset, batch_size=patch_n) h, w, _ = frames[0].shape boxes = [[int(w*0.1), int(h*0.1), int(w*0.5), int(h*0.5)]] * patch_n features = image_encoder(frames[0], boxes) assert len(features) == len(boxes)
def test_clip_video_to_frames(): ffmpeg_clip = 'data/clip_by_ffmpeg.mp4' try: # NOTE: ffmpeg would append extra copied frames os.system('ffmpeg -ss 1 -i %s -c copy -t 2 -y %s' % (VIDEO, ffmpeg_clip)) except Exception: pass if not os.path.exists(ffmpeg_clip): return True frames = clip_video_to_frames(VIDEO, 1000., 3000.) cap = cv2.VideoCapture(ffmpeg_clip) success, i = True, 0 while success and i < len(frames): success, frame = cap.read() assert np.all(frame == frames[i]) i += 1 cap.release()
def test_save_as_gif(): frames = clip_video_to_frames(VIDEO, 1000., 3000.) gif_file = 'data/test_save_as_gif.gif' save_as_gif(frames, gif_file) assert os.path.exists(gif_file)
def _process_single_anno(anno): if anno['VideoType'] == 'neg_frames': _process_neg_frames(anno) return te = timestamp_to_ms(anno['Time']) ts = te - conf_dict['ob_window_len'] * conf_dict['interval'] frames_dir = os.path.join(video_dir, anno['VideoID']) # print('=================', frames_dir, anno['VideoType']) if conf_dict['use_frames_first'] and os.path.isdir(frames_dir): # Read images try: frames = read_frames_dir(frames_dir, max(0.0, ts), te) frames = sample_frames(frames, conf_dict['ob_window_len']) ctx_frames = read_frames_dir(frames_dir, 0.0, te) except Exception: warnings.warn('OpenCV IO error. Reading {}'.format( frames_dir)) return h, w, _ = frames[0].shape if h / w == 480 / 640: frames = [cv2.resize(i, (640, 480)) for i in frames] elif h / w == 720 / 1280: frames = [cv2.resize(i, (1280, 720)) for i in frames] else: # Read video video_file = os.path.join( video_dir, '{}.mp4'.format(anno['VideoID'])) frames = clip_video_to_frames(video_file, max(0.0, ts), te) frames = sample_frames(frames, conf_dict['ob_window_len']) ctx_frames = clip_video_to_frames(video_file, 0.0, te) track_states_file = os.path.join( video_dir, '{}_states.pkl'.format(anno['VideoID'])) with open(track_states_file, 'rb') as f: track_states = pickle.load(f) last_frame_tracks = track_states[len(ctx_frames)-1][0] obj_ids = anno['ID'].split(',') if anno['ID'] != '' else [] check_passed = True for idx in obj_ids: check_passed = check_passed and idx in last_frame_tracks if not check_passed: warnings.warn( 'Failed to process annotation: {}\n'.format(anno)) return if conf_dict['augment']: while True: aug_frames = video_aug(frames) instances = scene_sensor.get_instances(aug_frames[-1:])[0] iou_lst = [max_iou(last_frame_tracks[idx], instances) for idx in obj_ids] if len(iou_lst) == 0 or min(iou_lst) > 0.5: break frames = aug_frames instances_lst = scene_sensor.get_instances_with_feats( frames, get_full_fm=False) success, data = convert_instances_lst_to_data( instances_lst, conf_dict['tokens_per_frame'], last_frame_tracks, obj_ids, anno['WAE_id'], conf_dict['inst_crop_shape'], conf_dict['inst_fm_shape'], conf_dict['inst_pos_dim'], conf_dict['inst_cls_dim'], conf_dict['visual_token_dim']) if success: data_queue.put(data) else: warnings.warn( 'Failed to process annotation: {}\n'.format(anno))
def _process_single_anno(self, idx, anno, txt, data_dir): if not hasattr(self, "scene_sensor"): self.scene_sensor = SceneSensor( self.yolov4_model_dir, gpu=self.gpu, img_shape=[3, 416, 416], roi_feat_resolution=self.roi_feat_resolution, algorithm='yolov4') # Read annos and data track_states_file = os.path.join( self.video_tracking_dir, '{}_states.pkl'.format(anno['VideoID'])) with open(track_states_file, 'rb') as f: track_states = pickle.load(f) video_file = os.path.join( self.video_tracking_dir, '{}.mp4'.format(anno['VideoID'])) frames = clip_video_to_frames(video_file, 0.0, None) # Extract frames related_frames, related_tracks = [], [] for frame, (tracks, bboxes) in zip(frames, track_states): if anno['ID'] not in tracks: continue related_frames.append(frame) related_tracks.append(tracks[anno['ID']]) instances_lst = self.scene_sensor.get_instances_with_feats( related_frames, get_full_fm=False) for frame, instances, track in zip( related_frames, instances_lst, related_tracks): _, inst_id = max_iou(track, instances, return_id=True) if inst_id == -1: warnings.warn( 'Cannot find corresponding instance for track in ' 'anno: {}\n'.format(anno)) continue x1, y1, x2, y2 = instances[inst_id]['bbox'] cv2.imwrite(os.path.join(data_dir, '{}.jpg'.format(idx)), frame[int(y1):int(y2), int(x1):int(x2)]) np.save(os.path.join(data_dir, '{}.npy'.format(idx)), instances[inst_id]['fm']) with open(txt, 'a') as f: if anno['Salutation'] == 'man': tree_mask, cls0, cls1, cls2 = '100', 0, -1, -1 elif anno['Salutation'] == 'woman': tree_mask, cls0, cls1, cls2 = '100', 1, -1, -1 elif anno['Salutation'] == 'young_boy': tree_mask, cls0, cls1, cls2 = '110', 0, 0, -1 elif anno['Salutation'] == 'uncle': tree_mask, cls0, cls1, cls2 = '110', 0, 1, -1 elif anno['Salutation'] == 'young_girl': tree_mask, cls0, cls1, cls2 = '101', 1, -1, 0 elif anno['Salutation'] == 'aunt': tree_mask, cls0, cls1, cls2 = '101', 1, -1, 1 f.write('{} {} {} {} {}\n'.format( idx, tree_mask, cls0, cls1, cls2)) idx += 1 return idx
def setup_class(self): self.scene_sensor = SceneSensor(YOLOv3_MODEL, gpu=0, algorithm='yolov3') self.frames = clip_video_to_frames(VIDEO, 3001., 4000.)