class TestYOLOv4SceneSensor(object): def setup_class(self): self.roi_feat_resolution = 5 self.scene_sensor = SceneSensor( YOLOv4_MODEL, gpu=0, img_shape=[3, 416, 416], roi_feat_resolution=self.roi_feat_resolution, algorithm='yolov4') self.frames = clip_video_to_frames(R2_VIDEO, 0., None) def test_get_instances(self, export=True): instances_lst = self.scene_sensor.get_instances(self.frames) assert len(instances_lst) == len(self.frames) if export: h, w, fps = 720, 1280, 24. # read from VIDEO video_writer = VideoWriter('data/scene_yolo4_demo.mp4', (w, h), fps) for frame, instances in zip(self.frames, instances_lst): bboxes = np.array([i['bbox'] for i in instances]) labels = [i['category'] for i in instances] frame_draw = draw_bboxes(frame, bboxes, labels=labels) video_writer.add_frame(frame_draw) video_writer.close() def test_get_instances_with_feats(self): instances_lst, fm_lst = self.scene_sensor.get_instances_with_feats( self.frames, get_full_fm=True) _, h, w = instances_lst[0][0]['fm'].shape assert h == w == self.roi_feat_resolution assert len(instances_lst) == len(fm_lst) == len(self.frames)
def worker_func(in_queue, out_queue, msg_queue, conf_dict): scene_sensor = SceneSensor( conf_dict['yolov4_model_dir'], gpu=conf_dict['gpu'], img_shape=[3, 416, 416], roi_feat_resolution=conf_dict['roi_feat_resolution'], algorithm='yolov4') while True: try: msg = msg_queue.get_nowait() except Empty: msg = '' if msg == 'stop': break try: anno = in_queue.get(timeout=5) except Empty: anno = None if anno is not None: if Enable_Time_Log: t1 = time.time() if 'Cache' not in anno: frames = [pickle.loads(i) for i in anno['Frames']] anno['Instances'] = scene_sensor.get_instances_with_feats( frames, get_full_fm=False) del anno['Frames'] # to save memory! out_queue.put(anno) if Enable_Time_Log: t2 = time.time() print('Detector takes {:.3f}s'.format(t2 - t1))
class SalutationClsDataset(object): def __init__(self, video_tracking_dir, anno_dir, yolov4_model_dir, roi_feat_resolution=5, gpu=0): self.video_tracking_dir = video_tracking_dir self.anno_dir = anno_dir self.yolov4_model_dir = yolov4_model_dir self.roi_feat_resolution = roi_feat_resolution self.gpu = gpu self._collect_annotations() self._split_train_test_sets(test_percentage=0.2) def _collect_annotations(self): self.annos = [] for anno_file in os.listdir(self.anno_dir): video_id = '_'.join(anno_file.split('_')[:2]) print(video_id) with open(os.path.join(self.anno_dir, anno_file), 'r') as f: for line in f.readlines(): anno = json.loads(line) anno['VideoID'] = video_id if anno['Salutation'] != 'null': self.annos.append(anno) def _split_train_test_sets(self, test_percentage=0.2): # Copy from XiaoduHiDataloaderv2 videos = set([anno['VideoID'] for anno in self.annos]) num_test = int(len(videos) * test_percentage) ids = np.arange(len(videos)) np.random.shuffle(ids) videos = [list(videos)[i] for i in ids] test_videos = set(videos[:num_test]) self.test_annos, self.train_annos = [], [] for anno in self.annos: if anno['VideoID'] in test_videos: self.test_annos.append(anno) else: self.train_annos.append(anno) ids = np.arange(len(self.train_annos)) np.random.shuffle(ids) self.train_annos = [self.train_annos[i] for i in list(ids)] def _process_single_anno(self, idx, anno, txt, data_dir): if not hasattr(self, "scene_sensor"): self.scene_sensor = SceneSensor( self.yolov4_model_dir, gpu=self.gpu, img_shape=[3, 416, 416], roi_feat_resolution=self.roi_feat_resolution, algorithm='yolov4') # Read annos and data track_states_file = os.path.join( self.video_tracking_dir, '{}_states.pkl'.format(anno['VideoID'])) with open(track_states_file, 'rb') as f: track_states = pickle.load(f) video_file = os.path.join( self.video_tracking_dir, '{}.mp4'.format(anno['VideoID'])) frames = clip_video_to_frames(video_file, 0.0, None) # Extract frames related_frames, related_tracks = [], [] for frame, (tracks, bboxes) in zip(frames, track_states): if anno['ID'] not in tracks: continue related_frames.append(frame) related_tracks.append(tracks[anno['ID']]) instances_lst = self.scene_sensor.get_instances_with_feats( related_frames, get_full_fm=False) for frame, instances, track in zip( related_frames, instances_lst, related_tracks): _, inst_id = max_iou(track, instances, return_id=True) if inst_id == -1: warnings.warn( 'Cannot find corresponding instance for track in ' 'anno: {}\n'.format(anno)) continue x1, y1, x2, y2 = instances[inst_id]['bbox'] cv2.imwrite(os.path.join(data_dir, '{}.jpg'.format(idx)), frame[int(y1):int(y2), int(x1):int(x2)]) np.save(os.path.join(data_dir, '{}.npy'.format(idx)), instances[inst_id]['fm']) with open(txt, 'a') as f: if anno['Salutation'] == 'man': tree_mask, cls0, cls1, cls2 = '100', 0, -1, -1 elif anno['Salutation'] == 'woman': tree_mask, cls0, cls1, cls2 = '100', 1, -1, -1 elif anno['Salutation'] == 'young_boy': tree_mask, cls0, cls1, cls2 = '110', 0, 0, -1 elif anno['Salutation'] == 'uncle': tree_mask, cls0, cls1, cls2 = '110', 0, 1, -1 elif anno['Salutation'] == 'young_girl': tree_mask, cls0, cls1, cls2 = '101', 1, -1, 0 elif anno['Salutation'] == 'aunt': tree_mask, cls0, cls1, cls2 = '101', 1, -1, 1 f.write('{} {} {} {} {}\n'.format( idx, tree_mask, cls0, cls1, cls2)) idx += 1 return idx def build_dataset(self, output_dir): train_dir = os.path.join(output_dir, 'train') test_dir = os.path.join(output_dir, 'test') train_txt = os.path.join(output_dir, 'train.txt') test_txt = os.path.join(output_dir, 'test.txt') if not os.path.exists(train_dir): os.makedirs(train_dir) if not os.path.exists(test_dir): os.makedirs(test_dir) for txt, data_dir, annos in zip( [test_txt, train_txt], [test_dir, train_dir], [self.test_annos, self.train_annos]): print('Generating {}'.format(txt)) idx = 0 for anno in annos: idx = self._process_single_anno(idx, anno, txt, data_dir) print(idx)