class TestYOLOv4SceneSensor(object): def setup_class(self): self.roi_feat_resolution = 5 self.scene_sensor = SceneSensor( YOLOv4_MODEL, gpu=0, img_shape=[3, 416, 416], roi_feat_resolution=self.roi_feat_resolution, algorithm='yolov4') self.frames = clip_video_to_frames(R2_VIDEO, 0., None) def test_get_instances(self, export=True): instances_lst = self.scene_sensor.get_instances(self.frames) assert len(instances_lst) == len(self.frames) if export: h, w, fps = 720, 1280, 24. # read from VIDEO video_writer = VideoWriter('data/scene_yolo4_demo.mp4', (w, h), fps) for frame, instances in zip(self.frames, instances_lst): bboxes = np.array([i['bbox'] for i in instances]) labels = [i['category'] for i in instances] frame_draw = draw_bboxes(frame, bboxes, labels=labels) video_writer.add_frame(frame_draw) video_writer.close() def test_get_instances_with_feats(self): instances_lst, fm_lst = self.scene_sensor.get_instances_with_feats( self.frames, get_full_fm=True) _, h, w = instances_lst[0][0]['fm'].shape assert h == w == self.roi_feat_resolution assert len(instances_lst) == len(fm_lst) == len(self.frames)
class TestYOLOv3SceneSensor(object): def setup_class(self): self.scene_sensor = SceneSensor(YOLOv3_MODEL, gpu=0, algorithm='yolov3') self.frames = clip_video_to_frames(VIDEO, 3001., 4000.) def test_get_instances(self, export=True): instances_lst = self.scene_sensor.get_instances(self.frames) assert len(instances_lst) == len(self.frames) if export: h, w, fps = 480, 640, 24. # read from VIDEO video_writer = VideoWriter('data/scene_yolo_demo.mp4', (w, h), fps) for frame, instances in zip(self.frames, instances_lst): bboxes = np.array([i['bbox'] for i in instances]) labels = [i['category'] for i in instances] frame_draw = draw_bboxes(frame, bboxes, labels=labels) video_writer.add_frame(frame_draw) video_writer.close() def test_get_feature_map(self): feature_maps = self.scene_sensor.get_feature_map(self.frames) assert len(feature_maps) == len(self.frames)
def run_worker(tasks, gpu_id, encoder_model, yolov4_model, output_dir, max_cosine_distance, resume): os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id from perception.tracker.re_id import create_box_encoder, \ NearestNeighborDistanceMetric from perception.tracker.tracker import Tracker, Detection from perception.scene.eval import SceneSensor encoder = create_box_encoder(encoder_model, batch_size=8) metric = NearestNeighborDistanceMetric('cosine', max_cosine_distance, None) tracker = Tracker(metric) detector = SceneSensor(yolov4_model, gpu=0, img_shape=[3, 416, 416], algorithm='yolov4') for video_file in tasks: task_id = os.path.basename(video_file)[:-len('.mp4')] if resume is not None: if resume != task_id: continue else: resume = None clip = VideoFileClip(video_file) track_video = os.path.join(output_dir, '{}_track.mp4'.format(task_id)) video_writer = VideoWriter(track_video, (clip.w, clip.h), clip.fps) tracker_logs = [] for frame in clip.iter_frames(): frame = frame[:, :, ::-1] instances = detector.get_instances(frame)[0] boxes = [ins['bbox'] for ins in instances] features = encoder(frame, boxes) detections = [ Detection(ins, feat) for ins, feat in zip(instances, features) ] tracker.predict() tracker.update(detections) track_log = dict() for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() track_log[str(track.track_id)] = bbox # NOTE: https://github.com/opencv/opencv/issues/14866 # We have to add this line frame = np.array(frame) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] + 23)), 0, 5e-3 * 100, (0, 255, 0), 2) det_log = [] for det in detections: if str(det.cls) != 'person': continue bbox = det.to_tlbr() score = "%.2f" % round(det.confidence * 100, 2) + "%" det_log.append(bbox) # NOTE: https://github.com/opencv/opencv/issues/14866 # We have to add this line frame = np.array(frame) cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2) cv2.putText(frame, score, (int(bbox[0]), int(bbox[3])), 0, 5e-3 * 100, (0, 255, 0), 2) tracker_logs.append((track_log, det_log)) video_writer.add_frame(frame) video_writer.close() convert_to_h264(track_video) print('Saved {}'.format(track_video)) tracker_logs_file = os.path.join(output_dir, '{}_states.pkl'.format(task_id)) with open(tracker_logs_file, 'wb') as f: pickle.dump(tracker_logs, f) print('Saved {}'.format(tracker_logs_file)) tracker.reset()