def test_cvt2frames(self): v = mmcv.VideoReader(self.video_path) frame_dir = tempfile.mkdtemp() v.cvt2frames(frame_dir) assert osp.isdir(frame_dir) for i in range(self.num_frames): filename = '{}/{:06d}.jpg'.format(frame_dir, i) assert osp.isfile(filename) os.remove(filename) v = mmcv.VideoReader(self.video_path) v.cvt2frames(frame_dir, show_progress=False) assert osp.isdir(frame_dir) for i in range(self.num_frames): filename = '{}/{:06d}.jpg'.format(frame_dir, i) assert osp.isfile(filename) os.remove(filename) v = mmcv.VideoReader(self.video_path) v.cvt2frames( frame_dir, file_start=100, filename_tmpl='{:03d}.JPEG', start=100, max_num=20) assert osp.isdir(frame_dir) for i in range(100, 120): filename = '{}/{:03d}.JPEG'.format(frame_dir, i) assert osp.isfile(filename) os.remove(filename) os.removedirs(frame_dir)
def test_frames2video(self): v = mmcv.VideoReader(self.video_path) frame_dir = tempfile.mkdtemp() v.cvt2frames(frame_dir) assert osp.isdir(frame_dir) for i in range(self.num_frames): filename = '{}/{:06d}.jpg'.format(frame_dir, i) assert osp.isfile(filename) out_filename = osp.join(tempfile.gettempdir(), 'mmcv_test.avi') mmcv.frames2video(frame_dir, out_filename) v = mmcv.VideoReader(out_filename) assert v.fps == 30 assert len(v) == self.num_frames mmcv.frames2video( frame_dir, out_filename, fps=25, start=10, end=50, show_progress=False) v = mmcv.VideoReader(out_filename) assert v.fps == 25 assert len(v) == 40 for i in range(self.num_frames): filename = '{}/{:06d}.jpg'.format(frame_dir, i) os.remove(filename) os.removedirs(frame_dir) os.remove(out_filename)
def get_injected_cfg(cfg_data): if 'filename' not in cfg_data.keys(): raise RuntimeError('注入数据必须指定filename参数,以确定处理的数据。') filename = cfg_data['filename'] filepath = osp.join(DataConfig.VIDEO_DIR, filename) if not osp.exists(filepath): raise RuntimeError('文件夹{}中不存在名字为{}的视频或者视频源头'.format( DataConfig.VIDEO_DIR, filename)) taskCfg = deepcopy(TaskCfg) taskCfg['head']['filename'] = filepath jsonname = filename.split('.')[0] + '.json' jsonpath = osp.join(DataConfig.JSON_DIR, jsonname) if not osp.exists(jsonpath): raise RuntimeError('文件夹{}中不存在名字为{}的json'.format( DataConfig.JSON_DIR, jsonname)) taskCfg['head']['json_filename'] = jsonpath taskCfg['backbones'][0][2]['is_process'] = True if 'parking_monitoring_area' in cfg_data.keys(): all_point_array = [ np.array(x, dtype=np.int32) for x in cfg_data['parking_monitoring_area'] ] mask = np.ones_like(mmcv.VideoReader(filepath)[10][:, :, 0]) parking_mask = cv2.fillPoly(mask, all_point_array, 0) taskCfg['backbones'][0][3]['monitoring_area'] = parking_mask taskCfg['backbones'][0][3]['is_process'] = False if 'lane_monitoring_area' in cfg_data.keys(): if 'lane_no_allow_cars' not in cfg_data.keys(): raise RuntimeError('如果已经提供车道检测区域,请也提供禁止出现车辆信息') taskCfg['backbones'][0][4]['is_process'] = False lane_no_allow_cars = cfg_data['lane_no_allow_cars'] all_point_array = [ np.array(x, dtype=np.int32) for x in cfg_data['lane_monitoring_area'] ] mask = np.ones_like(mmcv.VideoReader(filepath)[10][:, :, 0]) for lane_area, no_allow_flag in zip(all_point_array, lane_no_allow_cars.keys()): mask = cv2.fillPoly(mask, [lane_area], int(no_allow_flag)) taskCfg['backbones'][0][4]['monitoring_area'] = mask taskCfg['backbones'][0][4]['no_allow_car'] = lane_no_allow_cars if 'person_monitoring_area' in cfg_data.keys(): all_point_array = [ np.array(x, dtype=np.int32) for x in cfg_data['person_monitoring_area'] ] mask = np.ones_like(mmcv.VideoReader(filepath)[10][:, :, 0]) new_mask = cv2.fillPoly(mask, all_point_array, 1) taskCfg['backbones'][0][5]['monitoring_area'] = new_mask taskCfg['backbones'][0][5]['is_process'] = True return taskCfg
def main(): args = parse_args() assert args.out or args.show, \ ('Please specify at least one operation (save/show the ' 'video) with the argument "--out" or "--show"') model = init_detector(args.config, args.checkpoint, device=args.device) video_reader = mmcv.VideoReader(args.video) video_writer = None if args.out: fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_writer = cv2.VideoWriter( args.out, fourcc, video_reader.fps, (video_reader.width, video_reader.height)) for frame in mmcv.track_iter_progress(video_reader): result = inference_detector(model, frame) frame = model.show_result(frame, result, score_thr=args.score_thr) if args.show: cv2.namedWindow('video', 0) mmcv.imshow(frame, 'video', args.wait_time) if args.out: video_writer.write(frame) if video_writer: video_writer.release() cv2.destroyAllWindows()
def load_frame(video_path: (Path, str), frame_id: int = -1, rgb: bool = True) -> np.array: """Read frames from an mp4 video. Args: video_path: Path to the video to read frame_id: frame_id to read. If negative, return random frame. rgb: if the frame should be converted from bgr to rgb format. Returns: frame at a given frame id. """ if not Path(video_path).suffix == ".mp4": raise ValueError( f"Work only with the mp4 video files, but got {video_path}") video = mmcv.VideoReader(str(video_path)) num_frames = len(video) if frame_id < 0: frame_id = random.randint(0, num_frames - 1) frame = video.get_frame(frame_id) if rgb: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) return frame
def dump_frames(vid_item): full_path, vid_path, vid_id = vid_item vid_name = vid_path.split('.')[0] out_full_path = osp.join(args.out_dir, vid_name) try: os.mkdir(out_full_path) except OSError: pass vr = mmcv.VideoReader(full_path) f = 3 # frame rate killer factor for i in range(len(vr)): if vr[i] is not None: # Process onlly the even images. Process to reduce resolution to 480 * 320 # print (i, i % f == 0) if (i % f == 0): # print (i) img = mmcv.imresize(vr[i], (480, 320), return_scale=False) mmcv.imwrite( img, '{}/img_{:05d}.jpg'.format(out_full_path, (i // f) + 1)) else: print('[Warning] length inconsistent!' 'Early stop with {} out of {} frames'.format(i + 1, len(vr))) break print('{} done with {} frames'.format(vid_name, len(vr))) sys.stdout.flush() return True
def process(self): if self.type == 'image': print('[+] Processing Image') img = Image.open(self.path) boxes, _ = self.mtcnn.detect(img) frame_draw = img.copy() draw = ImageDraw.Draw(frame_draw) for box in boxes: draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6) frame_draw.save('output.jpg') elif self.type == 'video': print('[+] Processing Video') frames_tracked = [] video = mmcv.VideoReader(self.path) frames = [Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video] for i, frame in enumerate(frames): print('\rTracking frame: {}'.format(i + 1), end='') boxes, _ = self.mtcnn.detect(frame) frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) if type(boxes) != type(None): for box in boxes: draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6) frames_tracked.append(frame_draw.resize((640, 360), Image.BILINEAR)) dim = frames_tracked[0].size fourcc = cv2.VideoWriter_fourcc(*'FMP4') video_tracked = cv2.VideoWriter('video_tracked.mp4', fourcc, 25.0, dim) for frame in frames_tracked: video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)) video_tracked.release() else: print('Error: Invalid Input Type')
def main(input_file): basename = os.path.splitext(input_file)[0] if os.path.exists(f"{basename}.maga"): return output_folder = f"{basename}_faces" os.makedirs(output_folder, exist_ok=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Running on device: {device}") mtcnn = MTCNN(keep_all=True, device=device) video = mmcv.VideoReader(input_file) frames_tracked = [] face_count = 0 for frame in tqdm.tqdm(video, position=0): frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Detect faces boxes, _ = mtcnn.detect(frame) # Draw faces frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) if boxes is not None: for box in boxes: pt1 = box.tolist()[:2] pt2 = box.tolist()[2:4] face = frame.crop(pt1 + pt2) output_path = os.path.join(output_folder, f"{basename}-{face_count:08d}.jpg") face.save(output_path) face_count += 1 with open(f"{basename}.maga", "w") as f: f.write("MAGA!")
def gen_merl_frames_label(video_dir, label_dir, output_dir, map_dict): for video in os.listdir(video_dir): video_path = video_dir + '/' + video #data_dict = np.load("/disk2/lzq/data/MERL/features/" + video.split("_crop")[0] + '.npy') # (2048, 3921) vid = mmcv.VideoReader(video_path) label_nums = len(vid) label_file = video.split("crop")[0] + 'label.mat' label_path = label_dir + label_file labels = loadmat(label_path) groundtruth_file = video.split("c")[0] + 'label.txt' list = np.array((labels['tlabs'])) # shape:(5, 1) label_dict = {} for i, type in enumerate(list): for action in type[0]: for index in range(action[0], action[1] + 1): label_dict[str(index)] = map_dict[str(i + 1)] for row in range(label_nums): if str(row + 1) not in label_dict.keys(): label_dict[str(row + 1)] = map_dict['0'] print(len(label_dict.keys())) with open(output_dir + video.split("_crop")[0] + '.txt', 'w') as f: for x in range(label_nums): f.write(label_dict[str(x + 1)] + '\n') f.close() return output_dir
def __call__(self, results): if results['frame_inds'].ndim != 1: results['frame_inds'] = np.squeeze(results['frame_inds']) try: container = mmcv.VideoReader(results['filename']) img_group = list() for frame_ind in results['frame_inds']: cur_frame = container[frame_ind] try: cur_frame = container[frame_ind] except IndexError: print(results['filename'], frame_ind, results['total_frames']) # last frame may be None in OpenCV while isinstance(cur_frame, type(None)): frame_ind -= 1 cur_frame = container[frame_ind] img_group.append(cur_frame) # img_group = np.array(img_group) # The default channel order of OpenCV is BGR, thus we change it to RGB # img_group = img_group[:, :, :, ::-1] # imgs = imgs.transpose([0, 3, 1, 2]) results['img_group'] = img_group results['ori_shape'] = img_group[0].shape except Exception as e: print("Failed to decode {} with exception: {}".format( results['filename'], e)) return None return results
def inference(detection_cfg, estimation_cfg, recognition_cfg, video_file, gpus=1, worker_per_gpu=1, save_dir=None): recognizer = init_recognizer(recognition_cfg, 0) # import IPython # IPython.embed() resolution = mmcv.VideoReader(video_file).resolution results = pose_inference(detection_cfg, estimation_cfg, video_file, gpus, worker_per_gpu) seq = np.zeros((1, 3, len(results), 17, 1)) for i, r in enumerate(results): if r['joint_preds'] is not None: seq[0, 0, i, :, 0] = r['joint_preds'][0, :, 0] / resolution[0] seq[0, 1, i, :, 0] = r['joint_preds'][0, :, 1] / resolution[1] seq[0, 2, i, :, 0] = r['joint_scores'][0, :, 0] import IPython IPython.embed() return results
def __call__(self, results): """Perform the OpenCV initiation. Args: results (dict): The resulting dict to be modified and passed to the next transform in pipeline. """ if self.io_backend == 'disk': new_path = results['filename'] else: if self.file_client is None: self.file_client = FileClient(self.io_backend, **self.kwargs) thread_id = get_thread_id() # save the file of same thread at the same place new_path = osp.join(self.tmp_folder, f'tmp_{thread_id}.mp4') with open(new_path, 'wb') as f: f.write(self.file_client.get(results['filename'])) container = mmcv.VideoReader(new_path) results['new_path'] = new_path results['video_reader'] = container results['total_frames'] = len(container) return results
def _video2img2lmark(v_path, fa): # root = '/data2/lchen63/voxceleb/' count = 0 # tmp = v_path.split('/') # frame_list = [] # if not os.path.exists(os.path.join(root , 'img') ): # os.mkdir(os.path.join(root , 'img')) # if not os.path.exists(os.path.join(root , 'img', tmp[-3]) ): # os.mkdir(os.path.join(root , 'img', tmp[-3])) # if not os.path.exists(os.path.join(root , 'img', tmp[-3], tmp[-2]) ): # os.mkdir(os.path.join(root , 'img', tmp[-3], tmp[-2])) # if not os.path.exists(os.path.join(root , 'img', tmp[-3], tmp[-2], tmp[-1][:-4]) ): # os.mkdir(os.path.join(root , 'img', tmp[-3], tmp[-2], tmp[-1][:-4])) frame_list = mmcv.VideoReader(v_path) # cap = cv2.VideoCapture(v_path) # while(cap.isOpened()): # ret, frame = cap.read() # if ret == True: # # frame_name = os.path.join(root , 'img', tmp[-3], tmp[-2], tmp[-1][:-4] ,'%05d.png'%count) # # cv2.imwrite(frame_name,frame) # frame_list.append(frame) # count += 1 # else: # break get3DLmarks(frame_list,v_path,fa)
def inference( detection_cfg, skeleton_cfg, dataset_cfg, gpus=1, worker_per_gpu=1, ): # get frame num video_file = dataset_cfg.video_file video_name = video_file.strip('/n').split('/')[-1] video_frames = mmcv.VideoReader(video_file) num_frames = len(video_frames) del video_frames data_cfg = skeleton_cfg.data_cfg if data_cfg.save_video: data_cfg.img_dir = os.path.join(data_cfg.save_dir, '{}.img'.format(video_name)) if os.path.exists(data_cfg.img_dir): import shutil shutil.rmtree(data_cfg.img_dir) os.makedirs(data_cfg.img_dir) # cache model checkpoints cache_checkpoint(detection_cfg.checkpoint_file) cache_checkpoint(skeleton_cfg.checkpoint_file) # multiprocess settings context = mp.get_context('spawn') result_queue = context.Queue(num_frames) procs = [] for w in range(gpus * worker_per_gpu): shred_list = list(range(w, num_frames, gpus * worker_per_gpu)) p = context.Process(target=worker, args=(video_file, shred_list, detection_cfg, skeleton_cfg, data_cfg, w % gpus, result_queue)) p.start() procs.append(p) all_result = [] print('\nPose estimation start:') prog_bar = ProgressBar(num_frames) for i in range(num_frames): t = result_queue.get() all_result.append(t) prog_bar.update() for p in procs: p.join() if len(all_result) == num_frames and data_cfg.save_video: print('\n\nGenerate video:') video_path = os.path.join(data_cfg.save_dir, video_name) mmcv.frames2video(data_cfg.img_dir, video_path, filename_tmpl='{:01d}.png') print('Video was saved to {}'.format(video_path)) import IPython IPython.embed()
def main(input_file): base_out = os.path.splitext(input_file)[0] output_file = f"{base_out}_faces.mp4" if os.path.exists(output_file): raise Exception(f"Exists: {output_file}") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Running on device: {device}") mtcnn = MTCNN(keep_all=True, device=device) video = mmcv.VideoReader(input_file) frames_tracked = [] for frame in tqdm.tqdm(video, position=0): frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) # Detect faces boxes, _ = mtcnn.detect(frame) # Draw faces frame_draw = frame.copy() draw = ImageDraw.Draw(frame_draw) if boxes is not None: for box in boxes: draw.rectangle(box.tolist(), outline=(255, 0, 0), width=6) # Add to frame list frames_tracked.append(frame_draw) dim = frames_tracked[0].size fourcc = cv2.VideoWriter_fourcc(*"mp4v") video_tracked = cv2.VideoWriter(output_file, fourcc, 30.0, dim) for frame in frames_tracked: video_tracked.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)) video_tracked.release()
def main(config_file, checkpoint_file): # build the model from a config file and a checkpoint file model = init_detector(config_file, checkpoint_file, device='cuda:0') video = mmcv.VideoReader('IMG_0248.MOV') # https://github.com/open-mmlab/mmcv/blob/master/mmcv/video/io.py i = 0 num_person = 0 num_object = 0 pre_frame_object = 0 flag = 0 for frame in video: result = inference_detector(model, frame) #bbox result (person_bboxes, object_bboxes) = show_result(frame, result, model.CLASSES, wait_time=2) person_bboxes = list(set(person_bboxes)) object_bboxes = list(set(object_bboxes)) print('person:', person_bboxes) print('len_person :', len(person_bboxes)) print('object:', object_bboxes) print('len_object :', len(object_bboxes)) n_person = len(person_bboxes) n_object = len(object_bboxes)
def cut_video(video_dir, save_dir): video_list = os.listdir(video_dir) for video_name in video_list: video = mmcv.VideoReader(os.path.join(video_dir, video_name)) video.cvt2frames(save_dir, filename_tmpl=video_name[:-4] + '_{:06d}.png')
def test_slice(self): v = mmcv.VideoReader(self.video_path) imgs = v[-105:-103] assert int(round(imgs[0].mean())) == 94 assert int(round(imgs[1].mean())) == 205 assert len(imgs) == 2 imgs = v[63:65] assert int(round(imgs[0].mean())) == 94 assert int(round(imgs[1].mean())) == 205 assert len(imgs) == 2 imgs = v[64:62:-1] assert int(round(imgs[0].mean())) == 205 assert int(round(imgs[1].mean())) == 94 assert len(imgs) == 2 imgs = v[:5] assert len(imgs) == 5 for img in imgs: assert int(round(img.mean())) == 94 imgs = v[165:] assert len(imgs) == 3 for img in imgs: assert int(round(img.mean())) == 0 imgs = v[-3:] assert len(imgs) == 3 for img in imgs: assert int(round(img.mean())) == 0
def worker(video_file, index, detection_cfg, skeleton_cfg, skeleon_data_cfg, device, result_queue): os.environ["CUDA_VISIBLE_DEVICES"] = str(device) video_frames = mmcv.VideoReader(video_file) # load model detection_model_file = detection_cfg.model_cfg detection_checkpoint_file = get_mmskeleton_url( detection_cfg.checkpoint_file) detection_model = init_detector(detection_model_file, detection_checkpoint_file, device='cpu') skeleton_model_file = skeleton_cfg.model_cfg skeletion_checkpoint_file = skeleton_cfg.checkpoint_file skeleton_model = init_twodimestimator(skeleton_model_file, skeletion_checkpoint_file, device='cpu') detection_model = detection_model.cuda() skeleton_model = skeleton_model.cuda() for idx in index: skeleton_result = dict() image = video_frames[idx] draw_image = image.copy() bbox_result = inference_detector(detection_model, image) person_bbox, labels = VideoDemo.bbox_filter(bbox_result, detection_cfg.bbox_thre) if len(person_bbox) > 0: person, meta = VideoDemo.skeleton_preprocess( image[:, :, ::-1], person_bbox, skeleon_data_cfg) preds, maxvals = inference_twodimestimator(skeleton_model, person.cuda(), meta, True) results = VideoDemo.skeleton_postprocess(preds, maxvals, meta) if skeleon_data_cfg.save_video: file = os.path.join(skeleon_data_cfg.img_dir, '{}.png'.format(idx)) mmcv.imshow_det_bboxes(draw_image, person_bbox, labels, detection_model.CLASSES, score_thr=detection_cfg.bbox_thre, show=False, wait_time=0) save(image, draw_image, results, file) else: preds, maxvals = None, None if skeleon_data_cfg.save_video: file = os.path.join(skeleon_data_cfg.img_dir, '{}.png'.format(idx)) mmcv.imwrite(image, file) skeleton_result['frame_index'] = idx skeleton_result['position_preds'] = preds skeleton_result['position_maxvals'] = maxvals result_queue.put(skeleton_result)
def main(): #setup input video = mmcv.VideoReader('eggroll_color.mp4') frame_size = len(video) #frame=video[0] print(frame_size) #cv2.imwrite('video_result.jpg', frame[30:368,128:548,]) #init pi = 3.1415 left = 128 right = 548 l = 360 - 128 s = 548 - 360 r = int((368 - 30) / 2) correlations = oval_func(r, s, l) h_out = int(338 * pi) w_out = int(420 * pi) #get egg shape lengths = {} values_r = {} values_g = {} values_b = {} max_length = 0 for i in range(s + l + 1): length = int(2 * 3.1415 * correlations.get_y(i)) lengths[i] = length values_r[i] = [] values_g[i] = [] values_b[i] = [] if length > max_length: max_length = length print(lengths) #get value from frames result = numpy.zeros((h_out, w_out, 3), numpy.uint8) value = {} for j in range(frame_size): frame = video[j] target = frame[30:368, 128:548, ] for i in range(s + l): temp = s + l - i if j % (max_length / (lengths[temp] + 1)) < 1: values_r[i].append(target[r, i, 0]) values_g[i].append(target[r, i, 1]) values_b[i].append(target[r, i, 2]) print(values_r) #generate output # print(h_out) # print(w_out) for i in range(1, s + l): line_r = values_r[i] line_g = values_g[i] line_b = values_b[i] offset = int((max_length - lengths[s + l - i]) / 4) for j in range(len(line_r)): result[j + offset, i, 0] = line_r[j] result[j + offset, i, 1] = line_g[j] result[j + offset, i, 2] = line_b[j] cv2.imwrite('video_result.jpg', result)
def test_position(self): v = mmcv.VideoReader(self.video_path) assert v.position == 0 for _ in range(10): v.read() assert v.position == 10 v.get_frame(99) assert v.position == 100
def test_cut_concat_video(self): part1_file = osp.join(tempfile.gettempdir(), '.mmcv_test1.mp4') part2_file = osp.join(tempfile.gettempdir(), '.mmcv_test2.mp4') mmcv.cut_video(self.video_path, part1_file, end=3, vcodec='h264') mmcv.cut_video(self.video_path, part2_file, start=3, vcodec='h264') v1 = mmcv.VideoReader(part1_file) v2 = mmcv.VideoReader(part2_file) assert len(v1) == 75 assert len(v2) == self.num_frames - 75 out_file = osp.join(tempfile.gettempdir(), '.mmcv_test.mp4') mmcv.concat_video([part1_file, part2_file], out_file) v = mmcv.VideoReader(out_file) assert len(v) == self.num_frames os.remove(part1_file) os.remove(part2_file) os.remove(out_file)
def dumpPairFromVideo(path): video = mmcv.VideoReader(path) for index in range(0, 8002): frame = video[index] if index + 1 >= 6100 and index + 1 <= 7000: cv.imwrite('./tmp/input_%d.jpg' % (index + 1), frame)
def test_resize_video(self): out_file = osp.join(tempfile.gettempdir(), '.mmcv_test.mp4') mmcv.resize_video(self.video_path, out_file, (200, 100), quiet=True) v = mmcv.VideoReader(out_file) assert v.resolution == (200, 100) os.remove(out_file) mmcv.resize_video(self.video_path, out_file, ratio=2) v = mmcv.VideoReader(out_file) assert v.resolution == (294 * 2, 240 * 2) os.remove(out_file) mmcv.resize_video(self.video_path, out_file, (1000, 480), keep_ar=True) v = mmcv.VideoReader(out_file) assert v.resolution == (294 * 2, 240 * 2) os.remove(out_file) mmcv.resize_video( self.video_path, out_file, ratio=(2, 1.5), keep_ar=True) v = mmcv.VideoReader(out_file) assert v.resolution == (294 * 2, 360) os.remove(out_file)
def test_load(self): v = mmcv.VideoReader(self.video_path) assert v.width == 294 assert v.height == 240 assert v.fps == 25 assert v.frame_cnt == self.num_frames assert len(v) == self.num_frames assert v.opened import cv2 assert isinstance(v.vcap, type(cv2.VideoCapture()))
def embeddings(self, path): video = mmcv.VideoReader(path) frames = [ Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) for frame in video[1:] ] a = dict() for i in range(self.total_people): a[i] = [] count = 0 for frame in frames: bouding_box, prob = self.mtcnn.detect(frame) for box in bouding_box: x1, y1, x2, y2 = box if (x1 > x2): x1, x2 = x2, x1 if (y1 > y2): y1, y2 = y2, y1 cropped_tensors = extract_face(frame, (x1, y1, x2, y2)).to( self.device).view(-1, 3, 160, 160) emb = self.resnet(cropped_tensors) emb = emb.detach() if self.device.type == "cuda": emb = emb.cpu() emb = emb.numpy() idx = -1 min_dist = 10**9 for i, e in enumerate(self.embeddings_initial): d = emb - e d = d.reshape(512) # https://github.com/cmusatyalab/openface/blob/master/demos/compare.py dist = np.dot( d, d ) # https://cmusatyalab.github.io/openface/demo-2-comparison/ if (dist < min_dist): idx = i min_dist = dist a[idx].append(emb) # testing for face tracking crop = frame.crop((x1, y1, x2, y2)) crop = cv2.cvtColor(np.array(crop), cv2.COLOR_RGB2BGR) cv2.imshow(str(idx), crop) cv2.waitKey(1) print(len(a[0])) return a
def process_video(model, input_path, output_path, require_fps, hat_color, person_color, fourcc='mp4v'): """处理视频并输出到指定目录 Arguments: model {torch.nn.Sequ} -- [使用的模型] input_path {[str]} -- [视频文件路径] require_fps {[int]} -- [输出的视频fps] fourcc {[str]} -- [opencv写文件编码格式] hat_color {[str]} -- [安全帽框颜色] person_color {[str]} -- [人头框颜色] process_step {[int]} -- [以step分钟的间隔处理整个视频,内存越大step可以越大] """ video = mmcv.VideoReader(input_path) # 初始化人头追踪器 psn_tracker = Tracker() resolution = (video.width, video.height) video_fps = video.fps #ds = DetectionSifter(int(video_fps),osp.basename(args.input_path).split('.')[0],1,3,resolution,get_collection()) if require_fps is None: require_fps = video_fps if require_fps > video_fps: require_fps = video_fps vwriter = cv2.VideoWriter(output_path, VideoWriter_fourcc(*fourcc), require_fps, resolution) for frame in tqdm(video): # bbox:(hat_bbox,person_bbox) st = time.time() bboxs = inference_detector(model, frame) et = time.time() Loger.info('探测耗时{0}'.format(et - st)) frame_result = get_result(frame, bboxs, class_names=model.CLASSES, auto_thickness=True, color_dist={ 'hat': 'green', 'person': 'red' }) # person_bboxs:(N,5) person_bboxs = bboxs[1] # 筛选阈值大于0.5进行追踪 person_bboxs = person_bboxs[person_bboxs[:, 4] > 0.5] person_bboxs = np.expand_dims(person_bboxs, 0) person_bboxs_tracks = track(person_bboxs, psn_tracker)[0] #ds.add_object(person_bboxs_tracks,frame) vwriter.write(frame_result) #ds.clear() print('process finshed')
def _read_video(self, path): container = mmcv.VideoReader(path) sample = dict( height=int(container.height), width=int(container.width), fps=int(container.fps), num_frames=int(container.frame_cnt), video=[]) for _ in range(container.frame_cnt): sample['video'].append(container.read()) sample['video'] = np.stack(sample['video'], axis=0) return sample
def main(): parser = ArgumentParser() parser.add_argument('--video', help='video file') parser.add_argument('--config', help='Config file') parser.add_argument('--checkpoint', help='Checkpoint file') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--score-thr', type=float, default=0.3, help='bbox score threshold') parser.add_argument('--theme', type=str, default='white', help='themes to show detect results') args = parser.parse_args() # build the model from a config file and a checkpoint file model = init_detector(args.config, args.checkpoint, device=args.device) # test a video and show the results video = mmcv.VideoReader(args.video) result_root = "./results" check_path_exist(result_root) frame_dir = osp.join(result_root, 'frame') check_path_exist(frame_dir) frame_id = 0 interal = len(video) // 13 key_frame_path = "./key_frame" check_path_exist(key_frame_path) key_frame_dir = osp.join(key_frame_path, args.video.split('/')[-1].split('.')[0]) check_path_exist(key_frame_dir) for frame in tqdm(video, ncols=64): result = inference_detector(model, frame) model.show_result(frame, result, score_thr=args.score_thr, out_file=osp.join(frame_dir, '{:06d}.jpg'.format(frame_id))) if (frame_id + 1) % interal == 0: shutil.copyfile(src=osp.join(frame_dir, '{:06d}.jpg'.format(frame_id)), dst=osp.join(key_frame_dir, '{:06d}.jpg'.format(frame_id))) frame_id += 1 output_video_path = osp.join(result_root, args.video.split('/')[-1]) mmcv.frames2video(frame_dir, output_video_path, fourcc='mp4v', filename_tmpl='{:06d}.jpg') shutil.rmtree(frame_dir)
def test_load(self): # read from video file v = mmcv.VideoReader(self.video_path) assert v.width == 294 assert v.height == 240 assert v.fps == 25 assert v.frame_cnt == self.num_frames assert len(v) == self.num_frames assert v.opened import cv2 assert isinstance(v.vcap, type(cv2.VideoCapture())) # read from video url v = mmcv.VideoReader(self.video_url) assert v.width == 320 assert v.height == 240 assert v.fps == 15 assert v.frame_cnt == 1889 assert len(v) == 1889 assert v.opened assert isinstance(v.vcap, type(cv2.VideoCapture()))