def main(): args = parse_args() # assign the desired device. device = torch.device(args.device) cfg = Config.fromfile(args.config) cfg.merge_from_dict(args.cfg_options) # build the recognizer from a config file and checkpoint file/url model = init_recognizer(cfg, args.checkpoint, device=device, use_frames=args.use_frames) # e.g. use ('backbone', ) to return backbone feature output_layer_names = None # test a single video or rawframes of a single video if output_layer_names: results, returned_feature = inference_recognizer( model, args.video, args.label, use_frames=args.use_frames, outputs=output_layer_names) else: results = inference_recognizer(model, args.video, args.label, use_frames=args.use_frames) print('The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1]) if args.out_filename is not None: if args.target_resolution is not None: if args.target_resolution[0] == -1: args.target_resolution[0] = None if args.target_resolution[1] == -1: args.target_resolution[1] = None args.target_resolution = tuple(args.target_resolution) else: args.target_resolution = (None, None) get_output(args.video, args.out_filename, results[0][0], fps=args.fps, font_size=args.font_size, font_color=args.font_color, target_resolution=args.target_resolution, resize_algorithm=args.resize_algorithm, use_frames=args.use_frames)
def test_frames_inference_recognizer(): if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' rgb_model = init_recognizer(frame_config_file, None, device) flow_model = init_recognizer(flow_frame_config_file, None, device) with pytest.raises(RuntimeError): # video path doesn't exist inference_recognizer(rgb_model, 'missing_path') for ops in rgb_model.cfg.data.test.pipeline: if ops['type'] in ('TenCrop', 'ThreeCrop'): # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' ops['crop_size'] = 224 for ops in flow_model.cfg.data.test.pipeline: if ops['type'] in ('TenCrop', 'ThreeCrop'): # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' ops['crop_size'] = 224 top5_label = inference_recognizer(rgb_model, frames_path) scores = [item[1] for item in top5_label] assert len(top5_label) == 5 assert scores == sorted(scores, reverse=True) _, feat = inference_recognizer(flow_model, frames_path, outputs=('backbone', 'cls_head'), as_tensor=False) assert isinstance(feat, dict) assert 'backbone' in feat and 'cls_head' in feat assert isinstance(feat['backbone'], np.ndarray) assert isinstance(feat['cls_head'], np.ndarray) assert feat['backbone'].shape == (25, 2048, 7, 7) assert feat['cls_head'].shape == (1, 400) _, feat = inference_recognizer(rgb_model, frames_path, outputs=('backbone.layer3', 'backbone.layer3.1.conv1')) assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor) assert isinstance(feat['backbone.layer3'], torch.Tensor) assert feat['backbone.layer3'].size() == (25, 1024, 14, 14) assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14)
def main(): args = parse_args() # assign the desired device. device = torch.device(args.device) # build the recognizer from a config file and checkpoint file model = init_recognizer( args.config, args.checkpoint, device=device, use_frames=args.use_frames) # test a single video or rawframes of a single video results = inference_recognizer( model, args.video, args.label, use_frames=args.use_frames) print('The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1]) if args.out_filename is not None: get_output( args.video, args.out_filename, results[0][0], font_size=args.font_size, font_color=args.font_color, resize_algorithm=args.resize_algorithm, use_frames=args.use_frames)
def test_inference_recognizer(): if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' model = init_recognizer(video_config_file, None, device) with pytest.raises(RuntimeError): # video path doesn't exist inference_recognizer(model, 'missing.mp4', label_path) with pytest.raises(RuntimeError): # ``video_path`` should be consist with the ``use_frames`` inference_recognizer(model, video_path, label_path, use_frames=True) with pytest.raises(RuntimeError): # ``video_path`` should be consist with the ``use_frames`` inference_recognizer(model, 'demo/', label_path) for ops in model.cfg.data.test.pipeline: if ops['type'] == 'TenCrop': # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' top5_label = inference_recognizer(model, video_path, label_path) scores = [item[1] for item in top5_label] assert len(top5_label) == 5 assert scores == sorted(scores, reverse=True)
def main(): args = parse_args() # assign the desired device. device = torch.device(args.device) # build the recognizer from a config file and checkpoint file model = init_recognizer(args.config, args.checkpoint, device=device) # test a single video results = inference_recognizer(model, args.video, args.label) print('The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1])
def rgb_based_action_recognition(args): rgb_config = mmcv.Config.fromfile(args.rgb_config) rgb_config.model.backbone.pretrained = None rgb_model = build_recognizer( rgb_config.model, test_cfg=rgb_config.get('test_cfg')) load_checkpoint(rgb_model, args.rgb_checkpoint, map_location='cpu') rgb_model.cfg = rgb_config rgb_model.to(args.device) rgb_model.eval() action_results = inference_recognizer(rgb_model, args.video, args.label_map) rgb_action_result = action_results[0][0] return rgb_action_result
def Predict(self, video_path=None): results = inference_recognizer( self.system_dict["local"]["model"], video_path, self.system_dict["local"]["class_list_file"]) # show the results classes = [] scores = [] for result in results: classes.append(result[0]) scores.append(result[1] / 100) print(f'{result[0]}: ', result[1] / 100) return classes, scores
def test_inference_recognizer(): if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' model = init_recognizer(config_file, None, device) for ops in model.cfg.data.test.pipeline: if ops['type'] == 'TenCrop': # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' top5_label = inference_recognizer(model, video_path, label_path) scores = [item[1] for item in top5_label] assert len(top5_label) == 5 assert scores == sorted(scores, reverse=True)
def main(): args = parse_args() device = torch.device(args.device) cfg = Config.fromfile(args.config) cfg.merge_from_dict(args.cfg_options) model = init_recognizer(cfg, args.checkpoint, device=device) if not args.audio.endswith('.npy'): raise NotImplementedError('Demo works on extracted audio features') results = inference_recognizer(model, args.audio) labels = open(args.label).readlines() labels = [x.strip() for x in labels] results = [(labels[k[0]], k[1]) for k in results] print('Scores:') for result in results: print(f'{result[0]}: ', result[1])
import torch from mmaction.apis import init_recognizer, inference_recognizer config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py' device = 'cuda:0' # or 'cpu' device = torch.device(device) model = init_recognizer(config_file, device=device) # inference the demo video inference_recognizer(model, 'demo/demo.mp4', 'demo/label_map_k400.txt')
def test_video_inference_recognizer(): if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' model = init_recognizer(video_config_file, None, device) with pytest.raises(RuntimeError): # video path doesn't exist inference_recognizer(model, 'missing.mp4') for ops in model.cfg.data.test.pipeline: if ops['type'] in ('TenCrop', 'ThreeCrop'): # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' top5_label = inference_recognizer(model, video_path) scores = [item[1] for item in top5_label] assert len(top5_label) == 5 assert scores == sorted(scores, reverse=True) _, feat = inference_recognizer(model, video_path, outputs=('backbone', 'cls_head'), as_tensor=False) assert isinstance(feat, dict) assert 'backbone' in feat and 'cls_head' in feat assert isinstance(feat['backbone'], np.ndarray) assert isinstance(feat['cls_head'], np.ndarray) assert feat['backbone'].shape == (25, 2048, 7, 7) assert feat['cls_head'].shape == (1, 400) _, feat = inference_recognizer(model, video_path, outputs=('backbone.layer3', 'backbone.layer3.1.conv1')) assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor) assert isinstance(feat['backbone.layer3'], torch.Tensor) assert feat['backbone.layer3'].size() == (25, 1024, 14, 14) assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14) cfg_file = 'configs/recognition/slowfast/slowfast_r50_video_inference_4x16x1_256e_kinetics400_rgb.py' # noqa: E501 sf_model = init_recognizer(cfg_file, None, device) for ops in sf_model.cfg.data.test.pipeline: # Changes to reduce memory in order to pass CI if ops['type'] in ('TenCrop', 'ThreeCrop'): ops['type'] = 'CenterCrop' if ops['type'] == 'SampleFrames': ops['num_clips'] = 1 _, feat = inference_recognizer(sf_model, video_path, outputs=('backbone', 'cls_head')) assert isinstance(feat, dict) and isinstance(feat['backbone'], tuple) assert 'backbone' in feat and 'cls_head' in feat assert len(feat['backbone']) == 2 assert isinstance(feat['backbone'][0], torch.Tensor) assert isinstance(feat['backbone'][1], torch.Tensor) assert feat['backbone'][0].size() == (1, 2048, 4, 8, 8) assert feat['backbone'][1].size() == (1, 256, 32, 8, 8) assert feat['cls_head'].size() == (1, 400)
def main(): args = parse_args() # assign the desired device. device = torch.device(args.device) # build the recognizer from a config file and checkpoint file/url model = init_recognizer(args.config, args.checkpoint, device=device, use_frames=args.use_frames) # e.g. use ('backbone', ) to return backbone feature output_layer_names = None # test a single video or rawframes of a single video if args.split_time is None: if output_layer_names: results, returned_feature = inference_recognizer( model, args.video, args.label, use_frames=args.use_frames, outputs=output_layer_names) else: results = inference_recognizer(model, args.video, args.label, use_frames=args.use_frames) print('The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1]) if args.out_filename is not None: if args.target_resolution is not None: if args.target_resolution[0] == -1: args.target_resolution[0] = None if args.target_resolution[1] == -1: args.target_resolution[1] = None args.target_resolution = tuple(args.target_resolution) else: args.target_resolution = (None, None) label_show = '' for result in results: label_show = label_show + result[0] + ': {:.2g}'.format( result[1]) + '\n' get_output(args.video, args.out_filename, label_show[:-1], fps=args.fps, font_size=args.font_size, font_color=args.font_color, target_resolution=args.target_resolution, resize_algorithm=args.resize_algorithm, use_frames=args.use_frames) if args.split_time is not None: #https://stackoverflow.com/questions/28884159/using-python-script-to-cut-long-videos-into-chunks-in-ffmpeg #https://nico-lab.net/segment_muxer_with_ffmpeg/ import re import math length_regexp = 'Duration: (\d{2}):(\d{2}):(\d{2})\.\d+,' re_length = re.compile(length_regexp) from subprocess import check_call, PIPE, Popen import shlex import os if args.split_time <= 0: print("Split length can't be 0") raise SystemExit p1 = Popen(["ffmpeg", "-i", args.video], stdout=PIPE, stderr=PIPE, universal_newlines=True) # get p1.stderr as input output = Popen(["grep", 'Duration'], stdin=p1.stderr, stdout=PIPE, universal_newlines=True) p1.stdout.close() matches = re_length.search(output.stdout.read()) if matches: video_length = int(matches.group(1)) * 3600 + \ int(matches.group(2)) * 60 + \ int(matches.group(3)) print("Video length in seconds: {}".format(video_length)) else: print("Can't determine video length.") raise SystemExit split_count = math.ceil(video_length / args.split_time) if split_count == 1: print("Video length is less than the target split length.") raise SystemExit fname = os.path.basename(args.video) dirname = os.path.dirname(args.video) fname_base, ext = fname.rsplit(".", 1) tmp_path = os.path.join(dirname, 'tmpdir') dummy_filenames = [] if not os.path.isdir(tmp_path): os.makedirs(tmp_path) #copied_fname = "{}.{}".format(os.path.join(tmp_path,fname_base), ext) #cmd = "ffmpeg -i {} -vf scale=640:360 -y {}".\ # format(args.video, copied_fname) #check_call(shlex.split(cmd), universal_newlines=True) #print(split_count) '''for n in range(split_count): split_start = args.split_time * n cmd = "ffmpeg -i {} -vcodec copy -strict -2 -ss {} -t {} -y {}-{}.{}".\ format(args.video, split_start, args.split_time, os.path.join(tmp_path,fname_base), n, ext) dummy_filenames.append("{}-{}.{}".format(os.path.join(tmp_path,fname_base), n, ext)) print("About to run: {}".format(cmd)) check_call(shlex.split(cmd), universal_newlines=True) tmp_fname = "{}-{}.{}".format(os.path.join(tmp_path,fname_base), n, ext)''' cmd = "ffmpeg -i {} -map 0 -c copy -flags +global_header -f segment -segment_time {} -y -segment_list {} -segment_format_options movflags=+faststart -reset_timestamps 1 {}-%02d.{}".\ format(args.video, args.split_time, os.path.join(tmp_path,'list_gen.txt'), os.path.join(tmp_path,fname_base), ext) print("About to run: {}".format(cmd)) check_call(shlex.split(cmd), universal_newlines=True) # cmd = "ffmpeg -i {} -vf scale=640:360 -y {}".\ # format(tmp_fname,tmp_fname) # print("About to run: {}".format(cmd)) # check_call(shlex.split(cmd), universal_newlines=True) with open(os.path.join(tmp_path, 'list_gen.txt'), 'r') as tmp_file: lines = tmp_file.readlines() for line in lines: dummy_filenames.append( os.path.join(tmp_path, line.replace('\n', ''))) #print(dummy_filenames) import pandas as pd with open(args.label, 'r') as f: label = [line.strip() for line in f] list_df = pd.DataFrame(columns=label, index=range(len(dummy_filenames))) #index_time = 0 for i, video_block in enumerate(dummy_filenames): video_block_out = os.path.join( os.path.dirname(video_block), 'out_' + os.path.basename(video_block)) output_layer_names = ('cls_head', ) if output_layer_names: results, returned_feature = inference_recognizer( model, video_block, args.label, use_frames=args.use_frames, outputs=output_layer_names) ret_feature = returned_feature['cls_head'].cpu().detach( ).numpy() #list_df = list_df.append( ret_feature, ignore_index=True ) #list_df = list_df.append(pd.DataFrame(ret_feature, columns=label, index= index_time) #import pdb;pdb.set_trace() list_df.iloc[i, :] = ret_feature[0, :len(label)] #index_time = index_time + args.split_time else: results = inference_recognizer(model, video_block, args.label, use_frames=args.use_frames) if args.out_filename is not None: if args.target_resolution is not None: if args.target_resolution[0] == -1: args.target_resolution[0] = None if args.target_resolution[1] == -1: args.target_resolution[1] = None args.target_resolution = tuple(args.target_resolution) else: args.target_resolution = (None, None) print('The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1]) label_show = '' for result in results: label_show = label_show + result[0] + ': {:.2g}'.format( result[1]) + '\n' get_output(video_path=video_block, out_filename=video_block_out, label=label_show[:-1], fps=args.fps, font_size=args.font_size, font_color=args.font_color, target_resolution=args.target_resolution, resize_algorithm=args.resize_algorithm, use_frames=args.use_frames) # concatnate files with open(os.path.join(tmp_path, 'list.txt'), 'w') as tmp_file: for video_block in dummy_filenames: tmp_file.write("file " + 'out_' + os.path.basename(video_block) + "\n") cmd = "ffmpeg -f concat -i {} -c copy -y {}".\ format(os.path.join(tmp_path,'list.txt'), args.out_filename) #cmd = "ffmpeg -i {} -c copy -segment_format_options movflags=+faststart {}".\ # format(os.path.join(tmp_path,'list.txt'), args.out_filename) print("About to run: {}".format(cmd)) check_call(shlex.split(cmd), universal_newlines=True) import shutil #import pdb #pdb.set_trace() shutil.rmtree(tmp_path) import matplotlib import matplotlib.pyplot as plt plt.figure() list_df.plot( y=label ) #, x=range(0, args.split_time*len(dummy_filenames),args.split_time) fig_outdir = os.path.dirname(args.out_filename) fig_outname = os.path.basename(args.out_filename) fig_outname = fig_outname.rsplit(".", 1)[0] plt.savefig(os.path.join(fig_outdir, fig_outname + '.png')) plt.close('all') list_df.to_csv(os.path.join(fig_outdir, fig_outname + '.csv'), index=False)
kicking_cnt = 0 punching_cnt = 0 normal_cnt = 0 iter = 0 for i in dir: iter = 0 # print(i) for j in i: # print(f"Now {i} dir and {iter} frames",end=" ") iter += 1 if i == normal_file_list: print(f"Now normal dir and {iter} frames", end=" ") results = inference_recognizer(model, normal_path + "/" + j, label) # print(results[0][0]) if results[0][0] == 'normal': normal_cnt += 1 print(f'correct : {normal_cnt} {normal_cnt/iter}') else: print(f"NOPE : {normal_cnt} {normal_cnt/iter}") elif i == kicking_file_list: print(f"Now kick dir and {iter} frames", end=" ") results = inference_recognizer(model, kicking_path + "/" + j, label) # print(results[0][0]) if results[0][0] == 'kick': kicking_cnt += 1 print(f'correct : {kicking_cnt} {kicking_cnt/iter}')
parser.add_argument('--device', type=str, default='cuda:0', help='CPU/CUDA device option') parser.add_argument('--video', help='video file/url') parser.add_argument('--labels', help='dataset labels') args = parser.parse_args() #config file config_file = args.config # download the checkpoint from model zoo and put it in `checkpoints/` checkpoint_file = args.checkpoint # assign the desired device. device = args.device # 'cuda:0' or 'cpu' device = torch.device(device) # build the model from a config file and a checkpoint file model = init_recognizer(config_file, checkpoint_file, device=device) # test a single video and show the result: video = args.video labels = args.labels results = inference_recognizer(model, video, labels) # show the results print(f'The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1])
def main(): args = parse_args() frame_paths, original_frames = frame_extraction(args.video, args.short_side) num_frame = len(frame_paths) h, w, _ = original_frames[0].shape # Get clip_len, frame_interval and calculate center index of each clip config = mmcv.Config.fromfile(args.config) config.merge_from_dict(args.cfg_options) model = init_recognizer(config, args.checkpoint, args.device) # Load label_map label_map = [x.strip() for x in open(args.label_map).readlines()] # Get Human detection results det_results = detection_inference(args, frame_paths) torch.cuda.empty_cache() pose_results = pose_inference(args, frame_paths, det_results) torch.cuda.empty_cache() fake_anno = dict(frame_dir='', label=-1, img_shape=(h, w), original_shape=(h, w), start_index=0, modality='Pose', total_frames=num_frame) num_person = max([len(x) for x in pose_results]) # Current PoseC3D models are trained on COCO-keypoints (17 keypoints) num_keypoint = 17 keypoint = np.zeros((num_person, num_frame, num_keypoint, 2), dtype=np.float16) keypoint_score = np.zeros((num_person, num_frame, num_keypoint), dtype=np.float16) for i, poses in enumerate(pose_results): for j, pose in enumerate(poses): pose = pose['keypoints'] keypoint[j, i] = pose[:, :2] keypoint_score[j, i] = pose[:, 2] fake_anno['keypoint'] = keypoint fake_anno['keypoint_score'] = keypoint_score results = inference_recognizer(model, fake_anno) action_label = label_map[results[0][0]] pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, args.device) vis_frames = [ vis_pose_result(pose_model, frame_paths[i], pose_results[i]) for i in range(num_frame) ] for frame in vis_frames: cv2.putText(frame, action_label, (10, 30), FONTFACE, FONTSCALE, FONTCOLOR, THICKNESS, LINETYPE) vid = mpy.ImageSequenceClip([x[:, :, ::-1] for x in vis_frames], fps=24) vid.write_videofile(args.out_filename, remove_temp=True) tmp_frame_dir = osp.dirname(frame_paths[0]) shutil.rmtree(tmp_frame_dir)
def inference(self, data, *args, **kwargs): results = [inference_recognizer(self.model, item) for item in data] return results
config="../configs/recognition/slowfast/custom.py" checkpoint="../data_center/fight_assault/BinaryDataTree/tanos_lr_improve_checkpoints/epoch_70.pth" model = init_recognizer( config, checkpoint, device=device, ) label="../demo/custom_map.txt" while True: if not os.listdir(receive_path): continue os.system(f"mv {receive_path}* {path_dir}") file_list = os.listdir(path_dir) file_list.sort()#시간순 정렬 for i in file_list: results = inference_recognizer(model,path_dir+i,label) if results[0][0]=="abnormal" and results[0][1]>0.86: ''' 폭력 발생 db로 영상 보내야 함 and 처리 완료이므로 디렉토리에서 pop ''' os.system(f"mv {path_dir+i} {db}") data=StrConverter(i) data["cam_num"]=0 print(data) try: r = requests.post(server_url, data= json.dumps(data)) print("post") except: print("error") print(f"moved to db and abnormal score : {results[0][1]}") #웹서버로 보내버리기
import argparse import os from mmaction.apis import init_recognizer, inference_recognizer parser = argparse.ArgumentParser(description="parsing...") parser.add_argument("--root", type=str, default="/home/administrator/Z/Algorithms/mmaction2/", help="mmaction2 root") args = parser.parse_args() config_file = os.path.join(args.root, 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py') config_file = os.path.join(args.root, config_file) # download the checkpoint from model zoo and put it in `checkpoints/` checkpoint_file = os.path.join(args.root, 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth') # assign the desired device. device = 'cuda:0' # or 'cpu' device = torch.device(device) # build the model from a config file and a checkpoint file model = init_recognizer(config_file, checkpoint_file, device=device, use_frames=True) # test a single video and show the result: video = os.path.join(args.root,'data/kinetics400/rawframes_video/...') labels = os.path.join(args.root,'demo/label_map_k400.txt') results = inference_recognizer(model, video, labels, use_frames=True) # show the results print(f'The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1])