def __init__(self, display_widget=None): self.display_widget = display_widget with open(self.HUMAN_POSE, 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.OPTIMIZED_MODEL)) self.parse_objects = ParseObjects(topology) self.keypoint_coordinates = KeypointCoordinates( human_pose["keypoints"]) self.camera = CSICamera(width=self.WIDTH, height=self.HEIGHT, capture_fps=30) self.camera.running = True if self.display_widget is None: self.display = plt.imshow(self.camera.value) plt.ion() plt.show() # ROS stuff s = rospy.Service('get_keypoint', GetKeypoint, self.__handle_get_keypoint) self.image_pub = rospy.Publisher("image", Image) self.bridge = CvBridge()
def __init__(self): topology = None with open('/trt_pose/tasks/human_pose/human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) self._topology = topology self._MODEL_WEIGHTS = '/trt_pose/tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249.pth' self._OPTIMIZED_MODEL = '/trt_pose/tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self._num_parts = len(human_pose['keypoints']) self._num_links = len(human_pose['skeleton']) print('BreathRateDetector: using resnet model') self._model = trt_pose.models.resnet18_baseline_att(self._num_parts, 2 * self._num_links).cuda().eval() self._WIDTH = 224 self._HEIGHT = 224 self._data = torch.zeros((1, 3, self._HEIGHT, self._WIDTH)).cuda() if os.path.exists(self._OPTIMIZED_MODEL) == False: print('BreathRateDetector: -- Converting TensorRT models. This may takes several minutes...') self._model.load_state_dict(torch.load(self._MODEL_WEIGHTS)) model_trt = torch2trt.torch2trt(self._model, [self._data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), self._OPTIMIZED_MODEL) print('BreathRateDetector: -- Conversion complete --') print('BreathRateDetector: loading TRT model.') self._model_trt = TRTModule() self._model_trt.load_state_dict(torch.load(self._OPTIMIZED_MODEL)) self._mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self._std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self._device = torch.device('cuda') self._parse_objects = ParseObjects(topology) self._draw_objects = DrawObjects(topology)
def ETRI_Initialization(path): # Load & Init for Skeletons with open('./utils/human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) parse_objects = ParseObjects(topology) print("trtPose start") model_skeleton = TRTModule() model_path = os.path.join( path, 'resnet18_baseline_att_224x224_A_epoch_249_trt_2.pth') model_skeleton.load_state_dict(torch.load(model_path)) print("body action start") model_trt_ba = TRTModule() model_path = os.path.join(path, 'bodyaction_TRT.pth') model_trt_ba.load_state_dict(torch.load(model_path)) print("hand action start") model_trt_ha = TRTModule() model_path = os.path.join(path, 'handaction_jc_c_TRT.pth') model_trt_ha.load_state_dict(torch.load(model_path)) print("headpose start") model_trt_hp = TRTModule() model_path = os.path.join(path, 'headpose_TRT.pth') model_trt_hp.load_state_dict(torch.load(model_path)) return topology, parse_objects, model_skeleton, model_trt_ba, model_trt_ha, model_trt_hp
def __init__(self, modelFile, taskDescFile, csv=0, csvPath='.'): # Load the task description try: with open(taskDescFile, 'r') as f: human_pose = json.load(f) except OSError: raise PoseCaptureDescError topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) # Load the base model fbase = os.path.basename(modelFile) func, self.inWidth, self.inHeight = \ PoseCaptureModel.getModelFuncName(fbase) if func is None: logging.fatal('Invalid model name: %s' % (fbase)) logging.fatal('Model name should be (.+_.+_att)_(\\d+)x(\\d+)_') raise PoseCaptureModelError('Invalid model name: %s' % (fbase)) if not hasattr(trt_pose.models, func): logging.fatal('Could not find base model function: %s' % (func)) raise PoseCaptureModelError( \ 'Could not find base model function: %s' % (func)) func = 'trt_pose.models.' + func trtFile = os.path.splitext(fbase)[0] + '_trt.pth' logging.info('Loading base model from %s' % (func)) model = eval(func)(num_parts, 2 * num_links).cuda().eval() if os.path.exists(trtFile): logging.info('Loading model from TensorRT plan file ...') model_trt = TRTModule() model_trt.load_state_dict(torch.load(trtFile)) else: logging.info('Optimizing model for TensorRT ...') model.load_state_dict(torch.load(modelFile)) data = torch.zeros((1, 3, self.inHeight, self.inWidth)).cuda() model_trt = torch2trt.torch2trt( \ model, [data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), trtFile) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(topology) self.draw_objects = DrawObjects(topology) self.model_trt = model_trt self.num_parts = num_parts self.csv = csv self.count = 0 if self.csv > 0: try: self._initCsv(human_pose['keypoints'], csvPath) except OSError: raise PoseCaptureCsvError
def main(): parser = argparse.ArgumentParser(description="TensorRT pose estimation") parser.add_argument("--model", type=str, default="resnet") args = parser.parse_args() # Load the annotation file and create a topology tensor with open("human_pose.json", "r") as f: human_pose = json.load(f) # Create a topology tensor (intermediate DS that describes part linkages) topology = trt_pose.coco.coco_category_to_topology(human_pose) # Construct and load the model model = Model(pose_annotations=human_pose) model.load_model(args.model) model.load_weights() model.get_optimized() model.log_fps() # Set up the camera camera = Camera(width=WIDTH, height=HEIGHT) camera.capture_video("MP4V", "/tmp/output.mp4") # OPTIONAL assert camera.cap is not None, "Camera Open Error" # Set up callable class used to parse the objects from the neural network parse_objects = ParseObjects(topology) # from trt_pose.parse_objects # Execute while the camera is open and we haven't reached the time limit count = 0 time_limit = 500 while camera.cap.isOpened() and count < time_limit: t = time.time() succeeded, image = camera.cap.read() if not succeeded: print("Camera read Error") break resized_img = cv2.resize(image, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) preprocessed = preprocess(resized_img) counts, objects, peaks = model.execute_neural_net(data=preprocessed, parser=parse_objects) drawn = draw(resized_img, counts, objects, peaks, t) if camera.out: camera.out(drawn) count += 1 # Clean up resources cv2.destroyAllWindows() camera.out.release() camera.cap.release()
def main(args): with open("human_pose.json", "r") as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model = get_model(human_pose, device) print(f"Running inference on device: {device}") preprocess = torchvision.transforms.Compose([ torchvision.transforms.ToPILImage(), torchvision.transforms.Resize((HEIGHT, WIDTH)), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) def user_callback(image_data): start_time = time.monotonic() tensor_image = preprocess(image_data) tensor_image = tensor_image.unsqueeze(0) cmap, paf = model(tensor_image.to(device)) cmap, paf = cmap.detach().cpu(), paf.detach().cpu() inference_time_ms = (time.monotonic() - start_time) * 1000 print(f"Inference time: {inference_time_ms:.2f}ms") counts, objects, peaks = parse_objects( cmap, paf) # , cmap_threshold=0.15, link_threshold=0.15) draw_objects(image_data, counts, objects, peaks) return image_data run_pipeline( user_callback, src_frame_rate=args.frame_rate, src_height=args.source_height, src_width=args.source_width, binning_level=args.binning_level, )
def pose_estimation_worker(keypoint_queue: Queue, run: Value, done_loading: Value, camera_class: Type[CameraBase], draw: bool): try: cuda.init() device = cuda.Device(0) # enter your Gpu id here ctx = device.make_context() # Load human pose definition and topology. with open('config/human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) keypoint_names = human_pose['keypoints'] # Load trt pose model. logging.info('Loading the pose model...') resnet = ResNet() # Define mean and std for image preprocessing. mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) # Create object parser. parse_objects = ParseObjects(topology) # Create a camera instance. camera = camera_class() # Create a keypoint drawer if required. if draw: keypoint_drawer = KeypointDrawer(topology) with done_loading.get_lock(): done_loading.value = 1 while run.value: image = camera.get_frame() data = preprocess_image(image, mean, std) cmap, paf = resnet(data) cmap, paf = torch.Tensor(cmap[None, ...]), torch.Tensor(paf[None, ...]) counts, objects, peaks = parse_objects(cmap, paf) keypoints = extract_keypoints(objects, peaks, keypoint_names) keypoint_queue.put(keypoints) if draw: keypoint_drawer.draw(image, objects, peaks) except KeyboardInterrupt: pass ctx.pop() camera.release()
def __init__( self, model_path='./models/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' ): self.model_path = model_path with open('./models/human_pose.json', 'r') as f: self.human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology( self.human_pose) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.model_path)) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(self.topology) self.get_keypoints = GetKeypoints(self.topology)
def __init__(self): self.goal = 0.0 # [angle] print("Getting Path to package...") self.follow_people_configfiles_path = rospkg.RosPack().get_path( 'drone_ai') + "/scripts/helpers/trtpose/models" print("We get the human pose json file that described the human pose") humanPose_file_path = os.path.join( rospkg.RosPack().get_path('drone_ai') + "/scripts/helpers/trtpose/models/", 'human_pose.json') print("Opening json file") with open(humanPose_file_path, 'r') as f: self.human_pose = json.load(f) print("Creating topology") self.topology = trt_pose.coco.coco_category_to_topology( self.human_pose) #print("Topology====>", self.topology) self.WIDTH = 640 self.HEIGHT = 480 OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' #OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' optimized_model_weights_path = os.path.join( self.follow_people_configfiles_path, OPTIMIZED_MODEL) if not os.path.exists(optimized_model_weights_path): self.__create_optimodel(optimized_model_weights_path) print("Load the saved model using Torchtrt") self.model_trt = TRTModule() self.model_trt.load_state_dict( torch.load(optimized_model_weights_path)) print("Define the Image processing variables") self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device("cuda") print( "Classes to parse the object of the NeuralNetwork and draw on the image" ) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology)
def load_params(base_dir, human_pose_json, model_name): hp_json_file = os.path.join(base_dir,human_pose_json) if model_name == 'resnet18': MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' if model_name == 'densenet121': MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth' model_weights = os.path.join(base_dir, MODEL_WEIGHTS) with open(hp_json_file,'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) # Name of the body part num_links = len(human_pose['skeleton']) # Need to know parse_objects = ParseObjects(topology) return num_parts, num_links, model_weights, parse_objects, topology
def __init__(self, torch_model='resnet18_baseline_att', input_shape=(224, 224), dtype=torch.float32, device=torch.device('cuda'), torch2trt_kwargs={ 'max_workspace_size': 1 << 25, 'fp16_mode': True }): self.dtype = dtype self.device = device self.input_shape = input_shape self.mean = torch.Tensor([0.485, 0.456, 0.406]).to(device).type(dtype) self.std = torch.Tensor([0.229, 0.224, 0.225]).to(device).type(dtype) model_path = torch_model + '_trt.pth' if not os.path.exists(model_path): # download model download_path = torch_model + '_torch.pth' subprocess.call( ['wget', MODEL_URLS[torch_model], '-O', download_path]) # load downloaded model model = trt_pose.models.MODELS[torch_model]( len(COCO_CATEGORY['keypoints']), len(COCO_CATEGORY['skeleton']) * 2).eval().to(device) model.load_state_dict(torch.load(download_path)) # optimize with TensorRT data = torch.randn((1, 3) + input_shape).to(device).type(dtype) self.model = torch2trt(model, [data], **torch2trt_kwargs) torch.save(self.model.state_dict(), model_path) else: # load model self.model = TRTModule() self.model.load_state_dict(torch.load(model_path)) self._topology = trt_pose.coco.coco_category_to_topology(COCO_CATEGORY) self.parse_objects = ParseObjects(TOPOLOGY)
def __init__(self, imshow=True): self.imshow = imshow # display results of pose esimtation # Torch settings self.device = torch.device('cuda') self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.load_model() # Setup camera and visuals self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) self.setup_camera() # Visualization self.im = plt.imshow(self.execute({'new': self.camera.value})) self.ani = FuncAnimation(plt.gcf(), self.update, interval=200) self.cid = plt.gcf().canvas.mpl_connect("key_press_event", self.close) self.running = True plt.show()
def __init__(self): with open('human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) print('------ model = resnet--------') OPTIMIZED_MODEL = './models/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self.WIDTH = 224 self.HEIGHT = 224 print('Loading model') self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) print('model was loaded') self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(topology)
def __init__(self, size, model_path, min_leg_joints, min_total_joints, include_head=True, **kwargs): self.__dict__.update(self._params) self.__dict__.update(kwargs) self.min_total_joints = min_total_joints self.min_leg_joints = min_leg_joints self.include_head = include_head if not isinstance(size, (tuple, list)): size = (size, size) if isinstance(model_path, (tuple, list)): model_path = os.path.join(*model_path) self.height, self.width = size self.model_path = model_path self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # load humanpose json data self.topology = coco.coco_category_to_topology(POSE_META) self.parse_objects = ParseObjects(self.topology, cmap_threshold=self.cmap_threshold, link_threshold=self.link_threshold) # load is_trt model if self.model_path.endswith('.trt'): self.model = self._load_trt_model(self.model_path) else: self.model = self._load_torch_model(self.model_path, backbone=self.backbone) # transformer self.transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ])
def run(self): fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc, self.cap.get(cv2.CAP_PROP_FPS), (640, 480)) count = 0 while self.cap.isOpened() and count < 500: t = time.time() ret_val, dst = self.cap.read() parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) if ret_val == False: print("Camera read Error") break img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) img = PE.execute(img, dst, t) cv2.imshow("result", img) if cv2.waitKey(1) & 0xFF == ord('q'): break count += 1 cv2.destroyAllWindows() out_video.release() cap.release()
def __init__(self): with open('modules/ai_module/techniques/pose/trt_pose/human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) OPTIMIZED_MODEL = 'modules/ai_module/techniques/pose/trt_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) log = "POSE Model loaded" logging.info(log) print("[", colored("INFO", 'green', attrs=['bold']), " ] " + log) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') log = "Setting CUDA backend" logging.info(log) print("[", colored("INFO", 'green', attrs=['bold']), " ] " + log)
def __init__(self, net, image_dimensions, network_dimensions, weights_filename, optimize): with open('models/human_pose.json', 'r') as f: self.human_pose = json.load(f) self.topology = coco_category_to_topology(self.human_pose) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) self.num_parts = len(self.human_pose['keypoints']) self.num_links = len(self.human_pose['skeleton']) self.weights_filename = weights_filename self.weights_filename_opt = '{0}.trt'.format(weights_filename) self.model = net(self.num_parts, 2 * self.num_links).cuda().eval() self.image_dimensions = image_dimensions self.network_dimensions = network_dimensions # data = torch.zeros((1, 3, self.height, self.width)).cuda() if optimize or os.path.exists(self.weights_filename_opt) == False: logger.info("Optimizing network for trt") self.model.load_state_dict(torch.load(self.weights_filename)) self.model_trt = torch2trt.torch2trt( self.model, [torch.zeros((1, 3, *self.network_dimensions)).cuda()], fp16_mode=True) torch.save(self.model_trt.state_dict(), self.weights_filename_opt) else: self.model_trt = TRTModule() self.model_trt.load_state_dict( torch.load(self.weights_filename_opt)) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.preprocess = transforms.Compose([ transforms.Resize(self.network_dimensions), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), self.model_trt ])
def init(): import torch2trt from torch2trt import TRTModule with open('./models/human_pose.json', 'r') as f: human_pose = json.load(f) global topology topology = coco_category_to_topology(human_pose) global WIDTH WIDTH = 256 global HEIGHT HEIGHT = 256 #data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() OPTIMIZED_MODEL = Path('./models/densenet121_baseline_att_256x256_B_epoch_160_trt.pth') global model_trt model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) print('loaded model') global mean mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() global std std = torch.Tensor([0.229, 0.224, 0.225]).cuda() global device device = torch.device('cuda') global parse_objects parse_objects = ParseObjects(topology) global draw_objects draw_objects = DrawObjects(topology)
def __init__(self): print('------ model = resnet--------') self.MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' self.OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' with open('human_pose.json', 'r') as f: human_pose = json.load(f) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) self.model = trt_pose.models.resnet18_baseline_att( num_parts, 2 * num_links).cuda().eval() self.WIDTH = 224 self.HEIGHT = 224 data = torch.zeros((1, 3, self.HEIGHT, self.WIDTH)).cuda() if os.path.exists(self.OPTIMIZED_MODEL) == False: self.model.load_state_dict(torch.load(self.MODEL_WEIGHTS)) self.model_trt = torch2trt.torch2trt(self.model, [data], fp16_mode=True, max_workspace_size=1 << 25) torch.save(self.model_trt.state_dict(), self.OPTIMIZED_MODEL) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.OPTIMIZED_MODEL)) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology)
def __init__(self, model_folder): human_pose_path = os.path.join(model_folder, 'human_pose.json') with open(human_pose_path, 'r') as f: self._human_pose = json.load(f) self._topology = build_topology(self._human_pose) num_parts = len(self._human_pose['keypoints']) num_links = len(self._human_pose['skeleton']) model = trt_pose.models.resnet18_baseline_att( num_parts, 2 * num_links).cuda().eval() MODEL_WEIGHTS = os.path.join( model_folder, 'resnet18_baseline_att_224x224_A_epoch_249.pth') optimized_model_path = os.path.join(model_folder, self._OPTIMIZED_MODEL_NAME) if not os.path.exists(optimized_model_path): print("Converting Torch OpenPose model to TFRT") model.load_state_dict(torch.load(MODEL_WEIGHTS)) data = torch.zeros( (1, 3, self._IMAGE_HEIGHT, self._IMAGE_WIDTH)).cuda() model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1 << 25) torch.save(model_trt.state_dict(), optimized_model_path) print("Loading TFRT OpenPose model") self._model_trt = TRTModule() self._model_trt.load_state_dict(torch.load(optimized_model_path)) self._parse_objects = ParseObjects(self._topology['topology']) self._draw_objects = DrawObjects(self._topology) self._human_detector = HumanDetection()
model = models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() model.load_state_dict(torch.load(MODEL_WEIGHTS)) # Optimization with tensorRT # NOTE: optimization is device specific # data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() # model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25) # torch.save(model_trt.state_dict(), OPTIMIZED_MODEL) # Load optimized model print("Loading optimized model") model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) # Setup camera and visuals parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) camera = USBCamera(width=WIDTH, height=HEIGHT, capture_device=1) camera.running = True # Attach oberver to act on each new frame received # camera.observe(execute, names='value') im = plt.imshow(execute({'new': camera.value})) ani = FuncAnimation(plt.gcf(), update, interval=200) cid = plt.gcf().canvas.mpl_connect("key_press_event", close) plt.show()
MODEL_WEIGHTS = 'hand_pose_resnet18_att_244_244.pth' model.load_state_dict(torch.load(MODEL_WEIGHTS)) import torch2trt model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1 << 25) OPTIMIZED_MODEL = 'hand_pose_resnet18_att_244_244_trt.pth' torch.save(model_trt.state_dict(), OPTIMIZED_MODEL) OPTIMIZED_MODEL = 'hand_pose_resnet18_att_244_244_trt.pth' model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) parse_objects = ParseObjects(topology, cmap_threshold=0.15, link_threshold=0.15) draw_objects = DrawObjects(topology) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() device = torch.device('cuda') clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf')) preprocessdata = preprocessdata(topology, num_parts) svm_train = False if svm_train: clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, labels_train, hand.labels_test)
def evaluate(self, model, topology): self.parse_objects = ParseObjects(topology, cmap_threshold=0.1, link_threshold=0.1, cmap_window=5, line_integral_samples=7, max_num_parts=100, max_num_objects=100) results = [] for n, imgId in enumerate(self.imgIds[1:]): # read image img = self.cocoGt.imgs[imgId] img_path = os.path.join(self.images_dir, img['file_name']) image = PIL.Image.open(img_path).convert( 'RGB') #.resize(IMAGE_SHAPE) if self.keep_aspect_ratio: ar = float(image.width) / float(image.height) else: ar = 1.0 quad = get_quad(0.0, (0, 0), 1.0, aspect_ratio=ar) image = transform_image(image, self.image_shape, quad) data = self.transform(image).cuda()[None, ...] cmap, paf = model(data) cmap, paf = cmap.cpu(), paf.cpu() # object_counts, objects, peaks, int_peaks = postprocess(cmap, paf, cmap_threshold=0.05, link_threshold=0.01, window=5) # object_counts, objects, peaks = int(object_counts[0]), objects[0], peaks[0] object_counts, objects, peaks = self.parse_objects(cmap, paf) object_counts, objects, peaks = int( object_counts[0]), objects[0], peaks[0] for i in range(object_counts): object = objects[i] score = 0.0 kps = [0] * (17 * 3) x_mean = 0 y_mean = 0 cnt = 0 for j in range(17): k = object[j] if k >= 0: peak = peaks[j][k] if ar > 1.0: # w > h w/h x = peak[1] y = (peak[0] - 0.5) * ar + 0.5 else: x = (peak[1] - 0.5) / ar + 0.5 y = peak[0] x = round(float(img['width'] * x)) y = round(float(img['height'] * y)) score += 1.0 kps[j * 3 + 0] = x kps[j * 3 + 1] = y kps[j * 3 + 2] = 2 x_mean += x y_mean += y cnt += 1 ann = { 'image_id': imgId, 'category_id': 1, 'keypoints': kps, 'score': score / 17.0 } results.append(ann) if n % 100 == 0: print('%d / %d' % (n, len(self.imgIds))) if len(results) == 0: return with open('trt_pose_results.json', 'w') as f: json.dump(results, f) cocoDt = self.cocoGt.loadRes('trt_pose_results.json') cocoEval = pycocotools.cocoeval.COCOeval(self.cocoGt, cocoDt, 'keypoints') cocoEval.params.imgIds = self.imgIds cocoEval.params.catIds = [1] cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def main(): parser = argparse.ArgumentParser( description='TensorRT pose estimation run') parser.add_argument('--model', type=str, default='resnet', help='resnet or densenet') args = parser.parse_args() with open('human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) if 'resnet' in args.model: print('------ model = resnet--------') MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 224 HEIGHT = 224 else: print('------ model = densenet--------') MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth' OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' model = trt_pose.models.densenet121_baseline_att( num_parts, 2 * num_links).cuda().eval() WIDTH = 256 HEIGHT = 256 data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() if os.path.exists(OPTIMIZED_MODEL) == False: model.load_state_dict(torch.load(MODEL_WEIGHTS)) model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1 << 25) torch.save(model_trt.state_dict(), OPTIMIZED_MODEL) model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) t0 = time.time() torch.cuda.current_stream().synchronize() for i in range(50): y = model_trt(data) torch.cuda.current_stream().synchronize() t1 = time.time() print(50.0 / (t1 - t0)) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() device = torch.device('cuda') count = 0 X_compress = 640.0 / WIDTH * 1.0 Y_compress = 480.0 / HEIGHT * 1.0 parse_objects = ParseObjects(topology) #draw_objects = DrawObjects(topology) rep = 0 while cap.isOpened() and count < 200: t = time.time() ret_val, dst = cap.read() if ret_val == False: print("Camera read Error") break img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) cv2.imshow('test', execute(img, dst, t)) cv2.waitKey(1) count += 1
def __init__(self, root): # Context variable declarations and loading self.running = False self.WIDTH = 224 self.HEIGHT = 224 self.thresh = 127 self.round = 0 self.minimum_joints = 4 self.path = './images/' self.mdelay_sec = 10 self.mtick = self.mdelay_sec self.mask = None self.calibrate = True # Flag to show calibration pose over camera feed self.calibration_pose = cv2.imread('./images/cal_pose.jpg', cv2.IMREAD_COLOR) #Loading model and model data with open('./tasks/human_pose/human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) self.num_parts = len(human_pose['keypoints']) self.num_links = len(human_pose['skeleton']) self.data = torch.zeros((1, 3, self.HEIGHT, self.WIDTH)).cuda() self.OPTIMIZED_MODEL = './tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.OPTIMIZED_MODEL)) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) # Start camera if USBCam: self.camera = USBCamera(width=self.WIDTH, height=self.HEIGHT, capture_fps=30) else: self.camera = CSICamera(width=self.WIDTH, height=self.HEIGHT, capture_fps=30) self.frame=Tk.Frame(root) self.root=root # Create editable title self.titleVar = Tk.StringVar() self.title= Tk.Label(root, textvariable=self.titleVar, font="Verdana 36") self.titleVar.set("Pose Estimation Game") self.title.pack(side=Tk.TOP) self.frame.pack(side=Tk.LEFT, fill=Tk.BOTH, expand=1) # Create image capture figure # Set as Frame with three possible images (live feed, mask/pose to make, image captured) # Image row self.im_row = Tk.Frame(self.frame) self.feed_label = Tk.Label(self.im_row) self.feed_label.pack(side=Tk.LEFT) self.mask_label = Tk.Label(self.im_row) self.pose_label = Tk.Label(self.im_row) # Create editable description label self.desTextVar = "Please select an option from the right" self.desText = Tk.Label(self.frame, text=self.desTextVar, font="Verdana 12") #Create Combobox for selection (Steps are currently in comments) #Grab maps from repository #Parse map names to develop choices #group map names into array self.levels = [] choices = ["Easy", "Medium", "Hard"] #Put map names in combo box self.ddVar = Tk.StringVar() self.ddVar.set('Select a Choice') self.dropDown = Tk.OptionMenu(self.frame, self.ddVar, *choices) # This function binds a function that loads all images for level upon the selection of # an option in the dropdown menu self.ddVar.trace('w', self.levels_select) # Create inital button panel self.buttonPanel = ButtonPanel(root) self.im_row.pack() self.desText.pack() self.buttonPanel.pack() self.root.after(10, self.camera_loop) MainGUI.updateToTitle(self)
def __init__(self, *args, **kwargs): self.parse_objects = ParseObjects(TOPOLOGY, *args, **kwargs)
def main(): liveDemo = True if liveDemo: # Set up USB camera (assuming device 0) cam = USBCamera(width=1920, height=1080, capture_device=0) # Set up model for yolov5s yolo = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) device = torch.device('cuda') yolo.to(device) # Set up topology, model, and classes for ResNet with open('human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) resnet = TRTModule() resnet.load_state_dict(torch.load('resnet_trt.pth')) parseObjects = ParseObjects(topology) drawObjects = DrawObjects(topology) # Basic analytics imageCount = 0 t = time() # Live demo on webcam if liveDemo: # Continue until interrupt try: while True: # Grab a frame img = cam.read()[:, :, ::-1] # Convert BGR to RGB print(f'got frame {imageCount}') # Process with yolo and resnet result, empty = processFrame(img, yolo, resnet, parseObjects, drawObjects) # Save file cv2.imwrite(f'imgs/{imageCount:04}.jpg', result) imageCount += 1 except KeyboardInterrupt: print('Keyboard interrupt!') finally: t = time() - t # Recorded video else: cap = cv2.VideoCapture('example_video.mpg') # Grab a frame ret, frame = cap.read() # Continue until video is done while ret: # Process and save image img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result, empty = processFrame(img, yolo, resnet, parseObjects, drawObjects) cv2.imwrite(f'imgs/{imageCount:04}.jpg', result) # Try to grab next frame ret, frame = cap.read() imageCount += 1 t = time() - t cap.release() print( f'Ending. Processed {imageCount} images in {t}s, average FPS of {imageCount/t}' )
def execute(change): global frame_num global topology global model_trt global repcount global states global session_running global json_data global next_delta global image_list global image_nopts_list global countdown global count_seconds image = change['new'] # countdown in between getting qr code and starting workout if countdown: resized = cv2.resize(image, (1920, 1080), interpolation=cv2.INTER_AREA) if time.time() > next_delta: next_delta = time.time() + 1.0 count_seconds = count_seconds - 1 if count_seconds == -1: countdown = False if count_seconds == 0: overlay = cv2.putText(resized, 'Start Workout!', org=(800, 540), fontFace=1, fontScale=5, color=(255, 255, 255), thickness=5) next_overlay = cv2.putText( overlay, f"{json_data['username']}, get in position!", org=(7, 800), fontFace=2, fontScale=2, color=(255, 255, 255), thickness=2) else: overlay = cv2.putText(resized, str(count_seconds), org=(940, 540), fontFace=1, fontScale=8, color=(255, 255, 255), thickness=5) next_overlay = cv2.putText( overlay, f"{json_data['username']}, get in position!", org=(7, 800), fontFace=2, fontScale=2, color=(255, 255, 255), thickness=2) cv2.imshow('deeplift', next_overlay) cv2.waitKey(1) elif session_running: parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) frame_num = frame_num + 1 data = preprocess(image) cmap, paf = model_trt(data) cmap, paf = cmap.detach().cpu(), paf.detach().cpu() counts, objects, peaks = parse_objects( cmap, paf) #, cmap_threshold=0.15, link_threshold=0.15) keypoints = [] for keypoint in peaks[0]: keypoints.append(keypoint[0]) keypoints = print_to_file(keypoints, dump=False) analytics = depth_test(keypoints) #update states # states["prePrevState"] = states["prevState"] states["prevState"] = states["currState"] if analytics['is_squat']: states["currState"] = "squat" color = (0, 255, 0) elif analytics['is_standing']: states["currState"] = "standing" color = (0, 0, 255) else: color = (0, 255, 255) if states["prevState"] == "squat" and states["currState"] == "standing": repcount = repcount + 1 print( f"REPCOUNT: {repcount} Squat: {analytics['is_squat']} KP: {keypoints['left_hip'][0]},{keypoints['left_knee'][0]},{keypoints['right_hip'][0]},{keypoints['right_knee'][0]}", end='\r') blank_image = copy.deepcopy(image) draw_objects(image, counts, objects, peaks, color) overlay = cv2.putText(image, f"REPCOUNT: {repcount}", org=(15, 50), fontFace=1, fontScale=4, color=(255, 255, 255), thickness=4) next_overlay = cv2.putText( overlay, f"Exercise: {json_data['exerciseName']}, Weight: {json_data['weight']}", org=(7, 800), fontFace=1, fontScale=2, color=(255, 255, 255), thickness=2) resized = cv2.resize(next_overlay, (1920, 1080), interpolation=cv2.INTER_AREA) # cv2.imshow('image', image[:, ::-1, :]) cv2.imshow('deeplift', resized) cv2.waitKey(1) image_list.append(next_overlay) image_nopts_list.append(blank_image) # #check if we need to close the session and upload video data if time.time() > next_delta: url = os.path.join(api_url, 'users', json_data['username'], 'lifting') response = requests.get(url, verify=False) data = json.loads(response.text) next_delta = time.time() + max_delta if not data['currentlyLifting']: # # close all windows uploading = True resized = cv2.resize(image, (1920, 1080), interpolation=cv2.INTER_AREA) next_overlay = cv2.putText(resized, "Uploading ...", org=(400, 50), fontFace=1, fontScale=3, color=(255, 255, 255), thickness=3) cv2.imshow('deeplift', next_overlay) cv2.waitKey(1) # write uploading ... #write to video files for i in range(0, len(image_list)): #write to video file out.write(image_list[i]) out_nopts.write(image_nopts_list[i]) out.release() out_nopts.release() # update json with reps json_data["userName"] = json_data["username"] json_data["reps"] = repcount json_data["difficulty"] = data["difficulty"] # create workout url = api_url + '/workouts' headers = { "Content-Type": "application/json", "Authorization": "Bearer " + bearer_token } paths_response = requests.post( url, data=json.dumps(json_data), headers=headers, verify=False ) # TODO: Add Bearer token in header to this request print(paths_response.text) paths_text = json.loads(paths_response.text) # take workout response and upload video to respective s3 bucket s3 = boto3.resource('s3') s3.meta.client.upload_file('output.mp4', 'videos-bucket-0001', paths_text['video_with_path'], ExtraArgs={ 'ContentType': 'octet-stream', 'ACL': 'public-read' }) s3.meta.client.upload_file('output_no_pts.mp4', 'videos-bucket-0001', paths_text['video_without_path'], ExtraArgs={ 'ContentType': 'octet-stream', 'ACL': 'public-read' }) # redo loop, reset variables image_list = [] image_nopts_list = [] # variable to tell if we are in countdown state countdown = False count_seconds = 11 frame_num = 0 repcount = 0 states = dict({ "prevState": None, "currState": None, "prePrevState": None }) session_running = False json_data = {} # QR Scanning Mode else: resized = cv2.resize(image, (1920, 1080), interpolation=cv2.INTER_AREA) overlay = cv2.putText(resized, f"Please display DeepLift QR Code", org=(15, 50), fontFace=1, fontScale=4, color=(255, 255, 255), thickness=4) cv2.imshow('deeplift', overlay) cv2.waitKey(1) if frame_num % 5 == 0: data = read_qr_code(image) if len(data) > 0: try: print("QR Recognized!") json_data = json.loads(data) print(data) session_running = True countdown = True frame_num = 0 except: print("INVALID QR: Retry with QR code generated from app") pass frame_num = frame_num + 1