def __init__(self): topology = None with open('/trt_pose/tasks/human_pose/human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) self._topology = topology self._MODEL_WEIGHTS = '/trt_pose/tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249.pth' self._OPTIMIZED_MODEL = '/trt_pose/tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self._num_parts = len(human_pose['keypoints']) self._num_links = len(human_pose['skeleton']) print('BreathRateDetector: using resnet model') self._model = trt_pose.models.resnet18_baseline_att(self._num_parts, 2 * self._num_links).cuda().eval() self._WIDTH = 224 self._HEIGHT = 224 self._data = torch.zeros((1, 3, self._HEIGHT, self._WIDTH)).cuda() if os.path.exists(self._OPTIMIZED_MODEL) == False: print('BreathRateDetector: -- Converting TensorRT models. This may takes several minutes...') self._model.load_state_dict(torch.load(self._MODEL_WEIGHTS)) model_trt = torch2trt.torch2trt(self._model, [self._data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), self._OPTIMIZED_MODEL) print('BreathRateDetector: -- Conversion complete --') print('BreathRateDetector: loading TRT model.') self._model_trt = TRTModule() self._model_trt.load_state_dict(torch.load(self._OPTIMIZED_MODEL)) self._mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self._std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self._device = torch.device('cuda') self._parse_objects = ParseObjects(topology) self._draw_objects = DrawObjects(topology)
def main(args): with open("human_pose.json", "r") as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") model = get_model(human_pose, device) print(f"Running inference on device: {device}") preprocess = torchvision.transforms.Compose([ torchvision.transforms.ToPILImage(), torchvision.transforms.Resize((HEIGHT, WIDTH)), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) def user_callback(image_data): start_time = time.monotonic() tensor_image = preprocess(image_data) tensor_image = tensor_image.unsqueeze(0) cmap, paf = model(tensor_image.to(device)) cmap, paf = cmap.detach().cpu(), paf.detach().cpu() inference_time_ms = (time.monotonic() - start_time) * 1000 print(f"Inference time: {inference_time_ms:.2f}ms") counts, objects, peaks = parse_objects( cmap, paf) # , cmap_threshold=0.15, link_threshold=0.15) draw_objects(image_data, counts, objects, peaks) return image_data run_pipeline( user_callback, src_frame_rate=args.frame_rate, src_height=args.source_height, src_width=args.source_width, binning_level=args.binning_level, )
def __init__(self): self.goal = 0.0 # [angle] print("Getting Path to package...") self.follow_people_configfiles_path = rospkg.RosPack().get_path( 'drone_ai') + "/scripts/helpers/trtpose/models" print("We get the human pose json file that described the human pose") humanPose_file_path = os.path.join( rospkg.RosPack().get_path('drone_ai') + "/scripts/helpers/trtpose/models/", 'human_pose.json') print("Opening json file") with open(humanPose_file_path, 'r') as f: self.human_pose = json.load(f) print("Creating topology") self.topology = trt_pose.coco.coco_category_to_topology( self.human_pose) #print("Topology====>", self.topology) self.WIDTH = 640 self.HEIGHT = 480 OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' #OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' optimized_model_weights_path = os.path.join( self.follow_people_configfiles_path, OPTIMIZED_MODEL) if not os.path.exists(optimized_model_weights_path): self.__create_optimodel(optimized_model_weights_path) print("Load the saved model using Torchtrt") self.model_trt = TRTModule() self.model_trt.load_state_dict( torch.load(optimized_model_weights_path)) print("Define the Image processing variables") self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device("cuda") print( "Classes to parse the object of the NeuralNetwork and draw on the image" ) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology)
def __init__(self, imshow=True): self.imshow = imshow # display results of pose esimtation # Torch settings self.device = torch.device('cuda') self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.load_model() # Setup camera and visuals self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) self.setup_camera() # Visualization self.im = plt.imshow(self.execute({'new': self.camera.value})) self.ani = FuncAnimation(plt.gcf(), self.update, interval=200) self.cid = plt.gcf().canvas.mpl_connect("key_press_event", self.close) self.running = True plt.show()
def __init__(self): with open('modules/ai_module/techniques/pose/trt_pose/human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) OPTIMIZED_MODEL = 'modules/ai_module/techniques/pose/trt_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) log = "POSE Model loaded" logging.info(log) print("[", colored("INFO", 'green', attrs=['bold']), " ] " + log) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') log = "Setting CUDA backend" logging.info(log) print("[", colored("INFO", 'green', attrs=['bold']), " ] " + log)
def run(self): fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc, self.cap.get(cv2.CAP_PROP_FPS), (640, 480)) count = 0 while self.cap.isOpened() and count < 500: t = time.time() ret_val, dst = self.cap.read() parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) if ret_val == False: print("Camera read Error") break img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) img = PE.execute(img, dst, t) cv2.imshow("result", img) if cv2.waitKey(1) & 0xFF == ord('q'): break count += 1 cv2.destroyAllWindows() out_video.release() cap.release()
def __init__(self, net, image_dimensions, network_dimensions, weights_filename, optimize): with open('models/human_pose.json', 'r') as f: self.human_pose = json.load(f) self.topology = coco_category_to_topology(self.human_pose) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) self.num_parts = len(self.human_pose['keypoints']) self.num_links = len(self.human_pose['skeleton']) self.weights_filename = weights_filename self.weights_filename_opt = '{0}.trt'.format(weights_filename) self.model = net(self.num_parts, 2 * self.num_links).cuda().eval() self.image_dimensions = image_dimensions self.network_dimensions = network_dimensions # data = torch.zeros((1, 3, self.height, self.width)).cuda() if optimize or os.path.exists(self.weights_filename_opt) == False: logger.info("Optimizing network for trt") self.model.load_state_dict(torch.load(self.weights_filename)) self.model_trt = torch2trt.torch2trt( self.model, [torch.zeros((1, 3, *self.network_dimensions)).cuda()], fp16_mode=True) torch.save(self.model_trt.state_dict(), self.weights_filename_opt) else: self.model_trt = TRTModule() self.model_trt.load_state_dict( torch.load(self.weights_filename_opt)) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.preprocess = transforms.Compose([ transforms.Resize(self.network_dimensions), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), self.model_trt ])
def init(): import torch2trt from torch2trt import TRTModule with open('./models/human_pose.json', 'r') as f: human_pose = json.load(f) global topology topology = coco_category_to_topology(human_pose) global WIDTH WIDTH = 256 global HEIGHT HEIGHT = 256 #data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() OPTIMIZED_MODEL = Path('./models/densenet121_baseline_att_256x256_B_epoch_160_trt.pth') global model_trt model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) print('loaded model') global mean mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() global std std = torch.Tensor([0.229, 0.224, 0.225]).cuda() global device device = torch.device('cuda') global parse_objects parse_objects = ParseObjects(topology) global draw_objects draw_objects = DrawObjects(topology)
def __init__(self): print('------ model = resnet--------') self.MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' self.OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' with open('human_pose.json', 'r') as f: human_pose = json.load(f) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) self.model = trt_pose.models.resnet18_baseline_att( num_parts, 2 * num_links).cuda().eval() self.WIDTH = 224 self.HEIGHT = 224 data = torch.zeros((1, 3, self.HEIGHT, self.WIDTH)).cuda() if os.path.exists(self.OPTIMIZED_MODEL) == False: self.model.load_state_dict(torch.load(self.MODEL_WEIGHTS)) self.model_trt = torch2trt.torch2trt(self.model, [data], fp16_mode=True, max_workspace_size=1 << 25) torch.save(self.model_trt.state_dict(), self.OPTIMIZED_MODEL) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.OPTIMIZED_MODEL)) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology)
model.load_state_dict(torch.load(MODEL_WEIGHTS)) # Optimization with tensorRT # NOTE: optimization is device specific # data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() # model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25) # torch.save(model_trt.state_dict(), OPTIMIZED_MODEL) # Load optimized model print("Loading optimized model") model_trt = TRTModule() model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) # Setup camera and visuals parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) camera = USBCamera(width=WIDTH, height=HEIGHT, capture_device=1) camera.running = True # Attach oberver to act on each new frame received # camera.observe(execute, names='value') im = plt.imshow(execute({'new': camera.value})) ani = FuncAnimation(plt.gcf(), update, interval=200) cid = plt.gcf().canvas.mpl_connect("key_press_event", close) plt.show()
def main(): liveDemo = True if liveDemo: # Set up USB camera (assuming device 0) cam = USBCamera(width=1920, height=1080, capture_device=0) # Set up model for yolov5s yolo = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) device = torch.device('cuda') yolo.to(device) # Set up topology, model, and classes for ResNet with open('human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) resnet = TRTModule() resnet.load_state_dict(torch.load('resnet_trt.pth')) parseObjects = ParseObjects(topology) drawObjects = DrawObjects(topology) # Basic analytics imageCount = 0 t = time() # Live demo on webcam if liveDemo: # Continue until interrupt try: while True: # Grab a frame img = cam.read()[:, :, ::-1] # Convert BGR to RGB print(f'got frame {imageCount}') # Process with yolo and resnet result, empty = processFrame(img, yolo, resnet, parseObjects, drawObjects) # Save file cv2.imwrite(f'imgs/{imageCount:04}.jpg', result) imageCount += 1 except KeyboardInterrupt: print('Keyboard interrupt!') finally: t = time() - t # Recorded video else: cap = cv2.VideoCapture('example_video.mpg') # Grab a frame ret, frame = cap.read() # Continue until video is done while ret: # Process and save image img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) result, empty = processFrame(img, yolo, resnet, parseObjects, drawObjects) cv2.imwrite(f'imgs/{imageCount:04}.jpg', result) # Try to grab next frame ret, frame = cap.read() imageCount += 1 t = time() - t cap.release() print( f'Ending. Processed {imageCount} images in {t}s, average FPS of {imageCount/t}' )
def __init__(self, root): # Context variable declarations and loading self.running = False self.WIDTH = 224 self.HEIGHT = 224 self.thresh = 127 self.round = 0 self.minimum_joints = 4 self.path = './images/' self.mdelay_sec = 10 self.mtick = self.mdelay_sec self.mask = None self.calibrate = True # Flag to show calibration pose over camera feed self.calibration_pose = cv2.imread('./images/cal_pose.jpg', cv2.IMREAD_COLOR) #Loading model and model data with open('./tasks/human_pose/human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) self.num_parts = len(human_pose['keypoints']) self.num_links = len(human_pose['skeleton']) self.data = torch.zeros((1, 3, self.HEIGHT, self.WIDTH)).cuda() self.OPTIMIZED_MODEL = './tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.OPTIMIZED_MODEL)) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) # Start camera if USBCam: self.camera = USBCamera(width=self.WIDTH, height=self.HEIGHT, capture_fps=30) else: self.camera = CSICamera(width=self.WIDTH, height=self.HEIGHT, capture_fps=30) self.frame=Tk.Frame(root) self.root=root # Create editable title self.titleVar = Tk.StringVar() self.title= Tk.Label(root, textvariable=self.titleVar, font="Verdana 36") self.titleVar.set("Pose Estimation Game") self.title.pack(side=Tk.TOP) self.frame.pack(side=Tk.LEFT, fill=Tk.BOTH, expand=1) # Create image capture figure # Set as Frame with three possible images (live feed, mask/pose to make, image captured) # Image row self.im_row = Tk.Frame(self.frame) self.feed_label = Tk.Label(self.im_row) self.feed_label.pack(side=Tk.LEFT) self.mask_label = Tk.Label(self.im_row) self.pose_label = Tk.Label(self.im_row) # Create editable description label self.desTextVar = "Please select an option from the right" self.desText = Tk.Label(self.frame, text=self.desTextVar, font="Verdana 12") #Create Combobox for selection (Steps are currently in comments) #Grab maps from repository #Parse map names to develop choices #group map names into array self.levels = [] choices = ["Easy", "Medium", "Hard"] #Put map names in combo box self.ddVar = Tk.StringVar() self.ddVar.set('Select a Choice') self.dropDown = Tk.OptionMenu(self.frame, self.ddVar, *choices) # This function binds a function that loads all images for level upon the selection of # an option in the dropdown menu self.ddVar.trace('w', self.levels_select) # Create inital button panel self.buttonPanel = ButtonPanel(root) self.im_row.pack() self.desText.pack() self.buttonPanel.pack() self.root.after(10, self.camera_loop) MainGUI.updateToTitle(self)