def build_tensorrt(trt_file, model, size, device, recompile=False, fp16=True): from torch2trt import torch2trt, TRTModule import tensorrt as trt x = torch.ones(1, 3, int(size[1]), int(size[0])).to(device) if path.isfile(trt_file) and not recompile: print("Found TensorRT model file, loading...") # try: trt_model = TRTModule() weights = torch.load(trt_file) trt_model.load_state_dict(weights) trt_model(x) return trt_model # except Exception as e: # print("Error occured: ") # print(e) print("Compiling with tensorRT...") trt_model = torch2trt(model, [x], max_workspace_size=1 << 27, fp16_mode=fp16, log_level=trt.Logger.INFO, strict_type_constraints=True, max_batch_size=1) torch.save(trt_model.state_dict(), trt_file) return trt_model
class AntiSpoofPredict(Detection): def __init__(self, device_id, weights_path): super(AntiSpoofPredict, self).__init__() self.device = torch.device("cuda:{}".format(device_id) if torch.cuda. is_available() else "cpu") self.model_trt = None self._load_model(weights_path) def _load_model(self, model_path): # define model if os.path.isfile('trt_spoof.pth'): self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load('trt_spoof.pth')) return None model_name = os.path.basename(model_path) h_input, w_input, model_type, _ = parse_model_name(model_name) self.kernel_size = get_kernel( h_input, w_input, ) self.model = MODEL_MAPPING[model_type]( conv6_kernel=self.kernel_size).to(self.device) # load model weight state_dict = torch.load(model_path, map_location=self.device) keys = iter(state_dict) first_layer_name = keys.__next__() if first_layer_name.find('module.') >= 0: from collections import OrderedDict new_state_dict = OrderedDict() for key, value in state_dict.items(): name_key = key[7:] new_state_dict[name_key] = value self.model.load_state_dict(new_state_dict) else: self.model.load_state_dict(state_dict) self.model.eval() return None def predict(self, img): test_transform = trans.Compose([ trans.ToTensor(), ]) img = test_transform(img) img = img.unsqueeze(0).to(self.device) if self.model_trt is None: self.model_trt = torch2trt(self.model, [img], fp16_mode=True) torch.save(self.model_trt.state_dict(), 'trt_spoof.pth') self.model = None # self._load_model(model_path) # self.model.eval() with torch.no_grad(): result = self.model_trt(img) # result = self.model.forward(img) result = F.softmax(result).cpu().numpy() return result
def __init__(self, modelFile, taskDescFile, csv=0, csvPath='.'): # Load the task description try: with open(taskDescFile, 'r') as f: human_pose = json.load(f) except OSError: raise PoseCaptureDescError topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) # Load the base model fbase = os.path.basename(modelFile) func, self.inWidth, self.inHeight = \ PoseCaptureModel.getModelFuncName(fbase) if func is None: logging.fatal('Invalid model name: %s' % (fbase)) logging.fatal('Model name should be (.+_.+_att)_(\\d+)x(\\d+)_') raise PoseCaptureModelError('Invalid model name: %s' % (fbase)) if not hasattr(trt_pose.models, func): logging.fatal('Could not find base model function: %s' % (func)) raise PoseCaptureModelError( \ 'Could not find base model function: %s' % (func)) func = 'trt_pose.models.' + func trtFile = os.path.splitext(fbase)[0] + '_trt.pth' logging.info('Loading base model from %s' % (func)) model = eval(func)(num_parts, 2 * num_links).cuda().eval() if os.path.exists(trtFile): logging.info('Loading model from TensorRT plan file ...') model_trt = TRTModule() model_trt.load_state_dict(torch.load(trtFile)) else: logging.info('Optimizing model for TensorRT ...') model.load_state_dict(torch.load(modelFile)) data = torch.zeros((1, 3, self.inHeight, self.inWidth)).cuda() model_trt = torch2trt.torch2trt( \ model, [data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), trtFile) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(topology) self.draw_objects = DrawObjects(topology) self.model_trt = model_trt self.num_parts = num_parts self.csv = csv self.count = 0 if self.csv > 0: try: self._initCsv(human_pose['keypoints'], csvPath) except OSError: raise PoseCaptureCsvError
def process_tftrt(self, input_model, output_infer_model): if os.path.exists(output_infer_model): logging.info("resnet50_pytorch_trt.pth is exist") model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_infer_model)) else: # load pretrained model resnet50_model = load_pytorch_saved_model(input_model) # convert to TensorRT feeding sample data as input x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(resnet50_model, [x]) # save and load torch.save(model_trt.state_dict(), output_infer_model) return model_trt
class BackendTensorRT: def __init__(self): self.model = None def version(self): return torch.__version__ def name(self): return "pytorch-tensorrt-ofa" def load(self, args, ds=None): prefix = 'bs%d_is%d_%s_' % (args.batch_size, args.image_size, args.chip_name) lib_name = 'pretrained/' + args.model + '/' + prefix + 'torch2trt.pth' if os.path.exists(lib_name) and not args.force_build: self.model = TRTModule() self.model.load_state_dict(torch.load(lib_name)) self.model.eval() else: net, _ = load_model(args) net = net.cuda() net.eval() input_data = torch.FloatTensor( np.array(ds.get_calibration_set(), np.float32)).cuda() if args.calib_algo == 1: calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION elif args.calib_algo == 2: calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2 elif args.calib_algo == 3: calib_algo = trt.CalibrationAlgoType.MINMAX_CALIBRATION size = 1 << (33 if 'T4' in args.chip_name else 34) self.model = torch2trt( net, [input_data], max_batch_size=args.batch_size, fp16_mode=True, max_workspace_size=size, int8_mode=True, int8_calib_algorithm=calib_algo, int8_calib_batch_size=args.calib_batch_size) torch.save(self.model.state_dict(), lib_name) log.info('model is ready') return self def predict(self, image): with torch.no_grad(): output = self.model(image) _, output = output.max(1) return output
FRAME_CHANNELS = 3 SHOW_LOGS = False # Model if os.path.isfile(MODEL_PATH_TRT): model_trt = TRTModule() model_trt.load_state_dict(torch.load(MODEL_PATH_TRT)) else: model = AutopilotModel(pretrained=False) model.load_from_path(MODEL_PATH) model.eval() x = torch.ones((1, FRAME_CHANNELS, FRAME_SIZE, FRAME_SIZE)).cuda() model_trt = torch2trt(model, [x], fp16_mode=True) torch.save(model_trt.state_dict(), MODEL_PATH_TRT) try: # Car car = NvidiaRacecar() car.throttle_gain = THROTTLE_GAIN car.steering_offset = STEERING_OFFSET # Camera camera = CSICamera(width=CAMERA_WIDTH, height=CAMERA_HEIGHT) # Control Loop while True: if SHOW_LOGS: start_time = time.time()
class TrtPose(): def __init__(self): self.goal = 0.0 # [angle] print("Getting Path to package...") self.follow_people_configfiles_path = rospkg.RosPack().get_path( 'drone_ai') + "/scripts/helpers/trtpose/models" print("We get the human pose json file that described the human pose") humanPose_file_path = os.path.join( rospkg.RosPack().get_path('drone_ai') + "/scripts/helpers/trtpose/models/", 'human_pose.json') print("Opening json file") with open(humanPose_file_path, 'r') as f: self.human_pose = json.load(f) print("Creating topology") self.topology = trt_pose.coco.coco_category_to_topology( self.human_pose) #print("Topology====>", self.topology) self.WIDTH = 640 self.HEIGHT = 480 OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' #OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' optimized_model_weights_path = os.path.join( self.follow_people_configfiles_path, OPTIMIZED_MODEL) if not os.path.exists(optimized_model_weights_path): self.__create_optimodel(optimized_model_weights_path) print("Load the saved model using Torchtrt") self.model_trt = TRTModule() self.model_trt.load_state_dict( torch.load(optimized_model_weights_path)) print("Define the Image processing variables") self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device("cuda") print( "Classes to parse the object of the NeuralNetwork and draw on the image" ) self.parse_objects = ParseObjects(self.topology) self.draw_objects = DrawObjects(self.topology) def __create_optimodel(self, optimized_model_weights_path): rospy.loginfo("** No optimised model found. **") num_parts = len(self.human_pose['keypoints']) num_links = len(self.human_pose['skeleton']) rospy.loginfo("Creating Model") model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() #model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval() rospy.loginfo( "Load the weights from the eight files predownloaded to this package" ) MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' #MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth' model_weights_path = os.path.join(self.follow_people_configfiles_path, MODEL_WEIGHTS) rospy.loginfo("Load state dict") model.load_state_dict(torch.load(model_weights_path)) rospy.loginfo("Creating empty data") data = torch.zeros((1, 3, self.HEIGHT, self.WIDTH)).cuda() rospy.loginfo( "Use tortchtrt to go from Torch to TensorRT to generate an optimised model" ) self.model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1 << 25) rospy.loginfo("Saving new optimodel") torch.save(self.model_trt.state_dict(), optimized_model_weights_path) def __preprocess(self, image): self.device = torch.device("cuda") image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = PIL.Image.fromarray(image) image = transforms.functional.to_tensor(image).to(self.device) image.sub_(self.mean[:, None, None]).div_(self.std[:, None, None]) return image[None, ...] def detect(self, image): data = self.__preprocess(image) cmap, paf = self.model_trt(data) cmap, paf = cmap.detach().cpu(), paf.detach().cpu() counts, objects, peaks = self.parse_objects(cmap, paf) return counts, objects, peaks, self.topology def calcYawAngle(self, position): new_goal = degrees(atan( float(320 - position[0]) / (480 - position[1]))) # yaw = new_goal + self.goal yaw_angle = new_goal self.goal = yaw_angle return yaw_angle
class Matching(torch.nn.Module): """ Image Matching Frontend (SuperPoint + SuperGlue) """ def __init__(self, config={}): super().__init__() self.superpoint = SuperPoint(config.get('superpoint', {})) self.superglue = SuperGlue(config.get('superglue', {})) # self.model_sg_trt = None self.convert_save_trt_model = 1 if not self.convert_save_trt_model: self.model_sg_trt = TRTModule() self.model_sg_trt.load_state_dict( torch.load('superglue_trt_fixed_input_part_every_kpt_encoder_every_gnn_outputs.pth')) # 'superglue_trt_fixed_input_part_all_kpt_encoder_two_gnn_outputs.pth')) # 'superglue_trt_fixed_input_part_kpt_encoder_gnn_4outputs.pth')) # 'superglue_trt_fixed_input_part_kpt_encoder.pth')) #superglue_trt_fixed_input.pth')) self.bin_score = torch.nn.Parameter(torch.tensor(2.3457).cuda()) def forward(self, data): """ Run SuperPoint (optionally) and SuperGlue SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input Args: data: dictionary with minimal keys: ['image0', 'image1'] """ pred = {} # Extract SuperPoint (keypoints, scores, descriptors) if not provided if 'keypoints0' not in data: pred0 = self.superpoint({'image': data['image0']}) pred = {**pred, **{k + '0': v for k, v in pred0.items()}} if 'keypoints1' not in data: pred1 = self.superpoint({'image': data['image1']}) pred = {**pred, **{k + '1': v for k, v in pred1.items()}} t1 = time.time() # Batch all features # We should either have i) one image per batch, or # ii) the same number of local features for all images in the batch. data = {**data, **pred} for k in data: if isinstance(data[k], (list, tuple)): data[k] = torch.stack(data[k]) # Perform the matching """Run SuperGlue on a pair of keypoints and descriptors""" desc0, desc1 = data['descriptors0'], data['descriptors1'] kpts0, kpts1 = data['keypoints0'], data['keypoints1'] # Keypoint normalization. kpts0 = normalize_keypoints(kpts0, data['image0'].shape) kpts1 = normalize_keypoints(kpts1, data['image1'].shape) inputs0 = [kpts0.transpose(1, 2), data['scores0'].unsqueeze(1)] kpts_scores0 = torch.cat(inputs0, dim=1) inputs1 = [kpts1.transpose(1, 2), data['scores1'].unsqueeze(1)] kpts_scores1 = torch.cat(inputs1, dim=1) max_count = 350 len0 = max_count - desc0.shape[2] len1 = max_count - desc1.shape[2] desc0 = torch.cat((desc0, torch.zeros(1, 256, len0).cuda()), 2) desc1 = torch.cat((desc1, torch.zeros(1, 256, len1).cuda()), 2) kpts_scores0 = torch.cat((kpts_scores0, torch.zeros(1, 3, len0).cuda()), 2) kpts_scores1 = torch.cat((kpts_scores1, torch.zeros(1, 3, len1).cuda()), 2) if self.convert_save_trt_model: self.model_sg_trt = torch2trt(self.superglue, [kpts_scores0, desc0, kpts_scores1, desc1]) # , input_names=['in1', 'in2', 'in3', 'in4'], output_names=['out1', 'out2']) # torch.save(self.model_sg_trt.state_dict(), 'superglue_trt_fixed_input_part_kpt_encoder_gnn_4outputs.pth') >> fail. different result!! # torch.save(self.model_sg_trt.state_dict(), 'superglue_trt_fixed_input_part_all_kpt_encoder_two_gnn_outputs.pth') torch.save(self.model_sg_trt.state_dict(), 'superglue_trt_fixed_input_part_every_kpt_encoder_every_gnn_outputs.pth') if 0: if 0: mdesc0_trt, mdesc1_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1) else: outputs = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1) mdesc0, mdesc1 = outputs[-2], outputs[-1] else: ''' 1. superglue.py : forward, kpt init, forward , 2. matching.py forward, 3. model convert save -> comment TRT load in init. uncomment torch2trt 4. model laod -> uncomment TRT load in init. comment torch2trt # debug 1 : kpt encoder module => 100% same even after loading the saved converted model! conv1_1_trt, bn1_1_trt, relu1_1_trt, conv2_1_trt, bn2_1_trt, relu2_1_trt, conv3_1_trt, bn3_1_trt, relu3_1_trt, \ conv4_1_trt, bn4_1_trt, relu4_1_trt, conv5_1_trt, \ conv1_2_trt, bn1_2_trt, relu1_2_trt, conv2_2_trt, bn2_2_trt, relu2_2_trt, conv3_2_trt, bn3_2_trt, relu3_2_trt, \ conv4_2_trt, bn4_2_trt, relu4_2_trt, conv5_2_trt, \ desc0_trt, desc1_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1) conv1_1, bn1_1, relu1_1, conv2_1, bn2_1, relu2_1, conv3_1, bn3_1, relu3_1, conv4_1, bn4_1, relu4_1, conv5_1, \ conv1_2, bn1_2, relu1_2, conv2_2, bn2_2, relu2_2, conv3_2, bn3_2, relu3_2, conv4_2, bn4_2, relu4_2, conv5_2, \ desc0, desc1 = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1) ''' ''' # debug 2 : kpt encoder module + gnn module => 100% same output_trts = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1) output = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1) ''' ''' # debug 3 : all kpt encoder outputs + 2 gnn output conv1_1, bn1_1, relu1_1, conv2_1, bn2_1, relu2_1, conv3_1, bn3_1, relu3_1, conv4_1, bn4_1, relu4_1, conv5_1,\ conv1_2, bn1_2, relu1_2, conv2_2, bn2_2, relu2_2, conv3_2, bn3_2, relu3_2, conv4_2, bn4_2, relu4_2, conv5_2, \ desc0, desc1, desc0_gnn, desc1_gnn ''' output_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1) # output = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1) # debug 4 : all kpt encoder outputs + all self/cross attention gnn outputs!! ''' conv1_trt, bn1_trt, relu1_trt, \ conv2_trt, bn2_trt, relu2_trt, \ conv3_trt, bn3_trt, relu3_trt, \ conv4_trt, bn4_trt, relu4_trt, \ conv5_trt, _, _, _ = model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1) conv1, bn1, relu1, \ conv2, bn2, relu2, \ conv3, bn3, relu3, \ conv4, bn4, relu4, \ conv5, _, _, _ = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1) ''' # output_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1) # output = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1) mdesc0, mdesc1 = output_trt[-2], output_trt[-1] if len0 > 0: mdesc0 = mdesc0[:, :, :-len0] if len1 > 0: mdesc1 = mdesc1[:, :, :-len1] # Compute matching descriptor distance. scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1) scores = scores / 256 ** .5 # Run the optimal transport. scores = log_optimal_transport( scores, self.bin_score, iters=20) # self.config['sinkhorn_iterations']) # Get the matches with score above "match_threshold". max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1) indices0, indices1 = max0.indices, max1.indices mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0) mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1) zero = scores.new_tensor(0) mscores0 = torch.where(mutual0, max0.values.exp(), zero) mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero) valid0 = mutual0 & (mscores0 > 0.0) # self.config['match_threshold']) valid1 = mutual1 & valid0.gather(1, indices1) indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1)) indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1)) ret = { 'matches0': indices0, # use -1 for invalid match 'matches1': indices1, # use -1 for invalid match 'matching_scores0': mscores0, 'matching_scores1': mscores1, } pred = {**pred, **ret} print(1 / (time.time() - t1)) return pred
if opt.half: # 加载模型 model_backbone = Darknet_Backbone( opt.model_def, img_size=opt.img_size).to(device).half() else: model_backbone = Darknet_Backbone(opt.model_def, img_size=opt.img_size).to(device) # torch 中.pth和.weights有所不同 if opt.weights_path.split(".")[-1] == "pth": pass # load .pth文件 model_backbone.load_state_dict(torch.load(opt.weights_path)) else: # load .weights文件 model_backbone.load_darknet_weights(opt.weights_path) model_backbone.eval() if opt.half: # 设置形状,x的内容不重要,主要是告诉转化器生成的输入是什么个形状的,3 = RGB 3通道 x = torch.rand(size=(opt.batch_size, 3, opt.img_size[0], opt.img_size[1])).to(device).half() # 这里开始模型的转换,fp16_mode=True表示开启半精度,源码中的int8选项,根据原作者的描述,并没有调试好 model_trt = torch2trt(model_backbone, [x], fp16_mode=True) else: x = torch.rand(size=(opt.batch_size, 3, opt.img_size[0], opt.img_size[1])).to(device) model_trt = torch2trt(model_backbone, [x]) # 序列化保存模型 torch.save(model_trt.state_dict(), "weights/{}".format(opt.model_save_name))