def __init__(self): self.parser = argparse.ArgumentParser(description='TensorRT pose estimation run') self.parser.add_argument('--model', type=str, default='resnet', help='resnet or densenet') self.args = parser.parse_args() with open('human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) if 'resnet' in args.model: print('------ model = resnet--------') MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 224 HEIGHT = 224 else: print('------ model = densenet--------') MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth' OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 256 HEIGHT = 256 data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.cap = cv2.VideoCapture(0) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
def _load_model(self, model_path): # define model if os.path.isfile('trt_spoof.pth'): self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load('trt_spoof.pth')) return None model_name = os.path.basename(model_path) h_input, w_input, model_type, _ = parse_model_name(model_name) self.kernel_size = get_kernel( h_input, w_input, ) self.model = MODEL_MAPPING[model_type]( conv6_kernel=self.kernel_size).to(self.device) # load model weight state_dict = torch.load(model_path, map_location=self.device) keys = iter(state_dict) first_layer_name = keys.__next__() if first_layer_name.find('module.') >= 0: from collections import OrderedDict new_state_dict = OrderedDict() for key, value in state_dict.items(): name_key = key[7:] new_state_dict[name_key] = value self.model.load_state_dict(new_state_dict) else: self.model.load_state_dict(state_dict) self.model.eval() return None
def load_trt_model(model_path): from torch2trt import TRTModule print("Loading TensorRT optimized model") model = TRTModule() model.load_state_dict(torch.load(model_path)) return model
def load(self, args, ds=None): prefix = 'bs%d_is%d_%s_' % (args.batch_size, args.image_size, args.chip_name) lib_name = 'pretrained/' + args.model + '/' + prefix + 'torch2trt.pth' if os.path.exists(lib_name) and not args.force_build: self.model = TRTModule() self.model.load_state_dict(torch.load(lib_name)) self.model.eval() else: net, _ = load_model(args) net = net.cuda() net.eval() input_data = torch.FloatTensor( np.array(ds.get_calibration_set(), np.float32)).cuda() if args.calib_algo == 1: calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION elif args.calib_algo == 2: calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2 elif args.calib_algo == 3: calib_algo = trt.CalibrationAlgoType.MINMAX_CALIBRATION size = 1 << (33 if 'T4' in args.chip_name else 34) self.model = torch2trt( net, [input_data], max_batch_size=args.batch_size, fp16_mode=True, max_workspace_size=size, int8_mode=True, int8_calib_algorithm=calib_algo, int8_calib_batch_size=args.calib_batch_size) torch.save(self.model.state_dict(), lib_name) log.info('model is ready') return self
class ResDownS(nn.Module): def __init__(self, inplane, outplane): super(ResDownS, self).__init__() self.downsample = nn.Sequential( nn.Conv2d(inplane, outplane, kernel_size=1, bias=False), nn.BatchNorm2d(outplane)) self.downsample_15 = self.downsample_31 = self.downsample def init_trt(self,fp16_mode,trt_weights_path): if not path.exists(trt_weights_path+'/downsample_15_trt.pth'): x_ds_15 = torch.ones((1,1024,15,15)).cuda() x_ds_31 = torch.ones((1,1024,31,31)).cuda() self.downsample_15 = torch2trt(self.downsample,[x_ds_15],fp16_mode=fp16_mode) self.downsample_31 = torch2trt(self.downsample,[x_ds_31],fp16_mode=fp16_mode) torch.save(self.downsample_15.state_dict(), trt_weights_path+'/downsample_15_trt.pth') torch.save(self.downsample_31.state_dict(), trt_weights_path+'/downsample_31_trt.pth') else: self.downsample_15 = TRTModule() self.downsample_15.load_state_dict(torch.load(trt_weights_path+'/downsample_15_trt.pth')) self.downsample_31 = TRTModule() self.downsample_31.load_state_dict(torch.load(trt_weights_path+'/downsample_31_trt.pth')) def forward(self, x): if x.shape[-1] == 15: x = self.downsample_15(x) elif x.shape[-1] == 31: x = self.downsample_31(x) else: x = self.downsample(x) if x.size(3) < 20: l = 4 r = -4 x = x[:, :, l:r, l:r] return x
def __init__( self, model, exp, cls_names=COCO_CLASSES, trt_file=None, decoder=None, device="cpu", ): self.model = model self.cls_names = cls_names self.decoder = decoder self.num_classes = exp.num_classes self.confthre = exp.test_conf self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device if trt_file is not None: from torch2trt import TRTModule model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file)) x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225)
def export_siamfcpp_track_fea_trt(task_cfg, parsed_args): """ export phase "freeze_track_fea" (basemodel/c_x/r_x) to trt model """ model = model_builder.build("track", task_cfg.model) model.eval().cuda() model.phase = "freeze_track_fea" search_im = torch.randn(1, 3, 303, 303).cuda() fea = model(search_im) output_path = parsed_args.output + "_track_fea.trt" logger.info("start cvt pytorch model") model_trt = torch2trt(model, [search_im]) torch.save(model_trt.state_dict(), output_path) logger.info("save trt model to {}".format(output_path)) model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_path)) trt_outs = model_trt(search_im) np.testing.assert_allclose(to_numpy(fea[0]), to_numpy(trt_outs[0]), rtol=1e-03, atol=1e-05) np.testing.assert_allclose(to_numpy(fea[1]), to_numpy(trt_outs[1]), rtol=1e-03, atol=1e-05) logger.info("test accuracy ok")
def init_inference(): global model global device if args.model == 'resnet18': model = models.resnet18() model.fc = torch.nn.Linear(512, 3) elif args.model == 'samplenet': model = SampleNet() elif args.model == 'simplenet': model = SimpleNet() else: raise NotImplementedError() model.eval() #model.load_state_dict(torch.load(args.pretrained_model)) if args.trt_module: from torch2trt import TRTModule if args.trt_conversion: model.load_state_dict(torch.load(args.pretrained_model)) model = model.cuda() x = torch.ones((1, 3, 240, 320)).cuda() from torch2trt import torch2trt model_trt = torch2trt(model, [x], max_batch_size=100, fp16_mode=True) #model_trt = torch2trt(model, [x], max_batch_size=100) torch.save(model_trt.state_dict(), args.trt_model) exit() model_trt = TRTModule() #model_trt.load_state_dict(torch.load('road_following_model_trt_half.pth')) model_trt.load_state_dict(torch.load(args.trt_model)) model = model_trt.to(device) else: model.load_state_dict(torch.load(args.pretrained_model)) model = model.to(device)
def __init__( self, model, exp, cls_names=COCO_CLASSES, trt_file=None, decoder=None, device="cpu", fp16=False, legacy=False, ): self.model = model self.cls_names = cls_names self.decoder = decoder self.num_classes = exp.num_classes self.confthre = exp.test_conf self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device self.fp16 = fp16 self.preproc = ValTransform(legacy=legacy) if trt_file is not None: from torch2trt import TRTModule model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file)) x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() self.model(x) self.model = model_trt
def process_images(images: list, trt: bool): timest = time.time() if trt: # x = torch.ones((1, 3, 224, 224)).cuda() # model = alexnet(pretrained=True).eval().cuda() # model_trt = torch2trt(model, [x]) # torch.save(model_trt.state_dict(), 'alexnet_trt.pth') # model = model_trt model = TRTModule() model.load_state_dict(torch.load('alexnet_trt.pth')) else: model = alexnet(pretrained=True).eval().cuda() print("Model load time {}".format(time.time() - timest)) timest = time.time() for image in images: index = classify_image(image, model) output_text = str(index) + ': ' + classes[index] edit = ImageDraw.Draw(image) edit.rectangle((0, image.height - 20, image.width, image.height), fill=(255, 255, 255)) edit.text((50, image.height - 15), output_text, (0, 0, 0), font=ImageFont.load_default()) image.save('./output/' + image.filename.split('/')[-1]) print("Image(s) processing time {}".format(time.time() - timest)) print('Memory allocated: ' + str(torch.cuda.memory_allocated())) print('Max memory allocated: ' + str(torch.cuda.max_memory_allocated()))
def __init__(self): topology = None with open('/trt_pose/tasks/human_pose/human_pose.json', 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) self._topology = topology self._MODEL_WEIGHTS = '/trt_pose/tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249.pth' self._OPTIMIZED_MODEL = '/trt_pose/tasks/human_pose/resnet18_baseline_att_224x224_A_epoch_249_trt.pth' self._num_parts = len(human_pose['keypoints']) self._num_links = len(human_pose['skeleton']) print('BreathRateDetector: using resnet model') self._model = trt_pose.models.resnet18_baseline_att(self._num_parts, 2 * self._num_links).cuda().eval() self._WIDTH = 224 self._HEIGHT = 224 self._data = torch.zeros((1, 3, self._HEIGHT, self._WIDTH)).cuda() if os.path.exists(self._OPTIMIZED_MODEL) == False: print('BreathRateDetector: -- Converting TensorRT models. This may takes several minutes...') self._model.load_state_dict(torch.load(self._MODEL_WEIGHTS)) model_trt = torch2trt.torch2trt(self._model, [self._data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), self._OPTIMIZED_MODEL) print('BreathRateDetector: -- Conversion complete --') print('BreathRateDetector: loading TRT model.') self._model_trt = TRTModule() self._model_trt.load_state_dict(torch.load(self._OPTIMIZED_MODEL)) self._mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self._std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self._device = torch.device('cuda') self._parse_objects = ParseObjects(topology) self._draw_objects = DrawObjects(topology)
def __init__(self, opt): opt.device = torch.device('cuda') print('Creating model...') self.model_trt = create_model(opt.backbone, opt.heads, opt.head_conv, True) self.model = create_model(opt.backbone, opt.heads, opt.head_conv, False) #because tensorrt not support mutioutpu and ConvTranspose2d, so neeed splite self.model_trt = load_model(self.model_trt, opt.load_model) self.model = load_model(self.model, opt.load_model) self.model_trt = self.model_trt.to(opt.device) self.model_trt.eval() self.model = self.model.to(opt.device) self.model.eval() #Transform model from torch to tensorrt if opt.tensorrt: x = torch.ones((1, 3, 512, 512)).cuda() self.model_trt = torch2trt(self.model_trt, [x]) torch.save(self.model_trt.state_dict(), 'temp.pth') from torch2trt import TRTModule self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load('temp.pth')) self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) self.max_per_image = 100 self.num_classes = opt.num_classes self.scales = opt.test_scales self.opt = opt self.pause = True
def build_tensorrt(trt_file, model, size, device, recompile=False, fp16=True): from torch2trt import torch2trt, TRTModule import tensorrt as trt x = torch.ones(1, 3, int(size[1]), int(size[0])).to(device) if path.isfile(trt_file) and not recompile: print("Found TensorRT model file, loading...") # try: trt_model = TRTModule() weights = torch.load(trt_file) trt_model.load_state_dict(weights) trt_model(x) return trt_model # except Exception as e: # print("Error occured: ") # print(e) print("Compiling with tensorRT...") trt_model = torch2trt(model, [x], max_workspace_size=1 << 27, fp16_mode=fp16, log_level=trt.Logger.INFO, strict_type_constraints=True, max_batch_size=1) torch.save(trt_model.state_dict(), trt_file) return trt_model
def init_inference(): global model global device model = DQN(120, 320, DISCRETIZATION) model.eval() if args.trt_module: from torch2trt import TRTModule if args.trt_conversion: model.load_state_dict(torch.load(args.pretrained_model)) model = model.cuda() x = torch.ones((1, 3, 120, 320)).cuda() from torch2trt import torch2trt model_trt = torch2trt(model, [x], max_batch_size=100, fp16_mode=True) torch.save(model_trt.state_dict(), args.trt_model) exit() model_trt = TRTModule() model_trt.load_state_dict(torch.load(args.trt_model)) model = model_trt.to(device) else: model.load_state_dict(torch.load(args.pretrained_model)) model = model.to(device)
def __init__(self, display_widget=None): self.display_widget = display_widget with open(self.HUMAN_POSE, 'r') as f: human_pose = json.load(f) topology = trt_pose.coco.coco_category_to_topology(human_pose) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(self.OPTIMIZED_MODEL)) self.parse_objects = ParseObjects(topology) self.keypoint_coordinates = KeypointCoordinates( human_pose["keypoints"]) self.camera = CSICamera(width=self.WIDTH, height=self.HEIGHT, capture_fps=30) self.camera.running = True if self.display_widget is None: self.display = plt.imshow(self.camera.value) plt.ion() plt.show() # ROS stuff s = rospy.Service('get_keypoint', GetKeypoint, self.__handle_get_keypoint) self.image_pub = rospy.Publisher("image", Image) self.bridge = CvBridge()
def __init__(self, model, exp, trt_file=None, decoder=None, device=torch.device("cpu"), fp16=False): self.model = model self.decoder = decoder self.num_classes = exp.num_classes self.confthre = exp.test_conf self.nmsthre = exp.nmsthre self.test_size = exp.test_size self.device = device self.fp16 = fp16 if trt_file is not None: from torch2trt import TRTModule model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file)) x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]), device=device) self.model(x) self.model = model_trt self.rgb_means = (0.485, 0.456, 0.406) self.std = (0.229, 0.224, 0.225)
def __init__(self, modelFile, taskDescFile, csv=0, csvPath='.'): # Load the task description try: with open(taskDescFile, 'r') as f: human_pose = json.load(f) except OSError: raise PoseCaptureDescError topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) # Load the base model fbase = os.path.basename(modelFile) func, self.inWidth, self.inHeight = \ PoseCaptureModel.getModelFuncName(fbase) if func is None: logging.fatal('Invalid model name: %s' % (fbase)) logging.fatal('Model name should be (.+_.+_att)_(\\d+)x(\\d+)_') raise PoseCaptureModelError('Invalid model name: %s' % (fbase)) if not hasattr(trt_pose.models, func): logging.fatal('Could not find base model function: %s' % (func)) raise PoseCaptureModelError( \ 'Could not find base model function: %s' % (func)) func = 'trt_pose.models.' + func trtFile = os.path.splitext(fbase)[0] + '_trt.pth' logging.info('Loading base model from %s' % (func)) model = eval(func)(num_parts, 2 * num_links).cuda().eval() if os.path.exists(trtFile): logging.info('Loading model from TensorRT plan file ...') model_trt = TRTModule() model_trt.load_state_dict(torch.load(trtFile)) else: logging.info('Optimizing model for TensorRT ...') model.load_state_dict(torch.load(modelFile)) data = torch.zeros((1, 3, self.inHeight, self.inWidth)).cuda() model_trt = torch2trt.torch2trt( \ model, [data], fp16_mode=True, max_workspace_size=1<<25) torch.save(model_trt.state_dict(), trtFile) self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.device = torch.device('cuda') self.parse_objects = ParseObjects(topology) self.draw_objects = DrawObjects(topology) self.model_trt = model_trt self.num_parts = num_parts self.csv = csv self.count = 0 if self.csv > 0: try: self._initCsv(human_pose['keypoints'], csvPath) except OSError: raise PoseCaptureCsvError
class AntiSpoofPredict(Detection): def __init__(self, device_id, weights_path): super(AntiSpoofPredict, self).__init__() self.device = torch.device("cuda:{}".format(device_id) if torch.cuda. is_available() else "cpu") self.model_trt = None self._load_model(weights_path) def _load_model(self, model_path): # define model if os.path.isfile('trt_spoof.pth'): self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load('trt_spoof.pth')) return None model_name = os.path.basename(model_path) h_input, w_input, model_type, _ = parse_model_name(model_name) self.kernel_size = get_kernel( h_input, w_input, ) self.model = MODEL_MAPPING[model_type]( conv6_kernel=self.kernel_size).to(self.device) # load model weight state_dict = torch.load(model_path, map_location=self.device) keys = iter(state_dict) first_layer_name = keys.__next__() if first_layer_name.find('module.') >= 0: from collections import OrderedDict new_state_dict = OrderedDict() for key, value in state_dict.items(): name_key = key[7:] new_state_dict[name_key] = value self.model.load_state_dict(new_state_dict) else: self.model.load_state_dict(state_dict) self.model.eval() return None def predict(self, img): test_transform = trans.Compose([ trans.ToTensor(), ]) img = test_transform(img) img = img.unsqueeze(0).to(self.device) if self.model_trt is None: self.model_trt = torch2trt(self.model, [img], fp16_mode=True) torch.save(self.model_trt.state_dict(), 'trt_spoof.pth') self.model = None # self._load_model(model_path) # self.model.eval() with torch.no_grad(): result = self.model_trt(img) # result = self.model.forward(img) result = F.softmax(result).cpu().numpy() return result
class ResDown(MultiStageFeature): def __init__(self, pretrain=False): super(ResDown, self).__init__() self.features = resnet50(layer3=True, layer4=False) self.features_127 = self.features_255 = self.features if pretrain: load_pretrain(self.features, 'resnet.model') self.downsample = ResDownS(1024, 256) self.layers = [self.downsample, self.features.layer2, self.features.layer3] self.train_nums = [1, 3] self.change_point = [0, 0.5] self.unfix(0.0) def init_trt(self,fp16_mode,trt_weights_path): if not path.exists(trt_weights_path+'/features_127_trt.pth'): x_resnet_127 = torch.ones((1,3,127,127)).cuda() x_resnet_255 = torch.ones((1,3,255,255)).cuda() self.features_127 = torch2trt(self.features,[x_resnet_127],fp16_mode=fp16_mode) self.features_255 = torch2trt(self.features,[x_resnet_255],fp16_mode=fp16_mode) torch.save(self.features_127.state_dict(), trt_weights_path+'/features_127_trt.pth') torch.save(self.features_255.state_dict(), trt_weights_path+'/features_255_trt.pth') else: self.features_127 = TRTModule() self.features_255 = TRTModule() self.features_127.load_state_dict(torch.load(trt_weights_path+'/features_127_trt.pth')) self.features_255.load_state_dict(torch.load(trt_weights_path+'/features_255_trt.pth')) self.downsample.init_trt(fp16_mode,trt_weights_path) def param_groups(self, start_lr, feature_mult=1): lr = start_lr * feature_mult def _params(module, mult=1): params = list(filter(lambda x:x.requires_grad, module.parameters())) if len(params): return [{'params': params, 'lr': lr * mult}] else: return [] groups = [] groups += _params(self.downsample) groups += _params(self.features, 0.1) return groups def forward(self, x): output = self.features_127(x) p3 = self.downsample(output[-1]) return p3 def forward_all(self, x): output = self.features_255(x) p3 = self.downsample(output[-1]) return output, p3
class run: def __init__(self): self.parser = argparse.ArgumentParser(description='TensorRT pose estimation run') self.parser.add_argument('--model', type=str, default='resnet', help='resnet or densenet') self.args = parser.parse_args() with open('human_pose.json', 'r') as f: human_pose = json.load(f) self.topology = trt_pose.coco.coco_category_to_topology(human_pose) num_parts = len(human_pose['keypoints']) num_links = len(human_pose['skeleton']) if 'resnet' in args.model: print('------ model = resnet--------') MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth' OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth' model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 224 HEIGHT = 224 else: print('------ model = densenet--------') MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth' OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth' model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval() WIDTH = 256 HEIGHT = 256 data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda() self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL)) mean = torch.Tensor([0.485, 0.456, 0.406]).cuda() std = torch.Tensor([0.229, 0.224, 0.225]).cuda() self.cap = cv2.VideoCapture(0) self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) self.cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480) def run(self): fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') out_video = cv2.VideoWriter('/tmp/output.mp4', fourcc, self.cap.get(cv2.CAP_PROP_FPS), (640, 480)) count = 0 while self.cap.isOpened() and count < 500: t = time.time() ret_val, dst = self.cap.read() parse_objects = ParseObjects(topology) draw_objects = DrawObjects(topology) if ret_val == False: print("Camera read Error") break img = cv2.resize(dst, dsize=(WIDTH, HEIGHT), interpolation=cv2.INTER_AREA) img = PE.execute(img, dst, t) cv2.imshow("result", img) if cv2.waitKey(1) & 0xFF == ord('q'): break count += 1 cv2.destroyAllWindows() out_video.release() cap.release()
def set_model(self, model): self.model_file = model self.model = TRTModule() self.model.load_state_dict(torch.load(self.model_file)) self.twist_msg = Twist() self.twist_msg.angular.x = 0 self.twist_msg.angular.y = 0 self.twist_msg.angular.z = 0 self.twist_msg.linear.x = 0 self.twist_msg.linear.y = 0 self.twist_msg.linear.z = 0
def __init__(self, config=None): if config is not None: self.config = config else: self.config = configparser.ConfigParser() self.config.read("config.ini") self.model_path = self.config["ARCFACE_R50"]['Model_path'] self.img_size = int(self.config["ARCFACE_R50"]['Img_size']) self.batch_size = int(self.config["ARCFACE_R50"]['Batch_size']) self.feat_size = int(self.config["ARCFACE_R50"]['Feat_size']) self.model = TRTModule() self.model.load_state_dict(torch.load(self.model_path))
def __init__(self, trt_checkpoint_path: str, img_size: Tuple[int, int] = (128, 256), max_batch_size: int = 8, **kwargs): super().__init__(**kwargs) self.model_trt = TRTModule() self.model_trt.load_state_dict(torch.load(trt_checkpoint_path)) self.model_trt = self.model_trt.cuda().eval() self.size = img_size self.max_batch_size = max_batch_size self.transform = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])
def load_model(): model_log = log('Load {} ... '.format('alexnet & tensorrt')) model = alexnet().eval().cuda() model.load_state_dict(torch.load('alexnet.pth')) model_trt = TRTModule() model_trt.load_state_dict(torch.load('alexnet_trt.pth')) model_log.end() return (model, model_trt)
def init_trt(self,fp16_mode,trt_weights_path): if not path.exists(trt_weights_path+'/downsample_15_trt.pth'): x_ds_15 = torch.ones((1,1024,15,15)).cuda() x_ds_31 = torch.ones((1,1024,31,31)).cuda() self.downsample_15 = torch2trt(self.downsample,[x_ds_15],fp16_mode=fp16_mode) self.downsample_31 = torch2trt(self.downsample,[x_ds_31],fp16_mode=fp16_mode) torch.save(self.downsample_15.state_dict(), trt_weights_path+'/downsample_15_trt.pth') torch.save(self.downsample_31.state_dict(), trt_weights_path+'/downsample_31_trt.pth') else: self.downsample_15 = TRTModule() self.downsample_15.load_state_dict(torch.load(trt_weights_path+'/downsample_15_trt.pth')) self.downsample_31 = TRTModule() self.downsample_31.load_state_dict(torch.load(trt_weights_path+'/downsample_31_trt.pth'))
def __init__(self, model_name, model_path): # 1. set device self.device = 'cpu' # 'cuda:0' if torch.cuda.device_count() > 0: self.device = 'cuda:0' else: logger.error('TensorRT not working with CPU') logger.warning('Torch device {}.'.format(self.device)) self.name = model_name self.model = TRTModule() logger.info("Start loading TensorRT module, it's slow") self.model.load_state_dict(torch.load(model_path))
def demo_with_torch2trt(trt_file_path, data_root): model_trt = TRTModule() model_trt.load_state_dict(torch.load(trt_file_path)) row_anchor = tusimple_row_anchor img_w, img_h = 1280, 720 img_transforms = transforms.Compose([ transforms.Resize((288, 800)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) for i in range(10): key = cv2.waitKey(1) if key == ord("q"): break img_ori = cv2.imread(data_root) img = preprocessing(img_ori) img = img.unsqueeze(0) img = img.cuda() t1 = time.time() with torch.no_grad(): out = model_trt(img) col_sample = np.linspace(0, 800 - 1, 100) col_sample_w = col_sample[1] - col_sample[0] out_j = out[0].data.cpu().numpy() t2 = time.time() print("Inference time = %.3f ms" % ((t2 - t1) * 1000)) out_j = out_j[:, ::-1, :] prob = scipy.special.softmax(out_j[:-1, :, :], axis=0) idx = np.arange(100) + 1 idx = idx.reshape(-1, 1, 1) loc = np.sum(prob * idx, axis=0) out_j = np.argmax(out_j, axis=0) loc[out_j == 100] = 0 out_j = loc for i in range(out_j.shape[1]): if np.sum(out_j[:, i] != 0) > 2: for k in range(out_j.shape[0]): if out_j[k, i] > 0: ppp = (int(out_j[k, i] * col_sample_w * img_w / 800) - 1, int(img_h * (row_anchor[56 - 1 - k] / 288)) - 1) cv2.circle(img_ori, ppp, img_w // 300, (0, 255, 0), 2) cv2.imshow("result", img_ori) cv2.imwrite("demo_using_torch2trt.jpg", img_ori) cv2.destroyAllWindows()
def process_tftrt(self, input_model, output_infer_model): if os.path.exists(output_infer_model): logging.info("resnet50_pytorch_trt.pth is exist") model_trt = TRTModule() model_trt.load_state_dict(torch.load(output_infer_model)) else: # load pretrained model resnet50_model = load_pytorch_saved_model(input_model) # convert to TensorRT feeding sample data as input x = torch.ones((1, 3, 224, 224)).cuda() model_trt = torch2trt(resnet50_model, [x]) # save and load torch.save(model_trt.state_dict(), output_infer_model) return model_trt
def __init__(self, config={}): super().__init__() self.superpoint = SuperPoint(config.get('superpoint', {})) self.superglue = SuperGlue(config.get('superglue', {})) # self.model_sg_trt = None self.convert_save_trt_model = 1 if not self.convert_save_trt_model: self.model_sg_trt = TRTModule() self.model_sg_trt.load_state_dict( torch.load('superglue_trt_fixed_input_part_every_kpt_encoder_every_gnn_outputs.pth')) # 'superglue_trt_fixed_input_part_all_kpt_encoder_two_gnn_outputs.pth')) # 'superglue_trt_fixed_input_part_kpt_encoder_gnn_4outputs.pth')) # 'superglue_trt_fixed_input_part_kpt_encoder.pth')) #superglue_trt_fixed_input.pth')) self.bin_score = torch.nn.Parameter(torch.tensor(2.3457).cuda())
def init_trt(self,fp16_mode,trt_weights_path): if not path.exists(trt_weights_path+'/features_127_trt.pth'): x_resnet_127 = torch.ones((1,3,127,127)).cuda() x_resnet_255 = torch.ones((1,3,255,255)).cuda() self.features_127 = torch2trt(self.features,[x_resnet_127],fp16_mode=fp16_mode) self.features_255 = torch2trt(self.features,[x_resnet_255],fp16_mode=fp16_mode) torch.save(self.features_127.state_dict(), trt_weights_path+'/features_127_trt.pth') torch.save(self.features_255.state_dict(), trt_weights_path+'/features_255_trt.pth') else: self.features_127 = TRTModule() self.features_255 = TRTModule() self.features_127.load_state_dict(torch.load(trt_weights_path+'/features_127_trt.pth')) self.features_255.load_state_dict(torch.load(trt_weights_path+'/features_255_trt.pth')) self.downsample.init_trt(fp16_mode,trt_weights_path)