Ejemplo n.º 1
0
def build_tensorrt(trt_file, model, size, device, recompile=False, fp16=True):
    from torch2trt import torch2trt, TRTModule
    import tensorrt as trt

    x = torch.ones(1, 3, int(size[1]), int(size[0])).to(device)

    if path.isfile(trt_file) and not recompile:
        print("Found TensorRT model file, loading...")

        # try:
        trt_model = TRTModule()
        weights = torch.load(trt_file)
        trt_model.load_state_dict(weights)

        trt_model(x)
        return trt_model

        # except Exception as e:
        #     print("Error occured: ")
        #     print(e)

    print("Compiling with tensorRT...")
    trt_model = torch2trt(model, [x],
                          max_workspace_size=1 << 27,
                          fp16_mode=fp16,
                          log_level=trt.Logger.INFO,
                          strict_type_constraints=True,
                          max_batch_size=1)

    torch.save(trt_model.state_dict(), trt_file)

    return trt_model
Ejemplo n.º 2
0
class AntiSpoofPredict(Detection):
    def __init__(self, device_id, weights_path):
        super(AntiSpoofPredict, self).__init__()
        self.device = torch.device("cuda:{}".format(device_id) if torch.cuda.
                                   is_available() else "cpu")
        self.model_trt = None
        self._load_model(weights_path)

    def _load_model(self, model_path):
        # define model
        if os.path.isfile('trt_spoof.pth'):
            self.model_trt = TRTModule()
            self.model_trt.load_state_dict(torch.load('trt_spoof.pth'))
            return None
        model_name = os.path.basename(model_path)
        h_input, w_input, model_type, _ = parse_model_name(model_name)
        self.kernel_size = get_kernel(
            h_input,
            w_input,
        )

        self.model = MODEL_MAPPING[model_type](
            conv6_kernel=self.kernel_size).to(self.device)

        # load model weight
        state_dict = torch.load(model_path, map_location=self.device)
        keys = iter(state_dict)
        first_layer_name = keys.__next__()
        if first_layer_name.find('module.') >= 0:
            from collections import OrderedDict
            new_state_dict = OrderedDict()
            for key, value in state_dict.items():
                name_key = key[7:]
                new_state_dict[name_key] = value
            self.model.load_state_dict(new_state_dict)
        else:
            self.model.load_state_dict(state_dict)
        self.model.eval()
        return None

    def predict(self, img):
        test_transform = trans.Compose([
            trans.ToTensor(),
        ])
        img = test_transform(img)
        img = img.unsqueeze(0).to(self.device)
        if self.model_trt is None:
            self.model_trt = torch2trt(self.model, [img], fp16_mode=True)
            torch.save(self.model_trt.state_dict(), 'trt_spoof.pth')
            self.model = None
        # self._load_model(model_path)
        # self.model.eval()
        with torch.no_grad():
            result = self.model_trt(img)
            # result = self.model.forward(img)
            result = F.softmax(result).cpu().numpy()
        return result
Ejemplo n.º 3
0
    def __init__(self, modelFile, taskDescFile, csv=0, csvPath='.'):

        # Load the task description
        try:
            with open(taskDescFile, 'r') as f:
                human_pose = json.load(f)
        except OSError:
            raise PoseCaptureDescError
        topology = trt_pose.coco.coco_category_to_topology(human_pose)
        num_parts = len(human_pose['keypoints'])
        num_links = len(human_pose['skeleton'])

        # Load the base model
        fbase = os.path.basename(modelFile)
        func, self.inWidth, self.inHeight = \
            PoseCaptureModel.getModelFuncName(fbase)
        if func is None:
            logging.fatal('Invalid model name: %s' % (fbase))
            logging.fatal('Model name should be (.+_.+_att)_(\\d+)x(\\d+)_')
            raise PoseCaptureModelError('Invalid model name: %s' % (fbase))
        if not hasattr(trt_pose.models, func):
            logging.fatal('Could not find base model function: %s' % (func))
            raise PoseCaptureModelError( \
                'Could not find base model function: %s' % (func))
        func = 'trt_pose.models.' + func
        trtFile = os.path.splitext(fbase)[0] + '_trt.pth'
        logging.info('Loading base model from %s' % (func))
        model = eval(func)(num_parts, 2 * num_links).cuda().eval()

        if os.path.exists(trtFile):
            logging.info('Loading model from TensorRT plan file ...')
            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trtFile))
        else:
            logging.info('Optimizing model for TensorRT ...')
            model.load_state_dict(torch.load(modelFile))
            data = torch.zeros((1, 3, self.inHeight, self.inWidth)).cuda()
            model_trt = torch2trt.torch2trt( \
                model, [data], fp16_mode=True, max_workspace_size=1<<25)
            torch.save(model_trt.state_dict(), trtFile)

        self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
        self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
        self.device = torch.device('cuda')

        self.parse_objects = ParseObjects(topology)
        self.draw_objects = DrawObjects(topology)
        self.model_trt = model_trt
        self.num_parts = num_parts
        self.csv = csv
        self.count = 0

        if self.csv > 0:
            try:
                self._initCsv(human_pose['keypoints'], csvPath)
            except OSError:
                raise PoseCaptureCsvError
Ejemplo n.º 4
0
    def process_tftrt(self, input_model, output_infer_model):
        if os.path.exists(output_infer_model):
            logging.info("resnet50_pytorch_trt.pth is exist")
            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(output_infer_model))
        else:
            # load pretrained model
            resnet50_model = load_pytorch_saved_model(input_model)
            # convert to TensorRT feeding sample data as input
            x = torch.ones((1, 3, 224, 224)).cuda()
            model_trt = torch2trt(resnet50_model, [x])

            # save and load
            torch.save(model_trt.state_dict(), output_infer_model)
        return model_trt
Ejemplo n.º 5
0
class BackendTensorRT:
  def __init__(self):
    self.model = None

  def version(self):
    return torch.__version__

  def name(self):
    return "pytorch-tensorrt-ofa"

  def load(self, args, ds=None):
    prefix = 'bs%d_is%d_%s_' % (args.batch_size, args.image_size, args.chip_name)
    lib_name = 'pretrained/' + args.model + '/' + prefix + 'torch2trt.pth'
    if os.path.exists(lib_name) and not args.force_build:
      self.model = TRTModule()
      self.model.load_state_dict(torch.load(lib_name))
      self.model.eval()
    else:
      net, _ = load_model(args)
      net = net.cuda()
      net.eval()
      input_data = torch.FloatTensor(
          np.array(ds.get_calibration_set(), np.float32)).cuda()
      if args.calib_algo == 1:
        calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION
      elif args.calib_algo == 2:
        calib_algo = trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2
      elif args.calib_algo == 3:
        calib_algo = trt.CalibrationAlgoType.MINMAX_CALIBRATION
      size = 1 << (33 if 'T4' in args.chip_name else 34)
      self.model = torch2trt(
          net, [input_data],
          max_batch_size=args.batch_size,
          fp16_mode=True,
          max_workspace_size=size,
          int8_mode=True,
          int8_calib_algorithm=calib_algo,
          int8_calib_batch_size=args.calib_batch_size)
      torch.save(self.model.state_dict(), lib_name)
    log.info('model is ready')
    return self

  def predict(self, image):
    with torch.no_grad():
      output = self.model(image)
      _, output = output.max(1)
    return output
Ejemplo n.º 6
0
FRAME_CHANNELS = 3

SHOW_LOGS = False

# Model
if os.path.isfile(MODEL_PATH_TRT):
    model_trt = TRTModule()
    model_trt.load_state_dict(torch.load(MODEL_PATH_TRT))
else:
    model = AutopilotModel(pretrained=False)
    model.load_from_path(MODEL_PATH)
    model.eval()

    x = torch.ones((1, FRAME_CHANNELS, FRAME_SIZE, FRAME_SIZE)).cuda()
    model_trt = torch2trt(model, [x], fp16_mode=True)
    torch.save(model_trt.state_dict(), MODEL_PATH_TRT)

try:
    # Car
    car = NvidiaRacecar()
    car.throttle_gain = THROTTLE_GAIN
    car.steering_offset = STEERING_OFFSET

    # Camera
    camera = CSICamera(width=CAMERA_WIDTH, height=CAMERA_HEIGHT)

    # Control Loop
    while True:
        if SHOW_LOGS:
            start_time = time.time()
Ejemplo n.º 7
0
class TrtPose():
    def __init__(self):
        self.goal = 0.0  # [angle]

        print("Getting Path to package...")
        self.follow_people_configfiles_path = rospkg.RosPack().get_path(
            'drone_ai') + "/scripts/helpers/trtpose/models"

        print("We get the human pose json file that described the human pose")
        humanPose_file_path = os.path.join(
            rospkg.RosPack().get_path('drone_ai') +
            "/scripts/helpers/trtpose/models/", 'human_pose.json')

        print("Opening json file")
        with open(humanPose_file_path, 'r') as f:
            self.human_pose = json.load(f)

        print("Creating topology")
        self.topology = trt_pose.coco.coco_category_to_topology(
            self.human_pose)
        #print("Topology====>", self.topology)

        self.WIDTH = 640
        self.HEIGHT = 480

        OPTIMIZED_MODEL = 'resnet18_baseline_att_224x224_A_epoch_249_trt.pth'
        #OPTIMIZED_MODEL = 'densenet121_baseline_att_256x256_B_epoch_160_trt.pth'
        optimized_model_weights_path = os.path.join(
            self.follow_people_configfiles_path, OPTIMIZED_MODEL)

        if not os.path.exists(optimized_model_weights_path):
            self.__create_optimodel(optimized_model_weights_path)

        print("Load the saved model using Torchtrt")
        self.model_trt = TRTModule()
        self.model_trt.load_state_dict(
            torch.load(optimized_model_weights_path))

        print("Define the Image processing variables")
        self.mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()
        self.std = torch.Tensor([0.229, 0.224, 0.225]).cuda()
        self.device = torch.device("cuda")

        print(
            "Classes to parse the object of the NeuralNetwork and draw on the image"
        )
        self.parse_objects = ParseObjects(self.topology)
        self.draw_objects = DrawObjects(self.topology)

    def __create_optimodel(self, optimized_model_weights_path):
        rospy.loginfo("** No optimised model found. **")
        num_parts = len(self.human_pose['keypoints'])
        num_links = len(self.human_pose['skeleton'])

        rospy.loginfo("Creating Model")
        model = trt_pose.models.resnet18_baseline_att(num_parts, 2 *
                                                      num_links).cuda().eval()
        #model = trt_pose.models.densenet121_baseline_att(num_parts, 2 * num_links).cuda().eval()
        rospy.loginfo(
            "Load the weights from the eight files predownloaded to this package"
        )
        MODEL_WEIGHTS = 'resnet18_baseline_att_224x224_A_epoch_249.pth'
        #MODEL_WEIGHTS = 'densenet121_baseline_att_256x256_B_epoch_160.pth'
        model_weights_path = os.path.join(self.follow_people_configfiles_path,
                                          MODEL_WEIGHTS)

        rospy.loginfo("Load state dict")
        model.load_state_dict(torch.load(model_weights_path))

        rospy.loginfo("Creating empty data")
        data = torch.zeros((1, 3, self.HEIGHT, self.WIDTH)).cuda()

        rospy.loginfo(
            "Use tortchtrt to go from Torch to TensorRT to generate an optimised model"
        )
        self.model_trt = torch2trt.torch2trt(model, [data],
                                             fp16_mode=True,
                                             max_workspace_size=1 << 25)

        rospy.loginfo("Saving new optimodel")
        torch.save(self.model_trt.state_dict(), optimized_model_weights_path)

    def __preprocess(self, image):
        self.device = torch.device("cuda")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = PIL.Image.fromarray(image)
        image = transforms.functional.to_tensor(image).to(self.device)
        image.sub_(self.mean[:, None, None]).div_(self.std[:, None, None])
        return image[None, ...]

    def detect(self, image):

        data = self.__preprocess(image)
        cmap, paf = self.model_trt(data)
        cmap, paf = cmap.detach().cpu(), paf.detach().cpu()
        counts, objects, peaks = self.parse_objects(cmap, paf)

        return counts, objects, peaks, self.topology

    def calcYawAngle(self, position):
        new_goal = degrees(atan(
            float(320 - position[0]) / (480 - position[1])))
        # yaw = new_goal + self.goal
        yaw_angle = new_goal
        self.goal = yaw_angle
        return yaw_angle
class Matching(torch.nn.Module):
    """ Image Matching Frontend (SuperPoint + SuperGlue) """

    def __init__(self, config={}):
        super().__init__()
        self.superpoint = SuperPoint(config.get('superpoint', {}))
        self.superglue = SuperGlue(config.get('superglue', {}))
        # self.model_sg_trt = None
        self.convert_save_trt_model = 1
        if not self.convert_save_trt_model:
            self.model_sg_trt = TRTModule()
            self.model_sg_trt.load_state_dict(
                torch.load('superglue_trt_fixed_input_part_every_kpt_encoder_every_gnn_outputs.pth'))
            # 'superglue_trt_fixed_input_part_all_kpt_encoder_two_gnn_outputs.pth'))
            # 'superglue_trt_fixed_input_part_kpt_encoder_gnn_4outputs.pth'))
            # 'superglue_trt_fixed_input_part_kpt_encoder.pth')) #superglue_trt_fixed_input.pth'))

        self.bin_score = torch.nn.Parameter(torch.tensor(2.3457).cuda())

    def forward(self, data):
        """ Run SuperPoint (optionally) and SuperGlue
        SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input
        Args:
          data: dictionary with minimal keys: ['image0', 'image1']
        """
        pred = {}

        # Extract SuperPoint (keypoints, scores, descriptors) if not provided
        if 'keypoints0' not in data:
            pred0 = self.superpoint({'image': data['image0']})
            pred = {**pred, **{k + '0': v for k, v in pred0.items()}}
        if 'keypoints1' not in data:
            pred1 = self.superpoint({'image': data['image1']})
            pred = {**pred, **{k + '1': v for k, v in pred1.items()}}
        t1 = time.time()
        # Batch all features
        # We should either have i) one image per batch, or
        # ii) the same number of local features for all images in the batch.
        data = {**data, **pred}

        for k in data:
            if isinstance(data[k], (list, tuple)):
                data[k] = torch.stack(data[k])

        # Perform the matching
        """Run SuperGlue on a pair of keypoints and descriptors"""
        desc0, desc1 = data['descriptors0'], data['descriptors1']
        kpts0, kpts1 = data['keypoints0'], data['keypoints1']

        # Keypoint normalization.
        kpts0 = normalize_keypoints(kpts0, data['image0'].shape)
        kpts1 = normalize_keypoints(kpts1, data['image1'].shape)

        inputs0 = [kpts0.transpose(1, 2), data['scores0'].unsqueeze(1)]
        kpts_scores0 = torch.cat(inputs0, dim=1)

        inputs1 = [kpts1.transpose(1, 2), data['scores1'].unsqueeze(1)]
        kpts_scores1 = torch.cat(inputs1, dim=1)
        max_count = 350
        len0 = max_count - desc0.shape[2]
        len1 = max_count - desc1.shape[2]
        desc0 = torch.cat((desc0, torch.zeros(1, 256, len0).cuda()), 2)
        desc1 = torch.cat((desc1, torch.zeros(1, 256, len1).cuda()), 2)
        kpts_scores0 = torch.cat((kpts_scores0, torch.zeros(1, 3, len0).cuda()), 2)
        kpts_scores1 = torch.cat((kpts_scores1, torch.zeros(1, 3, len1).cuda()), 2)

        if self.convert_save_trt_model:
            self.model_sg_trt = torch2trt(self.superglue, [kpts_scores0, desc0, kpts_scores1,
                                                           desc1])  # , input_names=['in1', 'in2', 'in3', 'in4'], output_names=['out1', 'out2'])
            # torch.save(self.model_sg_trt.state_dict(), 'superglue_trt_fixed_input_part_kpt_encoder_gnn_4outputs.pth') >> fail. different result!!
            # torch.save(self.model_sg_trt.state_dict(), 'superglue_trt_fixed_input_part_all_kpt_encoder_two_gnn_outputs.pth')
            torch.save(self.model_sg_trt.state_dict(),
                       'superglue_trt_fixed_input_part_every_kpt_encoder_every_gnn_outputs.pth')

        if 0:
            if 0:
                mdesc0_trt, mdesc1_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1)
            else:
                outputs = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1)
                mdesc0, mdesc1 = outputs[-2], outputs[-1]
        else:
            '''
            1. superglue.py : forward, kpt init, forward , 
            2. matching.py forward,   
            3. model convert save -> comment TRT load in init. uncomment torch2trt
            4. model laod -> uncomment  TRT load in init. comment torch2trt

            # debug 1 : kpt encoder module => 100% same even after loading the saved converted model!
            conv1_1_trt, bn1_1_trt, relu1_1_trt, conv2_1_trt, bn2_1_trt, relu2_1_trt, conv3_1_trt, bn3_1_trt, relu3_1_trt, \
            conv4_1_trt, bn4_1_trt, relu4_1_trt, conv5_1_trt, \
            conv1_2_trt, bn1_2_trt, relu1_2_trt, conv2_2_trt, bn2_2_trt, relu2_2_trt, conv3_2_trt, bn3_2_trt, relu3_2_trt, \
            conv4_2_trt, bn4_2_trt, relu4_2_trt, conv5_2_trt, \
            desc0_trt, desc1_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1)

            conv1_1, bn1_1, relu1_1, conv2_1, bn2_1, relu2_1, conv3_1, bn3_1, relu3_1, conv4_1, bn4_1, relu4_1, conv5_1, \
            conv1_2, bn1_2, relu1_2, conv2_2, bn2_2, relu2_2, conv3_2, bn3_2, relu3_2, conv4_2, bn4_2, relu4_2, conv5_2, \
            desc0, desc1 = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1)
            '''
            '''
            # debug 2 : kpt encoder module + gnn module => 100% same
            output_trts = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1)
            output = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1)
            '''
            '''
            # debug 3 : all kpt encoder outputs + 2 gnn output

            conv1_1, bn1_1, relu1_1, conv2_1, bn2_1, relu2_1, conv3_1, bn3_1, relu3_1, conv4_1, bn4_1, relu4_1, conv5_1,\
            conv1_2, bn1_2, relu1_2, conv2_2, bn2_2, relu2_2, conv3_2, bn3_2, relu3_2, conv4_2, bn4_2, relu4_2, conv5_2, \
            desc0, desc1,    
            desc0_gnn, desc1_gnn  
            '''
            output_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1)
            # output = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1)

            # debug 4 : all kpt encoder outputs + all self/cross attention gnn outputs!!
            '''
            conv1_trt, bn1_trt, relu1_trt, \
            conv2_trt, bn2_trt, relu2_trt, \
            conv3_trt, bn3_trt, relu3_trt, \
            conv4_trt, bn4_trt, relu4_trt, \
            conv5_trt, _, _, _ = model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1)

            conv1, bn1, relu1, \
            conv2, bn2, relu2, \
            conv3, bn3, relu3, \
            conv4, bn4, relu4, \
            conv5, _, _, _ = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1)
            '''
            # output_trt = self.model_sg_trt(kpts_scores0, desc0, kpts_scores1, desc1)
            # output = self.superglue(kpts_scores0, desc0, kpts_scores1, desc1)
            mdesc0, mdesc1 = output_trt[-2], output_trt[-1]

        if len0 > 0:
            mdesc0 = mdesc0[:, :, :-len0]
        if len1 > 0:
            mdesc1 = mdesc1[:, :, :-len1]
        # Compute matching descriptor distance.
        scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1)
        scores = scores / 256 ** .5

        # Run the optimal transport.
        scores = log_optimal_transport(
            scores, self.bin_score,
            iters=20)  # self.config['sinkhorn_iterations'])

        # Get the matches with score above "match_threshold".

        max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
        indices0, indices1 = max0.indices, max1.indices
        mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0)
        mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1)
        zero = scores.new_tensor(0)
        mscores0 = torch.where(mutual0, max0.values.exp(), zero)
        mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero)
        valid0 = mutual0 & (mscores0 > 0.0)  # self.config['match_threshold'])
        valid1 = mutual1 & valid0.gather(1, indices1)
        indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1))
        indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1))

        ret = {
            'matches0': indices0,  # use -1 for invalid match
            'matches1': indices1,  # use -1 for invalid match
            'matching_scores0': mscores0,
            'matching_scores1': mscores1,
        }
        pred = {**pred, **ret}
        print(1 / (time.time() - t1))
        return pred
    if opt.half:
        # 加载模型
        model_backbone = Darknet_Backbone(
            opt.model_def, img_size=opt.img_size).to(device).half()
    else:
        model_backbone = Darknet_Backbone(opt.model_def,
                                          img_size=opt.img_size).to(device)

    # torch 中.pth和.weights有所不同
    if opt.weights_path.split(".")[-1] == "pth":
        pass
        # load .pth文件
        model_backbone.load_state_dict(torch.load(opt.weights_path))
    else:
        # load .weights文件
        model_backbone.load_darknet_weights(opt.weights_path)
    model_backbone.eval()

    if opt.half:
        # 设置形状,x的内容不重要,主要是告诉转化器生成的输入是什么个形状的,3 = RGB 3通道
        x = torch.rand(size=(opt.batch_size, 3, opt.img_size[0],
                             opt.img_size[1])).to(device).half()
        # 这里开始模型的转换,fp16_mode=True表示开启半精度,源码中的int8选项,根据原作者的描述,并没有调试好
        model_trt = torch2trt(model_backbone, [x], fp16_mode=True)
    else:
        x = torch.rand(size=(opt.batch_size, 3, opt.img_size[0],
                             opt.img_size[1])).to(device)
        model_trt = torch2trt(model_backbone, [x])
    # 序列化保存模型
    torch.save(model_trt.state_dict(),
               "weights/{}".format(opt.model_save_name))