class Perception(object): def __init__(self): # input rgbd image in numpy array format [w h c] self.sdmrcnn_model = get_model_instance_segmentation(2).to( device, dtype=torch.float) self.sdmrcnn_model.load_state_dict(torch.load(os.path.join('19.pth'))) self.sdmrcnn_model.eval() self.siamese_model = SiameseNetwork().cuda() self.siamese_model.load_state_dict(torch.load('siamese.pt')) self.siamese_model.eval() def segmentation(self, raw_rgb, raw_depth): rgb_raw_img = np.zeros_like(raw_rgb) for i in range(raw_rgb.shape[2]): rgb_raw_img[:, :, i] = raw_rgb[:, :, 2 - i] color_img = rgb_raw_img.astype(np.float) / 255. img_mean = [0.485, 0.456, 0.406] img_std = [0.229, 0.224, 0.225] for c in range(color_img.shape[2]): color_img[:, :, c] = (color_img[:, :, c] - img_mean[c]) / img_std[c] depth_img = raw_depth.astype(np.float) x, y = np.shape(depth_img) depth_img.shape = (x, y, 1) # depth_img = depth_img / np.amax(depth_img) img = np.concatenate((color_img[:, :, 0:2], depth_img), axis=2) test_input = [ torch.from_numpy(np.transpose(img, [2, 0, 1])).to(device, dtype=torch.float) ] output = self.sdmrcnn_model(test_input) mask_list = output[0]['masks'].cpu().detach().numpy() masks = np.reshape(mask_list, (len(mask_list), 480, 640)) # masks = [] # for mask in mask_list: # mask = mask.reshape(960, 1280) # mask = mask[240:720, 320:960] # masks.append(mask) return masks def classification(self, masks, raw_rgb, anchor_img): scores = [] img0 = anchor_img img0 = torch.from_numpy(np.reshape(img0, (1, 3, 120, 120))) masks = np.reshape(masks, (len(masks), 480, 640)) print('Number of objects detected: %d' % len(masks)) for mask in masks: color_img = np.copy(raw_rgb) color_img[np.where(mask < 0.5)] = 0 color_img = pad(color_img) color_img = center_crop_150(color_img) img1 = np.reshape(color_img, (1, 3, 120, 120)) img1 = torch.from_numpy(img1) img0, img1 = img0.type(torch.FloatTensor), img1.type( torch.FloatTensor) img0, img1, = img0.cuda(), img1.cuda() output = self.siamese_model(img0, img1) output = output.detach().cpu().numpy() scores.append(output[0]) scores = np.asarray(scores) res_mask = masks[np.argmax(scores)] res_mask = np.reshape(res_mask, (480, 640)) return res_mask
parser.add_argument( '-c', '--checkpoint', type=str, help="Path of model checkpoint to be used for inference.", required=True ) parser.add_argument( '-o', '--out_path', type=str, help="Path for saving tensorrt model.", required=True ) args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') checkpoint = torch.load(args.checkpoint) model = SiameseNetwork(backbone=checkpoint['backbone']) model.to(device) model.load_state_dict(checkpoint['model_state_dict']) model.eval() torch.onnx.export(model, (torch.rand(1, 3, 224, 224).to(device), torch.rand(1, 3, 224, 224).to(device)), args.out_path, input_names=['input'], output_names=['output'], export_params=True) onnx_model = onnx.load(args.out_path) onnx.checker.check_model(onnx_model)
print('Note: assume input image resolution is 180 x 320 (h x w)') data_loader = CameraDataset(pivot_images, positive_images, batch_size, -1, data_transform, is_train=False) print('load {} batch edge images'.format(len(data_loader))) # 2: load network branch = BranchNetwork() net = SiameseNetwork(branch) if os.path.isfile(model_name): checkpoint = torch.load(model_name, map_location=lambda storage, loc: storage) net.load_state_dict(checkpoint['state_dict']) print('load model file from {}.'.format(model_name)) else: print('Error: file not found at {}'.format(model_name)) sys.exit() # 3: setup computation device device = 'cpu' if torch.cuda.is_available(): device = torch.device('cuda:{}'.format(cuda_id)) net = net.to(device) cudnn.benchmark = True print('computation device: {}'.format(device)) features = [] with torch.no_grad():