def predict_img(img_path): """Inference a single image.""" # switch to CUDA device if possible device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Use GPU: {}'.format(str(device) != 'cpu')) # load model print('Loading model...') model = ResnetUnetHybrid.load_pretrained(device=device) model.eval() # load image img = cv2.imread(img_path)[..., ::-1] img = image_utils.scale_image(img) img = image_utils.center_crop(img) inp = image_utils.img_transform(img) inp = inp[None, :, :, :].to(device) # inference print('Running the image through the network...') output = model(inp) # transform and plot the results output = output.cpu()[0].data.numpy() image_utils.show_img_and_pred(img, output)
def run_vid(input_path): """Load, transform and inference the frames of a video. Display the predictions with the input frames.""" # switch to CUDA device if possible device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Use GPU: {}'.format(str(device) != 'cpu')) # load model print('Loading model...') model = ResnetUnetHybrid.load_pretrained(device=device) model.eval() # start running the video print('Inferencing video frames...') start = time.time() capture = cv2.VideoCapture(input_path) frame_cnt = 0 if not capture.isOpened(): print('ERROR: Failed to open video.') return -1 while True: success, frame = capture.read() # stop when finished, or when interrupted by the user if not success: print('Finished.') break if cv2.waitKey(1) == ord('q'): print('Interrupted by user.') break frame_cnt += 1 # pre-process frame frame = frame[..., ::-1] frame = image_utils.scale_image(frame) frame = image_utils.center_crop(frame) inp = image_utils.img_transform(frame) inp = inp[None, :, :, :].to(device) # inference pred = model(inp) # post-process prediction pred = pred.cpu()[0].data.numpy() pred = image_utils.depth_to_grayscale(pred) # concatenate the input frame with the prediction and display cv2.imshow('video', np.concatenate((frame[..., ::-1], pred), axis=1)) end = time.time() print('\n{} frames evaluated in {:.3f}s'.format(int(frame_cnt), end - start)) print('{:.2f} FPS'.format(frame_cnt / (end - start))) capture.release() cv2.destroyAllWindows()
def predict_img(img_path, focal_len): """Given an image, create a 3D model of the environment, based depth estimation and semantic segmentation.""" # switch to GPU if possible device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Use GPU: {}'.format(str(device) != 'cpu')) # load models print('Loading models...') model_de = ResnetUnetHybrid.load_pretrained(output_type='depth', device=device) model_seg = ResnetUnetHybrid.load_pretrained(output_type='seg', device=device) model_de.eval() model_seg.eval() # load image img = cv2.imread(img_path)[..., ::-1] img = image_utils.scale_image(img) img = image_utils.center_crop(img) inp = image_utils.img_transform(img) inp = inp[None, :, :, :].to(device) print('Plotting...') output_de = model_de(inp) output_seg = model_seg(inp) # up-sample outputs output_de = F.interpolate(output_de, size=(320, 320), mode='bilinear', align_corners=True) output_seg = F.interpolate(output_seg, size=(320, 320), mode='bilinear', align_corners=True) # use softmax on the segmentation output output_seg = F.softmax(output_seg, dim=1) # plot the results output_de = output_de.cpu()[0].data.numpy() output_seg = output_seg.cpu()[0].data.numpy() image_utils.create_plots(img, output_de, output_seg, focal_len, uncertainty_threshold=0.9, apply_depth_mask=True)
def predict(self, img): img = image_utils.scale_image(img) img = image_utils.center_crop(img) inp = image_utils.img_transform(img) inp = inp[None, :, :, :].to(self.device) # inference if not self.predict_called: rospy.loginfo('Running the image through the network...') self.predict_called = True output = self.model(inp) # transform and output the results output = output.cpu()[0].data.numpy() pred = np.transpose(output, (1, 2, 0)) return pred[:, :, 0]
data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) img = data_transform(img) return img #img_path = "example_data/test_img0.jpg" img_path = "buikd2.jpg" mean=[0.485, 0.456, 0.406] std=[0.229, 0.224, 0.225] org = cv2.imread(img_path)#[..., ::-1] cv2_imshow(org) img = scale_image(org) img = center_crop(img) img = image_utils.img_transform(img) img = img.detach().numpy() print(img.shape, img.dtype) #inp = inp[None, :, :, :].to(device) """ omg = 2.0 * (org.astype(np.float32)/255.0) - 1.0 omg = scale_image(omg) omg = center_crop(omg) omg -= mean omg /= std #img = omg.transpose(2,0,1) #cv2_imshow(omg) print(img.shape, img.dtype) """ net = cv2.dnn.readNet("bottle.onnx")
def compute_errors(): """Download the test files, run all the test images through the model, and evaluate.""" # switch to CUDA device if possible device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print('Use GPU: {}'.format(str(device) != 'cpu')) # load model print('Loading model...') model = ResnetUnetHybrid.load_pretrained(device=device) model.eval() preds = np.zeros((466, 582, 654), dtype=np.float32) labels = np.zeros((466, 582, 654), dtype=np.float32) test_img_paths, test_label_paths = collect_test_files() print('\nRunning evaluation:') for idx, (img_path, label_path) in enumerate(zip(test_img_paths, test_label_paths)): sys.stdout.write('\r{} / {}'.format(idx+1, len(test_img_paths))) sys.stdout.flush() # load image img = cv2.imread(img_path)[..., ::-1] # resize and center crop to input size img = image_utils.scale_image(img, 0.55) img = image_utils.center_crop(img) img = image_utils.img_transform(img) img = img[None, :, :, :].to(device) # inference pred = model(img) # up-sampling pred = F.interpolate(pred, size=(466, 582), mode='bilinear', align_corners=False) pred = pred.cpu().data.numpy() # load label label = np.load(label_path) # center crop to output size label = label[7:label.shape[0]-7, 29:label.shape[1]-29] # store the label and the corresponding prediction labels[:, :, idx] = label preds[:, :, idx] = pred[0, 0, :, :] # calculating errors rel_error = np.mean(np.abs(preds - labels)/labels) print('\nMean Absolute Relative Error: {:.6f}'.format(rel_error)) rmse = np.sqrt(np.mean((preds - labels)**2)) print('Root Mean Squared Error: {:.6f}'.format(rmse)) log10 = np.mean(np.abs(np.log10(preds) - np.log10(labels))) print('Mean Log10 Error: {:.6f}'.format(log10)) acc = np.maximum(preds/labels, labels/preds) delta1 = np.mean(acc < 1.25) print('Delta1: {:.6f}'.format(delta1)) delta2 = np.mean(acc < 1.25**2) print('Delta2: {:.6f}'.format(delta2)) delta3 = np.mean(acc < 1.25**3) print('Delta3: {:.6f}'.format(delta3))