예제 #1
0
def predict_img(img_path):
    """Inference a single image."""
    # switch to CUDA device if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load model
    print('Loading model...')
    model = ResnetUnetHybrid.load_pretrained(device=device)
    model.eval()

    # load image
    img = cv2.imread(img_path)[..., ::-1]
    img = image_utils.scale_image(img)
    img = image_utils.center_crop(img)
    inp = image_utils.img_transform(img)
    inp = inp[None, :, :, :].to(device)

    # inference
    print('Running the image through the network...')
    output = model(inp)

    # transform and plot the results
    output = output.cpu()[0].data.numpy()
    image_utils.show_img_and_pred(img, output)
예제 #2
0
def run_vid(input_path):
    """Load, transform and inference the frames of a video. Display the predictions with the input frames."""
    # switch to CUDA device if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load model
    print('Loading model...')
    model = ResnetUnetHybrid.load_pretrained(device=device)
    model.eval()

    # start running the video
    print('Inferencing video frames...')
    start = time.time()
    capture = cv2.VideoCapture(input_path)
    frame_cnt = 0

    if not capture.isOpened():
        print('ERROR: Failed to open video.')
        return -1

    while True:
        success, frame = capture.read()
        # stop when finished, or when interrupted by the user
        if not success:
            print('Finished.')
            break
        if cv2.waitKey(1) == ord('q'):
            print('Interrupted by user.')
            break

        frame_cnt += 1

        # pre-process frame
        frame = frame[..., ::-1]
        frame = image_utils.scale_image(frame)
        frame = image_utils.center_crop(frame)
        inp = image_utils.img_transform(frame)
        inp = inp[None, :, :, :].to(device)

        # inference
        pred = model(inp)

        # post-process prediction
        pred = pred.cpu()[0].data.numpy()
        pred = image_utils.depth_to_grayscale(pred)

        # concatenate the input frame with the prediction and display
        cv2.imshow('video', np.concatenate((frame[..., ::-1], pred), axis=1))

    end = time.time()
    print('\n{} frames evaluated in {:.3f}s'.format(int(frame_cnt),
                                                    end - start))
    print('{:.2f} FPS'.format(frame_cnt / (end - start)))

    capture.release()
    cv2.destroyAllWindows()
def predict_img(img_path, focal_len):
    """Given an image, create a 3D model of the environment, based depth estimation and semantic segmentation."""
    # switch to GPU if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load models
    print('Loading models...')
    model_de = ResnetUnetHybrid.load_pretrained(output_type='depth',
                                                device=device)
    model_seg = ResnetUnetHybrid.load_pretrained(output_type='seg',
                                                 device=device)
    model_de.eval()
    model_seg.eval()

    # load image
    img = cv2.imread(img_path)[..., ::-1]
    img = image_utils.scale_image(img)
    img = image_utils.center_crop(img)
    inp = image_utils.img_transform(img)
    inp = inp[None, :, :, :].to(device)

    print('Plotting...')
    output_de = model_de(inp)
    output_seg = model_seg(inp)

    # up-sample outputs
    output_de = F.interpolate(output_de,
                              size=(320, 320),
                              mode='bilinear',
                              align_corners=True)
    output_seg = F.interpolate(output_seg,
                               size=(320, 320),
                               mode='bilinear',
                               align_corners=True)

    # use softmax on the segmentation output
    output_seg = F.softmax(output_seg, dim=1)

    # plot the results
    output_de = output_de.cpu()[0].data.numpy()
    output_seg = output_seg.cpu()[0].data.numpy()
    image_utils.create_plots(img,
                             output_de,
                             output_seg,
                             focal_len,
                             uncertainty_threshold=0.9,
                             apply_depth_mask=True)
예제 #4
0
    def predict(self, img):
        img = image_utils.scale_image(img)
        img = image_utils.center_crop(img)
        inp = image_utils.img_transform(img)
        inp = inp[None, :, :, :].to(self.device)

        # inference
        if not self.predict_called:
            rospy.loginfo('Running the image through the network...')
            self.predict_called = True
        output = self.model(inp)

        # transform and output the results
        output = output.cpu()[0].data.numpy()
        pred = np.transpose(output, (1, 2, 0))
        return pred[:, :, 0]
예제 #5
0
    data_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    img = data_transform(img)
    return img
 
#img_path = "example_data/test_img0.jpg"
img_path = "buikd2.jpg"
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
org = cv2.imread(img_path)#[..., ::-1]
cv2_imshow(org)
img = scale_image(org)
img = center_crop(img)
img = image_utils.img_transform(img)
img = img.detach().numpy()
print(img.shape, img.dtype)
#inp = inp[None, :, :, :].to(device)
"""
omg = 2.0 * (org.astype(np.float32)/255.0) - 1.0
omg = scale_image(omg)
omg = center_crop(omg)
omg -= mean
omg /= std
#img = omg.transpose(2,0,1)
#cv2_imshow(omg)
print(img.shape, img.dtype)
"""

net = cv2.dnn.readNet("bottle.onnx")
예제 #6
0
def compute_errors():
    """Download the test files, run all the test images through the model, and evaluate."""
    # switch to CUDA device if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load model
    print('Loading model...')
    model = ResnetUnetHybrid.load_pretrained(device=device)
    model.eval()

    preds = np.zeros((466, 582, 654), dtype=np.float32)
    labels = np.zeros((466, 582, 654), dtype=np.float32)

    test_img_paths, test_label_paths = collect_test_files()

    print('\nRunning evaluation:')
    for idx, (img_path, label_path) in enumerate(zip(test_img_paths, test_label_paths)):
        sys.stdout.write('\r{} / {}'.format(idx+1, len(test_img_paths)))
        sys.stdout.flush()

        # load image
        img = cv2.imread(img_path)[..., ::-1]

        # resize and center crop to input size
        img = image_utils.scale_image(img, 0.55)
        img = image_utils.center_crop(img)
        img = image_utils.img_transform(img)
        img = img[None, :, :, :].to(device)

        # inference
        pred = model(img)

        # up-sampling
        pred = F.interpolate(pred, size=(466, 582), mode='bilinear', align_corners=False)
        pred = pred.cpu().data.numpy()

        # load label
        label = np.load(label_path)
        # center crop to output size
        label = label[7:label.shape[0]-7, 29:label.shape[1]-29]

        # store the label and the corresponding prediction
        labels[:, :, idx] = label
        preds[:, :, idx] = pred[0, 0, :, :]

    # calculating errors
    rel_error = np.mean(np.abs(preds - labels)/labels)
    print('\nMean Absolute Relative Error: {:.6f}'.format(rel_error))

    rmse = np.sqrt(np.mean((preds - labels)**2))
    print('Root Mean Squared Error: {:.6f}'.format(rmse))

    log10 = np.mean(np.abs(np.log10(preds) - np.log10(labels)))
    print('Mean Log10 Error: {:.6f}'.format(log10))

    acc = np.maximum(preds/labels, labels/preds)
    delta1 = np.mean(acc < 1.25)
    print('Delta1: {:.6f}'.format(delta1))

    delta2 = np.mean(acc < 1.25**2)
    print('Delta2: {:.6f}'.format(delta2))

    delta3 = np.mean(acc < 1.25**3)
    print('Delta3: {:.6f}'.format(delta3))