Esempio n. 1
0
def predict_img(img_path):
    """Inference a single image."""
    # switch to CUDA device if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load model
    print('Loading model...')
    model = ResnetUnetHybrid.load_pretrained(device=device)
    model.eval()

    # load image
    img = cv2.imread(img_path)[..., ::-1]
    img = image_utils.scale_image(img)
    img = image_utils.center_crop(img)
    inp = image_utils.img_transform(img)
    inp = inp[None, :, :, :].to(device)

    # inference
    print('Running the image through the network...')
    output = model(inp)

    # transform and plot the results
    output = output.cpu()[0].data.numpy()
    image_utils.show_img_and_pred(img, output)
Esempio n. 2
0
def run_vid(input_path):
    """Load, transform and inference the frames of a video. Display the predictions with the input frames."""
    # switch to CUDA device if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load model
    print('Loading model...')
    model = ResnetUnetHybrid.load_pretrained(device=device)
    model.eval()

    # start running the video
    print('Inferencing video frames...')
    start = time.time()
    capture = cv2.VideoCapture(input_path)
    frame_cnt = 0

    if not capture.isOpened():
        print('ERROR: Failed to open video.')
        return -1

    while True:
        success, frame = capture.read()
        # stop when finished, or when interrupted by the user
        if not success:
            print('Finished.')
            break
        if cv2.waitKey(1) == ord('q'):
            print('Interrupted by user.')
            break

        frame_cnt += 1

        # pre-process frame
        frame = frame[..., ::-1]
        frame = image_utils.scale_image(frame)
        frame = image_utils.center_crop(frame)
        inp = image_utils.img_transform(frame)
        inp = inp[None, :, :, :].to(device)

        # inference
        pred = model(inp)

        # post-process prediction
        pred = pred.cpu()[0].data.numpy()
        pred = image_utils.depth_to_grayscale(pred)

        # concatenate the input frame with the prediction and display
        cv2.imshow('video', np.concatenate((frame[..., ::-1], pred), axis=1))

    end = time.time()
    print('\n{} frames evaluated in {:.3f}s'.format(int(frame_cnt),
                                                    end - start))
    print('{:.2f} FPS'.format(frame_cnt / (end - start)))

    capture.release()
    cv2.destroyAllWindows()
    def predict_dataset(self, save_path, dataset_filepath, model, batch_size):
        """
        Predicts semantic labels for all images of the speficied dataset and saves results to disk.

        :param save_path: The target directory. A sub-directory will be created from the current date and time.
        :param dataset_filepath: The filename of the TFRecordDataset to use for prediction.
        :param model: An instance of FCN Model.
        :param batch_size: The number of images per batch.
        :return: None
        """
        if not os.path.exists(dataset_filepath):
            raise ValueError('File not found: {}'.format(dataset_filepath))

        sess = tf.compat.v1.get_default_session()

        # Make the folder to save the predictions
        output_path = os.path.join(save_path, datetime.now().isoformat().split('.')[0]).split(':')
        output_path = output_path[0] + ':' + output_path[1] + 'H' + output_path[2]
        if os.path.exists(output_path):
            shutil.rmtree(output_path)
        print('Saving predictions to ' + output_path)
        os.makedirs(output_path)

        # Load the dataset and make an iterator
        dataset = tf.data.TFRecordDataset(dataset_filepath)
        dataset = dataset.map(self.parse_record)
        dataset = dataset.batch(batch_size)
        iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
        next_sample = iterator.get_next()

        idx = 0  # The image name is it's index in the TFRecordDataset
        while True:
            try:
                im_batch, _, shape_batch = sess.run(next_sample)
                # Make an array from a tuple of 3 lists each with `batch_size` elements
                shape_batch = np.swapaxes(np.asarray(shape_batch), 0, 1)
                # Returns a 1-item list containing a numpy vector of length BATCH_SIZE * N_PIXELS * N_CLASSES
                im_softmax = sess.run([tf.nn.softmax(model.logits)], {model.keep_prob: 1.0,
                                                                      model.inputs: im_batch})[0]
                im_softmax = im_softmax.reshape((len(im_batch), np.prod(model.image_shape), self.n_classes+1))

                for i in range(len(im_batch)):
                    # Predict pixel class and expand with a channel dimension.
                    im_pred = np.argmax(im_softmax[i], axis=1).reshape(model.image_shape)
                    im_pred = labels2colors(im_pred, self.cmap)
                    im_masked = center_crop(apply_mask(im_batch[i], im_pred), shape_batch[i][:2])
                    imwrite(os.path.join(output_path, str(idx) + '.jpg'), im_masked)
                    idx += 1
            except tf.errors.OutOfRangeError:
                break
def predict_img(img_path, focal_len):
    """Given an image, create a 3D model of the environment, based depth estimation and semantic segmentation."""
    # switch to GPU if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load models
    print('Loading models...')
    model_de = ResnetUnetHybrid.load_pretrained(output_type='depth',
                                                device=device)
    model_seg = ResnetUnetHybrid.load_pretrained(output_type='seg',
                                                 device=device)
    model_de.eval()
    model_seg.eval()

    # load image
    img = cv2.imread(img_path)[..., ::-1]
    img = image_utils.scale_image(img)
    img = image_utils.center_crop(img)
    inp = image_utils.img_transform(img)
    inp = inp[None, :, :, :].to(device)

    print('Plotting...')
    output_de = model_de(inp)
    output_seg = model_seg(inp)

    # up-sample outputs
    output_de = F.interpolate(output_de,
                              size=(320, 320),
                              mode='bilinear',
                              align_corners=True)
    output_seg = F.interpolate(output_seg,
                               size=(320, 320),
                               mode='bilinear',
                               align_corners=True)

    # use softmax on the segmentation output
    output_seg = F.softmax(output_seg, dim=1)

    # plot the results
    output_de = output_de.cpu()[0].data.numpy()
    output_seg = output_seg.cpu()[0].data.numpy()
    image_utils.create_plots(img,
                             output_de,
                             output_seg,
                             focal_len,
                             uncertainty_threshold=0.9,
                             apply_depth_mask=True)
Esempio n. 5
0
    def predict(self, img):
        img = image_utils.scale_image(img)
        img = image_utils.center_crop(img)
        inp = image_utils.img_transform(img)
        inp = inp[None, :, :, :].to(self.device)

        # inference
        if not self.predict_called:
            rospy.loginfo('Running the image through the network...')
            self.predict_called = True
        output = self.model(inp)

        # transform and output the results
        output = output.cpu()[0].data.numpy()
        pred = np.transpose(output, (1, 2, 0))
        return pred[:, :, 0]
Esempio n. 6
0
img_path = "example_data/test_img0.jpg"
#img_path = "/content/DSC_0000007.jpg"

# switch to CUDA device if possible
#device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device = "cpu" # for onnx export
print('Use GPU: {}'.format(str(device) != 'cpu'))
# load model
print('Loading model...')
model = ResnetUnetHybrid.load_pretrained(device=device)
model.eval()

# load image
img = cv2.imread(img_path)[..., ::-1]
img = image_utils.scale_image(img)
img = image_utils.center_crop(img)
inp = image_utils.img_transform(img)
inp = inp[None, :, :, :].to(device)
print(inp.shape)
# inference
print('Running the image through the network...')
output = model(inp)

# transform and plot the results
depth = output.cpu()[0].data.numpy()

def depth_to_grayscale(depth, max_dist=10.0):
    """Transform a prediction into a grayscale 8-bit image."""
    depth = np.transpose(depth, (1, 2, 0))
    depth[depth > max_dist] = max_dist
    depth = depth / max_dist
Esempio n. 7
0
def compute_errors():
    """Download the test files, run all the test images through the model, and evaluate."""
    # switch to CUDA device if possible
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Use GPU: {}'.format(str(device) != 'cpu'))

    # load model
    print('Loading model...')
    model = ResnetUnetHybrid.load_pretrained(device=device)
    model.eval()

    preds = np.zeros((466, 582, 654), dtype=np.float32)
    labels = np.zeros((466, 582, 654), dtype=np.float32)

    test_img_paths, test_label_paths = collect_test_files()

    print('\nRunning evaluation:')
    for idx, (img_path, label_path) in enumerate(zip(test_img_paths, test_label_paths)):
        sys.stdout.write('\r{} / {}'.format(idx+1, len(test_img_paths)))
        sys.stdout.flush()

        # load image
        img = cv2.imread(img_path)[..., ::-1]

        # resize and center crop to input size
        img = image_utils.scale_image(img, 0.55)
        img = image_utils.center_crop(img)
        img = image_utils.img_transform(img)
        img = img[None, :, :, :].to(device)

        # inference
        pred = model(img)

        # up-sampling
        pred = F.interpolate(pred, size=(466, 582), mode='bilinear', align_corners=False)
        pred = pred.cpu().data.numpy()

        # load label
        label = np.load(label_path)
        # center crop to output size
        label = label[7:label.shape[0]-7, 29:label.shape[1]-29]

        # store the label and the corresponding prediction
        labels[:, :, idx] = label
        preds[:, :, idx] = pred[0, 0, :, :]

    # calculating errors
    rel_error = np.mean(np.abs(preds - labels)/labels)
    print('\nMean Absolute Relative Error: {:.6f}'.format(rel_error))

    rmse = np.sqrt(np.mean((preds - labels)**2))
    print('Root Mean Squared Error: {:.6f}'.format(rmse))

    log10 = np.mean(np.abs(np.log10(preds) - np.log10(labels)))
    print('Mean Log10 Error: {:.6f}'.format(log10))

    acc = np.maximum(preds/labels, labels/preds)
    delta1 = np.mean(acc < 1.25)
    print('Delta1: {:.6f}'.format(delta1))

    delta2 = np.mean(acc < 1.25**2)
    print('Delta2: {:.6f}'.format(delta2))

    delta3 = np.mean(acc < 1.25**3)
    print('Delta3: {:.6f}'.format(delta3))