예제 #1
0
def test_write_read_float_labels(csv_of_volumes, tmp_path):  # noqa: F811
    files = io.read_csv(csv_of_volumes, skip_header=False)
    files = [(x, random.random()) for x, _ in files]
    filename_template = str(tmp_path / "data-{shard:03d}.tfrecords")
    examples_per_shard = 12
    tfrecord.write(
        files,
        filename_template=filename_template,
        examples_per_shard=examples_per_shard,
        processes=1,
    )

    paths = list(tmp_path.glob("data-*.tfrecords"))
    paths = sorted(paths)
    assert len(paths) == 9
    assert (tmp_path / "data-008.tfrecords").is_file()

    dset = tf.data.TFRecordDataset(list(map(str, paths)),
                                   compression_type="GZIP")
    dset = dset.map(
        tfrecord.parse_example_fn(volume_shape=(8, 8, 8), scalar_label=True))

    for ref, test in zip(files, dset):
        x, y = ref
        x = io.read_volume(x)
        assert_array_equal(x, test[0])
        assert_array_equal(y, test[1])
예제 #2
0
def itervolumes(filepaths,
                block_shape,
                x_dtype,
                y_dtype,
                strides=(1, 1, 1),
                shuffle=False,
                normalizer=None):
    """Yield tuples of numpy arrays `(features, labels)` from a list of
    filepaths to neuroimaging files.
    """
    filepaths = copy.deepcopy(filepaths)

    if shuffle:
        random.shuffle(filepaths)

    for idx, (features_fp, labels_fp) in enumerate(filepaths):
        try:
            features = read_volume(features_fp, dtype=x_dtype)
            labels = read_volume(labels_fp, dtype=y_dtype)
        except Exception:
            tf.logging.fatal(
                "Error reading at least one input file: {} {}".format(
                    features_fp, labels_fp))
            raise

        if normalizer is not None:
            features, labels = normalizer(features, labels)

        _check_shapes_equal(features, labels)
        feature_gen = iterblocks_3d(arr=features,
                                    kernel_size=block_shape,
                                    strides=strides)
        label_gen = iterblocks_3d(arr=labels,
                                  kernel_size=block_shape,
                                  strides=strides)

        for ff, ll in zip(feature_gen, label_gen):
            yield ff[..., np.newaxis], ll
예제 #3
0
def itervolumes(filepaths,
                block_shape,
                x_dtype=np.float32,
                y_dtype=np.int32,
                strides=None,
                shuffle=False,
                normalizer=None):
    """Yield tuples of numpy arrays `(features, labels)` from a list of
    filepaths to neuroimaging files.

    Args:
        filepaths: nested list of tuples, where each tuple has length two. The
            first item in each tuple is the path to the volume of features
            (e.g., an anatomical scan), and the second item is the path to the
            volume of features (e.g., FreeSurfer's aparc+aseg.mgz).
        block_shape: tuple of len 3 or None, the shape of blocks to take from
            the features and labels. This is useful if a full volume cannot fit
            into GPU memory. If `block_shape` is `None`, full volumes are
            yielded. Use `(None, None, None)` if yielding full volumes and
            volumes have different shapes.
        x_dtype: dtype object or string, data type of features.
        y_dtype: dtype object or string, data type of labels.
        strides: tuple or None, strides to take between blocks. If None,
            strides will be equal to `block_shape`, which will generate
            non-overlapping blocks.
        shuffle: bool, if true, shuffle the list of filepaths. Pairs of
            `(features, labels)` filepaths are maintained.
        normalizer: callable, function that accepts two arrays (`features` and
            `labels`) and returns two arrays (`features` and `labels`).

    Yields:
        Tuple of `(features, labels)`. If `block_shape` is a tuple of integers,
        the shape of `features` is `(*block_shape, 1)`, and the shape of
        `labels` is `block_shape`. If `block_shape` is `None` or
        `(None, None, None)`, the shape of `features` is `(*volume_shape, 1)`,
        and the shape of `labels` is `volume_shape`.
    """
    filepaths = copy.deepcopy(filepaths)

    # Common error is to pass the CSV filepath as `filepaths` argument.
    if isinstance(filepaths, str):
        raise ValueError("`filepaths` must be a nested sequence of filepaths.")

    if any((len(i) != 2 for i in filepaths)):
        raise ValueError("Found sequence with len != 2 in `filepaths`.")

    if shuffle:
        random.shuffle(filepaths)

    for idx, (features_fp, labels_fp) in enumerate(filepaths):
        try:
            features = read_volume(features_fp, dtype=x_dtype)
            labels = read_volume(labels_fp, dtype=y_dtype)
        except Exception:
            tf.logging.fatal(
                "Error reading at least one input file: {} {}".format(
                    features_fp, labels_fp))
            raise

        if normalizer is not None:
            features, labels = normalizer(features, labels)

        if features.shape != labels.shape:
            raise ValueError(
                "Shape of features ({}) is not equal to shape of labels ({}).".
                format(features.shape, labels.shape))

        # Yield full volumes.
        if block_shape is None or block_shape == (None, None, None):
            yield (features[..., np.newaxis].astype(x_dtype),
                   labels.astype(y_dtype))

        # Yield blocks of volumes.
        else:
            feature_gen = iterblocks_3d(arr=features,
                                        kernel_size=block_shape,
                                        strides=strides)
            label_gen = iterblocks_3d(arr=labels,
                                      kernel_size=block_shape,
                                      strides=strides)

            for ff, ll in zip(feature_gen, label_gen):
                # TEMP: skip pair if labels are all zero. this prevents us
                # from running train_and_evaluate because the number of samples
                # is dynamic.
                # if not ll.any():
                #     continue

                # Add channel axis (channel last).
                yield ff[..., np.newaxis].astype(x_dtype), ll.astype(y_dtype)
예제 #4
0
def validate_from_filepath(
    filepath,
    predictor,
    block_shape,
    n_classes,
    mapping_y,
    return_variance=False,
    return_entropy=False,
    return_array_from_images=False,
    n_samples=1,
    normalizer=normalize_zero_one,
    batch_size=4,
    dtype=DT_X,
):
    """Computes dice for a prediction compared to a ground truth image.

    Args:
        filepath: tuple, tupel of paths to existing neuroimaging volume (index 0)
         and ground truth (index 1).
        predictor: TensorFlow Predictor object, predictor from previously
            trained model.
        n_classes: int, number of classifications the model is trained to output.
        mapping_y: path-like, path to csv mapping file per command line argument.
        block_shape: tuple of len 3, shape of blocks on which to predict.
        return_variance: Boolean. If set True, it returns the running population 
            variance along with mean. Note, if the n_samples is smaller or equal to 1,
            the variance will not be returned; instead it will return None
        return_entropy: Boolean. If set True, it returns the running entropy.
            along with mean.       
        return_array_from_images: Boolean. If set True and the given input is either image,
            filepath, or filepaths, it will return arrays of [mean, variance, entropy]
            instead of images of them. Also, if the input is array, it will
            simply return array, whether or not this flag is True or False.
        n_samples: The number of sampling. If set as 1, it will just return the 
            single prediction value.
        normalizer: callable, function that accepts an ndarray and returns an
            ndarray. Called before separating volume into blocks.
        batch_size: int, number of sub-volumes per batch for prediction.
        dtype: str or dtype object, dtype of features.

    Returns:
        `nibabel.spatialimages.SpatialImage` or arrays of predictions of 
        mean, variance(optional), and entropy (optional).
    """
    if not Path(filepath[0]).is_file():
        raise FileNotFoundError("could not find file {}".format(filepath[0]))
    img = nib.load(filepath[0])
    y = read_volume(filepath[1], dtype=np.int32)

    outputs = _predict(inputs=img,
                       predictor=predictor,
                       block_shape=block_shape,
                       return_variance=return_variance,
                       return_entropy=return_entropy,
                       return_array_from_images=return_array_from_images,
                       n_samples=n_samples,
                       normalizer=normalizer,
                       batch_size=batch_size)
    prediction_image = outputs[0].get_data()
    y = replace(y, read_mapping(mapping_y))
    dice = get_dice_for_images(prediction_image, y, n_classes)
    return outputs, dice