Example #1
0
def test_from_blocks():
    data = np.ones((256, 256, 256))
    blocks = to_blocks(data, (128, 128, 128))
    assert_array_equal(data, from_blocks(blocks, (256, 256, 256)))

    data = np.arange(12**3).reshape(12, 12, 12)
    blocks = to_blocks(data, (4, 4, 4))
    assert_array_equal(data, from_blocks(blocks, (12, 12, 12)))
Example #2
0
def test_to_blocks():
    x = np.arange(8).reshape(2, 2, 2)
    outputs = volume.to_blocks(x, (1, 1, 1)).numpy()
    expected = np.array(
        [[[[0]]], [[[1]]], [[[2]]], [[[3]]], [[[4]]], [[[5]]], [[[6]]], [[[7]]]]
    )
    assert_array_equal(outputs, expected)
    outputs = volume.to_blocks(x, (2, 2, 2)).numpy()
    assert_array_equal(outputs, x[None])

    with pytest.raises(tf.errors.InvalidArgumentError):
        volume.to_blocks(x, (3, 3, 3))
Example #3
0
def test_to_blocks():
    shape = (20, 20, 20)
    data = np.ones(shape)
    blocks = to_blocks(data, (10, 10, 10))
    assert blocks.shape == (8, 10, 10, 10)

    data = np.arange(2**3)
    blocks = to_blocks(data.reshape(2, 2, 2), (1, 1, 1))
    reference = data[..., None, None, None]
    assert_array_equal(blocks, reference)

    shape = (256, 256, 200)
    data = np.ones(shape)
    blocks = to_blocks(data, (128, 128, 100))
    assert blocks.shape == (8, 128, 128, 100)
Example #4
0
def predict_from_array(inputs,
                       predictor,
                       block_shape,
                       normalizer=normalize_zero_one,
                       batch_size=4):
    """Return a prediction given a filepath and an ndarray of features.

    Args:
        inputs: ndarray, array of features.
        predictor: TensorFlow Predictor object, predictor from previously
            trained model.
        block_shape: tuple of len 3, shape of blocks on which to predict.
        normalizer: callable, function that accepts an ndarray and returns an
            ndarray. Called before separating volume into blocks.
        batch_size: int, number of sub-volumes per batch for prediction.

    Returns:
        ndarray of predictions.
    """
    if normalizer:
        features = normalizer(inputs)
    features = to_blocks(features, block_shape=block_shape)
    outputs = np.zeros_like(features)
    features = features[..., None]  # Add a dimension for single channel.

    # Predict per block to reduce memory consumption.
    n_blocks = features.shape[0]
    n_batches = math.ceil(n_blocks / batch_size)
    progbar = tf.keras.utils.Progbar(n_batches)
    progbar.update(0)
    for j in range(0, n_blocks, batch_size):
        outputs[j:j + batch_size] = predictor(
            {'volume': features[j:j + batch_size]})[_INFERENCE_CLASSES_KEY]
        progbar.add(1)

    return from_blocks(outputs, output_shape=inputs.shape)
Example #5
0
def get_dataset(
    file_pattern,
    n_classes,
    batch_size,
    volume_shape,
    scalar_label=False,
    block_shape=None,
    n_epochs=None,
    mapping=None,
    augment=False,
    shuffle_buffer_size=None,
    num_parallel_calls=AUTOTUNE,
):
    """Return `tf.data.Dataset` that preprocesses data for training or prediction.

    Labels are preprocessed for binary or multiclass segmentation according to
    `n_classes`.

    Parameters
    ----------
    file_pattern: str, expression that can be globbed to get TFRecords files
        for this dataset. For example 'data/training_*.tfrecords'.
    n_classes: int, number of classes to segment. Values of 1 and 2 indicate
        binary segmentation (foreground vs background), and values greater than
        2 indicate multiclass segmentation.
    batch_size: int, number of elements per batch.
    volume_shape: tuple of length 3, the shape of every volume in the TFRecords
        files. Every volume must have the same shape.
    scalar_label: boolean, if `True`, labels are scalars.
    block_shape: tuple of length 3, the shape of the non-overlapping sub-volumes
        to take from the full volumes. If None, do not separate the full volumes
        into sub-volumes. Separating into non-overlapping sub-volumes is useful
        (sometimes even necessary) to overcome memory limitations depending on
        the number of model parameters.
    n_epochs: int, number of epochs for the dataset to repeat. If None, the
        dataset will be repeated indefinitely.
    mapping: dict, mapping to replace label values. Values equal to a key in
        the mapping are replaced with the corresponding values in the mapping.
        Values not in `mapping.keys()` are replaced with zeros.
    augment: boolean, if true, apply random rigid transformations to the
        features and labels. The rigid transformations are applied to the full
        volumes.
    shuffle_buffer_size: int, buffer of full volumes to shuffle. If this is not
        None, then the list of files found by 'file_pattern' is also shuffled
        at every iteration.
    num_parallel_calls: int, number of parallel calls to make for data loading
        and processing.

    Returns
    -------
    `tf.data.Dataset` of features and labels. If block_shape is not None, the
    shape of features is `(batch_size, *block_shape, 1)` and the shape of labels
    is `(batch_size, *block_shape, n_classes)`. If block_shape is None, then
    the shape of features is `(batch_size, *volume_shape, 1)` and the shape of
    labels is `(batch_size, *volume_shape, n_classes)`. If `scalar_label` is `True,
    the shape of labels is always `(batch_size,)`.
    """

    files = glob.glob(file_pattern)
    if not files:
        raise ValueError(
            "no files found for pattern '{}'".format(file_pattern))

    # Create dataset of all TFRecord files. After this point, the dataset will have
    # two value per iteration: (feature, label).
    shuffle = bool(shuffle_buffer_size)
    compressed = _is_gzipped(files[0])
    dataset = tfrecord_dataset(
        file_pattern=file_pattern,
        volume_shape=volume_shape,
        shuffle=shuffle,
        scalar_label=scalar_label,
        compressed=compressed,
        num_parallel_calls=num_parallel_calls,
    )

    # Standard-score the features.
    dataset = dataset.map(lambda x, y: (standardize(x), y))

    # Separate into blocks, if requested.
    if block_shape is not None:
        if not scalar_label:
            dataset = dataset.map(
                lambda x, y:
                (to_blocks(x, block_shape), to_blocks(y, block_shape)),
                num_parallel_calls=num_parallel_calls,
            )
            # This step is necessary because separating into blocks adds a dimension.
            dataset = dataset.unbatch()
        if scalar_label:

            def _f(x, y):
                x = to_blocks(x, block_shape)
                n_blocks = x.shape[0]
                y = tf.repeat(y, n_blocks)
                return (x, y)

            dataset = dataset.map(_f, num_parallel_calls=num_parallel_calls)
            # This step is necessary because separating into blocks adds a dimension.
            dataset = dataset.unbatch()

    # Augment examples if requested.
    if augment:
        if not scalar_label:
            dataset = dataset.map(
                lambda x, y: tf.cond(
                    tf.random.uniform((1, )) > 0.5,
                    true_fn=lambda: apply_random_transform(x, y),
                    false_fn=lambda: (x, y),
                ),
                num_parallel_calls=num_parallel_calls,
            )
        else:
            dataset = dataset.map(
                lambda x, y: tf.cond(
                    tf.random.uniform((1, )) > 0.5,
                    true_fn=lambda: apply_random_transform_scalar_labels(x, y),
                    false_fn=lambda: (x, y),
                ),
                num_parallel_calls=num_parallel_calls,
            )

    # Binarize or replace labels according to mapping.
    if not scalar_label:
        if n_classes < 1:
            raise ValueError("n_classes must be > 0.")
        elif n_classes == 1:
            dataset = dataset.map(lambda x, y:
                                  (x, tf.expand_dims(binarize(y), -1)))
        elif n_classes == 2:
            dataset = dataset.map(lambda x, y:
                                  (x, tf.one_hot(binarize(y), n_classes)))
        elif n_classes > 2:
            if mapping is not None:
                dataset = dataset.map(lambda x, y:
                                      (x, replace(y, mapping=mapping)))
            dataset = dataset.map(lambda x, y: (x, tf.one_hot(y, n_classes)))

    # Add grayscale channel to features.
    # TODO: in the future, multi-channel features should be supported.
    dataset = dataset.map(lambda x, y: (tf.expand_dims(x, -1), y))

    # Prefetch data to overlap data production with data consumption. The
    # TensorFlow documentation suggests prefetching `batch_size` elements.
    dataset = dataset.prefetch(buffer_size=batch_size)

    # Batch the dataset, so each iteration gives `batch_size` elements. We drop
    # the remainder so that when training on multiple GPUs, the batch will
    # always be evenly divisible by the number of GPUs. Otherwise, the last
    # batch might have fewer than `batch_size` elements and will cause errors.
    if batch_size is not None:
        dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)

    # Optionally shuffle. We also optionally shuffle the list of files.
    # The TensorFlow recommend shuffling and then repeating.
    if shuffle_buffer_size:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat the dataset for n_epochs. If n_epochs is None, then repeat
    # indefinitely. If n_epochs is 1, then the dataset will only be iterated
    # through once.
    dataset = dataset.repeat(n_epochs)

    return dataset
Example #6
0
 def _f(x, y):
     x = to_blocks(x, block_shape)
     n_blocks = x.shape[0]
     y = tf.repeat(y, n_blocks)
     return (x, y)
Example #7
0
def test_from_blocks():
    x = np.arange(64).reshape(4, 4, 4)
    block_shape = (2, 2, 2)
    outputs = volume.from_blocks(volume.to_blocks(x, block_shape), x.shape)
    assert_array_equal(outputs, x)
Example #8
0
def predict_from_array(inputs,
                       predictor,
                       block_shape,
                       return_variance=False,
                       return_entropy=False,
                       return_array_from_images=False,
                       n_samples=1,
                       normalizer=None,
                       batch_size=4):
    """Return a prediction given a filepath and an ndarray of features.

    Args:
        inputs: ndarray, array of features.
        predictor: TensorFlow Predictor object, predictor from previously
            trained model.
        block_shape: tuple of len 3, shape of blocks on which to predict.
        return_variance: 'y' or 'n'. If set True, it returns the running population
            variance along with mean. Note, if the n_samples is smaller or equal to 1,
            the variance will not be returned; instead it will return None
        return_entropy: Boolean. If set True, it returns the running entropy.
            along with mean.
        return_array_from_images: Boolean. If set True and the given input is either image,
            filepath, or filepaths, it will return arrays of [mean, variance, entropy]
            instead of images of them. Also, if the input is array, it will
            simply return array, whether or not this flag is True or False.
        n_samples: The number of sampling. If set as 1, it will just return the
            single prediction value.
        normalizer: callable, function that accepts an ndarray and returns an
            ndarray. Called before separating volume into blocks.
        batch_size: int, number of sub-volumes per batch for prediction.

    Returns:
        ndarray of predictions.
    """

    print("Normalizer being used {n}".format(n = normalizer))
    if normalizer:
        features = normalizer(inputs)
        print(features.mean())
        print(features.std())
    else:
        features = inputs
    features = to_blocks(features, block_shape=block_shape)
    means = np.zeros_like(features)
    variances = np.zeros_like(features)
    entropies = np.zeros_like(features)

    features = features[..., None]  # Add a dimension for single channel.

    # Predict per block to reduce memory consumption.
    n_blocks = features.shape[0]
    n_batches = math.ceil(n_blocks / batch_size)
    progbar = tf.keras.utils.Progbar(n_batches)
    progbar.update(0)
    for j in range(0, n_blocks, batch_size):

        new_prediction = predictor( {'volume': features[j:j + batch_size]})

        prev_mean = np.zeros_like(new_prediction['probabilities'])
        curr_mean = new_prediction['probabilities']
        
        M = np.zeros_like(new_prediction['probabilities'])
        for n in range(1, n_samples):

            new_prediction = predictor( {'volume': features[j:j + batch_size]})
            prev_mean = curr_mean
            curr_mean = prev_mean + (new_prediction['probabilities'] - prev_mean)/float(n+1)
            M = M + np.multiply(prev_mean - new_prediction['probabilities'], curr_mean - new_prediction['probabilities'])

        progbar.add(1)
        means[j:j + batch_size] = np.argmax(curr_mean, axis = -1 ) # max mean
        variances[j:j + batch_size] = np.sum(M/n_samples, axis = -1)
        entropies[j:j + batch_size] = -np.sum(np.multiply(np.log(curr_mean+1e-7),curr_mean), axis = -1) # entropy
    total_means =from_blocks(means, output_shape=inputs.shape)
    total_variance = from_blocks(variances, output_shape=inputs.shape)
    total_entropy = from_blocks(entropies, output_shape=inputs.shape)

    mean_var_voxels = np.mean(total_variance)
    std_var_voxels = np.std(total_variance)

    include_variance = ((n_samples > 1) and (return_variance))
    if include_variance:
        if return_entropy:
            return total_means, total_variance, total_entropy
        else:
            return total_means, total_variance
    else:
        if return_entropy:
            return total_means, total_entropy
        else:
            return total_means,