def test_from_blocks(): data = np.ones((256, 256, 256)) blocks = to_blocks(data, (128, 128, 128)) assert_array_equal(data, from_blocks(blocks, (256, 256, 256))) data = np.arange(12**3).reshape(12, 12, 12) blocks = to_blocks(data, (4, 4, 4)) assert_array_equal(data, from_blocks(blocks, (12, 12, 12)))
def test_to_blocks(): x = np.arange(8).reshape(2, 2, 2) outputs = volume.to_blocks(x, (1, 1, 1)).numpy() expected = np.array( [[[[0]]], [[[1]]], [[[2]]], [[[3]]], [[[4]]], [[[5]]], [[[6]]], [[[7]]]] ) assert_array_equal(outputs, expected) outputs = volume.to_blocks(x, (2, 2, 2)).numpy() assert_array_equal(outputs, x[None]) with pytest.raises(tf.errors.InvalidArgumentError): volume.to_blocks(x, (3, 3, 3))
def test_to_blocks(): shape = (20, 20, 20) data = np.ones(shape) blocks = to_blocks(data, (10, 10, 10)) assert blocks.shape == (8, 10, 10, 10) data = np.arange(2**3) blocks = to_blocks(data.reshape(2, 2, 2), (1, 1, 1)) reference = data[..., None, None, None] assert_array_equal(blocks, reference) shape = (256, 256, 200) data = np.ones(shape) blocks = to_blocks(data, (128, 128, 100)) assert blocks.shape == (8, 128, 128, 100)
def predict_from_array(inputs, predictor, block_shape, normalizer=normalize_zero_one, batch_size=4): """Return a prediction given a filepath and an ndarray of features. Args: inputs: ndarray, array of features. predictor: TensorFlow Predictor object, predictor from previously trained model. block_shape: tuple of len 3, shape of blocks on which to predict. normalizer: callable, function that accepts an ndarray and returns an ndarray. Called before separating volume into blocks. batch_size: int, number of sub-volumes per batch for prediction. Returns: ndarray of predictions. """ if normalizer: features = normalizer(inputs) features = to_blocks(features, block_shape=block_shape) outputs = np.zeros_like(features) features = features[..., None] # Add a dimension for single channel. # Predict per block to reduce memory consumption. n_blocks = features.shape[0] n_batches = math.ceil(n_blocks / batch_size) progbar = tf.keras.utils.Progbar(n_batches) progbar.update(0) for j in range(0, n_blocks, batch_size): outputs[j:j + batch_size] = predictor( {'volume': features[j:j + batch_size]})[_INFERENCE_CLASSES_KEY] progbar.add(1) return from_blocks(outputs, output_shape=inputs.shape)
def get_dataset( file_pattern, n_classes, batch_size, volume_shape, scalar_label=False, block_shape=None, n_epochs=None, mapping=None, augment=False, shuffle_buffer_size=None, num_parallel_calls=AUTOTUNE, ): """Return `tf.data.Dataset` that preprocesses data for training or prediction. Labels are preprocessed for binary or multiclass segmentation according to `n_classes`. Parameters ---------- file_pattern: str, expression that can be globbed to get TFRecords files for this dataset. For example 'data/training_*.tfrecords'. n_classes: int, number of classes to segment. Values of 1 and 2 indicate binary segmentation (foreground vs background), and values greater than 2 indicate multiclass segmentation. batch_size: int, number of elements per batch. volume_shape: tuple of length 3, the shape of every volume in the TFRecords files. Every volume must have the same shape. scalar_label: boolean, if `True`, labels are scalars. block_shape: tuple of length 3, the shape of the non-overlapping sub-volumes to take from the full volumes. If None, do not separate the full volumes into sub-volumes. Separating into non-overlapping sub-volumes is useful (sometimes even necessary) to overcome memory limitations depending on the number of model parameters. n_epochs: int, number of epochs for the dataset to repeat. If None, the dataset will be repeated indefinitely. mapping: dict, mapping to replace label values. Values equal to a key in the mapping are replaced with the corresponding values in the mapping. Values not in `mapping.keys()` are replaced with zeros. augment: boolean, if true, apply random rigid transformations to the features and labels. The rigid transformations are applied to the full volumes. shuffle_buffer_size: int, buffer of full volumes to shuffle. If this is not None, then the list of files found by 'file_pattern' is also shuffled at every iteration. num_parallel_calls: int, number of parallel calls to make for data loading and processing. Returns ------- `tf.data.Dataset` of features and labels. If block_shape is not None, the shape of features is `(batch_size, *block_shape, 1)` and the shape of labels is `(batch_size, *block_shape, n_classes)`. If block_shape is None, then the shape of features is `(batch_size, *volume_shape, 1)` and the shape of labels is `(batch_size, *volume_shape, n_classes)`. If `scalar_label` is `True, the shape of labels is always `(batch_size,)`. """ files = glob.glob(file_pattern) if not files: raise ValueError( "no files found for pattern '{}'".format(file_pattern)) # Create dataset of all TFRecord files. After this point, the dataset will have # two value per iteration: (feature, label). shuffle = bool(shuffle_buffer_size) compressed = _is_gzipped(files[0]) dataset = tfrecord_dataset( file_pattern=file_pattern, volume_shape=volume_shape, shuffle=shuffle, scalar_label=scalar_label, compressed=compressed, num_parallel_calls=num_parallel_calls, ) # Standard-score the features. dataset = dataset.map(lambda x, y: (standardize(x), y)) # Separate into blocks, if requested. if block_shape is not None: if not scalar_label: dataset = dataset.map( lambda x, y: (to_blocks(x, block_shape), to_blocks(y, block_shape)), num_parallel_calls=num_parallel_calls, ) # This step is necessary because separating into blocks adds a dimension. dataset = dataset.unbatch() if scalar_label: def _f(x, y): x = to_blocks(x, block_shape) n_blocks = x.shape[0] y = tf.repeat(y, n_blocks) return (x, y) dataset = dataset.map(_f, num_parallel_calls=num_parallel_calls) # This step is necessary because separating into blocks adds a dimension. dataset = dataset.unbatch() # Augment examples if requested. if augment: if not scalar_label: dataset = dataset.map( lambda x, y: tf.cond( tf.random.uniform((1, )) > 0.5, true_fn=lambda: apply_random_transform(x, y), false_fn=lambda: (x, y), ), num_parallel_calls=num_parallel_calls, ) else: dataset = dataset.map( lambda x, y: tf.cond( tf.random.uniform((1, )) > 0.5, true_fn=lambda: apply_random_transform_scalar_labels(x, y), false_fn=lambda: (x, y), ), num_parallel_calls=num_parallel_calls, ) # Binarize or replace labels according to mapping. if not scalar_label: if n_classes < 1: raise ValueError("n_classes must be > 0.") elif n_classes == 1: dataset = dataset.map(lambda x, y: (x, tf.expand_dims(binarize(y), -1))) elif n_classes == 2: dataset = dataset.map(lambda x, y: (x, tf.one_hot(binarize(y), n_classes))) elif n_classes > 2: if mapping is not None: dataset = dataset.map(lambda x, y: (x, replace(y, mapping=mapping))) dataset = dataset.map(lambda x, y: (x, tf.one_hot(y, n_classes))) # Add grayscale channel to features. # TODO: in the future, multi-channel features should be supported. dataset = dataset.map(lambda x, y: (tf.expand_dims(x, -1), y)) # Prefetch data to overlap data production with data consumption. The # TensorFlow documentation suggests prefetching `batch_size` elements. dataset = dataset.prefetch(buffer_size=batch_size) # Batch the dataset, so each iteration gives `batch_size` elements. We drop # the remainder so that when training on multiple GPUs, the batch will # always be evenly divisible by the number of GPUs. Otherwise, the last # batch might have fewer than `batch_size` elements and will cause errors. if batch_size is not None: dataset = dataset.batch(batch_size=batch_size, drop_remainder=True) # Optionally shuffle. We also optionally shuffle the list of files. # The TensorFlow recommend shuffling and then repeating. if shuffle_buffer_size: dataset = dataset.shuffle(buffer_size=shuffle_buffer_size) # Repeat the dataset for n_epochs. If n_epochs is None, then repeat # indefinitely. If n_epochs is 1, then the dataset will only be iterated # through once. dataset = dataset.repeat(n_epochs) return dataset
def _f(x, y): x = to_blocks(x, block_shape) n_blocks = x.shape[0] y = tf.repeat(y, n_blocks) return (x, y)
def test_from_blocks(): x = np.arange(64).reshape(4, 4, 4) block_shape = (2, 2, 2) outputs = volume.from_blocks(volume.to_blocks(x, block_shape), x.shape) assert_array_equal(outputs, x)
def predict_from_array(inputs, predictor, block_shape, return_variance=False, return_entropy=False, return_array_from_images=False, n_samples=1, normalizer=None, batch_size=4): """Return a prediction given a filepath and an ndarray of features. Args: inputs: ndarray, array of features. predictor: TensorFlow Predictor object, predictor from previously trained model. block_shape: tuple of len 3, shape of blocks on which to predict. return_variance: 'y' or 'n'. If set True, it returns the running population variance along with mean. Note, if the n_samples is smaller or equal to 1, the variance will not be returned; instead it will return None return_entropy: Boolean. If set True, it returns the running entropy. along with mean. return_array_from_images: Boolean. If set True and the given input is either image, filepath, or filepaths, it will return arrays of [mean, variance, entropy] instead of images of them. Also, if the input is array, it will simply return array, whether or not this flag is True or False. n_samples: The number of sampling. If set as 1, it will just return the single prediction value. normalizer: callable, function that accepts an ndarray and returns an ndarray. Called before separating volume into blocks. batch_size: int, number of sub-volumes per batch for prediction. Returns: ndarray of predictions. """ print("Normalizer being used {n}".format(n = normalizer)) if normalizer: features = normalizer(inputs) print(features.mean()) print(features.std()) else: features = inputs features = to_blocks(features, block_shape=block_shape) means = np.zeros_like(features) variances = np.zeros_like(features) entropies = np.zeros_like(features) features = features[..., None] # Add a dimension for single channel. # Predict per block to reduce memory consumption. n_blocks = features.shape[0] n_batches = math.ceil(n_blocks / batch_size) progbar = tf.keras.utils.Progbar(n_batches) progbar.update(0) for j in range(0, n_blocks, batch_size): new_prediction = predictor( {'volume': features[j:j + batch_size]}) prev_mean = np.zeros_like(new_prediction['probabilities']) curr_mean = new_prediction['probabilities'] M = np.zeros_like(new_prediction['probabilities']) for n in range(1, n_samples): new_prediction = predictor( {'volume': features[j:j + batch_size]}) prev_mean = curr_mean curr_mean = prev_mean + (new_prediction['probabilities'] - prev_mean)/float(n+1) M = M + np.multiply(prev_mean - new_prediction['probabilities'], curr_mean - new_prediction['probabilities']) progbar.add(1) means[j:j + batch_size] = np.argmax(curr_mean, axis = -1 ) # max mean variances[j:j + batch_size] = np.sum(M/n_samples, axis = -1) entropies[j:j + batch_size] = -np.sum(np.multiply(np.log(curr_mean+1e-7),curr_mean), axis = -1) # entropy total_means =from_blocks(means, output_shape=inputs.shape) total_variance = from_blocks(variances, output_shape=inputs.shape) total_entropy = from_blocks(entropies, output_shape=inputs.shape) mean_var_voxels = np.mean(total_variance) std_var_voxels = np.std(total_variance) include_variance = ((n_samples > 1) and (return_variance)) if include_variance: if return_entropy: return total_means, total_variance, total_entropy else: return total_means, total_variance else: if return_entropy: return total_means, total_entropy else: return total_means,