def test_read_mapping(tmpdir): rows = [["orig", "new"], ["0", "1"], ["1", "2"], ["2", "3"]] filepath = tmpdir.join("mapping.csv") save_csv(rows=rows, filepath=str(filepath)) mapping = read_mapping(str(filepath), header=True) assert mapping == {0: 1, 1: 2, 2: 3}
def test_read_mapping(): with tempfile.NamedTemporaryFile() as f: f.write('orig,new\n0,1\n20,10\n40,15'.encode()) f.seek(0) assert { 0: 1, 20: 10, 40: 15 } == io.read_mapping(f.name, skip_header=True) # Header is non-integer. with pytest.raises(ValueError): io.read_mapping(f.name, skip_header=False) with tempfile.NamedTemporaryFile() as f: f.write('orig,new\n0,1\n20,10\n40'.encode()) f.seek(0) # Last row only has one value. with pytest.raises(ValueError): io.read_mapping(f.name, skip_header=False) with tempfile.NamedTemporaryFile() as f: f.write('origFnew\n0F1\n20F10\n40F15'.encode()) f.seek(0) assert { 0: 1, 20: 10, 40: 15 } == io.read_mapping(f.name, skip_header=True, delimiter='F')
def train(params): """Train estimator.""" if params['aparcaseg_mapping']: tf.logging.info( "Reading mapping file: {}".format(params['aparcaseg_mapping'])) mapping = read_mapping(params['aparcaseg_mapping']) else: mapping = None def normalizer_aparcaseg(features, labels): return ( normalize_zero_one(features), preprocess_aparcaseg(labels, mapping)) def normalizer_brainmask(features, labels): return ( normalize_zero_one(features), binarize(labels, threshold=0)) if params['aparcaseg_mapping'] is not None: normalizer = normalizer_aparcaseg elif params['brainmask']: normalizer = normalizer_brainmask else: normalizer = None list_of_filepaths = read_csv(params['csv']) def generator_builder(): """Return a function that returns a generator.""" return iter_volumes( list_of_filepaths=list_of_filepaths, vol_shape=params['vol_shape'], block_shape=params['block_shape'], x_dtype=_DT_X_NP, y_dtype=_DT_Y_NP, strides=params['strides'], shuffle=True, normalizer=normalizer) _output_shapes = ( (*params['block_shape'], 1), params['block_shape']) input_fn = input_fn_builder( generator=generator_builder, output_types=(_DT_X_TF, _DT_Y_TF), output_shapes=_output_shapes, num_epochs=params['n_epochs'], batch_size=params['batch_size'], # TODO(kaczmarj): add multi-gpu support for training on volumes. # multi_gpu=params['multi_gpu'], # examples_per_epoch=examples_per_epoch, ) runconfig = tf.estimator.RunConfig( save_summary_steps=25, save_checkpoints_steps=500, keep_checkpoint_max=100) model = nobrainer.models.get_estimator(params['model'])( n_classes=params['n_classes'], optimizer=params['optimizer'], learning_rate=params['learning_rate'], model_dir=params['model_dir'], config=runconfig, multi_gpu=params['multi_gpu']) # Setup for training and periodic evaluation. if params['eval_csv'] is not None: eval_list_of_filepaths = read_csv(params['eval_csv']) gen = nobrainer.util.iter_volumes( list_of_filepaths=eval_list_of_filepaths, x_dtype=_DT_X_NP, y_dtype=_DT_Y_NP, vol_shape=params['vol_shape'], block_shape=params['block_shape'], strides=params['strides'], shuffle=False, normalizer=normalizer) def _get_eval_features_labels(): _features = [] _labels = [] for _f, _l in gen: _features.append(_f) _labels.append(_l) return np.stack(_features), np.stack(_labels) tf.logging.info("Loading evaluation data") _eval_features, _eval_labels = _get_eval_features_labels() eval_input_fn = tf.estimator.inputs.numpy_input_fn( x=_eval_features, y=_eval_labels, batch_size=2, num_epochs=1, shuffle=False) _monitors = [ tf.contrib.learn.monitors.ValidationMonitor( input_fn=eval_input_fn, every_n_steps=2000, early_stopping_metric=None, early_stopping_rounds=None)] hooks = tf.contrib.learn.monitors.replace_monitors_with_hooks( _monitors, model) # Training without evaluation. else: hooks = None model.train(input_fn=input_fn, hooks=hooks)
def train(params): model_config = tf.estimator.RunConfig( save_summary_steps=params['save_summary_steps'], save_checkpoints_steps=params['save_checkpoints_steps'], keep_checkpoint_max=params['keep_checkpoint_max']) model = get_estimator(params['model'])( n_classes=params['n_classes'], optimizer=params['optimizer'], learning_rate=params['learning_rate'], model_dir=params['model_dir'], config=model_config, multi_gpu=params['multi_gpu'], **params['model_opts']) label_mapping = None if params['label_mapping']: tf.logging.info( "Reading mapping file: {}".format(params['label_mapping'])) label_mapping = read_mapping(params['label_mapping']) filepaths = read_csv(params['csv']) volume_data_generator = VolumeDataGenerator( samplewise_minmax=params['samplewise_minmax'], samplewise_zscore=params['samplewise_zscore'], samplewise_center=params['samplewise_center'], samplewise_std_normalization=params['samplewise_std_normalization'], flip=params['flip'], rescale=params['rescale'], rotate=params['rotate'], gaussian=params['gaussian'], reduce_contrast=params['reduce_contrast'], salt_and_pepper=params['salt_and_pepper'], brightness_range=params['brightness_range'], shift_range=params['shift_range'], zoom_range=params['zoom_range'], binarize_y=params['binarize'], mapping_y=label_mapping) if params['eval_csv']: eval_filepaths = read_csv(params['eval_csv']) eval_volume_data_generator = VolumeDataGenerator( binarize_y=params['binarize'], mapping_y=label_mapping) else: eval_filepaths = None eval_volume_data_generator = None _train( model=model, volume_data_generator=volume_data_generator, filepaths=filepaths, volume_shape=params['volume_shape'], block_shape=params['block_shape'], strides=params['strides'], x_dtype='float32', y_dtype='int32', shuffle=True, batch_size=params['batch_size'], n_epochs=params['n_epochs'], prefetch=params['prefetch'], multi_gpu=params['multi_gpu'], eval_volume_data_generator=eval_volume_data_generator, eval_filepaths=eval_filepaths)
def validate_from_filepath( filepath, predictor, block_shape, n_classes, mapping_y, return_variance=False, return_entropy=False, return_array_from_images=False, n_samples=1, normalizer=normalize_zero_one, batch_size=4, dtype=DT_X, ): """Computes dice for a prediction compared to a ground truth image. Args: filepath: tuple, tupel of paths to existing neuroimaging volume (index 0) and ground truth (index 1). predictor: TensorFlow Predictor object, predictor from previously trained model. n_classes: int, number of classifications the model is trained to output. mapping_y: path-like, path to csv mapping file per command line argument. block_shape: tuple of len 3, shape of blocks on which to predict. return_variance: Boolean. If set True, it returns the running population variance along with mean. Note, if the n_samples is smaller or equal to 1, the variance will not be returned; instead it will return None return_entropy: Boolean. If set True, it returns the running entropy. along with mean. return_array_from_images: Boolean. If set True and the given input is either image, filepath, or filepaths, it will return arrays of [mean, variance, entropy] instead of images of them. Also, if the input is array, it will simply return array, whether or not this flag is True or False. n_samples: The number of sampling. If set as 1, it will just return the single prediction value. normalizer: callable, function that accepts an ndarray and returns an ndarray. Called before separating volume into blocks. batch_size: int, number of sub-volumes per batch for prediction. dtype: str or dtype object, dtype of features. Returns: `nibabel.spatialimages.SpatialImage` or arrays of predictions of mean, variance(optional), and entropy (optional). """ if not Path(filepath[0]).is_file(): raise FileNotFoundError("could not find file {}".format(filepath[0])) img = nib.load(filepath[0]) y = read_volume(filepath[1], dtype=np.int32) outputs = _predict(inputs=img, predictor=predictor, block_shape=block_shape, return_variance=return_variance, return_entropy=return_entropy, return_array_from_images=return_array_from_images, n_samples=n_samples, normalizer=normalizer, batch_size=batch_size) prediction_image = outputs[0].get_data() y = replace(y, read_mapping(mapping_y)) dice = get_dice_for_images(prediction_image, y, n_classes) return outputs, dice
def train(params): """Train estimator.""" x_dataset = params['xdset'] y_dataset = params['ydset'] tf.logging.info('Using features dataset {x} and labels dataset {y}'.format( x=x_dataset, y=y_dataset)) with h5py.File(params['hdf5path'], mode='r') as fp: examples_per_epoch = fp[x_dataset].shape[0] assert examples_per_epoch == fp[y_dataset].shape[0] if params['aparcaseg_mapping']: tf.logging.info("Reading mapping file: {}".format( params['aparcaseg_mapping'])) mapping = read_mapping(params['aparcaseg_mapping']) else: mapping = None def normalizer_aparcaseg(features, labels): return features, preprocess_aparcaseg(labels, mapping) def normalizer_brainmask(features, labels): return features, binarize(labels, threshold=0) if params['aparcaseg_mapping'] is not None: normalizer = normalizer_aparcaseg elif params['brainmask']: normalizer = normalizer_brainmask else: normalizer = None def generator_builder(): """Return a function that returns a generator.""" return iter_hdf5(filepath=params['hdf5path'], x_dataset=x_dataset, y_dataset=y_dataset, x_dtype=_DT_X_NP, y_dtype=_DT_Y_NP, shuffle=False, normalizer=normalizer) _output_shapes = ((*params['block_shape'], 1), params['block_shape']) input_fn = input_fn_builder(generator=generator_builder, output_types=(_DT_X_TF, _DT_Y_TF), output_shapes=_output_shapes, num_epochs=params['n_epochs'], multi_gpu=params['multi_gpu'], examples_per_epoch=examples_per_epoch, batch_size=params['batch_size']) runconfig = tf.estimator.RunConfig(save_summary_steps=25, save_checkpoints_steps=100, keep_checkpoint_max=100) model = nobrainer.models.get_estimator(params['model'])( n_classes=params['n_classes'], optimizer=params['optimizer'], learning_rate=params['learning_rate'], model_dir=params['model_dir'], config=runconfig, multi_gpu=params['multi_gpu']) model.train(input_fn=input_fn)