Exemple #1
0
def test_save_read_csv(tmpdir):
    rows = [["col1", "col2"], ["foo", "bar"], ["foo", "bar"], ["foo", "bar"],
            ["foo", "bar"], ["foo", "bar"]]

    filepath = tmpdir.join("file.csv")
    save_csv(rows=rows, filepath=str(filepath))
    rows_withoutheader = read_csv(filepath=str(filepath), header=True)
    rows_withheader = read_csv(filepath=str(filepath), header=False)

    assert rows == rows_withheader
    assert rows[1:] == rows_withoutheader
Exemple #2
0
def test_convert(csv_of_volumes, tmp_path):
    files = io.read_csv(csv_of_volumes, skip_header=False)
    tfrecords_template = str(tmp_path / 'data-{shard:03d}.tfrecords')
    volumes_per_shard = 12
    io.convert(files,
               tfrecords_template=tfrecords_template,
               volumes_per_shard=volumes_per_shard,
               num_parallel_calls=1)

    paths = list(tmp_path.glob('data-*.tfrecords'))
    paths = sorted(paths)
    assert len(paths) == 9
    assert (tmp_path / 'data-008.tfrecords').is_file()

    dset = tf.data.TFRecordDataset(list(map(str, paths)),
                                   compression_type='GZIP')
    dset = dset.map(
        io.get_parse_fn(volume_shape=(8, 8, 8), include_affines=True))

    for ref, test in zip(files, dset):
        x, y = ref
        x, x_aff = io.read_volume(x, return_affine=True)
        y, y_aff = io.read_volume(y, return_affine=True)
        assert np.array_equal(x, test[0])
        assert np.array_equal(y, test[1])
        assert np.array_equal(x_aff, test[2])
        assert np.array_equal(y_aff, test[3])

    with pytest.raises(ValueError):
        io.convert(files, tfrecords_template="data/foobar-{}.tfrecords")
Exemple #3
0
def test_write_read_float_labels(csv_of_volumes, tmp_path):  # noqa: F811
    files = io.read_csv(csv_of_volumes, skip_header=False)
    files = [(x, random.random()) for x, _ in files]
    filename_template = str(tmp_path / "data-{shard:03d}.tfrecords")
    examples_per_shard = 12
    tfrecord.write(
        files,
        filename_template=filename_template,
        examples_per_shard=examples_per_shard,
        processes=1,
    )

    paths = list(tmp_path.glob("data-*.tfrecords"))
    paths = sorted(paths)
    assert len(paths) == 9
    assert (tmp_path / "data-008.tfrecords").is_file()

    dset = tf.data.TFRecordDataset(list(map(str, paths)),
                                   compression_type="GZIP")
    dset = dset.map(
        tfrecord.parse_example_fn(volume_shape=(8, 8, 8), scalar_label=True))

    for ref, test in zip(files, dset):
        x, y = ref
        x = io.read_volume(x)
        assert_array_equal(x, test[0])
        assert_array_equal(y, test[1])
Exemple #4
0
def test_write_read_volume_labels_all_processes(csv_of_volumes,
                                                tmp_path):  # noqa: F811
    files = io.read_csv(csv_of_volumes, skip_header=False)
    filename_template = str(tmp_path / "data-{shard:03d}.tfrecords")
    examples_per_shard = 12
    tfrecord.write(
        files,
        filename_template=filename_template,
        examples_per_shard=examples_per_shard,
        processes=None,
    )

    paths = list(tmp_path.glob("data-*.tfrecords"))
    paths = sorted(paths)
    assert len(paths) == 9
    assert (tmp_path / "data-008.tfrecords").is_file()

    dset = tf.data.TFRecordDataset(list(map(str, paths)),
                                   compression_type="GZIP")
    dset = dset.map(
        tfrecord.parse_example_fn(volume_shape=(8, 8, 8), scalar_label=False))

    for ref, test in zip(files, dset):
        x, y = ref
        x, y = io.read_volume(x), io.read_volume(y)
        assert_array_equal(x, test[0])
        assert_array_equal(y, test[1])

    with pytest.raises(ValueError):
        tfrecord.write(files,
                       filename_template="data/foobar-{}.tfrecords",
                       examples_per_shard=4)
Exemple #5
0
def test_get_data():
    csv_path = nbutils.get_data()
    assert Path(csv_path).is_file()

    files = read_csv(csv_path)
    assert len(files) == 10
    assert all(len(r) == 2 for r in files)
    for x, y in files:
        assert Path(x).is_file()
        assert Path(y).is_file()
Exemple #6
0
def test_read_csv():
    with tempfile.NamedTemporaryFile() as f:
        f.write("foo,bar\nbaz,boo".encode())
        f.seek(0)
        assert [("foo", "bar"),
                ("baz", "boo")] == io.read_csv(f.name, skip_header=False)

    with tempfile.NamedTemporaryFile() as f:
        f.write("foo,bar\nbaz,boo".encode())
        f.seek(0)
        assert [("baz", "boo")] == io.read_csv(f.name, skip_header=True)

    with tempfile.NamedTemporaryFile() as f:
        f.write("foo,bar\nbaz,boo".encode())
        f.seek(0)
        assert [("baz", "boo")] == io.read_csv(f.name)

    with tempfile.NamedTemporaryFile() as f:
        f.write("foo|bar\nbaz|boo".encode())
        f.seek(0)
        assert [("baz", "boo")] == io.read_csv(f.name, delimiter="|")
Exemple #7
0
def test_read_csv():
    with tempfile.NamedTemporaryFile() as f:
        f.write('foo,bar\nbaz,boo'.encode())
        f.seek(0)
        assert [('foo', 'bar'),
                ('baz', 'boo')] == io.read_csv(f.name, skip_header=False)

    with tempfile.NamedTemporaryFile() as f:
        f.write('foo,bar\nbaz,boo'.encode())
        f.seek(0)
        assert [('baz', 'boo')] == io.read_csv(f.name, skip_header=True)

    with tempfile.NamedTemporaryFile() as f:
        f.write('foo,bar\nbaz,boo'.encode())
        f.seek(0)
        assert [('baz', 'boo')] == io.read_csv(f.name)

    with tempfile.NamedTemporaryFile() as f:
        f.write('foo|bar\nbaz|boo'.encode())
        f.seek(0)
        assert [('baz', 'boo')] == io.read_csv(f.name, delimiter='|')
Exemple #8
0
def test_cli(csv_of_volumes):
    model_dir = "/tmp/tmpmodeldir"
    cmd = """train
--n-classes=2
--model=highres3dnet
--model-dir={model_dir}
--optimizer=Adam
--learning-rate=0.001
--batch-size=2
--prefetch=1
--volume-shape 8 8 8
--block-shape 8 8 8
--strides 8 8 8
--csv={filepath}
--binarize
--flip
--rotate
--gaussian
--reduce-contrast
--salt-and-pepper
    """
    cmd = cmd.replace('\n', ' ').format(model_dir=model_dir,
                                        filepath=csv_of_volumes).split()
    main(args=cmd)
    assert Path(model_dir).is_dir()

    save_dir = "/tmp/tmpmodeldir/savedmodel"
    cmd = """save
--model=highres3dnet
--model-dir={model_dir}
--n-classes=2
--block-shape 8 8 8
{save_dir}
    """
    cmd = cmd.replace('\n', ' ').format(model_dir=model_dir,
                                        save_dir=save_dir).split()
    main(args=cmd)
    assert Path(save_dir).is_dir()

    save_dir = next(Path(save_dir).glob('**/saved_model.pb'))
    input_ = read_csv(csv_of_volumes)[0][0]
    output = "/tmp/output.nii.gz"
    cmd = """predict
--block-shape 8 8 8
--model={save_dir}
{input}
{output}
    """
    cmd = cmd.replace('\n', ' ').format(save_dir=save_dir,
                                        input=input_,
                                        output=output).split()
    main(cmd)
    read_volume(output)
Exemple #9
0
def test_read_volume(csv_of_volumes):
    filepath = read_csv(csv_of_volumes)[0][0]
    volume = read_volume(filepath, dtype='float32', return_affine=False)
    assert volume.sum()
    assert volume.shape == (8, 8, 8)
    assert volume.dtype == np.float32

    volume, affine = read_volume(filepath, dtype='int32', return_affine=True)
    assert volume.sum()
    assert volume.shape == (8, 8, 8)
    assert volume.dtype == np.int32
    assert affine.shape == (4, 4)
    assert affine.sum() == 4
Exemple #10
0
def test_verify_features_scalar_labels(csv_of_volumes):  # noqa: F811
    files = io.read_csv(csv_of_volumes, skip_header=False)
    # Int labels.
    files = [(x, 0) for (x, _) in files]
    invalid = io.verify_features_labels(files,
                                        volume_shape=(8, 8, 8),
                                        num_parallel_calls=1)
    assert not invalid
    invalid = io.verify_features_labels(files,
                                        volume_shape=(12, 12, 8),
                                        num_parallel_calls=1)
    assert all(invalid)
    # Float labels.
    files = [(x, 1.0) for (x, _) in files]
    invalid = io.verify_features_labels(files,
                                        volume_shape=(8, 8, 8),
                                        num_parallel_calls=1)
    assert not invalid
    invalid = io.verify_features_labels(files,
                                        volume_shape=(12, 12, 8),
                                        num_parallel_calls=1)
    assert all(invalid)
Exemple #11
0
def validate(params):
    normalizer = None
    sm = params["samplewise_minmax"]
    sz = params["samplewise_zscore"]
    if sm and sz:
        raise Exception("Normalizer cannot be both minmax and zscore")
    if sm:
        normalizer = normalize_zero_one
    if sz:
        normalizer = zscore
    print(params['model'])
    validate_from_filepaths(
        filepaths=read_csv(params['csv']),
        predictor=params['model'],
        block_shape=params['block_shape'],
        n_classes=params['n_classes'],
        mapping_y=params['label_mapping'],
        output_path=params['output_path'],
        return_variance=params['return_variance'],
        return_entropy=params['return_entropy'],
        return_array_from_images=params['return_array_from_images'],
        n_samples=params['n_samples'],
        normalizer=normalizer,
        batch_size=params['batch_size'])
Exemple #12
0
def test_convert_scalar_float_labels(tmp_path):
    runner = CliRunner()
    with runner.isolated_filesystem():
        csvpath = get_data(str(tmp_path))
        # Make labels scalars.
        data = [(x, 1.0) for (x, _) in read_csv(csvpath)]
        csvpath = tmp_path.with_suffix(".new.csv")
        with open(csvpath, "w", newline="") as myfile:
            wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
            wr.writerows(data)
        tfrecords_template = Path("data/shard-{shard:03d}.tfrecords")
        tfrecords_template.parent.mkdir(exist_ok=True)
        args = """\
    convert --csv={} --tfrecords-template={} --volume-shape 256 256 256
        --examples-per-shard=2 --to-ras --no-verify-volumes
    """.format(csvpath, tfrecords_template)
        result = runner.invoke(climain.cli, args.split())
        assert result.exit_code == 0
        assert Path("data/shard-000.tfrecords").is_file()
        assert Path("data/shard-001.tfrecords").is_file()
        assert Path("data/shard-002.tfrecords").is_file()
        assert Path("data/shard-003.tfrecords").is_file()
        assert Path("data/shard-004.tfrecords").is_file()
        assert not Path("data/shard-005.tfrecords").is_file()
Exemple #13
0
def train(params):
    """Train estimator."""
    if params['aparcaseg_mapping']:
        tf.logging.info(
            "Reading mapping file: {}".format(params['aparcaseg_mapping']))
        mapping = read_mapping(params['aparcaseg_mapping'])
    else:
        mapping = None

    def normalizer_aparcaseg(features, labels):
        return (
            normalize_zero_one(features),
            preprocess_aparcaseg(labels, mapping))

    def normalizer_brainmask(features, labels):
        return (
            normalize_zero_one(features),
            binarize(labels, threshold=0))

    if params['aparcaseg_mapping'] is not None:
        normalizer = normalizer_aparcaseg
    elif params['brainmask']:
        normalizer = normalizer_brainmask
    else:
        normalizer = None

    list_of_filepaths = read_csv(params['csv'])

    def generator_builder():
        """Return a function that returns a generator."""
        return iter_volumes(
            list_of_filepaths=list_of_filepaths,
            vol_shape=params['vol_shape'],
            block_shape=params['block_shape'],
            x_dtype=_DT_X_NP,
            y_dtype=_DT_Y_NP,
            strides=params['strides'],
            shuffle=True,
            normalizer=normalizer)

    _output_shapes = (
        (*params['block_shape'], 1),
        params['block_shape'])

    input_fn = input_fn_builder(
        generator=generator_builder,
        output_types=(_DT_X_TF, _DT_Y_TF),
        output_shapes=_output_shapes,
        num_epochs=params['n_epochs'],
        batch_size=params['batch_size'],
        # TODO(kaczmarj): add multi-gpu support for training on volumes.
        # multi_gpu=params['multi_gpu'],
        # examples_per_epoch=examples_per_epoch,
    )

    runconfig = tf.estimator.RunConfig(
        save_summary_steps=25,
        save_checkpoints_steps=500,
        keep_checkpoint_max=100)

    model = nobrainer.models.get_estimator(params['model'])(
        n_classes=params['n_classes'],
        optimizer=params['optimizer'],
        learning_rate=params['learning_rate'],
        model_dir=params['model_dir'],
        config=runconfig,
        multi_gpu=params['multi_gpu'])

    # Setup for training and periodic evaluation.
    if params['eval_csv'] is not None:
        eval_list_of_filepaths = read_csv(params['eval_csv'])
        gen = nobrainer.util.iter_volumes(
            list_of_filepaths=eval_list_of_filepaths,
            x_dtype=_DT_X_NP,
            y_dtype=_DT_Y_NP,
            vol_shape=params['vol_shape'],
            block_shape=params['block_shape'],
            strides=params['strides'],
            shuffle=False,
            normalizer=normalizer)

        def _get_eval_features_labels():
            _features = []
            _labels = []
            for _f, _l in gen:
                _features.append(_f)
                _labels.append(_l)
            return np.stack(_features), np.stack(_labels)

        tf.logging.info("Loading evaluation data")
        _eval_features, _eval_labels = _get_eval_features_labels()

        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x=_eval_features, y=_eval_labels, batch_size=2, num_epochs=1,
            shuffle=False)

        _monitors = [
            tf.contrib.learn.monitors.ValidationMonitor(
                input_fn=eval_input_fn, every_n_steps=2000,
                early_stopping_metric=None, early_stopping_rounds=None)]
        hooks = tf.contrib.learn.monitors.replace_monitors_with_hooks(
            _monitors, model)

    # Training without evaluation.
    else:
        hooks = None

    model.train(input_fn=input_fn, hooks=hooks)
Exemple #14
0
def train(params):

    model_config = tf.estimator.RunConfig(
        save_summary_steps=params['save_summary_steps'],
        save_checkpoints_steps=params['save_checkpoints_steps'],
        keep_checkpoint_max=params['keep_checkpoint_max'])

    model = get_estimator(params['model'])(
        n_classes=params['n_classes'],
        optimizer=params['optimizer'],
        learning_rate=params['learning_rate'],
        model_dir=params['model_dir'],
        config=model_config,
        multi_gpu=params['multi_gpu'],
        **params['model_opts'])

    label_mapping = None
    if params['label_mapping']:
        tf.logging.info(
            "Reading mapping file: {}".format(params['label_mapping']))
        label_mapping = read_mapping(params['label_mapping'])

    filepaths = read_csv(params['csv'])

    volume_data_generator = VolumeDataGenerator(
        samplewise_minmax=params['samplewise_minmax'],
        samplewise_zscore=params['samplewise_zscore'],
        samplewise_center=params['samplewise_center'],
        samplewise_std_normalization=params['samplewise_std_normalization'],
        flip=params['flip'],
        rescale=params['rescale'],
        rotate=params['rotate'],
        gaussian=params['gaussian'],
        reduce_contrast=params['reduce_contrast'],
        salt_and_pepper=params['salt_and_pepper'],
        brightness_range=params['brightness_range'],
        shift_range=params['shift_range'],
        zoom_range=params['zoom_range'],
        binarize_y=params['binarize'],
        mapping_y=label_mapping)

    if params['eval_csv']:
        eval_filepaths = read_csv(params['eval_csv'])
        eval_volume_data_generator = VolumeDataGenerator(
            binarize_y=params['binarize'],
            mapping_y=label_mapping)
    else:
        eval_filepaths = None
        eval_volume_data_generator = None

    _train(
        model=model,
        volume_data_generator=volume_data_generator,
        filepaths=filepaths,
        volume_shape=params['volume_shape'],
        block_shape=params['block_shape'],
        strides=params['strides'],
        x_dtype='float32',
        y_dtype='int32',
        shuffle=True,
        batch_size=params['batch_size'],
        n_epochs=params['n_epochs'],
        prefetch=params['prefetch'],
        multi_gpu=params['multi_gpu'],
        eval_volume_data_generator=eval_volume_data_generator,
        eval_filepaths=eval_filepaths)
Exemple #15
0
    time_zero = time.time()
    namespace = parse_args(sys.argv[1:])
    params = vars(namespace)

    if params['verbose'] >= 1:
        logger.setLevel(logging.DEBUG)
    elif params['verbose'] == 0:
        logger.setLevel(logging.INFO)

    if os.path.isdir(params['input']):
        logger.info("Assuming SUBJECTS_DIR was passed in. Findings file pairs")
        list_of_files = get_list_of_t1_aparcaseg(params['input'])

    elif os.path.isfile(params['input']):
        logger.info("Reading CSV")
        list_of_files = read_csv(params['input'])
    else:
        raise ValueError(
            "Input must be the path to an existing FreeSurfer SUBJECTS_DIR or"
            " to an existing CSV file.")

    logger.info("Found {} pairs of volumes".format(len(list_of_files)))
    logger.info("User requested chunk size of {}".format(params['chunksize']))
    logger.info("Will iterate over {} set(s) of block shape(s)".format(
        len(params['block_shape'])))

    if params['save_filepaths'] is not None:
        _df = pd.DataFrame(list_of_files)
        _df.columns = ["features", "labels"]
        logger.info(
            "Saving CSV of filepaths found by this script to {}".format(
Exemple #16
0
def test_verify_features_nonscalar_labels(csv_of_volumes):  # noqa: F811
    files = io.read_csv(csv_of_volumes, skip_header=False)
    invalid = io.verify_features_labels(files,
                                        volume_shape=(8, 8, 8),
                                        num_parallel_calls=1)
    assert not invalid
def convert(
    csv,
    preprocess_path,
    tfrecords_template,
    volume_shape,
    examples_per_shard,
    num_parallel_calls,
    verbose,
):
    """Preprocess MRI volumes and convert to Tfrecords.

    NOTE: Volumes will all be the same shape after preprocessing.
    """

    volume_filepaths = read_csv(csv)

    num_parallel_calls = None if num_parallel_calls == -1 else num_parallel_calls
    if num_parallel_calls is None:
        # Get number of processes allocated to the current process.
        # Note the difference from `os.cpu_count()`.
        num_parallel_calls = len(os.sched_getaffinity(0))

    invalid_pairs = verify_features_labels(
        volume_filepaths,
        check_labels_int=True,
        num_parallel_calls=num_parallel_calls,
        verbose=verbose,
    )

    # UNCOMMENT the following when https://github.com/neuronets/nobrainer/pull/125
    # is merged
    # if not invalid_pairs:
    #     click.echo(click.style("Passed verification.", fg="green"))
    # else:
    #     click.echo(click.style("Failed verification.", fg="red"))
    #     for pair in invalid_pairs:
    #         click.echo(pair[0])
    #         click.echo(pair[1])
    #     sys.exit(-1)

    ppaths = preprocess_parallel(
        volume_filepaths,
        conform_volume_to=volume_shape,
        num_parallel_calls=num_parallel_calls,
        save_path=preprocess_path,
    )

    invalid_pairs = verify_features_labels(
        ppaths,
        volume_shape=volume_shape,
        check_labels_int=True,
        num_parallel_calls=num_parallel_calls,
        verbose=verbose,
    )

    if not invalid_pairs:
        click.echo()
    else:
        click.echo(click.style("Failed post preprocessing re-verification.", fg="red"))
        click.echo(
            f"Oops! This is embarrasing. Looks like our preprocessing"
            " script shit the bed. Found {len(invalid_pairs)} invalid"
            " pairs of volumes. These files might not all have shape "
            " {volume_shape} or the labels might not be scalar values"
            " Please report this issue on                            "
            " https://github.com/poldracklab/nondefaced-detector     "
        )

        for pair in invalid_pairs:
            click.echo(pair[0])
            click.echo(pair[1])
        sys.exit(-1)

    # TODO: Convert to tfrecords
    os.makedirs(os.path.dirname(tfrecords_template), exist_ok=True)

    _write_tfrecord(
        features_labels=ppaths,
        filename_template=tfrecords_template,
        examples_per_shard=examples_per_shard,
        processes=num_parallel_calls,
        verbose=verbose,
    )

    click.echo(click.style("Finished conversion to TFRecords.", fg="green"))
Exemple #18
0
def test_verify_features_labels(csv_of_volumes):
    files = io.read_csv(csv_of_volumes, skip_header=False)
    io.verify_features_labels(files,
                              volume_shape=(8, 8, 8),
                              num_parallel_calls=1)