Exemple #1
0
def _get_record_path(dataset_path: str, subject: str, overwrite: bool):
    path = os.path.join(dataset_path, "{}.tfrecords".format(subject))
    if os.path.exists(path):
        if overwrite:
            _util.rm(path)
        else:
            _util.ensure_path_free(path)
    return path
Exemple #2
0
def _get_dataset_path(dataset: str, overwrite: bool):
    path = _util.get_rel_datasets_path(dataset)
    if os.path.exists(path):
        if overwrite:
            _util.rm(path)
        else:
            _util.ensure_path_free(path)

    _logger.info("Creating {}".format(path))
    _util.mkdir(path)
    return path
Exemple #3
0
def downsample(dataset: str, shape: List[int], partial=False):
    """

    :param dataset:
    :param shape:
    :return:
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(shape, list) and all(isinstance(s, int)
                                           for s in shape) and len(shape) == 3
    assert isinstance(partial, bool)
    tf.enable_eager_execution()

    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    data = _dataset.get_dataset(dataset_path, 1, 8, partial=partial)

    resized_dataset = "{}_resized".format(dataset)
    resized_path = _util.get_rel_datasets_path(resized_dataset)
    _util.ensure_path_free(resized_path)
    _util.mkdir(resized_path)

    iter = data.make_one_shot_iterator()
    records = _dataset.get_records(dataset_path, partial)

    for record in tqdm(records):
        record = record.replace(dataset, resized_dataset)
        sample = iter.next()

        scan = sample[0][0].numpy().squeeze()
        # show_scan(scan, "Original")

        crop = crop_image(scan, 1e-5)
        # show_scan(crop, "Crop")

        factors = [s / d for d, s in zip(crop.shape, shape)]
        resized = ndimage.zoom(crop, zoom=factors, order=4)
        # show_scan(resized, "Resized")

        _dataset.write_record(record, resized, sample[0][1].numpy().squeeze(),
                              sample[1].numpy())

    _dataset.save_shape(resized_path, shape)
Exemple #4
0
def _standardize_dataset(dataset_path, dataset, mean, std):
    data = _dataset.get_dataset(dataset_path,
                                partial=True).make_one_shot_iterator()
    records = _dataset.get_records(dataset_path, partial=True)

    standardized_name = _get_standardized_name(dataset)
    standardized_path = dataset_path.replace(dataset, standardized_name)
    _util.ensure_path_free(standardized_path, empty_ok=True)
    _util.mkdir(standardized_path)
    for record in tqdm(records):
        record = record.replace(dataset, standardized_name)
        sample = data.next()

        scan = sample[0][0]
        # show_scan(scan.numpy().squeeze(), "Original")

        standardized = (scan - mean) / std
        # show_scan(standardized.numpy().squeeze(), "Standardized")

        _dataset.write_record(record,
                              standardized.numpy().squeeze(),
                              sample[0][1].numpy().squeeze(),
                              sample[1].numpy())
Exemple #5
0
def train(dataset: str,
          epochs: int,
          batch_size: int,
          buffer_size: int,
          lr: float,
          l2_reg=0.,
          tv_reg=0.,
          ssim_loss=0.,
          sobel_loss=0.):
    """
    Trains an Autoencoder using the specified parameters.

    :param dataset: Existing dataset over which to train. Must contain train, dev, {mean,std}.pickle, shape.json
    :param epochs: Number of iterations over training data before termination.
    :param batch_size: Number of training samples per batch.
    :param buffer_size: Number of batches to prefetch.
    :param lr: Adam optimization initial learning rate.
    :param l2_reg: L2 regularization coefficient for kernel weights.
    :param tv_reg: Total Variation regularization coefficient for data.
    :param ssim_loss: SSIM regularization coefficient for data.
    :param sobel_loss: L2 regularization coefficient for data Sobel difference.
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(epochs, int) and epochs > 0
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(buffer_size, int) and batch_size > 0
    assert isinstance(lr, float) and lr > 0
    assert isinstance(l2_reg, float) and l2_reg >= 0
    assert isinstance(tv_reg, float) and tv_reg >= 0
    assert isinstance(ssim_loss, float) and ssim_loss >= 0
    assert isinstance(sobel_loss, float) and sobel_loss >= 0

    # Load and ensure required paths.
    weights_path = _util.get_weights_path_by_param(model="autoencoder",
                                                   dataset=dataset,
                                                   epochs=epochs,
                                                   batch_size=batch_size,
                                                   lr=lr,
                                                   l2_reg=l2_reg,
                                                   tv_reg=tv_reg,
                                                   ssim_loss=ssim_loss,
                                                   sobel_loss=sobel_loss)
    log_path = os.path.join(weights_path, "logs")
    _util.ensure_path_free(log_path, empty_ok=True)
    _util.mkdir(log_path)
    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    # Load model and input shape.
    shape = _dataset.load_shape(dataset_path)
    mean = _dataset.load_mean(dataset_path)
    std = _dataset.load_std(dataset_path)
    model = Autoencoder(l2_reg)

    # Create input/output placeholders.
    inp = tf.image.per_image_standardization(
        tf.placeholder(tf.float32, shape=[None, *shape]))
    out = model.call(inp)

    # Initialize loss functions.
    total_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss = \
        _get_losses(inp, out, batch_size, model.losses, l2_reg, tv_reg, ssim_loss, sobel_loss)
    # Configure training operation.
    train_op = _get_train_op(total_loss, lr)

    # Load datasets
    train_dataset = (_dataset.get_dataset(
        os.path.join(dataset_path, "train"), partial=True).map(
            _only_cropped_scan).batch(batch_size).prefetch(buffer_size))
    dev_dataset = (_dataset.get_dataset(
        os.path.join(dataset_path, "dev"), partial=True).map(
            _only_cropped_scan).batch(batch_size).prefetch(buffer_size))

    # Setup logging and weight saving.
    _tboard.configure(log_path, flush_secs=2)
    saver = tf.train.Saver()

    # Initialize training loop variables.
    best_dev_loss, dev_loss = np.inf, np.inf
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        _logger.info("Counting datasets...")
        train_batches = dataset_iter_len(
            sess,
            train_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tTrain samples: {}".format(train_batches))
        dev_batches = dataset_iter_len(
            sess,
            dev_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tDev samples: {}".format(dev_batches))

        train_loss = total_loss / train_batches
        dev_loss = total_loss / dev_batches

        train_dataset = (_dataset.get_dataset(
            os.path.join(dataset_path, "train"), partial=True).map(
                _only_cropped_scan).batch(batch_size).prefetch(buffer_size))

        for epoch in tqdm(range(epochs)):
            train_iter = train_dataset.make_one_shot_iterator().get_next()

            losses = defaultdict(float)
            for _ in range(train_batches):
                sample = sess.run(train_iter)
                _, _train_loss, _l2_loss, _l2_reg, _tv_reg, _ssim_loss, _sobel_loss = \
                    sess.run(
                        [train_op, train_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss],
                        feed_dict={inp: sample})
                losses["train/loss/total"] += _train_loss
                losses["train/loss/l2_loss"] += _l2_loss
                losses["train/reg/l2"] += _l2_reg
                losses["train/reg/tv"] += _tv_reg
                losses["train/loss/ssim"] += _ssim_loss
                losses["train/loss/sobel"] += _sobel_loss

            # Increment before doing anything else to avoid zero-indexed epochs.
            epoch += 1

            # Log training losses to tensorboard.
            for name, val in losses.items():
                _tboard.log_value(name, val, step=epoch)
            _logger.info("Epoch {}: train loss {}".format(
                epoch, losses["train/loss/total"]))

            # Compute dev metrics every 2 epochs.
            if epoch < 2 or epoch % 2 == 0:
                losses.clear()

                # Compute and log dev loss
                _dev_loss, _l2_loss, _l2_reg, _tv_reg, _ssim_loss, _sobel_loss = \
                    _get_dev_loss(sess, inp, dev_dataset, dev_batches, dev_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss)

                # Log dev losses to tensorboard.
                _logger.info("Epoch {}: dev loss {}".format(epoch, _dev_loss))

                _tboard.log_value("dev/loss/total", _dev_loss, step=epoch)
                _tboard.log_value("dev/loss/l2_loss", _l2_loss, step=epoch)
                _tboard.log_value("dev/reg/l2", _l2_reg, step=epoch)
                _tboard.log_value("dev/reg/tv", _tv_reg, step=epoch)
                _tboard.log_value("dev/loss/ssim", _ssim_loss, step=epoch)
                _tboard.log_value("dev/loss/sobel", _sobel_loss, step=epoch)

                # Save best model.
                if _dev_loss < best_dev_loss:
                    save_path = saver.save(
                        sess,
                        os.path.join(weights_path, "{}.ckpt".format(epoch)))
                    _logger.info(
                        "Saved new best model to {}".format(save_path))
                    best_dev_loss = _dev_loss

                # Plot some reconstruction images
                _logger.info("Generating reconstruction plots...")
                _log_reconstruction_imgs("eval", sess, train_dataset, inp, out,
                                         epoch, mean, std)
                _log_reconstruction_imgs("train", sess, train_dataset, inp,
                                         out, epoch, mean, std)
Exemple #6
0
def _get_log_path(weights: str) -> str:
    log_path = _util.get_rel_log_path(weights)
    _util.ensure_path_free(log_path, empty_ok=True)
    _util.mkdir(log_path)
    return log_path
Exemple #7
0
def _get_weights_path(weights: str) -> str:
    weights_path = _util.get_rel_weights_path(weights)
    _util.ensure_path_free(weights_path, empty_ok=True)
    _util.mkdir(weights_path)
    return weights_path