Python load_shape Examples, data.dataset.load_shape Python Examples

Example #1

0

Show file

File: SVMregression.py Project: joseph-zhong/cse547

def get_dataset_regress(dataset_path: str, batch_size: int, buffer_size: int, shuffle=False, partial=False, indices):
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(buffer_size, int) and buffer_size > 0
    assert isinstance(indices, bool) and indices>0

    records = np.array(get_records(dataset_path, partial))
    records=list(records[indices])
    shape = load_shape(dataset_path)

    return (tf.data.TFRecordDataset(records)
            .map(functools.partial(_decode, shape))
            .batch(batch_size)
            .prefetch(buffer_size))

Example #2

0

Show file

def standardize(dataset: str):
    """

    :param dataset:
    :return:
    """
    assert isinstance(dataset, str) and len(dataset)

    tf.enable_eager_execution()

    train_path = _util.get_rel_datasets_path(dataset, "train")
    _util.ensure_dir(train_path)

    dataset_path = _util.get_rel_datasets_path(dataset)

    standardized_name = _get_standardized_name(dataset)
    standardized_path = _util.get_rel_datasets_path(standardized_name)
    # _util.ensure_path_free(standardized_path, empty_ok=True)
    # _util.mkdir(standardized_path)

    train_data = _dataset.get_dataset(train_path, partial=True)
    train_iter = train_data.repeat().make_one_shot_iterator()
    train_records = _dataset.get_records(train_path, partial=True)

    # Compute sample mean over train
    total = train_iter.next()[0][0]
    for _ in tqdm(train_records[1:]):
        sample = train_iter.next()

        total += sample[0][0]
    mean = total / len(train_records)

    total = tf.square(train_iter.next()[0][0] - mean)
    for _ in tqdm(train_records[1:]):
        sample = train_iter.next()

        scan = sample[0][0]
        total += tf.square(scan - mean)
    std = tf.sqrt(tf.reduce_mean(total))

    _standardize_dataset(train_path, dataset, mean, std)
    _standardize_dataset(_util.get_rel_datasets_path(dataset, "dev"), dataset,
                         mean, std)
    _standardize_dataset(_util.get_rel_datasets_path(dataset, "test"), dataset,
                         mean, std)

    _dataset.save_shape(standardized_path, _dataset.load_shape(dataset_path))
    _dataset.save_mean(standardized_path, mean.numpy())
    _dataset.save_std(standardized_path, std.numpy())

Example #3

0

Show file

File: predict.py Project: rfrowe/cse547

def predict(dataset: str, encoder_weights: str, model: str,
            model_weights: str):
    """
    Assess regression model performance

    :param dataset: Name of dataset over which to test.
    :param encoder_weights: Path to trained encoder weights.
    :param model: Model type to use for regression
    :param model_weights: Path to trained regression weights.
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(encoder_weights, str) and len(encoder_weights)
    assert isinstance(model, str) and len(model)
    assert isinstance(model_weights, str) and len(model_weights)

    model = _get_model(model)

    if not os.path.isabs(encoder_weights):
        encoder_weights = _util.get_rel_weights_path(encoder_weights)
        _util.ensure_dir(os.path.dirname(encoder_weights))
    if not os.path.isabs(model_weights):
        model_weights = _util.get_rel_weights_path(model_weights)
        _util.ensure_dir(os.path.dirname(model_weights))

    test_dataset = _dataset.get_dataset_by_name(os.path.join(dataset, "test"),
                                                partial=True).batch(1)
    shape = _dataset.load_shape(_util.get_rel_datasets_path(dataset))

    label = tf.placeholder(dtype=tf.float32, shape=[None, 1])
    features = tf.placeholder(dtype=tf.float32,
                              shape=[None, len(_hcp.FEATURES)])

    config = tf.ConfigProto(device_count={'GPU': 0})
    with tf.Session(config=config) as sess:
        # Define input, output, and intermediate operation.
        encoder, (scan, code) = _tt_utils.load_encoder(sess, encoder_weights,
                                                       1, shape)

        _logger.info("Counting dataset...")
        test_batches = _tt_utils.dataset_iter_len(
            sess,
            test_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tTest samples: {}".format(test_batches))

        model(model_weights, sess, encoder, scan, features, code, label,
              test_dataset, test_batches)

Example #4

0

Show file

def train(dataset: str,
          epochs: int,
          batch_size: int,
          buffer_size: int,
          lr: float,
          l2_reg=0.,
          tv_reg=0.,
          ssim_loss=0.,
          sobel_loss=0.):
    """
    Trains an Autoencoder using the specified parameters.

    :param dataset: Existing dataset over which to train. Must contain train, dev, {mean,std}.pickle, shape.json
    :param epochs: Number of iterations over training data before termination.
    :param batch_size: Number of training samples per batch.
    :param buffer_size: Number of batches to prefetch.
    :param lr: Adam optimization initial learning rate.
    :param l2_reg: L2 regularization coefficient for kernel weights.
    :param tv_reg: Total Variation regularization coefficient for data.
    :param ssim_loss: SSIM regularization coefficient for data.
    :param sobel_loss: L2 regularization coefficient for data Sobel difference.
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(epochs, int) and epochs > 0
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(buffer_size, int) and batch_size > 0
    assert isinstance(lr, float) and lr > 0
    assert isinstance(l2_reg, float) and l2_reg >= 0
    assert isinstance(tv_reg, float) and tv_reg >= 0
    assert isinstance(ssim_loss, float) and ssim_loss >= 0
    assert isinstance(sobel_loss, float) and sobel_loss >= 0

    # Load and ensure required paths.
    weights_path = _util.get_weights_path_by_param(model="autoencoder",
                                                   dataset=dataset,
                                                   epochs=epochs,
                                                   batch_size=batch_size,
                                                   lr=lr,
                                                   l2_reg=l2_reg,
                                                   tv_reg=tv_reg,
                                                   ssim_loss=ssim_loss,
                                                   sobel_loss=sobel_loss)
    log_path = os.path.join(weights_path, "logs")
    _util.ensure_path_free(log_path, empty_ok=True)
    _util.mkdir(log_path)
    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    # Load model and input shape.
    shape = _dataset.load_shape(dataset_path)
    mean = _dataset.load_mean(dataset_path)
    std = _dataset.load_std(dataset_path)
    model = Autoencoder(l2_reg)

    # Create input/output placeholders.
    inp = tf.image.per_image_standardization(
        tf.placeholder(tf.float32, shape=[None, *shape]))
    out = model.call(inp)

    # Initialize loss functions.
    total_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss = \
        _get_losses(inp, out, batch_size, model.losses, l2_reg, tv_reg, ssim_loss, sobel_loss)
    # Configure training operation.
    train_op = _get_train_op(total_loss, lr)

    # Load datasets
    train_dataset = (_dataset.get_dataset(
        os.path.join(dataset_path, "train"), partial=True).map(
            _only_cropped_scan).batch(batch_size).prefetch(buffer_size))
    dev_dataset = (_dataset.get_dataset(
        os.path.join(dataset_path, "dev"), partial=True).map(
            _only_cropped_scan).batch(batch_size).prefetch(buffer_size))

    # Setup logging and weight saving.
    _tboard.configure(log_path, flush_secs=2)
    saver = tf.train.Saver()

    # Initialize training loop variables.
    best_dev_loss, dev_loss = np.inf, np.inf
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        _logger.info("Counting datasets...")
        train_batches = dataset_iter_len(
            sess,
            train_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tTrain samples: {}".format(train_batches))
        dev_batches = dataset_iter_len(
            sess,
            dev_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tDev samples: {}".format(dev_batches))

        train_loss = total_loss / train_batches
        dev_loss = total_loss / dev_batches

        train_dataset = (_dataset.get_dataset(
            os.path.join(dataset_path, "train"), partial=True).map(
                _only_cropped_scan).batch(batch_size).prefetch(buffer_size))

        for epoch in tqdm(range(epochs)):
            train_iter = train_dataset.make_one_shot_iterator().get_next()

            losses = defaultdict(float)
            for _ in range(train_batches):
                sample = sess.run(train_iter)
                _, _train_loss, _l2_loss, _l2_reg, _tv_reg, _ssim_loss, _sobel_loss = \
                    sess.run(
                        [train_op, train_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss],
                        feed_dict={inp: sample})
                losses["train/loss/total"] += _train_loss
                losses["train/loss/l2_loss"] += _l2_loss
                losses["train/reg/l2"] += _l2_reg
                losses["train/reg/tv"] += _tv_reg
                losses["train/loss/ssim"] += _ssim_loss
                losses["train/loss/sobel"] += _sobel_loss

            # Increment before doing anything else to avoid zero-indexed epochs.
            epoch += 1

            # Log training losses to tensorboard.
            for name, val in losses.items():
                _tboard.log_value(name, val, step=epoch)
            _logger.info("Epoch {}: train loss {}".format(
                epoch, losses["train/loss/total"]))

            # Compute dev metrics every 2 epochs.
            if epoch < 2 or epoch % 2 == 0:
                losses.clear()

                # Compute and log dev loss
                _dev_loss, _l2_loss, _l2_reg, _tv_reg, _ssim_loss, _sobel_loss = \
                    _get_dev_loss(sess, inp, dev_dataset, dev_batches, dev_loss, l2_loss, l2_reg, tv_reg, ssim_loss, sobel_loss)

                # Log dev losses to tensorboard.
                _logger.info("Epoch {}: dev loss {}".format(epoch, _dev_loss))

                _tboard.log_value("dev/loss/total", _dev_loss, step=epoch)
                _tboard.log_value("dev/loss/l2_loss", _l2_loss, step=epoch)
                _tboard.log_value("dev/reg/l2", _l2_reg, step=epoch)
                _tboard.log_value("dev/reg/tv", _tv_reg, step=epoch)
                _tboard.log_value("dev/loss/ssim", _ssim_loss, step=epoch)
                _tboard.log_value("dev/loss/sobel", _sobel_loss, step=epoch)

                # Save best model.
                if _dev_loss < best_dev_loss:
                    save_path = saver.save(
                        sess,
                        os.path.join(weights_path, "{}.ckpt".format(epoch)))
                    _logger.info(
                        "Saved new best model to {}".format(save_path))
                    best_dev_loss = _dev_loss

                # Plot some reconstruction images
                _logger.info("Generating reconstruction plots...")
                _log_reconstruction_imgs("eval", sess, train_dataset, inp, out,
                                         epoch, mean, std)
                _log_reconstruction_imgs("train", sess, train_dataset, inp,
                                         out, epoch, mean, std)

Example #5

0

Show file

def regression(dataset: str, batch_size: int, encoder_weights: str, lr: float,
               epsilon: float, model: str):
    """
    Creates and trains a regression model with variable batch size.

    :param dataset: Name of dataset over which to train.
    :param batch_size:
    :param encoder_weights: Path to trained encoder weights.
    :param lr:  Model learning rate.
    :param epsilon: Cutoff for training termination.
    :param model: Model type to use for regression
    """
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(encoder_weights, str) and len(encoder_weights)
    assert isinstance(lr, float) and lr > 0
    assert isinstance(epsilon, float) and epsilon > 0
    assert isinstance(model, str) and len(model)

    model_name = model
    model = _get_model(model)

    if not os.path.isabs(encoder_weights):
        encoder_weights = _util.get_rel_weights_path(encoder_weights)
        _util.ensure_dir(os.path.dirname(encoder_weights))

    # Note: these are weights for THIS model.
    weights_path = _util.get_weights_path_by_param(
        model=model_name,
        dataset=dataset,
        encoder=md5(encoder_weights.encode("ascii")).hexdigest(),
        lr=lr,
        batch_size=batch_size,
        epsilon=epsilon,
    )
    log_path = os.path.join(weights_path, "logs")

    train_dataset = _dataset.get_dataset_by_name(
        os.path.join(dataset,
                     "train"), partial=True).batch(batch_size,
                                                   drop_remainder=True)
    dev_dataset = _dataset.get_dataset_by_name(os.path.join(dataset, "dev"),
                                               partial=True).batch(
                                                   batch_size,
                                                   drop_remainder=True)
    shape = _dataset.load_shape(_util.get_rel_datasets_path(dataset))

    label = tf.placeholder(dtype=tf.float32, shape=[None, 1])
    features = tf.placeholder(dtype=tf.float32,
                              shape=[None, len(_hcp.FEATURES)])

    _tboard.configure(log_path, flush_secs=2)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        # Define input, output, and intermediate operation.
        encoder, (scan, code) = _tt_utils.load_encoder(sess, encoder_weights,
                                                       batch_size, shape)

        _logger.info("Counting datasets...")
        train_batches = _tt_utils.dataset_iter_len(
            sess,
            train_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tTrain samples: {}".format(train_batches))
        dev_batches = _tt_utils.dataset_iter_len(
            sess,
            dev_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tDev samples: {}".format(dev_batches))

        model(sess, encoder, scan, code, features, label, epsilon,
              train_dataset, train_batches, dev_dataset, dev_batches, lr,
              weights_path)

Example #6

0

Show file

File: train.py Project: joseph-zhong/cse547

def train(dataset: str,
          weights: str,
          epochs=1000,
          batch_size=64,
          grad_norm=1000,
          buffer_size=8,
          lr=1e-3,
          l2_reg=1e-1,
          tv_reg=1e-2,
          partial=False):
    """
    TODO (rfrowe)
    :param dataset: 
    :param weights: 
    :param epochs: 
    :param batch_size: 
    :param grad_norm:
    :param buffer_size: 
    :param lr: 
    :param partial: 
    :return: 
    """ ""
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(weights, str) and len(weights)
    assert isinstance(epochs, int) and epochs > 0
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(grad_norm, int) and grad_norm >= 0
    assert isinstance(buffer_size, int) and batch_size > 0
    assert isinstance(lr, float) and lr > 0
    assert isinstance(l2_reg, float) and l2_reg > 0
    assert isinstance(tv_reg, float) and tv_reg > 0
    assert isinstance(partial, bool)

    # Load and ensure required paths.
    weights_path = _get_weights_path(weights)
    log_path = _get_log_path(weights)
    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)

    # Load model and input shape.
    shape = load_shape(dataset_path)
    model = Autoencoder(l2_reg)

    # Create input/output placeholders.
    inp = tf.placeholder(tf.float32, shape=[None, *shape])
    out = model.call(inp)

    # Initialize loss, reg, and TV
    loss = tf.nn.l2_loss(inp - out)
    if l2_reg > 0:
        loss += tf.add_n(model.losses)
    loss += tf.reduce_sum(total_variation_5d(tf.expand_dims(out, 4)))

    # Configure training operation.
    train_op = _get_train_op(loss, lr, grad_norm)

    # Load datasets
    train_dataset = _get_dataset(os.path.join(dataset_path, "train"),
                                 batch_size, buffer_size, partial)
    dev_dataset = _get_dataset(os.path.join(dataset_path, "dev"), batch_size,
                               buffer_size, partial)

    # Setup logging and weight saving.
    _tboard.configure(log_path, flush_secs=5)
    saver = tf.train.Saver()

    # Initialize training loop variables.
    best_dev_loss, dev_loss = np.inf, np.inf
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        _logger.info("Counting datasets...")
        train_batches = _iter_len(
            sess,
            train_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tTrain samples: {}".format(train_batches))
        dev_batches = _iter_len(
            sess,
            dev_dataset.make_one_shot_iterator().get_next())
        _logger.info("\tDev samples: {}".format(dev_batches))

        for epoch in tqdm(range(epochs)):
            train_iter = train_dataset.make_one_shot_iterator().get_next()

            train_loss = 0
            for _ in range(train_batches):
                _, new_train_loss = sess.run(
                    [train_op, loss], feed_dict={inp: sess.run(train_iter)})
                train_loss += new_train_loss

            # Increment before doing anything else to avoid zero-indexed epochs.
            epoch += 1
            _tboard.log_value("epoch", epoch, step=epoch)

            train_loss /= train_batches * batch_size
            _logger.info("Epoch {}: train {}".format(epoch, train_loss))
            _tboard.log_value("train loss", train_loss, step=epoch)

            if epoch % 20 == 0:
                # Compute and log dev loss
                new_dev_loss = _get_dev_loss(sess, inp, dev_dataset,
                                             dev_batches, batch_size, loss)
                _logger.info("Epoch {}: dev {} diff {}".format(
                    epoch, new_dev_loss, dev_loss - new_dev_loss))
                dev_loss = new_dev_loss
                if dev_loss < best_dev_loss:
                    save_path = saver.save(
                        sess,
                        os.path.join(weights_path, "{}.ckpt".format(epoch)))
                    _logger.info(
                        "Saved new best model to {}".format(save_path))
                    best_dev_loss = new_dev_loss

                # Plot some reconstruction images
                _log_reconstruction_imgs("eval", sess, dev_dataset, inp, out,
                                         epoch, weights_path)
                _log_reconstruction_imgs("train", sess, train_dataset, inp,
                                         out, epoch, weights_path)

Example #7

0

Show file

File: SVMregression.py Project: joseph-zhong/cse547

    batch size, the learning rate, and the threshold for acceptable error
    """
    
    #make sure that you have all necessary variables in necessary forms
    assert isinstance(dataset, str) and len(dataset)
    assert isinstance(batch_size, int) and batch_size > 0
    assert isinstance(test_size, int) and test_size > 0
    assert isinstance(buffer_size, int) and batch_size > 0
    assert isinstance(lr, float) and lr > 0
    assert isinstance(eps, float) and eps > 0
    assert isinstance(partial, bool)
    
    # get dataset path
    dataset_path = _util.get_rel_datasets_path(dataset)
    _util.ensure_dir(dataset_path)
    shape = load_shape(dataset_path)     
    
    #subset data into train and test sets, randomly shuffle?
        # for now, hardcoade number of files for train/test split:
    testset=np.random.choice(1096,110) #this is a 10% validation split
    test_set = get_dataset_regress(dataset, test_size, buffer_size, partial, testset)
    trainset=np.random.permute(np.delete(np.arange(1:1096), test_set))
    train_set = get_dataset_regress(dataset, batch_size, buffer_size, partial, trainset)

    #define svm inputs vars
    feat= tf.placeholder(dtype=tf.float32,shape=[None, 1])
    label= tf.placeholder(dtype=tf.float32, shape=[None, 1])
    w = tf.Variable(tf.random_normal(shape=[1,1]))
    b = tf.Variable(tf.random_normal(shape=[1,1]))
    #define output
    svm_out = tf.add(tf.matmul(feat, w), b)