Beispiel #1
0
def run_training(data):
    # TODO: read in data
    training_data = data[0]
    labels = data[1]
    # test_data = 

    with tf.Graph().as_default():
        input_pl, labels_pl = network.placeholder_inputs(BATCH_SIZE)
        logits = network.feedforward(training_data,
                                     NUM_HIDDEN1,
                                     NUM_HIDDEN2)

        loss = network.loss(logits, labels_pl)

        train_op = network.training(loss,ETA)

        eval_correct = network.evaluation(logits, labels_pl)
        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Create a saver for writing training checkpoints.
        saver = tf.train.Saver()

        # Create a session for running Ops on the Graph.
        sess = tf.Session()

        # Run the Op to initialize the variables.
        init = tf.initialize_all_variables()
        sess.run(init)

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        for epoch in xrange(EPOCHS):
            start_time = time.time()

            training_size = len(training_data)
            batches = [training_data[k:k+BATCHSIZE] for k in xrange(0, training_size,BATCH_SIZE)]
            for batch in batches:
                feed_dict = fill_dict(training_data,
                                      labels,
                                      input_pl,
                                      labels_pl)
                _, loss_value = sess.run([train_op, loss], feed_dict = feed_dict)
                duration = time.time() - start_time

                # Write summarry after each 10 epochs
                if epoch % 10 == 0:
                    print 'Epoch %d: loss = %.2f (%.3f sec)'%(epoch, loss_value, duration)

                    summary_str = sess.run(summary_op, feed_dict=feed_dict)
                    summary_writer.add_summary(summary_str, epoch)
                    summary_writer.flush()
        print 'Evaluate with the validation set...'
        validate(sess,
                 eval_correct,
                 input_pl,
                 labels_pl,
                 test_data)
def main(_):
    # Read the expert rollouts from disk.
    observations, actions = load_data(ARGS.rollouts_file)
    print("observations shape = " + str(observations.shape))
    print("actions shape = " + str(actions.shape))
    observation_length = observations.shape[1]
    action_length = actions.shape[1]

    assert (observations.shape[0] == actions.shape[0])
    assert (observations.shape[0] % ARGS.batch_size == 0)

    # Assemble the network.
    opl = tf.placeholder(tf.float32,
                         shape=(None, observation_length),
                         name="observations")
    apl = tf.placeholder(tf.float32,
                         shape=(None, action_length),
                         name="actions")
    logits = network.inference(opl, observation_length, ARGS.hidden1,
                               ARGS.hidden2, action_length)
    errors, loss = network.loss(logits, apl)
    global_step, train_op = network.training(loss, ARGS.learning_rate)

    # Initialize the network.
    init = tf.global_variables_initializer()
    saver = tf.train.Saver(max_to_keep=10)
    sess = tf.Session()

    if os.path.exists(ARGS.checkpoint_dir):
        saver.restore(sess, tf.train.latest_checkpoint(ARGS.checkpoint_dir))
    else:
        sess.run(init)

    # Train the network.
    num_batches = observations.shape[0] / ARGS.batch_size
    for step in range(ARGS.training_steps):
        i = step % num_batches
        if i == 0:
            p = np.random.permutation(observations.shape[0])
            observations = observations[p]
            actions = actions[p]
        start = int(i * ARGS.batch_size)
        stop = int((i + 1) * ARGS.batch_size)
        feed_dict = {opl: observations[start:stop], apl: actions[start:stop]}
        _, loss_value, step_value = sess.run([train_op, loss, global_step],
                                             feed_dict=feed_dict)
        if step % 100 == 0:
            basename = os.path.basename(ARGS.checkpoint_dir)
            checkpoint_file = os.path.join(ARGS.checkpoint_dir, basename)
            saver.save(sess, checkpoint_file, global_step=step_value)
            loss_value = sess.run(loss,
                                  feed_dict={
                                      opl: observations,
                                      apl: actions
                                  })
            msg = "step {}; loss = {}".format(step_value, loss_value)
            print(msg)
Beispiel #3
0
def train():
    """Train network for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        # Get images and labels for network.
        images, labels = network.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = network.inference(images)

        # Calculate loss.
        loss = network.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op = network.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        #summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        #summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
        #                                        graph_def=sess.graph_def)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_input_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))
Beispiel #4
0
def train():
  """Train network for a number of steps."""
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels for network.
    images, labels = network.distorted_inputs()

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = network.inference(images)

    # Calculate loss.
    loss = network.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = network.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.all_variables())

    # Build the summary operation based on the TF collection of Summaries.
    #summary_op = tf.merge_all_summaries()

    # Build an initialization operation to run below.
    init = tf.initialize_all_variables()

    # Start running operations on the Graph.
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    #summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
    #                                        graph_def=sess.graph_def)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_input_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))
Beispiel #5
0
def train():
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        images, labels = network.distorted_inputs()

        logits = network.inference(images)

        loss = network.loss(logits, labels)

        train_op = network.train(loss, global_step)

        saver = tf.train.Saver(tf.all_variables())

        summary_op = tf.merge_all_summaries()

        init = tf.initialize_all_variables()

        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            if step % 10 == 0:
                num_examples_per_step = FLAGS.batch_input_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Beispiel #6
0
def train_all(epochs_count=EPOCHS_COUNT, batch_size=BATCH_SIZE):
	model.train()
	for i in range(epochs_count):
		training_loss = 0
		for j in range(batches_count):
			network_output = model(get_batch_features_tensor(j).to(device))
			optimizer.zero_grad()
			loss_value = network.loss(network_output, get_batch_labels_tensor(j))
			training_loss += loss_value.item()
			loss_value.backward()
			optimizer.step()
		training_loss /= batches_count
		print("Epoch number:", i + 1)
		print("Training loss:", training_loss)
		validate()
		print("---------------------")
Beispiel #7
0
def train():

  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)
    # 获得图片数据和对应的标签batch
    float_image, label = tfrecord.train_data_read(tfrecord_path=FLAGS.train_data)
    images, labels = tfrecord.create_batch(float_image,label,count_num=FLAGS.train_num)

    logits = network.inference(images)
    # 误差计算
    loss = network.loss(logits, labels)
    # 模型训练
    train_op = network.train(loss, global_step)
    # 存储模型
    saver = tf.train.Saver(tf.global_variables())
    # 存储所有操作
    summary_op = tf.summary.merge_all()
    # 初始化所有变量.
    init = tf.initialize_all_variables()
    # 开始计算流图
    sess = tf.Session(config=tf.ConfigProto(
        log_device_placement=FLAGS.log_device_placement))
    sess.run(init)
    # 队列开始
    tf.train.start_queue_runners(sess=sess)
    summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                            graph_def=sess.graph_def)
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time
      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)
        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))
      if step % 50 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)
      # 保存模型检查点.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
    parameter_table = [["Initial parameters", parameter_path],
                    ["Ranking loss", ranking_loss], ["SPP", spp], ["Pooling", args.pooling],
                    ['Experiment', experiment_name],
                    ['Embedding dim', embedding_dim], ['Batch size', batch_size_trn],
                    ['Initial LR', initial_lr], ['Momentum', momentum_coeff],
                    ['LR Step size', step_size], ['LR Step factor', step_factor],
                    ['Total Steps', total_steps]]

    training_images = inputs(args.training_db, batch_size_trn, None, True, augment_training_data)
    test_images = inputs(args.validation_db, batch_size_val, None, False)
    net_data = np.load(parameter_path).item()
    var_dict=  nw.get_variable_dict(net_data)
    with tf.variable_scope("ranker") as scope:
        feature_vec = nw.build_alexconvnet(training_images, var_dict, embedding_dim, spp, args.pooling)
        L, p = nw.loss(feature_vec, nw.build_loss_matrix(batch_size_trn), ranking_loss)
        scope.reuse_variables()
        val_feature_vec = nw.build_alexconvnet(test_images, var_dict, embedding_dim, spp, args.pooling)
        L_val, p_val = nw.loss(val_feature_vec, nw.build_loss_matrix(batch_size_val), ranking_loss)

    lr = tf.Variable(initial_lr)
    opt = tf.train.AdamOptimizer()
    grads = opt.compute_gradients(L)

    apply_grad_op = opt.apply_gradients(grads)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
Beispiel #9
0
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        gloabl_step = tf.train.get_or_create_global_step()

        # Get images and lables for CIFAR-10
        # Force input pipelines to CPU:0 to avoid operations sometimes ending up on GPU and resultign in a slow down.
        with tf.device('/cpu:0'):
            images, labels = network.distorted_inputs()

        # Build a Graph that computes the logits predictions from the inference model.
        logits = network.inference(images)
        # print(logits.get_shape())
        # print(labels.get_shape())
        # os.system('pause')

        # Calcute loss.
        loss = network.loss(logits, labels)

        # Buid a Graph that trains the model with one batch of examples and updates the model parameters.
        train_op = network.train(loss, gloabl_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime"""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)  # Asks for loss value

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        "%s: step %d, loss = %.2f (%.1f exmples/sec: %.3f sec/batch"
                    )
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            print(
                '********************************Sussessfully creating session'
            )
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Beispiel #10
0
def main():
    print "initial model generator"
    with tf.Graph().as_default():
        train_sets = dataset.get_datasets(main_path, EPIWidth, disp_precision,
                                          'train')
        test_sets = dataset.get_datasets(main_path, EPIWidth, disp_precision,
                                         'test')

        global_step = tf.Variable(0, trainable=False)

        images_placeholder_v = tf.placeholder(tf.float32,
                                              shape=(None, 9, EPIWidth, 1))
        images_placeholder_u = tf.placeholder(tf.float32,
                                              shape=(None, 9, EPIWidth, 1))
        labels_placeholder = tf.placeholder(tf.int32, shape=None)
        prop_placeholder = tf.placeholder('float')
        phase_train = tf.placeholder(tf.bool, name='phase_train')

        logits = network.inference_ds(images_placeholder_u,
                                      images_placeholder_v, prop_placeholder,
                                      phase_train, disp_precision)

        logits_softmax = network.softmax(logits)

        loss = network.loss(logits_softmax, labels_placeholder)

        train_op = network.training(loss, 1e-4, global_step)

        eval = network.evaluation(logits_softmax)

        summary = tf.summary.merge_all()

        saver = tf.train.Saver(tf.global_variables())

        gpu_option = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)

        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_option))

        summary_writer = tf.summary.FileWriter(summary_path, sess.graph)

        sess.run(tf.global_variables_initializer())

        ckpt = tf.train.get_checkpoint_state(checkpoint_path)
        if ckpt:
            #            saver.restore(sess,checkpoint_path+'/model.ckpt')#利用不同平台的训练结果
            #            saver.restore(sess, ckpt.model_checkpoint_path)  # 本地训练的结果
            print("restore from checkpoint!")
        else:
            print("no checkpoint found!")

        start_time = time.time()

        step = 0

        while not train_sets.complete:
            feed_dict = fill_feed_dict(train_sets, images_placeholder_u,
                                       images_placeholder_v,
                                       labels_placeholder, prop_placeholder,
                                       phase_train, 'train')
            _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)

            duration = time.time() - start_time
            if step % 1000 == 0:
                print('Step:%d: loss = %.2f (%.3f sec)' %
                      (step, loss_value, duration))
                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

            if step % 25000 == 24999:
                print('test Data Eval:')
                do_eval_true(sess, eval, logits_softmax, images_placeholder_u,
                             images_placeholder_v, prop_placeholder,
                             phase_train, test_sets)

            if step % 50000 == 49999:
                saver.save(sess,
                           checkpoint_path + '/model.ckpt',
                           global_step=step)
Beispiel #11
0
LEARNING_RATE = float(os.environ['LEARNING_RATE'] or 0.1)
RESTORE = ((os.environ['RESTORE'] or '') == 'true') or False

learning_rate_value = LEARNING_RATE
session_config = tf.ConfigProto(log_device_placement=True)
session_config.gpu_options.allow_growth = True
# this is required if want to use GPU as device.
# see: https://github.com/tensorflow/tensorflow/issues/2292
session_config.allow_soft_placement = True

if __name__ == "__main__":

    with tf.Graph().as_default() as g, tf.device(USE_DEVICE):
        # inference()
        input, deep_features = network.inference()
        labels, logits, cross_entropy = network.loss(deep_features)
        centroid_loss, centroids, spread = network.center_loss(
            deep_features, labels)

        # combine the two losses
        _lambda = tf.placeholder(dtype=tf.float32)
        total_loss = cross_entropy + _lambda / 2. * centroid_loss

        learning_rate, train, global_step = network.training(total_loss)
        eval = network.evaluation(logits, labels)

        init = tf.initialize_all_variables()

        with tf.Session(config=session_config) as sess, \
                h5py.File(DUMP_FILE, 'a', libver='latest', swmr=True) as h5_file:
            # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
Beispiel #12
0
def run_training():
    """Train network for a number of epochs."""
    # Tell TensorFlow that the model will be built into the default Graph.
    with tf.Graph().as_default():
        with tf.name_scope('input'):
            # Input data, pin to CPU because rest of pipeline is CPU-only
            with tf.device('/cpu:0'):
                input_data = tf.constant(training_data)
                input_labels = tf.constant(training_labels)

            input, label = tf.train.slice_input_producer(
                [input_data, input_labels], num_epochs=FLAGS.num_epochs)
            label = tf.cast(label, tf.int32)
            input, labels = tf.train.batch([input, label],
                                           batch_size=FLAGS.batch_size)

            # Build a Graph that computes predictions from the inference model.
            logits = network.inference(input, FLAGS.hidden1, FLAGS.hidden2)

            # Add to the Graph the Ops for loss calculation.
            loss = network.loss(logits, labels)

            # Add to the Graph the Ops that calculate and apply gradients.
            train_op = network.training(loss, FLAGS.learning_rate)

            # Add the Op to compare the logits to the labels during evaluation.
            eval_correct = network.evaluation(logits, labels)

            # Build the summary operation based on the TF collection of Summaries.
            summary_op = tf.summary.merge_all()

            # Create a saver for writing training checkpoints.
            saver = tf.train.Saver()

            # Create the op for initializing variables.
            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            # Create a session for running Ops on the Graph.
            sess = tf.Session()

            # Run the Op to initialize the variables.
            sess.run(init_op)

            # Instantiate a SummaryWriter to output summaries and the Graph.
            summary_writer = tf.train.SummaryWriter(FLAGS.train_dir,
                                                    sess.graph)

            # Start input enqueue threads.
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            # And then after everything is built, start the training loop.
    for ep in xrange(FLAGS.num_epochs):
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time
            # Write the summaries and print an overview fairly often.
            if loss_value - 0.0 <= 0.00001:
                print(
                    'Loss value: %.4f, done training for %d epochs, %d steps.'
                    % (loss_value, ep, ep * FLAGS.max_steps + step))
                return
            if step % 100 == 0:
                # Print status to stdout.
                print('Epochs %d: loss = %.4f (%.3f sec)' %
                      (ep, loss_value, duration))
                # Update the events file.
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

                # Save a checkpoint periodically.
                if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
                    print('Saving')
                    saver.save(sess, FLAGS.train_dir, global_step=step)
Beispiel #13
0
def main():

    ########################################## USER INPUT ##############################################################

    # Training parameters:
    if len(sys.argv) >= 8:
        IMAGE_NAME = sys.argv[1]  # IMAGE_NAME = '1'
        NETWORK_NAME = sys.argv[2]  # 'unet', 'deep_decoder'
        LOSS_NAME = sys.argv[
            3]  # 'mse', 'l1', 'mse_l1', 'mse_with_tv_reg', 'mse_with_edge_reg'
        OPTIMIZER_TYPE = sys.argv[4]  # 'sgd', 'adam'
        LEARNING_RATE = float(sys.argv[5])
        NUM_ITERATIONS = int(sys.argv[6])
        ITERATIONS_TO_SAVE = int(sys.argv[7])

        if len(sys.argv) == 11:
            w_h = float(sys.argv[8])
            w_v = float(sys.argv[9])
            w_mse = float(sys.argv[10])
        else:
            w_h = None
            w_v = None
            w_mse = None

    else:
        print('Not enough input parameters.')
        return

    ####################################################################################################################

    # Load images:
    RAW_FILENAME = os.path.join('Raw', '{}_Raw Image.tif'.format(IMAGE_NAME))
    AVERAGED_FILENAME = os.path.join(
        'Averaged', '{}_Averaged Image.tif'.format(IMAGE_NAME))

    try:
        input_image = hf.get_training_image(RAW_FILENAME)
    except:
        print("Error loading {}".format(RAW_FILENAME))
        return
    try:
        ground_truth = hf.get_training_image(AVERAGED_FILENAME)
    except:
        print("Error loading {}".format(AVERAGED_FILENAME))
        return

    # Validate settings:
    VALID_NETWORK_NAMES = ["unet", "deep_decoder"]
    VALID_OPTIMIZER_TYPES = ["sgd", "adam"]
    VALID_LOSS_NAMES = [
        "mse", "l1", "mse_l1", "mse_with_tv_reg", "mse_with_edge_reg"
    ]

    if not (NETWORK_NAME in VALID_NETWORK_NAMES):
        print("Error: {} network does not exist.".format(NETWORK_NAME))
        return
    if not (OPTIMIZER_TYPE in VALID_OPTIMIZER_TYPES):
        print("Error: {} optimizer does not exist.".format(OPTIMIZER_TYPE))
        return
    if not (LOSS_NAME in VALID_LOSS_NAMES):
        print("Error: {} loss does not exist.".format(LOSS_NAME))
        return

    # Create folder to save results:
    SAVE_FOLDER = os.path.join('./results', IMAGE_NAME)
    count = 0
    CHECK_FOLDER = SAVE_FOLDER
    while os.path.exists(CHECK_FOLDER):
        count += 1
        CHECK_FOLDER = '{}({})'.format(SAVE_FOLDER, count)
    SAVE_FOLDER = CHECK_FOLDER
    os.mkdir(SAVE_FOLDER)

    WRITE_FILENAME = os.path.join(SAVE_FOLDER, 'metrics.txt')
    with open(WRITE_FILENAME, 'a') as wf:
        wf.write(
            'PARAMETERS\nNetwork: {}\nLoss: {}\nOptimizer: {}\nLearning rate: {}\nNumber of iterations: {}'
            .format(NETWORK_NAME, LOSS_NAME, OPTIMIZER_TYPE, LEARNING_RATE,
                    NUM_ITERATIONS))
        wf.write('\n\nw_h: {}\nw_v: {}\nw_mse: {}'.format(w_h, w_v, w_mse))
        wf.write('\n\nIteration\tLoss\tSNR\tCNR\tSSIM')

    # Get input noise:
    if NETWORK_NAME == "unet":
        input_noise = hf.get_noise_matrix(input_image.shape[1],
                                          input_image.shape[2], 32)
    elif NETWORK_NAME == "deep_decoder":
        input_noise = hf.get_noise_matrix(input_image.shape[1] / (2**4),
                                          input_image.shape[2] / (2**4), 64)

    # Save inputs:
    save_filename = os.path.join(SAVE_FOLDER, 'input_image.tif')
    imsave(save_filename, input_image[0, :, :, 0], cmap='gray')

    save_filename = os.path.join(SAVE_FOLDER, 'ground_truth.tif')
    imsave(save_filename, ground_truth[0, :, :, 0], cmap='gray')

    # Calculate initial metrics:
    snr_i = hf.calculate_metrics(ground_truth, input_image, 'snr', IMAGE_NAME)
    cnr_i = hf.calculate_metrics(ground_truth, input_image, 'cnr', IMAGE_NAME)
    ssim_i = hf.calculate_metrics(ground_truth, input_image, 'ssim',
                                  IMAGE_NAME)
    with open(WRITE_FILENAME, 'a') as wf:
        wf.write('\ninput_image\tN/A\t{}\t{}\t{}'.format(snr_i, cnr_i, ssim_i))

    # Placeholders:
    z = tf.placeholder(tf.float32, shape=[1, None, None,
                                          input_noise.shape[3]])  # input noise
    x = tf.placeholder(tf.float32, shape=[1, None, None, 1])  # input image

    # Network:
    y = network.inference(NETWORK_NAME,
                          z,
                          height=input_noise.shape[1],
                          width=input_noise.shape[2],
                          channels=input_noise.shape[3])
    if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg":
        loss, mse, edge_h, edge_v = network.loss(y, x, LOSS_NAME, w_h, w_v,
                                                 w_mse)
    else:
        loss = network.loss(y, x, LOSS_NAME)

    # Update moving mean and variance for batch normalization (if required):
    if NETWORK_NAME == "deep_decoder":
        update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Create different optimizers here:
    if OPTIMIZER_TYPE == "sgd":
        train_op = tf.train.GradientDescentOptimizer(
            learning_rate=LEARNING_RATE).minimize(loss)
    elif OPTIMIZER_TYPE == "adam":
        train_op = tf.train.AdamOptimizer(
            learning_rate=LEARNING_RATE).minimize(loss)

    # Start session:
    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        # Keep track of metrics:
        track_iter = []
        track_loss = []
        track_snr = []
        track_cnr = []
        track_ssim = []

        for i in range(NUM_ITERATIONS + 1):

            if NETWORK_NAME == "unet":
                if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg":
                    _, output_image, loss_i, mse_i, edge_h_i, edge_v_i = sess.run(
                        [train_op, y, loss, mse, edge_h, edge_v],
                        feed_dict={
                            z: input_noise,
                            x: input_image
                        })
                else:
                    _, output_image, loss_i = sess.run([train_op, y, loss],
                                                       feed_dict={
                                                           z: input_noise,
                                                           x: input_image
                                                       })
            elif NETWORK_NAME == "deep_decoder":
                if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg":
                    _, _, output_image, loss_i, mse_i, edge_h_i, edge_v_i = sess.run(
                        [update_op, train_op, y, loss, mse, edge_h, edge_v],
                        feed_dict={
                            z: input_noise,
                            x: input_image
                        })
                else:
                    _, _, output_image, loss_i = sess.run(
                        [update_op, train_op, y, loss],
                        feed_dict={
                            z: input_noise,
                            x: input_image
                        })

            if i % ITERATIONS_TO_SAVE == 0:

                # Save image:
                save_filename = os.path.join(SAVE_FOLDER,
                                             'iteration_{}.tif'.format(i))
                imsave(save_filename, output_image[0, :, :, 0], cmap='gray')

                # Calculate metrics:
                snr_i = hf.calculate_metrics(ground_truth, output_image, 'snr',
                                             IMAGE_NAME)
                cnr_i = hf.calculate_metrics(ground_truth, output_image, 'cnr',
                                             IMAGE_NAME)
                ssim_i = hf.calculate_metrics(ground_truth, output_image,
                                              'ssim', IMAGE_NAME)
                with open(WRITE_FILENAME, 'a') as wf:
                    wf.write('\n{}\t{}\t{}\t{}\t{}'.format(
                        i, loss_i, snr_i, cnr_i, ssim_i))

                # Display:
                if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg":
                    print(
                        'Iteration {}/{}\t| Loss: {}\tSNR: {}\tCNR: {}\tSSIM: {}\tMSE: {}\tEdge_h: {}\tEdge_v: {}'
                        .format(i, NUM_ITERATIONS, loss_i, snr_i, cnr_i,
                                ssim_i, mse_i, edge_h_i, edge_v_i))
                else:
                    print(
                        'Iteration {}/{}\t| Loss: {}\tSNR: {}\tCNR: {}\tSSIM: {}'
                        .format(i, NUM_ITERATIONS, loss_i, snr_i, cnr_i,
                                ssim_i))

                # Track:
                track_iter.append(i)
                track_loss.append(loss_i)
                track_snr.append(snr_i)
                track_cnr.append(cnr_i)
                track_ssim.append(ssim_i)

        # Plot:
        hf.plot_metrics(track_iter, track_loss, 'loss',
                        os.path.join(SAVE_FOLDER, 'loss.tif'))
        hf.plot_metrics(track_iter, track_snr, 'snr',
                        os.path.join(SAVE_FOLDER, 'snr.tif'))
        hf.plot_metrics(track_iter, track_cnr, 'cnr',
                        os.path.join(SAVE_FOLDER, 'cnr.tif'))
        hf.plot_metrics(track_iter, track_ssim, 'ssim',
                        os.path.join(SAVE_FOLDER, 'ssim.tif'))

    print('Completed.')
Beispiel #14
0
sys.path.append(os.path.abspath(os.path.join(
    os.path.dirname(__file__),
    os.path.pardir,
    'tracker')))
import network

# Load tensorflow
tf.Graph().as_default()
batchSize = 1
delta = 1
imagePlaceholder = tf.placeholder(tf.float32, shape=(batchSize * delta * 2, 227, 227, 3))
labelsPlaceholder = tf.placeholder(tf.float32, shape=(batchSize * delta, 4))
learningRate = tf.placeholder(tf.float32)
tfOutputs = network.inference(imagePlaceholder, num_unrolls=delta, train=True)
tfLossFull, tfLoss = network.loss(tfOutputs, labelsPlaceholder)
train_op = network.training(tfLossFull, learningRate)
summary = tf.summary.merge_all()
init = tf.global_variables_initializer()
saver = tf.train.Saver()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
summary_writer = tf.summary.FileWriter('logs/train/caffe_copy', sess.graph)
ops = []
with sess.as_default():
    sess.run(init)

    import caffe
    caffe.set_mode_cpu()
    # Load caffe net
Beispiel #15
0
def main():
    args = get_parser().parse_args()
    observation_length = 17
    action_length = 6

    # Read the expert rollouts from disk.
    observations, actions = load_data(args.rollouts_file)
    print("observations shape = " + str(observations.shape))
    print("actions shape = " + str(actions.shape))

    # Make sure our files exist!
    assert (os.path.exists(os.path.dirname(os.path.abspath(args.stats_file))))

    # Load the expert.
    print("Loading and building expert policy.")
    policy_fn = load_policy.load_policy(args.expert_policy_file)
    print("Expert policy loaded and built.")

    # Assemble the network.
    opl = tf.placeholder(tf.float32,
                         shape=(None, observation_length),
                         name="observations")
    apl = tf.placeholder(tf.float32,
                         shape=(None, action_length),
                         name="actions")
    logits = network.inference(opl, observation_length, args.hidden1,
                               args.hidden2, action_length)
    errors, loss = network.loss(logits, apl)
    global_step, train_op = network.training(loss, args.learning_rate)

    with tf.Session() as sess:
        # Initialize the network.
        tf_util.initialize()
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(args.checkpoint_dir))

        env = gym.make("Walker2d-v1")
        max_steps = env.spec.timestep_limit

        avg_returns = []
        stddev_returns = []
        observations = list(observations)
        actions = list(actions)

        for iteration in range(args.num_iterations):
            obs = np.array(observations)
            acts = np.array(actions)
            assert (obs.shape[0] == acts.shape[0])

            # Train the network.
            if iteration != 0:
                num_batches = int(obs.shape[0] / args.batch_size)
                for step in range(args.training_steps):
                    i = step % num_batches
                    if i == 0:
                        p = np.random.permutation(obs.shape[0])
                        obs = obs[p]
                        acts = acts[p]
                    start = int(i * args.batch_size)
                    stop = int((i + 1) * args.batch_size)
                    feed_dict = {opl: obs[start:stop], apl: acts[start:stop]}
                    _, loss_value, step_value = sess.run(
                        [train_op, loss, global_step], feed_dict=feed_dict)
                    if step % 100 == 0:
                        loss_value = sess.run(loss,
                                              feed_dict={
                                                  opl: obs,
                                                  apl: acts
                                              })
                        msg = "Iteration {}; step {}; loss = {}".format(
                            iteration, step_value, loss_value)
                        print(msg)

            # Generate new rollouts.
            rewards = []
            for i in range(args.num_rollouts):
                print("Iteration {}; rollout {}".format(iteration, i))
                obs = env.reset()
                done = False
                steps = 0
                totalr = 0
                while not done:
                    expert_action = policy_fn(obs[None, :])
                    observations.append(obs)
                    actions.append(expert_action[0])

                    action = sess.run(logits, feed_dict={opl: obs[None, :]})
                    obs, r, done, _ = env.step(action)
                    totalr += r
                    steps += 1
                    if steps >= max_steps:
                        break
                rewards.append(totalr)

            print("Iteration {}; average return {}".format(
                iteration, np.mean(rewards)))
            print("Iteration {}; stddev return {}".format(
                iteration, np.std(rewards)))
            avg_returns.append(np.mean(rewards))
            stddev_returns.append(np.std(rewards))

            with open(args.stats_file, "w") as f:
                stats = {
                    "mean_return": avg_returns,
                    "stddev_returns": stddev_returns
                }
                json.dump(stats, f, indent=4)
Beispiel #16
0
                       ['Batch size', batch_size_trn],
                       ['Initial LR',
                        initial_lr], ['Momentum', momentum_coeff],
                       ['LR Step size', step_size],
                       ['LR Step factor', step_factor],
                       ['Total Steps', total_steps]]

    training_images = inputs(args.training_db, batch_size_trn, None, True,
                             augment_training_data)
    test_images = inputs(args.validation_db, batch_size_val, None, False)
    net_data = np.load(parameter_path).item()
    var_dict = nw.get_variable_dict(net_data)
    with tf.variable_scope("ranker") as scope:
        feature_vec = nw.build_alexconvnet(training_images, var_dict,
                                           embedding_dim, spp, args.pooling)
        L, p = nw.loss(feature_vec, nw.build_loss_matrix(batch_size_trn),
                       ranking_loss)
        scope.reuse_variables()
        val_feature_vec = nw.build_alexconvnet(test_images, var_dict,
                                               embedding_dim, spp,
                                               args.pooling)
        L_val, p_val = nw.loss(val_feature_vec,
                               nw.build_loss_matrix(batch_size_val),
                               ranking_loss)

    lr = tf.Variable(initial_lr)
    opt = tf.train.AdamOptimizer()
    grads = opt.compute_gradients(L)

    apply_grad_op = opt.apply_gradients(grads)

    init = tf.global_variables_initializer()
LEARNING_RATE = float(os.environ['LEARNING_RATE'] or 0.1)
RESTORE = ((os.environ['RESTORE'] or '') == 'true') or False

learning_rate_value = LEARNING_RATE
session_config = tf.ConfigProto(log_device_placement=True)
session_config.gpu_options.allow_growth = True
# this is required if want to use GPU as device.
# see: https://github.com/tensorflow/tensorflow/issues/2292
session_config.allow_soft_placement = True

if __name__ == "__main__":

    with tf.Graph().as_default() as g, tf.device(USE_DEVICE):
        # inference()
        input, deep_features = network.inference()
        labels, logits, cross_entropy = network.loss(deep_features)
        centroid_loss, centroids, spread = network.center_loss(deep_features, labels)

        # combine the two losses
        _lambda = tf.placeholder(dtype=tf.float32)
        total_loss = cross_entropy + _lambda / 2. * centroid_loss

        learning_rate, train, global_step = network.training(total_loss)
        eval = network.evaluation(logits, labels)

        init = tf.initialize_all_variables()

        with tf.Session(config=session_config) as sess, \
                h5py.File(DUMP_FILE, 'a', libver='latest', swmr=True) as h5_file:
            # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
            # to see the tensor graph, fire up the tensorboard with --logdir="./train"