Exemple #1
0
def train_ops():
    # Get training parameters
    data_dir = FLAGS.data_dir
    batch_size = FLAGS.batch_size
    learning_rate = FLAGS.learning_rate
    # Create global step counter
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Instantiate async producers for images and labels
    images, labels = data.train_inputs(data_dir=data_dir)

    # Instantiate the model
    model = select.by_name(FLAGS.model)

    # Create a 'virtual' graph node based on images that represents the input
    # node to be used for graph retrieval
    inputs = tf.identity(images, 'inputs')

    # Build a Graph that computes the logits predictions from the
    # inference model
    logits = model.inference(inputs)

    # In the same way, create a 'virtual' node for outputs
    outputs = tf.identity(logits, 'predictions')

    # Calculate loss
    loss = model.loss(logits, labels)

    # Evaluate training accuracy
    accuracy = model.accuracy(logits, labels)

    # Attach a scalar summary only to the total loss
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('batch accuracy', accuracy)
    # Note that for debugging purpose, we could also track other losses
    #for l in tf.get_collection('losses'):
    #    tf.summary.scalar(l.op.name, l)

    # Build a graph that applies gradient descent to update model parameters
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    sgd_op = optimizer.minimize(loss, global_step=global_step)

    # Build yet another graph to evaluate moving averages of variables after
    # each step: these smoothed parameters will be loaded instead of the raw
    # trained values during evaluation
    variable_averages = \
        tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # For batch normalization, we also need to update some variables
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Create a meta-graph that includes sgd and variables moving average
    with tf.control_dependencies([sgd_op, variables_averages_op] + update_ops):
        train_op = tf.no_op(name='train')

    # Build another graph to provide training summary information
    summary_op = tf.summary.merge_all()

    return (train_op, loss, accuracy, summary_op)
Exemple #2
0
def save_weights():
    """Saves CIFAR10 weights"""
    FLAGS.resume = True  # Get saved weights, not new ones
    print(FLAGS.save_dir)
    run_dir = get_run_dir(FLAGS.save_dir, FLAGS.model)
    print('run_dir', run_dir)
    checkpoint_dir = os.path.join(run_dir, 'train')

    with tf.Graph().as_default() as g:
        # Get images and labels for CIFAR-10.
        images, labels = data.train_inputs(data_dir=FLAGS.data_dir)
        model = select.by_name(FLAGS.model, FLAGS, training=True)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = model.inference(images)
        print('Multiplicative depth', model.mult_depth())

        saver = tf.train.Saver()

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
            if ckpt and ckpt.model_checkpoint_path:
                # Restores from checkpoint
                saver.restore(sess, ckpt.model_checkpoint_path)
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                    '-')[-1]
            else:
                print('### ERROR No checkpoint file found###')
                print('ckpt_dir', checkpoint_dir)
                print('ckpt.model_checkpoint_path', ckpt.model_checkpoint_path)
                print('ckpt', ckpt)
                return

            # Save variables
            for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
                weight = (sess.run([var]))[0].flatten().tolist()
                filename = model._name_to_filename(var.name)
                dir_name = filename.rsplit('/', 1)[0]
                os.makedirs(dir_name, exist_ok=True)

                print("saving", filename)
                np.savetxt(str(filename), weight)
Exemple #3
0
def train_ops():
    # Get training parameters
    data_dir = FLAGS.data_dir
    batch_size = FLAGS.batch_size

    # Create global step counter
    global_step = tf.Variable(0, name='global_step', trainable=False)

    # Instantiate async producers for images and labels
    images, labels = data.train_inputs(data_dir=data_dir)

    # Instantiate the model
    model = select.by_name(FLAGS.model, FLAGS, training=True)

    # Create a 'virtual' graph node based on images that represents the input
    # node to be used for graph retrieval
    inputs = tf.identity(images, 'XXX')

    # Build a Graph that computes the logits predictions from the
    # inference model
    logits = model.inference(inputs)
    print('Multiplicative depth', model.mult_depth())

    # In the same way, create a 'virtual' node for outputs
    outputs = tf.identity(logits, 'YYY')

    # Calculate loss
    loss = model.loss(logits, labels)

    # Evaluate training accuracy
    accuracy = model.accuracy(logits, labels)

    # Attach a scalar summary only to the total loss
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('batch accuracy', accuracy)
    # Note that for debugging purpose, we could also track other losses
    for l in tf.get_collection('losses'):
        tf.summary.scalar(l.op.name, l)

    learning_rate = 0.1
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)

    # Clip gradients to [-0.25, 0.25]
    if FLAGS.clip_grads:
        print("Clipping gradients to [-0.25, 0.25]")
        gvs = optimizer.compute_gradients(loss)
        capped_gvs = []
        for grad, var in gvs:
            if grad is None:
                continue
            capped_gvs.append((tf.clip_by_value(grad, -0.25, 0.25), var))
        sgd_op = optimizer.apply_gradients(capped_gvs, global_step=global_step)
    else:
        print("Not clipping gradients")
        sgd_op = optimizer.minimize(loss, global_step=global_step)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Create a meta-graph that includes sgd and variables moving average
    with tf.control_dependencies([sgd_op] + update_ops):
        train_op = tf.no_op(name='train')

    return (train_op, loss, accuracy)