Esempio n. 1
0
def main(unused_argv):
    assert FLAGS.input_file_pattern, "--input_file_pattern is required"
    assert FLAGS.train_dir, "--train_dir is required"

    model_config = configuration.ModelConfig()
    model_config.input_file_pattern = FLAGS.input_file_pattern
    model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file
    training_config = configuration.TrainingConfig()

    # Create training directory.
    train_dir = FLAGS.train_dir
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)

    # Build the TensorFlow graph.
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = show_and_tell_model.ShowAndTellModel(
            model_config, mode="train", train_inception=FLAGS.train_inception)
        model.build()

        # Set up the learning rate.
        learning_rate_decay_fn = None
        if FLAGS.train_inception:
            learning_rate = tf.constant(
                training_config.train_inception_learning_rate)
        else:
            learning_rate = tf.constant(training_config.initial_learning_rate)
            if training_config.learning_rate_decay_factor > 0:
                num_batches_per_epoch = (
                    training_config.num_examples_per_epoch /
                    model_config.batch_size)
                decay_steps = int(num_batches_per_epoch *
                                  training_config.num_epochs_per_decay)

                def _learning_rate_decay_fn(learning_rate, global_step):
                    return tf.train.exponential_decay(
                        learning_rate,
                        global_step,
                        decay_steps=decay_steps,
                        decay_rate=training_config.learning_rate_decay_factor,
                        staircase=True)

                learning_rate_decay_fn = _learning_rate_decay_fn

        # Set up the training ops.
        train_op = tf.contrib.layers.optimize_loss(
            loss=model.total_loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

    # Run training.
    tf.contrib.slim.learning.train(train_op,
                                   train_dir,
                                   log_every_n_steps=FLAGS.log_every_n_steps,
                                   graph=g,
                                   global_step=model.global_step,
                                   number_of_steps=FLAGS.number_of_steps,
                                   init_fn=model.init_fn,
                                   saver=saver)
Esempio n. 2
0
           '--eval_dir={eval_dir} ' \
           '--eval_interval_secs={eval_interval_secs}  ' \
           '--num_eval_examples={num_eval_examples}  ' \
           '--min_global_step={min_global_step} ' \
           '--CNN_name={CNN_name} ' \
           '--batch_size={batch_size}'

if __name__ == '__main__':
    FLAGS, unparsed = parse_args()
    print('current working dir [{0}]'.format(os.getcwd()))
    w_d = os.path.dirname(os.path.abspath(__file__))
    print('change wording dir to [{0}]'.format(w_d))
    os.chdir(w_d)

    model_config = configuration.ModelConfig()
    training_config = configuration.TrainingConfig()
    training_config.update_data_params(FLAGS.dataset_name)

    step_per_epoch = training_config.num_examples_per_epoch // model_config.batch_size
    epoch_num = FLAGS.number_of_steps // step_per_epoch
    print("Number of examples per epoch is",
          training_config.num_examples_per_epoch)
    print("Number of step per epoch is", step_per_epoch)
    print("To run", FLAGS.number_of_steps, "steps,run epoch number is",
          epoch_num)

    if FLAGS.pretrained_model_checkpoint_file:
        ckpt = ' --inception_checkpoint_file=' + FLAGS.pretrained_model_checkpoint_file
    else:
        ckpt = ''
    for i in range(epoch_num):
Esempio n. 3
0
def main(unused_argv):
    assert FLAGS.input_file_pattern, "--input_file_pattern is required"
    assert FLAGS.train_dir, "--train_dir is required"

    model_config = configuration.ModelConfig()
    model_config.input_file_pattern = FLAGS.input_file_pattern
    model_config.image_keys = [model_config.image_feature_name]

    #make sure we have the right batch size
    if FLAGS.train_inception:
        assert FLAGS.batch_size == 8
    else:
        assert FLAGS.batch_size == 32
    if FLAGS.two_input_queues:
        FLAGS.batch_size = int(FLAGS.batch_size / 2)
    model_config.batch_size = FLAGS.batch_size

    #assert all batch sizes are right
    #set flags if you are training with blocked image
    if FLAGS.blocked_image:
        assert FLAGS.blocked_input_file_pattern, "--blocked_input_file_pattern is required if you would like to train with blocked images"
        model_config.blocked_input_file_pattern = FLAGS.blocked_input_file_pattern
        model_config.image_keys.append(model_config.blocked_image_feature_name)
    if FLAGS.two_input_queues:
        assert FLAGS.input_file_pattern2, "--input_file_pattern2 is required if you would like to train with two input queues"
        model_config.blocked_input_file_pattern = FLAGS.input_file_pattern2
        model_config.image_keys.append(model_config.image_feature_name)
    model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file
    training_config = configuration.TrainingConfig()

    # Create training directory.
    train_dir = FLAGS.train_dir
    if not tf.gfile.IsDirectory(train_dir):
        tf.logging.info("Creating training directory: %s", train_dir)
        tf.gfile.MakeDirs(train_dir)
    print('graph')
    #go from flags to dict
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        if not isinstance(
                FLAGS.__flags['init_from'], str
        ):  #Tensorflow likes to change random things for different releases.  One random thing it likes to change is FLAGS.  This code takes care of that *sight*
            flag_dict = {}
            for key in FLAGS.__flags.keys():
                flag_dict[key] = FLAGS.__flags[key].value
        else:
            flag_dict = FLAGS.__flags

        model = show_and_tell_model.ShowAndTellModel(
            model_config,
            mode="train",
            train_inception=FLAGS.train_inception,
            flags=flag_dict
        )  #let's just pass in all the flags bc this is going to get annoying
        model.build()

        # Set up the learning rate.
        learning_rate_decay_fn = None
        if FLAGS.train_inception:
            learning_rate = tf.constant(
                training_config.train_inception_learning_rate)
        else:
            learning_rate = tf.constant(training_config.initial_learning_rate)
            if training_config.learning_rate_decay_factor > 0:
                num_batches_per_epoch = (
                    training_config.num_examples_per_epoch /
                    model_config.batch_size)
                decay_steps = int(num_batches_per_epoch *
                                  training_config.num_epochs_per_decay)

                def _learning_rate_decay_fn(learning_rate, global_step):
                    return tf.train.exponential_decay(
                        learning_rate,
                        global_step,
                        decay_steps=decay_steps,
                        decay_rate=training_config.learning_rate_decay_factor,
                        staircase=True)

                learning_rate_decay_fn = _learning_rate_decay_fn

        # Set up the training ops.
        train_op = tf.contrib.layers.optimize_loss(
            loss=model.total_loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        # Set up the Saver for saving and restoring model checkpoints.
        saver = tf.train.Saver(
            max_to_keep=training_config.max_checkpoints_to_keep)

        if FLAGS.init_from:
            inception_restore = model.init_fn

            def restore_full_model(sess):
                print("restoring full model")
                inception_restore(sess)
                saver.restore(sess, FLAGS.init_from)

            model.init_fn = restore_full_model
    print('train')
    # Run training.
    if FLAGS.debug:
        tf.contrib.slim.learning.train(
            train_op,
            train_dir,
            log_every_n_steps=FLAGS.log_every_n_steps,
            graph=g,
            global_step=model.global_step,
            number_of_steps=FLAGS.number_of_steps,
            init_fn=model.init_fn,
            saver=saver,
            session_wrapper=tf_debug.LocalCLIDebugWrapperSession)
    else:
        tf.contrib.slim.learning.train(
            train_op,
            train_dir,
            log_every_n_steps=FLAGS.log_every_n_steps,
            graph=g,
            global_step=model.global_step,
            number_of_steps=FLAGS.number_of_steps,
            init_fn=model.init_fn,
            saver=saver)
def train(number_of_steps):
    model_config = configuration.ModelConfig()
    model_config.input_file_pattern = FLAGS.input_file_pattern
    training_config = configuration.TrainingConfig()
    model_config.inception_checkpoint_file = FLAGS.inception_checkpoint_file

    # Create training directory.
    train_dir = FLAGS.train_dir
    # if not tf.gfile.IsDirectory(train_dir):
    #   tf.logging.info("Creating training directory: %s", train_dir)
    #   tf.gfile.MakeDirs(train_dir)

    # Build the TensorFlow graph.
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = show_and_tell_model.ShowAndTellModel(
            model_config, mode="train", train_inception=FLAGS.train_inception)
        model.build()

        # Set up the learning rate.
        learning_rate_decay_fn = None
        if FLAGS.train_inception:
            print(
                "The inception weights are fine-tuned together with weights in the LSTM units and word embeddings."
            )
            learning_rate = tf.constant(
                training_config.train_inception_learning_rate)
        else:
            print(
                "The inception weights are frozen. Only weights in the LSTMs and word embeddings are randomly"
                "initialized and trained.")
            learning_rate = tf.constant(training_config.initial_learning_rate)
            if training_config.learning_rate_decay_factor > 0:
                num_batches_per_epoch = (
                    training_config.num_examples_per_epoch /
                    model_config.batch_size)
                decay_steps = int(num_batches_per_epoch *
                                  training_config.num_epochs_per_decay)

                def _learning_rate_decay_fn(learning_rate, global_step):
                    return tf.train.exponential_decay(
                        learning_rate,
                        global_step,
                        decay_steps=decay_steps,
                        decay_rate=training_config.learning_rate_decay_factor,
                        staircase=True)

                learning_rate_decay_fn = _learning_rate_decay_fn

        # Set up the training ops.
        train_op = tf.contrib.layers.optimize_loss(
            loss=model.total_loss,
            global_step=model.global_step,
            learning_rate=learning_rate,
            optimizer=training_config.optimizer,
            clip_gradients=training_config.clip_gradients,
            learning_rate_decay_fn=learning_rate_decay_fn)

        saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.5)
        # saver = tf.train.Saver(max_to_keep=training_config.max_checkpoints_to_keep)

    # Run training.
    tf.contrib.slim.learning.train(train_op,
                                   train_dir,
                                   log_every_n_steps=FLAGS.log_every_n_steps,
                                   graph=g,
                                   global_step=model.global_step,
                                   number_of_steps=number_of_steps,
                                   init_fn=model.init_fn,
                                   saver=saver)