Example #1
0
def evaluate_model(config):
    """ Train the model using the passed in config """
    ###########################################################
    # Generate the model
    ###########################################################
    outputs = create_generator(config, input_utils.get_data_shape(config.dataset))

    ###########################################################
    # Setup the evaluation metrics and summaries
    ###########################################################
    # Generate the canvases that lead to the final output image
    summaries = []
    summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
    with tf.name_scope('canvases'):
        for step, canvas in enumerate(outputs):
            canvas = input_utils.reshape_images(canvas, config.dataset)
            tiled_images = image_utils.tile_images(canvas)
            summaries.append(tf.summary.image('step{0}'.format(step), tiled_images))

    summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin evaluation
    ###########################################################
    checkpoint_path = FLAGS.checkpoint_path
    if tf.gfile.IsDirectory(checkpoint_path):
        checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
    eval_ops = tf.group(*outputs)
    hooks = [
        training.SummaryAtEndHook(FLAGS.log_dir, summary_op),
        training.StopAfterNEvalsHook(FLAGS.count)]

    training.evaluate_once(checkpoint_path, hooks=hooks, eval_ops=eval_ops)
Example #2
0
def evaluate_model(config):
    """ Train the model using the passed in config """
    ###########################################################
    # Create the input pipeline
    ###########################################################
    with tf.name_scope('input_pipeline'):
        dataset = input_utils.get_dataset(config.datadir,
                                          config.dataset,
                                          config.datasubset,
                                          num_folds=config.fold_count,
                                          fold=config.fold,
                                          holdout=True)

        init_op, init_feed_dict, image = input_utils.get_data(
            config.dataset,
            dataset,
            config.batch_size,
            num_epochs=config.num_epochs,
            num_readers=config.num_readers)

        images = tf.train.batch([image],
                                config.batch_size,
                                num_threads=config.num_preprocessing_threads,
                                capacity=5 * config.batch_size)

    ###########################################################
    # Generate the model
    ###########################################################
    outputs = create_model(config, images, dataset)

    ###########################################################
    # Setup the evaluation metrics and summaries
    ###########################################################
    summaries = []
    metrics_map = {}
    for loss in tf.losses.get_losses():
        metrics_map[loss.op.name] = metrics.streaming_mean(loss)

    for metric in tf.get_collection(graph_utils.GraphKeys.METRICS):
        metrics_map[metric.op.name] = metrics.streaming_mean(metric)

    total_loss = tf.losses.get_total_loss()
    metrics_map[total_loss.op.name] = metrics.streaming_mean(total_loss)
    names_to_values, names_to_updates = metrics.aggregate_metric_map(
        metrics_map)

    # Create summaries of the metrics and print them to the screen
    for name, value in names_to_values.iteritems():
        summary = tf.summary.scalar(name, value, collections=[])
        summaries.append(tf.Print(summary, [value], name))

    summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES))
    summaries.extend(layers.summarize_collection(
        graph_utils.GraphKeys.METRICS))
    summaries.extend(
        layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
    summaries.extend(
        layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS))

    images = input_utils.reshape_images(images, config.dataset)
    tiled_images = image_utils.tile_images(images)
    summaries.append(tf.summary.image('input_batch', tiled_images))

    # Generate the canvases that lead to the final output image
    with tf.name_scope('canvases'):
        for step, canvas in enumerate(outputs):
            canvas = input_utils.reshape_images(canvas, config.dataset)
            tiled_images = image_utils.tile_images(canvas)
            summaries.append(
                tf.summary.image('step{0}'.format(step), tiled_images))

    summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin evaluation
    ###########################################################
    checkpoint_path = FLAGS.checkpoint_path
    eval_ops = tf.group(*names_to_updates.values())
    hooks = [
        training.SummaryAtEndHook(log_dir=FLAGS.log_dir,
                                  summary_op=summary_op),
        training.StopAfterNEvalsHook(
            math.ceil(dataset.num_samples / float(config.batch_size)))
    ]

    eval_kwargs = {}
    eval_fn = training.evaluate_repeatedly
    if FLAGS.once:
        if tf.gfile.IsDirectory(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        eval_fn = training.evaluate_once
    else:
        assert tf.gfile.IsDirectory(checkpoint_path), (
            'checkpoint path must be a directory when using loop evaluation')

    eval_fn(checkpoint_path, hooks=hooks, eval_ops=eval_ops, **eval_kwargs)
Example #3
0
File: train.py Project: ml-lab/glas
def train_model(config):
    """ Train the model using the passed in config """
    training_devices = [
        graph_utils.device_fn(device)
        for device in graph_utils.collect_devices({'GPU': FLAGS.num_gpus})
    ]
    assert training_devices, 'Found no training devices!'

    ###########################################################
    # Create the input pipeline
    ###########################################################
    with tf.device('/cpu:0'), tf.name_scope('input_pipeline'):
        dataset = input_utils.get_dataset(config.datadir, config.dataset,
                                          'train')

        init_op, init_feed_dict, image = input_utils.get_data(
            config.dataset,
            dataset,
            config.batch_size,
            num_epochs=config.num_epochs,
            num_readers=config.num_readers)

        inputs_queue = input_utils.batch_images(
            image,
            config.batch_size,
            num_threads=config.num_preprocessing_threads,
            num_devices=len(training_devices))

    ###########################################################
    # Generate the model
    ###########################################################
    towers = graph_utils.create_towers(create_training_model, training_devices,
                                       config, inputs_queue, dataset)
    assert towers, 'No training towers were created!'

    ###########################################################
    # Setup the training objectives
    ###########################################################
    with tf.name_scope('training'):
        with tf.device('/cpu:0'):
            learning_rate_decay_step = config.learning_rate_decay_step / len(
                towers)
            learning_rate = tf.maximum(exponential_decay(
                config.batch_size, learning_rate_decay_step,
                config.learning_rate, config.learning_rate_decay, dataset),
                                       config.learning_rate_min,
                                       name='learning_rate')
            tf.add_to_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS,
                                 learning_rate)

            optimizer = tf.train.AdamOptimizer(learning_rate)

        # Calculate gradients and total loss
        tower_klds, tower_losses, grads_and_vars = graph_utils.optimize_towers(
            optimizer, towers, clip_norm=config.clip)
        total_kld = tf.add_n(tower_klds,
                             name='total_kld') if tower_klds else None
        total_loss = tf.add_n(tower_losses, name='total_loss')

        # Gather update ops from the first tower (for updating batch_norm for example)
        global_step = framework.get_or_create_global_step()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       towers[0].scope)
        update_ops.append(
            optimizer.apply_gradients(grads_and_vars, global_step=global_step))

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_op = tf.identity(total_loss, name='train_op')

    ###########################################################
    # Collect summaries
    ###########################################################
    with tf.device('/cpu:0'):
        summaries = []
        summaries.extend(learning.add_gradients_summaries(grads_and_vars))
        summaries.extend(
            layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES))
        summaries.extend(
            layers.summarize_collection(graph_utils.GraphKeys.METRICS))
        summaries.extend(
            layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
        summaries.extend(
            layers.summarize_collection(
                graph_utils.GraphKeys.TRAINING_PARAMETERS))

        images = input_utils.reshape_images(inputs_queue.dequeue(),
                                            config.dataset)
        tiled_images = image_utils.tile_images(images)
        summaries.append(tf.summary.image('input_batch', tiled_images))

        # Generate the canvases that lead to the final output image
        with tf.name_scope('canvases'):
            for step, canvas in enumerate(towers[0].outputs):
                canvas = input_utils.reshape_images(canvas, config.dataset)
                tiled_images = image_utils.tile_images(canvas)
                summaries.append(
                    tf.summary.image('step{0}'.format(step), tiled_images))

        with tf.name_scope('losses'):
            if total_kld is not None:
                summaries.append(tf.summary.scalar('total_kld', total_kld))
            summaries.append(tf.summary.scalar('total_loss', total_loss))

            for loss in tower_losses:
                summaries.append(tf.summary.scalar(loss.op.name, loss))

            for loss in losses.get_losses():
                summaries.append(tf.summary.scalar(loss.op.name, loss))

        summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin training
    ###########################################################
    init_op = tf.group(tf.global_variables_initializer(), init_op)
    session_config = tf.ConfigProto(
        allow_soft_placement=False,
        log_device_placement=FLAGS.log_device_placement)

    prefetch_queue_buffer = 2 * len(training_devices)
    number_of_steps = int(
        int(dataset.num_samples / config.batch_size) / len(training_devices))
    number_of_steps = number_of_steps * config.num_epochs - prefetch_queue_buffer

    tf.logging.info('Running %s steps', number_of_steps)
    learning.train(train_op,
                   FLAGS.log_dir,
                   session_config=session_config,
                   global_step=global_step,
                   number_of_steps=number_of_steps,
                   init_op=init_op,
                   init_feed_dict=init_feed_dict,
                   save_interval_secs=config.checkpoint_frequency,
                   summary_op=summary_op,
                   save_summaries_secs=config.summary_frequency,
                   trace_every_n_steps=config.trace_frequency
                   if config.trace_frequency > 0 else None)