예제 #1
0
def evaluate_model(config):
    """ Train the model using the passed in config """
    ###########################################################
    # Generate the model
    ###########################################################
    outputs = create_generator(config, input_utils.get_data_shape(config.dataset))

    ###########################################################
    # Setup the evaluation metrics and summaries
    ###########################################################
    # Generate the canvases that lead to the final output image
    summaries = []
    summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
    with tf.name_scope('canvases'):
        for step, canvas in enumerate(outputs):
            canvas = input_utils.reshape_images(canvas, config.dataset)
            tiled_images = image_utils.tile_images(canvas)
            summaries.append(tf.summary.image('step{0}'.format(step), tiled_images))

    summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin evaluation
    ###########################################################
    checkpoint_path = FLAGS.checkpoint_path
    if tf.gfile.IsDirectory(checkpoint_path):
        checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
    eval_ops = tf.group(*outputs)
    hooks = [
        training.SummaryAtEndHook(FLAGS.log_dir, summary_op),
        training.StopAfterNEvalsHook(FLAGS.count)]

    training.evaluate_once(checkpoint_path, hooks=hooks, eval_ops=eval_ops)
    def graph_rewrite_fn():
        """Function to quantize weights and activation of the default graph."""
        if (graph_rewriter_config.quantization.weight_bits != 8
                or graph_rewriter_config.quantization.activation_bits != 8):
            raise ValueError('Only 8bit quantization is supported')

        graph = tf.get_default_graph()

        # Insert custom quant ops.
        if quant_overrides_config is not None:
            input_to_ops_map = input_to_ops.InputToOps(graph)
            for q in quant_overrides_config.quant_configs:
                producer = graph.get_operation_by_name(q.op_name)
                if producer is None:
                    raise ValueError('Op name does not exist in graph.')
                context = _get_context_from_op(producer)
                consumers = input_to_ops_map.ConsumerOperations(producer)
                if q.fixed_range:
                    _insert_fixed_quant_op(
                        context,
                        q.quant_op_name,
                        producer,
                        consumers,
                        init_min=q.min,
                        init_max=q.max,
                        quant_delay=q.delay if is_training else 0)
                else:
                    raise ValueError('Learned ranges are not yet supported.')

        # Quantize the graph by inserting quantize ops for weights and activations
        if is_training:
            contrib_quantize.experimental_create_training_graph(
                input_graph=graph,
                quant_delay=graph_rewriter_config.quantization.delay,
                freeze_bn_delay=graph_rewriter_config.quantization.delay)
        else:
            contrib_quantize.experimental_create_eval_graph(
                input_graph=graph,
                quant_delay=graph_rewriter_config.quantization.delay
                if not is_export else 0)

        contrib_layers.summarize_collection('quant_vars')
예제 #3
0
def evaluate_model(config):
    """ Train the model using the passed in config """
    ###########################################################
    # Create the input pipeline
    ###########################################################
    with tf.name_scope('input_pipeline'):
        dataset = input_utils.get_dataset(config.datadir,
                                          config.dataset,
                                          config.datasubset,
                                          num_folds=config.fold_count,
                                          fold=config.fold,
                                          holdout=True)

        init_op, init_feed_dict, image = input_utils.get_data(
            config.dataset,
            dataset,
            config.batch_size,
            num_epochs=config.num_epochs,
            num_readers=config.num_readers)

        images = tf.train.batch([image],
                                config.batch_size,
                                num_threads=config.num_preprocessing_threads,
                                capacity=5 * config.batch_size)

    ###########################################################
    # Generate the model
    ###########################################################
    outputs = create_model(config, images, dataset)

    ###########################################################
    # Setup the evaluation metrics and summaries
    ###########################################################
    summaries = []
    metrics_map = {}
    for loss in tf.losses.get_losses():
        metrics_map[loss.op.name] = metrics.streaming_mean(loss)

    for metric in tf.get_collection(graph_utils.GraphKeys.METRICS):
        metrics_map[metric.op.name] = metrics.streaming_mean(metric)

    total_loss = tf.losses.get_total_loss()
    metrics_map[total_loss.op.name] = metrics.streaming_mean(total_loss)
    names_to_values, names_to_updates = metrics.aggregate_metric_map(
        metrics_map)

    # Create summaries of the metrics and print them to the screen
    for name, value in names_to_values.iteritems():
        summary = tf.summary.scalar(name, value, collections=[])
        summaries.append(tf.Print(summary, [value], name))

    summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES))
    summaries.extend(layers.summarize_collection(
        graph_utils.GraphKeys.METRICS))
    summaries.extend(
        layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
    summaries.extend(
        layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS))

    images = input_utils.reshape_images(images, config.dataset)
    tiled_images = image_utils.tile_images(images)
    summaries.append(tf.summary.image('input_batch', tiled_images))

    # Generate the canvases that lead to the final output image
    with tf.name_scope('canvases'):
        for step, canvas in enumerate(outputs):
            canvas = input_utils.reshape_images(canvas, config.dataset)
            tiled_images = image_utils.tile_images(canvas)
            summaries.append(
                tf.summary.image('step{0}'.format(step), tiled_images))

    summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin evaluation
    ###########################################################
    checkpoint_path = FLAGS.checkpoint_path
    eval_ops = tf.group(*names_to_updates.values())
    hooks = [
        training.SummaryAtEndHook(log_dir=FLAGS.log_dir,
                                  summary_op=summary_op),
        training.StopAfterNEvalsHook(
            math.ceil(dataset.num_samples / float(config.batch_size)))
    ]

    eval_kwargs = {}
    eval_fn = training.evaluate_repeatedly
    if FLAGS.once:
        if tf.gfile.IsDirectory(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        eval_fn = training.evaluate_once
    else:
        assert tf.gfile.IsDirectory(checkpoint_path), (
            'checkpoint path must be a directory when using loop evaluation')

    eval_fn(checkpoint_path, hooks=hooks, eval_ops=eval_ops, **eval_kwargs)
예제 #4
0
파일: train.py 프로젝트: ravimeda/glas
def train_model(config):
    """ Train the model using the passed in config """
    training_devices = [
        graph_utils.device_fn(device)
        for device in graph_utils.collect_devices({'GPU': FLAGS.num_gpus})]
    assert training_devices, 'Found no training devices!'

    ###########################################################
    # Create the input pipeline
    ###########################################################
    with tf.device('/cpu:0'), tf.name_scope('input_pipeline'):
        dataset = input_utils.get_dataset(
            config.datadir, config.dataset, 'train',
            num_folds=config.fold_count, fold=config.fold, holdout=False)

        init_op, init_feed_dict, image = input_utils.get_data(
            config.dataset, dataset, config.batch_size,
            num_epochs=config.num_epochs,
            num_readers=config.num_readers)

        inputs_queue = input_utils.batch_images(
            image, config.batch_size,
            num_threads=config.num_preprocessing_threads,
            num_devices=len(training_devices))

    ###########################################################
    # Generate the model
    ###########################################################
    towers = graph_utils.create_towers(
        create_training_model, training_devices, config, inputs_queue, dataset)
    assert towers, 'No training towers were created!'

    ###########################################################
    # Setup the training objectives
    ###########################################################
    with tf.name_scope('training'):
        with tf.device('/cpu:0'):
            learning_rate_decay_step = config.learning_rate_decay_step / len(towers)
            learning_rate = tf.maximum(
                exponential_decay(
                    config.batch_size, learning_rate_decay_step,
                    config.learning_rate, config.learning_rate_decay, dataset),
                config.learning_rate_min, name='learning_rate')
            tf.add_to_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS, learning_rate)

            optimizer = tf.train.AdamOptimizer(learning_rate)

        # Calculate gradients and total loss
        tower_klds, tower_losses, grads_and_vars = graph_utils.optimize_towers(
            optimizer, towers, clip_norm=config.clip)
        total_kld = tf.add_n(tower_klds, name='total_kld') if tower_klds else None
        total_loss = tf.add_n(tower_losses, name='total_loss')

        # Gather update ops from the first tower (for updating batch_norm for example)
        global_step = framework.get_or_create_global_step()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, towers[0].scope)
        update_ops.append(optimizer.apply_gradients(grads_and_vars, global_step=global_step))

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_op = tf.identity(total_loss, name='train_op')

    ###########################################################
    # Collect summaries
    ###########################################################
    with tf.device('/cpu:0'):
        summaries = []
        summaries.extend(learning.add_gradients_summaries(grads_and_vars))
        summaries.extend(layers.summarize_collection(tf.GraphKeys.MODEL_VARIABLES))
        summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.METRICS))
        summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.RNN_OUTPUTS))
        summaries.extend(layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS))

        with tf.name_scope('losses'):
            if total_kld is not None:
                summaries.append(tf.summary.scalar('total_kld', total_kld))
            summaries.append(tf.summary.scalar('total_loss', total_loss))

            for loss in tower_losses:
                summaries.append(tf.summary.scalar(loss.op.name, loss))

            for loss in tf.losses.get_losses():
                summaries.append(tf.summary.scalar(loss.op.name, loss))

        summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin training
    ###########################################################
    global_init_op = tf.global_variables_initializer()
    init_op = global_init_op if init_op is None else tf.group(global_init_op, init_op)
    session_config = tf.ConfigProto(
        allow_soft_placement=False,
        log_device_placement=FLAGS.log_device_placement)

    prefetch_queue_buffer = 2 * len(training_devices)
    number_of_steps = int(int(dataset.num_samples / config.batch_size) / len(training_devices))
    number_of_steps = number_of_steps * config.num_epochs - prefetch_queue_buffer

    tf.logging.info('Running %s steps', number_of_steps)
    learning.train(
        train_op, FLAGS.log_dir, session_config=session_config,
        global_step=global_step, number_of_steps=number_of_steps,
        init_op=init_op, init_feed_dict=init_feed_dict,
        save_interval_secs=config.checkpoint_frequency,
        summary_op=summary_op, save_summaries_secs=config.summary_frequency,
        trace_every_n_steps=config.trace_frequency if config.trace_frequency > 0 else None)
예제 #5
0
def evaluate_model(config):
    """ Train the model using the passed in config """
    ###########################################################
    # Create the input pipeline
    ###########################################################
    with tf.name_scope('input_pipeline'):
        dataset = input_utils.get_dataset(config.datadir, config.dataset,
                                          config.datasubset)

        init_op, init_feed_dict, image, label = input_utils.get_data(
            config.dataset,
            dataset,
            config.batch_size,
            num_epochs=config.num_epochs,
            num_readers=config.num_readers)

        images, labels = tf.train.batch(
            [image, label],
            config.batch_size,
            num_threads=config.num_preprocessing_threads,
            capacity=5 * config.batch_size)

    ###########################################################
    # Generate the model
    ###########################################################
    outputs = create_model(config, images, dataset)
    tfprof.model_analyzer.print_model_analysis(tf.get_default_graph())

    ###########################################################
    # Setup the evaluation metrics and summaries
    ###########################################################
    summaries = []
    metrics_map = {}
    for metric in tf.get_collection(graph_utils.GraphKeys.METRICS):
        metrics_map[metric.op.name] = metrics.streaming_mean(metric)

    predictions = tf.argmax(outputs, 1)
    metrics_map['accuracy'] = metrics.streaming_accuracy(predictions, labels)
    metrics_map['recall_5'] = metrics.streaming_sparse_recall_at_k(
        outputs, tf.expand_dims(labels, 1), 5)

    names_to_values, names_to_updates = metrics.aggregate_metric_map(
        metrics_map)

    # Create summaries of the metrics and print them to the screen
    for name, value in names_to_values.iteritems():
        summary = tf.summary.scalar(name, value, collections=[])
        summaries.append(tf.Print(summary, [value], name))

    summaries.extend(layers.summarize_collection(
        graph_utils.GraphKeys.METRICS))
    summaries.extend(
        layers.summarize_collection(graph_utils.GraphKeys.QUANTIZED_VARIABLES))
    summaries.extend(
        layers.summarize_collection(graph_utils.GraphKeys.TRAINING_PARAMETERS))

    tiled_images = image_utils.tile_images(images)
    summaries.append(tf.summary.image('input_batch', tiled_images))

    summary_op = tf.summary.merge(summaries, name='summaries')

    ###########################################################
    # Begin evaluation
    ###########################################################
    checkpoint_path = FLAGS.checkpoint_path
    eval_ops = tf.group(*names_to_updates.values())
    scaffold = tf.train.Scaffold(init_op, init_feed_dict)
    hooks = [
        training.SummaryAtEndHook(FLAGS.log_dir, summary_op),
        training.StopAfterNEvalsHook(
            math.ceil(dataset.num_samples / float(config.batch_size)))
    ]

    eval_kwargs = {}
    eval_fn = training.evaluate_repeatedly
    if FLAGS.once:
        if tf.gfile.IsDirectory(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        eval_fn = training.evaluate_once
    else:
        assert tf.gfile.IsDirectory(checkpoint_path), (
            'checkpoint path must be a directory when using loop evaluation')

        # On Tensorflow master fd87896 fixes this, but for now just set a very large number
        eval_kwargs['max_number_of_evaluations'] = sys.maxint

    eval_fn(checkpoint_path,
            scaffold=scaffold,
            hooks=hooks,
            eval_ops=eval_ops,
            **eval_kwargs)