Beispiel #1
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=True, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    if FLAGS.quantize_delay >= 0:
      tf.contrib.quantize.create_training_graph(
          quant_delay=FLAGS.quantize_delay)
      
    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None)
Beispiel #2
0
def main(_):
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)
    
    dataset_val = dataset_biasCNN.get_dataset(
        FLAGS.dataset_name, 'validation', FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################
    
    if FLAGS.weights_initializer is None:
      weights_initializer = None
      # default value will be defined in argscope, it is xavier_initializer
    elif FLAGS.weights_initializer=='zeros':
      weights_initializer = tf.zeros_initializer()
    elif FLAGS.weights_initializer=='ones':
      weights_initializer = tf.ones_initializer()
    elif FLAGS.weights_initializer=='trunc_normal':
      weights_initializer = tf.truncated_normal_initializer()
    elif FLAGS.weights_initializer=='xavier':
      weights_initializer = initializers.xavier_initializer()
    elif FLAGS.weights_initializer=='var_scaling':
      weights_initializer = initializers.variance_scaling_initializer()
    else:
      raise ValueError('weight initializer not found')
      
    if FLAGS.biases_initializer is None:
      biases_initializer = None
      # default value will be defined in argscope, it is zeros_initializer
    elif biases_initializer=='zeros':
       biases_initializer = tf.zeros_initializer()
    elif FLAGS.biases_initializer=='ones':
       biases_initializer = tf.ones_initializer()
    elif FLAGS.biases_initializer=='trunc_normal':
      biases_initializer = tf.truncated_normal_initializer()
    elif FLAGS.biases_initializer=='xavier':
      biases_initializer = initializers.xavier_initializer()
    elif FLAGS.biases_initializer=='var_scaling':
      biases_initializer = initializers.variance_scaling_initializer()
    else:
      raise ValueError('biases initializer not found')
    
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        weights_initializer=weights_initializer,
        biases_initializer=biases_initializer,
        is_training=True)

    network_fn_val = nets_factory.get_network_fn(
        FLAGS.model_name,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weights_initializer=weights_initializer,
        biases_initializer=biases_initializer,
        is_training=False)
    
    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=True, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, 
	is_windowed = FLAGS.is_windowed)

    image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing(
        preprocessing_name,
        is_training=False, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, 
	is_windowed=FLAGS.is_windowed)
    
    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)
      
      ############################################
      # Create a provider for the validation set #
      ############################################
      provider_val = slim.dataset_data_provider.DatasetDataProvider(
          dataset_val,
          shuffle=True,
          common_queue_capacity=2 * FLAGS.batch_size_val,
          common_queue_min=FLAGS.batch_size_val)
      [image_val, label_val] = provider_val.get(['image', 'label'])
      label_val -= FLAGS.labels_offset
      
      eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size
  
      image_val = image_preprocessing_fn_val(image_val, eval_image_size, eval_image_size)
  
      images_val, labels_val = tf.train.batch(
          [image_val, label_val],
          batch_size=FLAGS.batch_size_val,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size_val)
      labels_val_onehot = slim.one_hot_encoding(
          labels_val, dataset.num_classes - FLAGS.labels_offset)
      
    ###############################
    # Define the model (training) #
    ###############################
    
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      
      with tf.variable_scope('my_scope'):
          logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
        
      tf.losses.softmax_cross_entropy(
          labels, logits, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      # adding in a picture of the activations at each layer, this is a good way to double check that the rotated images look rotated to our eyes
      if 'conv' in end_point:
        dims = x.get_shape()
        for ii in range(5):
          summaries.add(tf.summary.image('image_out/' + end_point + '/image_' + str(ii), tf.slice(x,[ii,0,0,0],[1,dims[1],dims[2],1])))
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    if FLAGS.quantize_delay >= 0:
      tf.contrib.quantize.create_training_graph(
          quant_delay=FLAGS.quantize_delay)
      
    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))
 
    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    #################################
    # Define the model (validation) #
    #################################
    
    # get the validation set logits (predictions)
    with tf.variable_scope('my_scope',reuse=True):
      logits_val, _ = network_fn_val(images_val)
        
    predictions_val = tf.argmax(logits_val, 1)
    
    # Define loss on validation set, add a summary
    tf.losses.softmax_cross_entropy(
      labels_val_onehot, logits_val, label_smoothing=FLAGS.label_smoothing, 
      weights=1.0, loss_collection = 'eval_losses')
    
    for loss in tf.get_collection('eval_losses'):
      summaries.add(tf.summary.scalar('eval_losses/%s' % loss.op.name, loss))
      
    # Define the validation set metrics: 
    # Will define each metric twice as separate operation. 
    # One set will be made resettable, the other set will be streaming.
    with tf.name_scope('eval_metrics'):
      eval_acc_value, eval_acc_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val)    
      eval_recall_5_value, eval_recall_5_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) 
      # add these variables as summaries for tensorboard
      summaries.add(tf.summary.scalar('eval_recall_5', eval_recall_5_value))
      summaries.add(tf.summary.scalar('eval_acc', eval_acc_value))
      
    with tf.name_scope('eval_metrics_streaming'):
      eval_acc_streaming_value, eval_acc_streaming_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) 
      eval_recall_5_streaming_value, eval_recall_5_streaming_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) 
      # add these variables as summaries for tensorboard
      summaries.add(tf.summary.scalar('eval_recall_5_streaming', eval_recall_5_streaming_value))
      summaries.add(tf.summary.scalar('eval_acc_streaming', eval_acc_streaming_value))
    
   # also add summaries of all the local variables used to compute the eval metrics...
    for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_metrics'):
      summaries.add(tf.summary.scalar('%s' % metric.op.name, metric))
    for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_streaming_metrics'):
      summaries.add(tf.summary.scalar('%s' % metric.op.name, metric))

    # gather up all the variables that are used to compute eval metrics
    stream_vars = [i for i in tf.local_variables() if i.name.split('/')[0]=='eval_metrics']
    # make an operation that'll let us re-initialize just these vars.
    reset_op = tf.initialize_variables(stream_vars)
   
    # make an operation that'll let us run evaluation (all metrics)
    eval_op = list([eval_acc_op, eval_recall_5_op, eval_acc_streaming_op, eval_recall_5_streaming_op])
    
    # Gather validation summaries
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))
    
    # Merge all summaries together (this includes training summaries too).
    summary_op = tf.summary.merge(list(summaries), name='summary_op')

    # Create a non-default saver so we don't delete all the old checkpoints.
    my_saver = tf_saver.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep,
               keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,)
    
    # Create a non-default dictionary of options for train_step_fn
    # This is a hack that lets us pass everything we need to run evaluation, into the training loop function
    with ops.name_scope('train_step'):
        train_step_kwargs = {}

        if FLAGS.max_number_of_steps:
          should_stop_op = math_ops.greater_equal(global_step, FLAGS.max_number_of_steps)
        else:
          should_stop_op = constant_op.constant(False)
        train_step_kwargs['should_stop'] = should_stop_op
        if FLAGS.log_every_n_steps > 0:
          train_step_kwargs['should_log'] = math_ops.equal(
              math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)
        train_step_kwargs['should_val'] = math_ops.equal(
                math_ops.mod(global_step, FLAGS.val_every_n_steps),0)
        train_step_kwargs['should_reset_eval_metrics'] = math_ops.equal(
                math_ops.mod(global_step, tf.to_int64(math_ops.multiply(FLAGS.reset_eval_metrics_every_n_vals, FLAGS.val_every_n_steps))),0)
        train_step_kwargs['eval_op'] = eval_op
        train_step_kwargs['reset_op'] = reset_op

  
    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None,
        saver=my_saver, 
        train_step_fn=learning_biasCNN.train_step_fn,
        train_step_kwargs = train_step_kwargs)
Beispiel #3
0
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    num_batches = FLAGS.num_batches

    #  for bb in np.arange(0,num_batches):
    for bb in [0]:

        batch_name = 'batch' + str(bb)

        #      tf.app.flags.DEFINE_string(
        #              'dataset_split_name',batch_name, 'The name of the train/test split.')

        tf.logging.set_verbosity(tf.logging.INFO)

        with tf.Graph().as_default():
            tf_global_step = slim.get_or_create_global_step()

            ######################
            # Select the dataset #
            ######################
            dataset = dataset_biasCNN.get_dataset(
                FLAGS.dataset_name,
                batch_name,
                FLAGS.dataset_dir,
                num_classes=FLAGS.num_classes)

            ####################
            # Select the model #
            ####################
            network_fn = nets_factory.get_network_fn(
                FLAGS.model_name,
                num_classes=(dataset.num_classes - FLAGS.labels_offset),
                is_training=False)

            ##############################################################
            # Create a dataset provider that loads data from the dataset #
            ##############################################################
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=1,
                shuffle=False,
                common_queue_capacity=2 * FLAGS.batch_size,
                common_queue_min=FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            #####################################
            # Select the preprocessing function #
            #####################################
            preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
            image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
                preprocessing_name,
                is_training=False,
                flipLR=False,
                random_scale=False,
                is_windowed=FLAGS.is_windowed)

            eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, eval_image_size,
                                           eval_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)

            #    ims_orig = tf.identity(images);
            #    labels_orig = tf.identity(labels);

            ####################
            # Define the model #
            ####################
            logits, end_pts = network_fn(images)

            if FLAGS.moving_average_decay:
                variable_averages = tf.train.ExponentialMovingAverage(
                    FLAGS.moving_average_decay, tf_global_step)
                variables_to_restore = variable_averages.variables_to_restore(
                    slim.get_model_variables())
                variables_to_restore[tf_global_step.op.name] = tf_global_step
            else:
                if FLAGS.append_scope_string:
                    # If I've specified a string for the name of the scope in the checkpoint file, append it here so we can match up the layer names
                    variables_to_restore_orig = slim.get_variables_to_restore()
                    variables_to_restore = {}
                    for var in variables_to_restore_orig:
                        curr_name = var.op.name
                        if 'global_step' not in curr_name:
                            new_name = FLAGS.append_scope_string + '/' + curr_name
                        else:
                            new_name = curr_name
                        variables_to_restore[new_name] = var
                else:
                    variables_to_restore = slim.get_variables_to_restore()

            predictions = tf.argmax(logits, 1)
            labels = tf.squeeze(labels)

            # Define the metrics:
            names_to_values, names_to_updates = slim.metrics.aggregate_metric_map(
                {
                    'Accuracy':
                    slim.metrics.streaming_accuracy(predictions, labels),
                    'Recall_5':
                    slim.metrics.streaming_recall_at_k(logits, labels, 5),
                })

            # Gather initial summaries.
            summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

            # Add summaries for first layer to visualize the image, double check whether rotations are CW or CCW

            keylist = list(end_pts.keys())

            x = end_pts[keylist[0]]
            dims = x.get_shape()
            for ii in range(dims[0]):

                summaries.add(
                    tf.summary.image(
                        'image_out/' + keylist[0] + '/label_' + str(ii),
                        tf.slice(x, [ii, 0, 0, 0], [1, dims[1], dims[2], 1])))
                summaries.add(
                    tf.summary.scalar('image_label/label_' + str(ii),
                                      labels[ii]))

            # Print the summaries to screen.
            for name, value in names_to_values.items():
                summary_name = 'eval/%s' % name
                op = tf.summary.scalar(summary_name, value, collections=[])
                op = tf.Print(op, [value], summary_name)
                tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

            # Merge all summaries together (this includes training summaries too).
            summary_op = tf.summary.merge(list(summaries), name='summary_op')

            # TODO(sguada) use num_epochs=1
            if FLAGS.max_num_batches:
                num_batches = FLAGS.max_num_batches
            else:
                # This ensures that we make a single pass over all of the data.
                num_batches = math.ceil(dataset.num_samples /
                                        float(FLAGS.batch_size))

            if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
                checkpoint_path = tf.train.latest_checkpoint(
                    FLAGS.checkpoint_path)
            else:
                checkpoint_path = FLAGS.checkpoint_path

            tf.logging.info('Evaluating %s' % checkpoint_path)

            out = slim.evaluation.evaluate_once(
                master=FLAGS.master,
                checkpoint_path=checkpoint_path,
                logdir=FLAGS.eval_dir,
                num_evals=num_batches,
                eval_op=list(names_to_updates.values()),
                summary_op=summary_op,
                final_op={
                    'logits': logits,
                    'end_pts': end_pts,
                    'images': images,
                    'labels': labels,
                    'predictions': predictions
                },
                variables_to_restore=variables_to_restore)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_biasCNN.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        dataset_val = dataset_biasCNN.get_dataset(FLAGS.dataset_name,
                                                  'validation',
                                                  FLAGS.dataset_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay,
            is_training=True)

        network_fn_val = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
            preprocessing_name,
            is_training=True,
            flipLR=FLAGS.flipLR,
            random_scale=FLAGS.random_scale,
            is_windowed=FLAGS.is_windowed)

        image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing(
            preprocessing_name,
            is_training=False,
            flipLR=FLAGS.flipLR,
            random_scale=FLAGS.random_scale,
            is_windowed=FLAGS.is_windowed)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=20 * FLAGS.batch_size,
                common_queue_min=10 * FLAGS.batch_size)
            [image, label] = provider.get(['image', 'label'])
            label -= FLAGS.labels_offset

            train_image_size = FLAGS.train_image_size or network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size,
                                           train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - FLAGS.labels_offset)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

            ############################################
            # Create a provider for the validation set #
            ############################################
            provider_val = slim.dataset_data_provider.DatasetDataProvider(
                dataset_val,
                shuffle=True,
                common_queue_capacity=2 * FLAGS.batch_size_val,
                common_queue_min=FLAGS.batch_size_val)
            [image_val, label_val] = provider_val.get(['image', 'label'])
            label_val -= FLAGS.labels_offset

            eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

            image_val = image_preprocessing_fn_val(image_val, eval_image_size,
                                                   eval_image_size)

            images_val, labels_val = tf.train.batch(
                [image_val, label_val],
                batch_size=FLAGS.batch_size_val,
                num_threads=FLAGS.num_preprocessing_threads,
                capacity=5 * FLAGS.batch_size_val)

        ###############################
        # Define the model (training) #
        ###############################

        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()

            with tf.variable_scope('my_scope'):
                logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                slim.losses.softmax_cross_entropy(
                    end_points['AuxLogits'],
                    labels,
                    label_smoothing=FLAGS.label_smoothing,
                    weights=0.4,
                    scope='aux_loss')
            slim.losses.softmax_cross_entropy(
                logits,
                labels,
                label_smoothing=FLAGS.label_smoothing,
                weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn,
                                            [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                       first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(
                tf.summary.scalar('sparsity/' + end_point,
                                  tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if FLAGS.moving_average_decay:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, global_step)
        else:
            moving_average_variables, variable_averages = None, None

        if FLAGS.quantize_delay >= 0:
            tf.contrib.quantize.create_training_graph(
                quant_delay=FLAGS.quantize_delay)

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples,
                                                     global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if FLAGS.sync_replicas:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=FLAGS.replicas_to_aggregate,
                total_num_replicas=FLAGS.worker_replicas,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables)
        elif FLAGS.moving_average_decay:
            # Update ops executed locally by trainer.
            update_ops.append(
                variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train()

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones, optimizer, var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        #################################
        # Define the model (validation) #
        #################################

        with tf.variable_scope('my_scope', reuse=True):
            logits_val, _ = network_fn_val(images_val)

        predictions_val = tf.argmax(logits_val, 1)
        labels_val = tf.squeeze(labels_val)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions_val, labels_val),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits_val, labels_val, 5)
        })

        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection('summaries', op)

        # Gather validation summaries
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))
        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        # Create a non-default saver so we don't delete all the old checkpoints.
        my_saver = tf_saver.Saver(
            max_to_keep=FLAGS.max_checkpoints_to_keep,
            keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
        )

        # Create a non-default dictionary of options for train_step_fn
        # This is a hack that lets us pass everything we need to run evaluation, into the training loop function
        from tensorflow.python.framework import ops
        from tensorflow.python.framework import constant_op
        from tensorflow.python.ops import math_ops

        with ops.name_scope('train_step'):
            train_step_kwargs = {}

            if FLAGS.max_number_of_steps:
                should_stop_op = math_ops.greater_equal(
                    global_step, FLAGS.max_number_of_steps)
            else:
                should_stop_op = constant_op.constant(False)
            train_step_kwargs['should_stop'] = should_stop_op
            if FLAGS.log_every_n_steps > 0:
                train_step_kwargs['should_log'] = math_ops.equal(
                    math_ops.mod(global_step, FLAGS.log_every_n_steps), 0)
            train_step_kwargs['should_val'] = math_ops.equal(
                math_ops.mod(global_step, FLAGS.val_every_n_steps), 0)
            train_step_kwargs['eval_op'] = list(names_to_updates.values())


#    assert(FLAGS.max_number_of_steps==100000)
        print(should_stop_op)
        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_dir,
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            init_fn=_get_init_fn(),
            summary_op=summary_op,
            number_of_steps=FLAGS.max_number_of_steps,
            log_every_n_steps=FLAGS.log_every_n_steps,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs,
            sync_optimizer=optimizer if FLAGS.sync_replicas else None,
            saver=my_saver,
            train_step_fn=learning_biasCNN.train_step_fn,
            train_step_kwargs=train_step_kwargs)
Beispiel #5
0
def main(_):
  if not FLAGS.dataset_dir:
      raise ValueError('You must supply the dataset directory with --dataset_dir')

  num_batches= FLAGS.num_batches

  for bb in np.arange(0,num_batches):
    
      batch_name = 'batch'+str(bb)

      tf.logging.set_verbosity(tf.logging.INFO)
  
      with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()
    
        ######################
        # Select the dataset #
        ######################
        dataset = dataset_biasCNN.get_dataset(
            FLAGS.dataset_name, batch_name, FLAGS.dataset_dir, num_classes=FLAGS.num_classes)
    
        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)
    
        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=1,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset
    
    
        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing(
            preprocessing_name,
            is_training=False, flipLR=False,random_scale=False, 
	    is_windowed=FLAGS.is_windowed)
    
        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size
    
        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)
    
        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, end_pts = network_fn(images)

        if FLAGS.moving_average_decay:
          variable_averages = tf.train.ExponentialMovingAverage(
              FLAGS.moving_average_decay, tf_global_step)
          variables_to_restore = variable_averages.variables_to_restore(
              slim.get_model_variables())
          variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            if FLAGS.append_scope_string:
                # If I've specified a string for the name of the scope in the checkpoint file, append it here so we can match up the layer names
                variables_to_restore_orig = slim.get_variables_to_restore()    
                variables_to_restore = {}
                for var in variables_to_restore_orig:
                    curr_name = var.op.name
                    if 'global_step' not in curr_name:
                        new_name = FLAGS.append_scope_string + '/' + curr_name
                    else:
                        new_name = curr_name 
                    variables_to_restore[new_name]=  var
            else:                    
                variables_to_restore = slim.get_variables_to_restore()
    
        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)
    
        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5': slim.metrics.streaming_recall_at_k(
                logits, labels, 5),
        })
    
        # Print the summaries to screen.
        for name, value in names_to_values.items():
          summary_name = 'eval/%s' % name
          op = tf.summary.scalar(summary_name, value, collections=[])
          op = tf.Print(op, [value], summary_name)
          tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
    
        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
          num_batches = FLAGS.max_num_batches
        else:
          # This ensures that we make a single pass over all of the data.
          num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))
    
        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
          checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
          checkpoint_path = FLAGS.checkpoint_path
    
        tf.logging.info('Evaluating %s' % checkpoint_path)
    
        out = slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            final_op={'logits':logits, 'end_pts':end_pts,'images':images,'labels':labels,'predictions':predictions},
            variables_to_restore=variables_to_restore)
           
    
        end_pts= out['end_pts']
        
        keylist= list(end_pts.keys())
     
        for kk in range(np.size(keylist)):
            keystr = keylist[kk]
            keystr = keystr.replace('/','_') 
            fn2save = FLAGS.eval_dir + '/' + batch_name + '_' + keystr + '.npy'
            np.save(fn2save, end_pts[keylist[kk]])
            
        logits = out['logits']

        labels = out['labels']

        predictions = out['predictions']
    
        fn2save = FLAGS.eval_dir + '/' + batch_name + '_logits.npy'
        np.save(fn2save, logits)

        fn2save = FLAGS.eval_dir + '/' + batch_name + '_labels_orig.npy'
        np.save(fn2save, labels)
          
        fn2save = FLAGS.eval_dir + '/' + batch_name + '_labels_predicted.npy'
        np.save(fn2save, predictions)