Esempio n. 1
0
def imagenet_input(is_training):
    """Data reader for imagenet.

  Reads in imagenet data and performs pre-processing on the images.

  Args:
     is_training: bool specifying if train or validation dataset is needed.
  Returns:
     A batch of images and labels.
  """
    if is_training:
        dataset = dataset_factory.get_dataset('imagenet', 'train',
                                              FLAGS.dataset_dir)
    else:
        dataset = dataset_factory.get_dataset('imagenet', 'validation',
                                              FLAGS.dataset_dir)

    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=is_training,
        common_queue_capacity=2 * FLAGS.batch_size,
        common_queue_min=FLAGS.batch_size)
    [image, label] = provider.get(['image', 'label'])

    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        'mobilenet_v1', is_training=is_training)

    image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)

    images, labels = tf.train.batch(tensors=[image, label],
                                    batch_size=FLAGS.batch_size,
                                    num_threads=4,
                                    capacity=5 * FLAGS.batch_size)
    return images, labels
Esempio n. 2
0
def provide_data(split_name,
                 batch_size,
                 dataset_dir,
                 dataset_name='imagenet',
                 num_readers=1,
                 num_threads=1,
                 patch_size=128):
    """Provides batches of image data for compression.

  Args:
    split_name: Either 'train' or 'validation'.
    batch_size: The number of images in each batch.
    dataset_dir: The directory where the data can be found. If `None`, use
      default.
    dataset_name: Name of the dataset.
    num_readers: Number of dataset readers.
    num_threads: Number of prefetching threads.
    patch_size: Size of the path to extract from the image.

  Returns:
    images: A `Tensor` of size [batch_size, patch_size, patch_size, channels]
  """
    randomize = split_name == 'train'
    dataset = datasets.get_dataset(dataset_name,
                                   split_name,
                                   dataset_dir=dataset_dir)
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=num_readers,
        common_queue_capacity=5 * batch_size,
        common_queue_min=batch_size,
        shuffle=randomize)
    [image] = provider.get(['image'])

    # Sample a patch of fixed size.
    patch = tf.image.resize_image_with_crop_or_pad(image, patch_size,
                                                   patch_size)
    patch.shape.assert_is_compatible_with([patch_size, patch_size, 3])

    # Preprocess the images. Make the range lie in a strictly smaller range than
    # [-1, 1], so that network outputs aren't forced to the extreme ranges.
    patch = (tf.to_float(patch) - 128.0) / 142.0

    if randomize:
        image_batch = tf.train.shuffle_batch([patch],
                                             batch_size=batch_size,
                                             num_threads=num_threads,
                                             capacity=5 * batch_size,
                                             min_after_dequeue=batch_size)
    else:
        image_batch = tf.train.batch(
            [patch],
            batch_size=batch_size,
            num_threads=1,  # no threads so it's deterministic
            capacity=5 * batch_size)

    return image_batch
Esempio n. 3
0
def provide_data(batch_size,
                 dataset_dir,
                 dataset_name='cifar10',
                 split_name='train',
                 one_hot=True):
    """Provides batches of CIFAR data.

  Args:
    batch_size: The number of images in each batch.
    dataset_dir: The directory where the CIFAR10 data can be found. If `None`,
      use default.
    dataset_name: Name of the dataset.
    split_name: Should be either 'train' or 'test'.
    one_hot: Output one hot vector instead of int32 label.

  Returns:
    images: A `Tensor` of size [batch_size, 32, 32, 3]. Output pixel values are
      in [-1, 1].
    labels: Either (1) one_hot_labels if `one_hot` is `True`
            A `Tensor` of size [batch_size, num_classes], where each row has a
            single element set to one and the rest set to zeros.
            Or (2) labels if `one_hot` is `False`
            A `Tensor` of size [batch_size], holding the labels as integers.
    num_samples: The number of total samples in the dataset.
    num_classes: The number of classes in the dataset.

  Raises:
    ValueError: if the split_name is not either 'train' or 'test'.
  """
    dataset = datasets.get_dataset(dataset_name,
                                   split_name,
                                   dataset_dir=dataset_dir)
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        common_queue_capacity=5 * batch_size,
        common_queue_min=batch_size,
        shuffle=(split_name == 'train'))
    [image, label] = provider.get(['image', 'label'])

    # Preprocess the images.
    image = (tf.to_float(image) - 128.0) / 128.0

    # Creates a QueueRunner for the pre-fetching operation.
    images, labels = tf.train.batch([image, label],
                                    batch_size=batch_size,
                                    num_threads=32,
                                    capacity=5 * batch_size)

    labels = tf.reshape(labels, [-1])

    if one_hot:
        labels = tf.one_hot(labels, dataset.num_classes)

    return images, labels, dataset.num_samples, dataset.num_classes
Esempio n. 4
0
def provide_data(split_name, batch_size, dataset_dir,
                 dataset_name='imagenet', num_readers=1, num_threads=1,
                 patch_size=128):
  """Provides batches of image data for compression.

  Args:
    split_name: Either 'train' or 'validation'.
    batch_size: The number of images in each batch.
    dataset_dir: The directory where the data can be found. If `None`, use
      default.
    dataset_name: Name of the dataset.
    num_readers: Number of dataset readers.
    num_threads: Number of prefetching threads.
    patch_size: Size of the path to extract from the image.

  Returns:
    images: A `Tensor` of size [batch_size, patch_size, patch_size, channels]
  """
  randomize = split_name == 'train'
  dataset = datasets.get_dataset(
      dataset_name, split_name, dataset_dir=dataset_dir)
  provider = slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      num_readers=num_readers,
      common_queue_capacity=5 * batch_size,
      common_queue_min=batch_size,
      shuffle=randomize)
  [image] = provider.get(['image'])

  # Sample a patch of fixed size.
  patch = tf.image.resize_image_with_crop_or_pad(image, patch_size, patch_size)
  patch.shape.assert_is_compatible_with([patch_size, patch_size, 3])

  # Preprocess the images. Make the range lie in a strictly smaller range than
  # [-1, 1], so that network outputs aren't forced to the extreme ranges.
  patch = (tf.to_float(patch) - 128.0) / 142.0

  if randomize:
    image_batch = tf.train.shuffle_batch(
        [patch],
        batch_size=batch_size,
        num_threads=num_threads,
        capacity=5 * batch_size,
        min_after_dequeue=batch_size)
  else:
    image_batch = tf.train.batch(
        [patch],
        batch_size=batch_size,
        num_threads=1,  # no threads so it's deterministic
        capacity=5 * batch_size)

  return image_batch
Esempio n. 5
0
def provide_data(batch_size, dataset_dir, dataset_name='cifar10',
                 split_name='train', one_hot=True):
  """Provides batches of CIFAR data.

  Args:
    batch_size: The number of images in each batch.
    dataset_dir: The directory where the CIFAR10 data can be found. If `None`,
      use default.
    dataset_name: Name of the dataset.
    split_name: Should be either 'train' or 'test'.
    one_hot: Output one hot vector instead of int32 label.

  Returns:
    images: A `Tensor` of size [batch_size, 32, 32, 3]. Output pixel values are
      in [-1, 1].
    labels: Either (1) one_hot_labels if `one_hot` is `True`
            A `Tensor` of size [batch_size, num_classes], where each row has a
            single element set to one and the rest set to zeros.
            Or (2) labels if `one_hot` is `False`
            A `Tensor` of size [batch_size], holding the labels as integers.
    num_samples: The number of total samples in the dataset.
    num_classes: The number of classes in the dataset.

  Raises:
    ValueError: if the split_name is not either 'train' or 'test'.
  """
  dataset = datasets.get_dataset(
      dataset_name, split_name, dataset_dir=dataset_dir)
  provider = slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      common_queue_capacity=5 * batch_size,
      common_queue_min=batch_size,
      shuffle=(split_name == 'train'))
  [image, label] = provider.get(['image', 'label'])

  # Preprocess the images.
  image = (tf.to_float(image) - 128.0) / 128.0

  # Creates a QueueRunner for the pre-fetching operation.
  images, labels = tf.train.batch(
      [image, label],
      batch_size=batch_size,
      num_threads=32,
      capacity=5 * batch_size)

  labels = tf.reshape(labels, [-1])

  if one_hot:
    labels = tf.one_hot(labels, dataset.num_classes)

  return images, labels, dataset.num_samples, dataset.num_classes
    def test_dataset_factory(self):
        train_set = factory.get_dataset('bot', 'train', BOT_PROTOBUF_DIR)
        validation_set = factory.get_dataset('bot', 'validation',
                                             BOT_PROTOBUF_DIR)

        self.assertTrue(train_set)
        self.assertTrue(type(train_set) is tf_slim.dataset.Dataset)
        self.assertEqual(train_set.num_classes, 5)
        self.assertEqual(train_set.num_samples, 3320)

        self.assertTrue(validation_set)
        self.assertTrue(type(validation_set) is tf_slim.dataset.Dataset)
        self.assertEqual(validation_set.num_classes, 5)
        self.assertEqual(validation_set.num_samples, 350)

        bmw_models_bot_id = 'bmw_models'
        bmw_model_protobuf = dirs.get_protobuf_dir(bmw_models_bot_id)
        train_set = factory.get_dataset('bot', 'train', bmw_model_protobuf)
        validation_set = factory.get_dataset('bot', 'validation',
                                             bmw_model_protobuf)
        exp_num_classes = utils.get_number_of_classes_by_labels(
            bmw_model_protobuf)

        exp_train_set_size = utils.get_split_size(bmw_models_bot_id, 'train')
        exp_val_set_size = utils.get_split_size(bmw_models_bot_id,
                                                'validation')

        self.assertTrue(train_set)
        self.assertTrue(type(train_set) is tf_slim.dataset.Dataset)
        self.assertEqual(train_set.num_classes, exp_num_classes)
        self.assertEqual(train_set.num_samples, exp_train_set_size)

        self.assertTrue(validation_set)
        self.assertTrue(type(validation_set) is tf_slim.dataset.Dataset)
        self.assertEqual(validation_set.num_classes, exp_num_classes)
        self.assertEqual(validation_set.num_samples, exp_val_set_size)
Esempio n. 7
0
def provide_data(split_name,
                 batch_size,
                 dataset_dir,
                 num_readers=1,
                 num_threads=1):
    """Provides batches of MNIST digits.

  Args:
    split_name: Either 'train' or 'test'.
    batch_size: The number of images in each batch.
    dataset_dir: The directory where the MNIST data can be found.
    num_readers: Number of dataset readers.
    num_threads: Number of prefetching threads.

  Returns:
    images: A `Tensor` of size [batch_size, 28, 28, 1]
    one_hot_labels: A `Tensor` of size [batch_size, mnist.NUM_CLASSES], where
      each row has a single element set to one and the rest set to zeros.
    num_samples: The number of total samples in the dataset.

  Raises:
    ValueError: If `split_name` is not either 'train' or 'test'.
  """
    dataset = datasets.get_dataset('mnist',
                                   split_name,
                                   dataset_dir=dataset_dir)
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=num_readers,
        common_queue_capacity=2 * batch_size,
        common_queue_min=batch_size,
        shuffle=(split_name == 'train'))
    [image, label] = provider.get(['image', 'label'])

    # Preprocess the images.
    image = (tf.to_float(image)) / 255.0
    # image = 2.0 * image - 1.0
    # image = tf.reshape(image, [-1])

    # Creates a QueueRunner for the pre-fetching operation.
    images, labels = tf.train.batch([image, label],
                                    batch_size=batch_size,
                                    num_threads=num_threads,
                                    capacity=5 * batch_size)

    one_hot_labels = tf.one_hot(labels, dataset.num_classes)
    return images, one_hot_labels, dataset.num_samples
Esempio n. 8
0
def provide_data(dataset_name='cifar10',
                 split_name='train',
                 dataset_dir,
                 batch_size=32,
                 shuffle=True,
                 num_threads=1,
                 patch_height=32,
                 patch_width=32,
                 colors=3):
  """Provides a batch of image data from predefined dataset.

  Args:
    dataset_name: A string of dataset name. Defaults to 'cifar10'.
    split_name: Either 'train' or 'validation'. Defaults to 'train'.
    dataset_dir: The directory where the data can be found. If `None`, use
      default.
    batch_size: The number of images in each minibatch. Defaults to 32.
    shuffle: Whether to shuffle the read images. Defaults to True.
    num_threads: Number of prefetching threads. Defaults to 1.
    patch_height: A Python integer. The read images height. Defaults to 32.
    patch_width: A Python integer. The read images width. Defaults to 32.
    colors: Number of channels. Defaults to 3.

  Returns:
    A float `Tensor`s with shape [batch_size, patch_height, patch_width, colors]
    representing a batch of images.
  """
  dataset = datasets.get_dataset(
      dataset_name, split_name, dataset_dir=dataset_dir)
  provider = tf.contrib.slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      num_readers=1,
      common_queue_capacity=5 * batch_size,
      common_queue_min=batch_size,
      shuffle=shuffle)
  return batch_images(
      image=normalize_image(provider.get(['image'])[0]),
      patch_height=patch_height,
      patch_width=patch_width,
      colors=colors,
      batch_size=batch_size,
      shuffle=shuffle,
      num_threads=num_threads)
Esempio n. 9
0
def provide_data(dataset_name='cifar10',
                 split_name='train',
                 dataset_dir,
                 batch_size=32,
                 shuffle=True,
                 num_threads=1,
                 patch_height=32,
                 patch_width=32,
                 colors=3):
  """Provides a batch of image data from predefined dataset.

  Args:
    dataset_name: A string of dataset name. Defaults to 'cifar10'.
    split_name: Either 'train' or 'validation'. Defaults to 'train'.
    dataset_dir: The directory where the data can be found. If `None`, use
      default.
    batch_size: The number of images in each minibatch. Defaults to 32.
    shuffle: Whether to shuffle the read images. Defaults to True.
    num_threads: Number of prefetching threads. Defaults to 1.
    patch_height: A Python integer. The read images height. Defaults to 32.
    patch_width: A Python integer. The read images width. Defaults to 32.
    colors: Number of channels. Defaults to 3.

  Returns:
    A float `Tensor`s with shape [batch_size, patch_height, patch_width, colors]
    representing a batch of images.
  """
  dataset = datasets.get_dataset(
      dataset_name, split_name, dataset_dir=dataset_dir)
  provider = tf.contrib.slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      num_readers=1,
      common_queue_capacity=5 * batch_size,
      common_queue_min=batch_size,
      shuffle=shuffle)
  return batch_images(
      image=normalize_image(provider.get(['image'])[0]),
      patch_height=patch_height,
      patch_width=patch_width,
      colors=colors,
      batch_size=batch_size,
      shuffle=shuffle,
      num_threads=num_threads)
Esempio n. 10
0
def provide_data(split_name, batch_size, dataset_dir, num_readers=1,
                 num_threads=1):
  """Provides batches of MNIST digits.

  Args:
    split_name: Either 'train' or 'test'.
    batch_size: The number of images in each batch.
    dataset_dir: The directory where the MNIST data can be found.
    num_readers: Number of dataset readers.
    num_threads: Number of prefetching threads.

  Returns:
    images: A `Tensor` of size [batch_size, 28, 28, 1]
    one_hot_labels: A `Tensor` of size [batch_size, mnist.NUM_CLASSES], where
      each row has a single element set to one and the rest set to zeros.
    num_samples: The number of total samples in the dataset.

  Raises:
    ValueError: If `split_name` is not either 'train' or 'test'.
  """
  dataset = datasets.get_dataset('mnist', split_name, dataset_dir=dataset_dir)
  provider = slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      num_readers=num_readers,
      common_queue_capacity=2 * batch_size,
      common_queue_min=batch_size,
      shuffle=(split_name == 'train'))
  [image, label] = provider.get(['image', 'label'])

  # Preprocess the images.
  image = (tf.to_float(image) - 128.0) / 128.0

  # Creates a QueueRunner for the pre-fetching operation.
  images, labels = tf.train.batch(
      [image, label],
      batch_size=batch_size,
      num_threads=num_threads,
      capacity=5 * batch_size)

  one_hot_labels = tf.one_hot(labels, dataset.num_classes)
  return images, one_hot_labels, dataset.num_samples
Esempio n. 11
0
def main(_):
    if not FLAGS.output_file:
        raise ValueError(
            'You must supply the path to save to with --output_file')
    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default() as graph:
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name, 'train',
                                              FLAGS.dataset_dir)
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=FLAGS.is_training)
        image_size = FLAGS.image_size or network_fn.default_image_size
        placeholder = tf.placeholder(
            name='input',
            dtype=tf.float32,
            shape=[FLAGS.batch_size, image_size, image_size, 3])
        network_fn(placeholder)
        graph_def = graph.as_graph_def()
        with gfile.GFile(FLAGS.output_file, 'wb') as f:
            f.write(graph_def.SerializeToString())
def eval_model(candidate, N, F, save_dir, model_name):
  print("eval model")
  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    tf_global_step = slim.get_or_create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, 'test', FLAGS.dataset_dir)

    ####################
    # Select the model #
    ####################
    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name, candidate, N, F,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        is_training=False)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=False,
        common_queue_capacity=2 * FLAGS.batch_size,
        common_queue_min=FLAGS.batch_size)
    [image, label] = provider.get(['image', 'label'])
    label -= FLAGS.labels_offset

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=False)

    eval_image_size = network_fn.default_image_size

    image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

    FLAGS.batch_size = 100
    images, labels = tf.train.batch(
        [image, label],
        batch_size=FLAGS.batch_size,
        num_threads=FLAGS.num_preprocessing_threads,
        capacity=5 * FLAGS.batch_size)

    ####################
    # Define the model #
    ####################
    logits, _ = network_fn(images)

    if FLAGS.moving_average_decay:
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, tf_global_step)
      variables_to_restore = variable_averages.variables_to_restore(
          slim.get_model_variables())
      variables_to_restore[tf_global_step.op.name] = tf_global_step
    else:
      variables_to_restore = slim.get_variables_to_restore()

    predictions = tf.argmax(logits, 1)
    labels = tf.squeeze(labels)

    # Define the metrics:
    names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
        'Accuracy': slim.metrics.streaming_accuracy(predictions, labels),
       # 'Recall_5': slim.metrics.streaming_recall_at_k(
       #     logits, labels, 5),
    })

    # Print the summaries to screen.
    for name, value in names_to_values.items():
      summary_name = 'eval/%s' % name
      op = tf.summary.scalar(summary_name, value, collections=[])
      op = tf.Print(op, [value], summary_name)
      tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

    # TODO(sguada) use num_epochs=1
    if FLAGS.max_num_batches:
      num_batches = FLAGS.max_num_batches
    else:
      # This ensures that we make a single pass over all of the data.
      num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size))

    FLAGS.checkpoint_path = FLAGS.train_dir
    if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
      checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
    else:
      checkpoint_path = FLAGS.checkpoint_path

    tf.logging.info('Evaluating %s' % checkpoint_path)

    final_op = [names_to_values['Accuracy']] #top1 accuracy to return
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #time.sleep(60)
    pl.start()
    start_time = time.time()
    a = slim.evaluation.evaluate_once(
        master=FLAGS.master,
        checkpoint_path=checkpoint_path,
        logdir=FLAGS.eval_dir,
        session_config=config,
        num_evals=num_batches,
        eval_op=list(names_to_updates.values()),
        final_op = final_op,
        variables_to_restore=variables_to_restore)
    duration = time.time() - start_time
    pl.stop()
    
    data_list = pl.getDataTrace(nodeName='module/gpu', valType='power')
    pickle.dump(data_list, open(os.path.join(save_dir, model_name + '_data_list_final_{}_{}.pkl'.format(N,F)),'wb'))

    power_list = data_list[1]
    time_list = data_list[0]

    start, end = get_start_end(power_list)
    integration_time = time_list[end] - time_list[start]
    integration_energy = integrate_power(power_list, time_list, start, end)

    return integration_time, integration_energy
def train_model(candidate, N, F):
  print("train model")
  print(FLAGS.dataset_name)
  if not FLAGS.dataset_dir:
    raise ValueError('You must supply the dataset directory with --dataset_dir')

  tf.logging.set_verbosity(tf.logging.INFO)
  with tf.Graph().as_default():
    #######################
    # Config model_deploy #
    #######################
    deploy_config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.worker_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Create global_step
    with tf.device(deploy_config.variables_device()):
      global_step = slim.create_global_step()

    ######################
    # Select the dataset #
    ######################
    dataset = dataset_factory.get_dataset(
        FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir)

    ######################
    # Select the network #
    ######################

    network_fn = nets_factory.get_network_fn(
        FLAGS.model_name,
        candidate, N, F,
        num_classes=(dataset.num_classes - FLAGS.labels_offset),
        weight_decay=FLAGS.weight_decay,
        is_training=True)

    #####################################
    # Select the preprocessing function #
    #####################################
    preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        preprocessing_name,
        is_training=True)

    ##############################################################
    # Create a dataset provider that loads data from the dataset #
    ##############################################################
    with tf.device(deploy_config.inputs_device()):
      provider = slim.dataset_data_provider.DatasetDataProvider(
          dataset,
          num_readers=FLAGS.num_readers,
          common_queue_capacity=20 * FLAGS.batch_size,
          common_queue_min=10 * FLAGS.batch_size)
      [image, label] = provider.get(['image', 'label'])
      label -= FLAGS.labels_offset

      train_image_size = FLAGS.train_image_size or network_fn.default_image_size

      image = image_preprocessing_fn(image, train_image_size, train_image_size)

      images, labels = tf.train.batch(
          [image, label],
          batch_size=FLAGS.batch_size,
          num_threads=FLAGS.num_preprocessing_threads,
          capacity=5 * FLAGS.batch_size)
      labels = slim.one_hot_encoding(
          labels, dataset.num_classes - FLAGS.labels_offset)
      batch_queue = slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * deploy_config.num_clones)

    ####################
    # Define the model #
    ####################
    def clone_fn(batch_queue):
      """Allows data parallelism by creating multiple clones of network_fn."""
      images, labels = batch_queue.dequeue()
      logits, end_points = network_fn(images)

      #############################
      # Specify the loss function #
      #############################
      if 'AuxLogits' in end_points:
        slim.losses.softmax_cross_entropy(
            end_points['AuxLogits'], labels,
            label_smoothing=FLAGS.label_smoothing, weights=0.4,
            scope='aux_loss')
      slim.losses.softmax_cross_entropy(
          logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0)
      return end_points

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
    first_clone_scope = deploy_config.clone_scope(0)
    # Gather update_ops from the first clone. These contain, for example,
    # the updates for the batch_norm variables created by network_fn.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Add summaries for end_points.
    end_points = clones[0].outputs
    for end_point in end_points:
      x = end_points[end_point]
      summaries.add(tf.summary.histogram('activations/' + end_point, x))
      summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                      tf.nn.zero_fraction(x)))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Add summaries for variables.
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    #################################
    # Configure the moving averages #
    #################################
    if FLAGS.moving_average_decay:
      moving_average_variables = slim.get_model_variables()
      variable_averages = tf.train.ExponentialMovingAverage(
          FLAGS.moving_average_decay, global_step)
    else:
      moving_average_variables, variable_averages = None, None

    #########################################
    # Configure the optimization procedure. #
    #########################################
    with tf.device(deploy_config.optimizer_device()):
      learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
      optimizer = _configure_optimizer(learning_rate)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    if FLAGS.sync_replicas:
      # If sync_replicas is enabled, the averaging will be done in the chief
      # queue runner.
      optimizer = tf.train.SyncReplicasOptimizer(
          opt=optimizer,
          replicas_to_aggregate=FLAGS.replicas_to_aggregate,
          total_num_replicas=FLAGS.worker_replicas,
          variable_averages=variable_averages,
          variables_to_average=moving_average_variables)
    elif FLAGS.moving_average_decay:
      # Update ops executed locally by trainer.
      update_ops.append(variable_averages.apply(moving_average_variables))

    # Variables to train.
    variables_to_train = _get_variables_to_train()

    #  and returns a train_tensor and summary_op
    total_loss, clones_gradients = model_deploy.optimize_clones(
        clones,
        optimizer,
        var_list=variables_to_train)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))

    # Create gradient updates.
    grad_updates = optimizer.apply_gradients(clones_gradients,
                                             global_step=global_step)
    update_ops.append(grad_updates)

    update_op = tf.group(*update_ops)
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                       first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')


    ###########################
    # Kicks off the training. #
    ###########################
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_dir,
        master=FLAGS.master,
        is_chief=(FLAGS.task == 0),
        init_fn=_get_init_fn(),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs,
        sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.dataset_split_name,
                                              FLAGS.dataset_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            FLAGS.model_name,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * FLAGS.batch_size,
            common_queue_min=FLAGS.batch_size)
        [image, label] = provider.get(['image', 'label'])
        label -= FLAGS.labels_offset

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * FLAGS.batch_size)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if FLAGS.moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                FLAGS.moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if FLAGS.max_num_batches:
            num_batches = FLAGS.max_num_batches
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples /
                                    float(FLAGS.batch_size))

        if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
        else:
            checkpoint_path = FLAGS.checkpoint_path

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=FLAGS.master,
            checkpoint_path=checkpoint_path,
            logdir=FLAGS.eval_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)
def run_transfer_learning(root_model_dir, bot_model_dir, protobuf_dir, model_name='inception_v4',
                          dataset_split_name='train',
                          dataset_name='bot',
                          checkpoint_exclude_scopes=None,
                          trainable_scopes=None,
                          max_train_time_sec=None,
                          max_number_of_steps=None,
                          log_every_n_steps=None,
                          save_summaries_secs=None,
                          optimization_params=None):
    """
    Starts the transfer learning of a model in a tensorflow session
    :param root_model_dir: Directory containing the root models pretrained checkpoint files
    :param bot_model_dir: Directory where the transfer learned model's checkpoint files are written to
    :param protobuf_dir: Directory for the dataset factory to load the bot's training data from
    :param model_name: name of the network model for the net factory to provide the correct network and preprocesing fn
    :param dataset_split_name: 'train' or 'validation'
    :param dataset_name: triggers the dataset factory to load a bot dataset
    :param checkpoint_exclude_scopes: Layers to exclude when restoring the models variables
    :param trainable_scopes: Layers to train from the restored model
    :param max_train_time_sec: time boundary to stop training after in seconds
    :param max_number_of_steps: maximum number of steps to run
    :param log_every_n_steps: write a log after every nth optimization step
    :param save_summaries_secs: save summaries to disc every n seconds
    :param optimization_params: parameters for the optimization
    :return: 
    """
    if not optimization_params:
        optimization_params = OPTIMIZATION_PARAMS

    if not max_number_of_steps:
        max_number_of_steps = _MAX_NUMBER_OF_STEPS

    if not checkpoint_exclude_scopes:
        checkpoint_exclude_scopes = _CHECKPOINT_EXCLUDE_SCOPES

    if not trainable_scopes:
        trainable_scopes = _TRAINABLE_SCOPES

    if not max_train_time_sec:
        max_train_time_sec = _MAX_TRAIN_TIME_SECONDS

    if not log_every_n_steps:
        log_every_n_steps = _LOG_EVERY_N_STEPS

    if not save_summaries_secs:
        save_summaries_secs = _SAVE_SUMMARRIES_SECS

    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=_NUM_CLONES,
            clone_on_cpu=_CLONE_ON_CPU,
            replica_id=_TASK,
            num_replicas=_WORKER_REPLICAS,
            num_ps_tasks=_NUM_PS_TASKS)

        # Create global_step
        with tf.device(deploy_config.variables_device()):
            global_step = slim.create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(
            dataset_name, dataset_split_name, protobuf_dir)

        ######################
        # Select the network #
        ######################
        network_fn = nets_factory.get_network_fn(
            model_name,
            num_classes=(dataset.num_classes - _LABELS_OFFSET),
            weight_decay=OPTIMIZATION_PARAMS['weight_decay'],
            is_training=True,
            dropout_keep_prob=OPTIMIZATION_PARAMS['dropout_keep_prob'])

        #####################################
        # Select the preprocessing function #
        #####################################
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            model_name,
            is_training=True)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        with tf.device(deploy_config.inputs_device()):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=_NUM_READERS,
                common_queue_capacity=20 * _BATCH_SIZE,
                common_queue_min=10 * _BATCH_SIZE)
            [image, label] = provider.get(['image', 'label'])
            label -= _LABELS_OFFSET

            train_image_size = network_fn.default_image_size

            image = image_preprocessing_fn(image, train_image_size, train_image_size)

            images, labels = tf.train.batch(
                [image, label],
                batch_size=_BATCH_SIZE,
                num_threads=_NUM_PREPROCESSING_THREADS,
                capacity=5 * _BATCH_SIZE)
            labels = slim.one_hot_encoding(
                labels, dataset.num_classes - _LABELS_OFFSET)
            batch_queue = slim.prefetch_queue.prefetch_queue(
                [images, labels], capacity=2 * deploy_config.num_clones)

        ####################
        # Define the model #
        ####################
        def clone_fn(batch_queue):
            """Allows data parallelism by creating multiple clones of network_fn."""
            images, labels = batch_queue.dequeue()
            logits, end_points = network_fn(images)

            #############################
            # Specify the loss function #
            #############################
            if 'AuxLogits' in end_points:
                tf.losses.softmax_cross_entropy(
                    logits=end_points['AuxLogits'], onehot_labels=labels,
                    label_smoothing=_LABEL_SMOOTHING, weights=0.4, scope='aux_loss')
            tf.losses.softmax_cross_entropy(
                logits=logits, onehot_labels=labels,
                label_smoothing=_LABEL_SMOOTHING, weights=1.0)
            return end_points

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue])
        first_clone_scope = deploy_config.clone_scope(0)
        # Gather update_ops from the first clone. These contain, for example,
        # the updates for the batch_norm variables created by network_fn.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Add summaries for end_points.
        end_points = clones[0].outputs
        for end_point in end_points:
            x = end_points[end_point]
            summaries.add(tf.summary.histogram('activations/' + end_point, x))
            summaries.add(tf.summary.scalar('sparsity/' + end_point,
                                            tf.nn.zero_fraction(x)))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #################################
        # Configure the moving averages #
        #################################
        if OPTIMIZATION_PARAMS['moving_average_decay']:
            moving_average_variables = slim.get_model_variables()
            variable_averages = tf.train.ExponentialMovingAverage(
                OPTIMIZATION_PARAMS['moving_average_decay'], global_step)
        else:
            moving_average_variables, variable_averages = None, None

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.optimizer_device()):
            learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
            optimizer = _configure_optimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        if _SYNC_REPLICAS:
            # If sync_replicas is enabled, the averaging will be done in the chief
            # queue runner.
            optimizer = tf.train.SyncReplicasOptimizer(
                opt=optimizer,
                replicas_to_aggregate=_REPLICAS_TO_AGGREGATE,
                variable_averages=variable_averages,
                variables_to_average=moving_average_variables,
                replica_id=tf.constant(_TASK, tf.int32, shape=()),
                total_num_replicas=_WORKER_REPLICAS)
        elif OPTIMIZATION_PARAMS['moving_average_decay']:
            # Update ops executed locally by trainer.
            update_ops.append(variable_averages.apply(moving_average_variables))

        # Variables to train.
        variables_to_train = _get_variables_to_train(trainable_scopes)

        #  and returns a train_tensor and summary_op
        total_loss, clones_gradients = model_deploy.optimize_clones(
            clones,
            optimizer,
            var_list=variables_to_train)
        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        # Create gradient updates.
        grad_updates = optimizer.apply_gradients(clones_gradients,
                                                 global_step=global_step)
        update_ops.append(grad_updates)

        update_op = tf.group(*update_ops)
        train_tensor = control_flow_ops.with_dependencies([update_op], total_loss,
                                                          name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES,
                                           first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries), name='summary_op')

        ###########################
        # Kicks off the training. #
        ###########################
        slim.learning.train(
            train_tensor,
            logdir=bot_model_dir,
            train_step_fn=train_step,  # Manually added a custom train step to stop after max_time
            train_step_kwargs=_train_step_kwargs(logdir=bot_model_dir, max_train_time_seconds=max_train_time_sec),
            master=_MASTER,
            is_chief=(_TASK == 0),
            init_fn=_get_init_fn(root_model_dir, bot_model_dir, checkpoint_exclude_scopes),
            summary_op=summary_op,
            # number_of_steps=max_number_of_steps,
            log_every_n_steps=log_every_n_steps,
            save_summaries_secs=save_summaries_secs,
            save_interval_secs=_SAVE_INTERNAL_SECS,
            sync_optimizer=optimizer if _SYNC_REPLICAS else None)
Esempio n. 16
0
def eval(bot_id,
         bot_suffix='',
         setting_id=None,
         validation_setting=2,
         dataset_split='validation',
         dataset_name='bot',
         model_name='inception_v4',
         preprocessing=None,
         moving_average_decay=None,
         tf_master=''):
    full_id = bot_id + bot_suffix
    if setting_id:
        protobuf_dir = dirs.get_transfer_proto_dir(bot_id, validation_setting)
        model_dir = dirs.get_transfer_model_dir(full_id, setting_id)
    else:
        protobuf_dir = dirs.get_protobuf_dir(bot_id)
        model_dir = dirs.get_model_data_dir(full_id)

    _check_dir(protobuf_dir)
    _check_dir(model_dir)

    print("READIND FROM %s AND %s" % (protobuf_dir, model_dir))

    performance_data_dir = dirs.get_performance_data_dir(bot_id)
    #    if os.listdir(performance_data_dir):
    #        raise ValueError('%s is not empty' % performance_data_dir)

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(dataset_name, dataset_split,
                                              protobuf_dir)

        ####################
        # Select the model #
        ####################
        network_fn = nets_factory.get_network_fn(
            model_name,
            num_classes=(dataset.num_classes - LABELS_OFFSET),
            is_training=False)

        ##############################################################
        # Create a dataset provider that loads data from the dataset #
        ##############################################################
        provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            shuffle=False,
            common_queue_capacity=2 * BATCH_SIZE,
            common_queue_min=BATCH_SIZE)
        [image, label] = provider.get(['image', 'label'])
        label -= LABELS_OFFSET

        #####################################
        # Select the preprocessing function #
        #####################################
        preprocessing_name = preprocessing or model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=False)

        eval_image_size = EVAL_IMAGE_SIZE or network_fn.default_image_size

        image = image_preprocessing_fn(image, eval_image_size, eval_image_size)

        images, labels = tf.train.batch([image, label],
                                        batch_size=BATCH_SIZE,
                                        num_threads=NUM_THREADS,
                                        capacity=5 * BATCH_SIZE)

        ####################
        # Define the model #
        ####################
        logits, _ = network_fn(images)

        if moving_average_decay:
            variable_averages = tf.train.ExponentialMovingAverage(
                moving_average_decay, tf_global_step)
            variables_to_restore = variable_averages.variables_to_restore(
                slim.get_model_variables())
            variables_to_restore[tf_global_step.op.name] = tf_global_step
        else:
            variables_to_restore = slim.get_variables_to_restore()

        predictions = tf.argmax(logits, 1)
        labels = tf.squeeze(labels)

        # Define the metrics:
        names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
            'Accuracy':
            slim.metrics.streaming_accuracy(predictions, labels),
            'Recall_5':
            slim.metrics.streaming_recall_at_k(logits, labels, 5),
        })

        # Print the summaries to screen.
        for name, value in names_to_values.items():
            summary_name = 'eval/%s' % name
            op = tf.summary.scalar(summary_name, value, collections=[])
            op = tf.Print(op, [value], summary_name)
            tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)

        # TODO(sguada) use num_epochs=1
        if MAX_NUM_BATCHES:
            num_batches = MAX_NUM_BATCHES
        else:
            # This ensures that we make a single pass over all of the data.
            num_batches = math.ceil(dataset.num_samples / float(BATCH_SIZE))

        if tf.gfile.IsDirectory(model_dir):
            checkpoint_path = tf.train.latest_checkpoint(model_dir)
        else:
            checkpoint_path = model_dir

        tf.logging.info('Evaluating %s' % checkpoint_path)

        slim.evaluation.evaluate_once(
            master=tf_master,
            checkpoint_path=checkpoint_path,
            logdir=performance_data_dir,
            num_evals=num_batches,
            eval_op=list(names_to_updates.values()),
            variables_to_restore=variables_to_restore)