Beispiel #1
0
def build_pathnet_eval_graph(task_names, batch_size, num_classes_for_tasks,
                             router_fn):
    """Constructs the PathNet eval graph.

  Args:
    task_names: (list of strings) names of tasks.
    batch_size: (int) batch size to use.
    num_classes_for_tasks: (list of ints) number of classes for each task.
    router_fn: function that, given a single argument `num_components`, returns
      a router (see routers in `pathnet/pathnet_lib.py`) for a layer containing
      `num_components` components.

  Returns:
    A tuple of (`p_inputs`, `p_task_id`, `out_logits`). `p_inputs` and
    `p_task_id` are placeholders for input image and scalar task id,
    respectively. `out_logits` are the final network output (classification
    logits).
  """
    num_tasks = len(task_names)

    # PathNet layers

    keras_layers = models.get_keras_layers_for_omniglot_experiment()

    pathnet_layers = models.build_model_from_keras_layers(
        _OMNIGLOT_INPUT_SHAPE, num_tasks, keras_layers, router_fn)

    # Task-specific linear heads

    pathnet_layers.append(
        utils.create_layer_with_task_specific_linear_heads(
            num_classes_for_tasks))

    # Output components

    pathnet_layers.append(
        create_uniform_layer(num_components=num_tasks,
                             component_fn=lambda: pn_components.
                             ModelHeadComponent(loss_fn=loss_fn),
                             combiner_fn=pn.SelectCombiner,
                             router_fn=lambda: None))

    pathnet = pn.PathNet(pathnet_layers,
                         tf.contrib.training.HParams(batch_size=batch_size))

    p_inputs, _, p_task_id, _, out_logits = utils.build_pathnet_graph(
        pathnet, _OMNIGLOT_INPUT_SHAPE, training=False)

    return p_inputs, p_task_id, out_logits
Beispiel #2
0
def construct_pathnet_and_run_mnist_experiment(task_names, task_data,
                                               num_classes_for_tasks,
                                               router_fn):
    """Runs the MNIST experiment.

  Args:
    task_names: (list of strings) names of tasks.
    task_data: (list of dicts) list of dictionaries, one per task.
      Each dictionary should map strings 'train' and 'test' into
      `tf.data.Dataset`s for training and testing, respectively.
    num_classes_for_tasks: (list of ints) number of classes for each task.
    router_fn: function that, given a single argument `num_components`, returns
      a router (see routers in `pathnet/pathnet_lib.py`) for a layer containing
      `num_components` components.

  """
    num_tasks = len(task_names)

    input_data_shape = [28, 28, 1]
    batch_size = 16

    for task_id in range(num_tasks):
        task_data[task_id] = data.batch_all(task_data[task_id], batch_size)

    # Train each task for 10 epochs
    n_epochs = 10

    training_hparams = tf.contrib.training.HParams(num_steps=n_epochs *
                                                   60000 // batch_size,
                                                   batch_size=batch_size,
                                                   learning_rate=0.005)

    routers = []

    def get_router(num_components):
        routers.append(router_fn(num_components))
        return routers[-1]

    # PathNet layers

    keras_layers = models.get_keras_layers_for_mnist_experiment(
        num_components=num_tasks)

    pathnet_layers = models.build_model_from_keras_layers(
        input_data_shape, num_tasks, keras_layers, get_router)

    # Task-specific linear heads

    pathnet_layers.append(
        utils.create_layer_with_task_specific_linear_heads(
            num_classes_for_tasks))

    # Output components to compute task loss

    auxiliary_loss_fn = utils.create_auxiliary_loss_function(
        routers=routers,
        num_total_components=12,
        num_total_steps=training_hparams.num_steps * num_tasks,
        budget=FLAGS.budget,
        budget_penalty=FLAGS.budget_penalty,
        entropy_penalty=FLAGS.entropy_penalty,
        entropy_penalty_alpha=FLAGS.entropy_penalty_alpha)

    def component_fn():
        return pn_components.ModelHeadComponent(
            loss_fn=loss_fn, auxiliary_loss_fn=auxiliary_loss_fn)

    pathnet_layers.append(
        create_uniform_layer(num_components=num_tasks,
                             component_fn=component_fn,
                             combiner_fn=pn.SelectCombiner,
                             router_fn=lambda: None))

    utils.run_pathnet_training_and_evaluation(
        task_names=task_names,
        task_data=task_data,
        input_data_shape=input_data_shape,
        training_hparams=training_hparams,
        components_layers=pathnet_layers,
        evaluate_on=['train', 'test'],
        summary_dir=FLAGS.logdir)
def construct_pathnet(
    num_steps_per_task, task_names, num_classes_for_tasks, router_fn):
  """Runs the Omniglot experiment.

  Args:
    num_steps_per_task: (int) number of training steps that will be performed
      per task. This function does not run any training; the number of steps
      is used to determine how some auxiliary losses are annealed over time.
    task_names: (list of strings) names of tasks.
    num_classes_for_tasks: (list of ints) number of classes for each task.
    router_fn: function that, given a single argument `num_components`, returns
      a router (see routers in `pathnet/pathnet_lib.py`) for a layer containing
      `num_components` components.

  Returns:
    A list of `pn.ComponentsLayer`s - layers that make up the PathNet model.
  """
  num_tasks = len(task_names)

  routers = []
  num_total_components = 0

  def get_router(num_components):
    nonlocal num_total_components

    routers.append(router_fn(num_components))
    num_total_components += num_components

    return routers[-1]

  # PathNet layers

  keras_layers = models.get_keras_layers_for_omniglot_experiment()

  pathnet_layers = models.build_model_from_keras_layers(
      _OMNIGLOT_INPUT_SHAPE, num_tasks, keras_layers, get_router)

  # Task-specific linear heads

  pathnet_layers.append(
      utils.create_layer_with_task_specific_linear_heads(num_classes_for_tasks))

  # Output components to compute task loss

  auxiliary_loss_fn = utils.create_auxiliary_loss_function(
      routers=routers,
      num_total_components=num_total_components,
      num_total_steps=num_steps_per_task * num_tasks,
      l2_penalty=FLAGS.l2_penalty,
      budget=FLAGS.budget,
      budget_penalty=FLAGS.budget_penalty,
      entropy_penalty=FLAGS.entropy_penalty,
      entropy_penalty_alpha=FLAGS.entropy_penalty_alpha)

  def component_fn():
    return pn_components.ModelHeadComponent(
        loss_fn=loss_fn, auxiliary_loss_fn=auxiliary_loss_fn)

  pathnet_layers.append(create_uniform_layer(
      num_components=num_tasks,
      component_fn=component_fn,
      combiner_fn=pn.SelectCombiner,
      router_fn=lambda: None))

  return pathnet_layers