def build_pathnet_eval_graph(task_names, batch_size, num_classes_for_tasks, router_fn): """Constructs the PathNet eval graph. Args: task_names: (list of strings) names of tasks. batch_size: (int) batch size to use. num_classes_for_tasks: (list of ints) number of classes for each task. router_fn: function that, given a single argument `num_components`, returns a router (see routers in `pathnet/pathnet_lib.py`) for a layer containing `num_components` components. Returns: A tuple of (`p_inputs`, `p_task_id`, `out_logits`). `p_inputs` and `p_task_id` are placeholders for input image and scalar task id, respectively. `out_logits` are the final network output (classification logits). """ num_tasks = len(task_names) # PathNet layers keras_layers = models.get_keras_layers_for_omniglot_experiment() pathnet_layers = models.build_model_from_keras_layers( _OMNIGLOT_INPUT_SHAPE, num_tasks, keras_layers, router_fn) # Task-specific linear heads pathnet_layers.append( utils.create_layer_with_task_specific_linear_heads( num_classes_for_tasks)) # Output components pathnet_layers.append( create_uniform_layer(num_components=num_tasks, component_fn=lambda: pn_components. ModelHeadComponent(loss_fn=loss_fn), combiner_fn=pn.SelectCombiner, router_fn=lambda: None)) pathnet = pn.PathNet(pathnet_layers, tf.contrib.training.HParams(batch_size=batch_size)) p_inputs, _, p_task_id, _, out_logits = utils.build_pathnet_graph( pathnet, _OMNIGLOT_INPUT_SHAPE, training=False) return p_inputs, p_task_id, out_logits
def construct_pathnet_and_run_mnist_experiment(task_names, task_data, num_classes_for_tasks, router_fn): """Runs the MNIST experiment. Args: task_names: (list of strings) names of tasks. task_data: (list of dicts) list of dictionaries, one per task. Each dictionary should map strings 'train' and 'test' into `tf.data.Dataset`s for training and testing, respectively. num_classes_for_tasks: (list of ints) number of classes for each task. router_fn: function that, given a single argument `num_components`, returns a router (see routers in `pathnet/pathnet_lib.py`) for a layer containing `num_components` components. """ num_tasks = len(task_names) input_data_shape = [28, 28, 1] batch_size = 16 for task_id in range(num_tasks): task_data[task_id] = data.batch_all(task_data[task_id], batch_size) # Train each task for 10 epochs n_epochs = 10 training_hparams = tf.contrib.training.HParams(num_steps=n_epochs * 60000 // batch_size, batch_size=batch_size, learning_rate=0.005) routers = [] def get_router(num_components): routers.append(router_fn(num_components)) return routers[-1] # PathNet layers keras_layers = models.get_keras_layers_for_mnist_experiment( num_components=num_tasks) pathnet_layers = models.build_model_from_keras_layers( input_data_shape, num_tasks, keras_layers, get_router) # Task-specific linear heads pathnet_layers.append( utils.create_layer_with_task_specific_linear_heads( num_classes_for_tasks)) # Output components to compute task loss auxiliary_loss_fn = utils.create_auxiliary_loss_function( routers=routers, num_total_components=12, num_total_steps=training_hparams.num_steps * num_tasks, budget=FLAGS.budget, budget_penalty=FLAGS.budget_penalty, entropy_penalty=FLAGS.entropy_penalty, entropy_penalty_alpha=FLAGS.entropy_penalty_alpha) def component_fn(): return pn_components.ModelHeadComponent( loss_fn=loss_fn, auxiliary_loss_fn=auxiliary_loss_fn) pathnet_layers.append( create_uniform_layer(num_components=num_tasks, component_fn=component_fn, combiner_fn=pn.SelectCombiner, router_fn=lambda: None)) utils.run_pathnet_training_and_evaluation( task_names=task_names, task_data=task_data, input_data_shape=input_data_shape, training_hparams=training_hparams, components_layers=pathnet_layers, evaluate_on=['train', 'test'], summary_dir=FLAGS.logdir)
def construct_pathnet( num_steps_per_task, task_names, num_classes_for_tasks, router_fn): """Runs the Omniglot experiment. Args: num_steps_per_task: (int) number of training steps that will be performed per task. This function does not run any training; the number of steps is used to determine how some auxiliary losses are annealed over time. task_names: (list of strings) names of tasks. num_classes_for_tasks: (list of ints) number of classes for each task. router_fn: function that, given a single argument `num_components`, returns a router (see routers in `pathnet/pathnet_lib.py`) for a layer containing `num_components` components. Returns: A list of `pn.ComponentsLayer`s - layers that make up the PathNet model. """ num_tasks = len(task_names) routers = [] num_total_components = 0 def get_router(num_components): nonlocal num_total_components routers.append(router_fn(num_components)) num_total_components += num_components return routers[-1] # PathNet layers keras_layers = models.get_keras_layers_for_omniglot_experiment() pathnet_layers = models.build_model_from_keras_layers( _OMNIGLOT_INPUT_SHAPE, num_tasks, keras_layers, get_router) # Task-specific linear heads pathnet_layers.append( utils.create_layer_with_task_specific_linear_heads(num_classes_for_tasks)) # Output components to compute task loss auxiliary_loss_fn = utils.create_auxiliary_loss_function( routers=routers, num_total_components=num_total_components, num_total_steps=num_steps_per_task * num_tasks, l2_penalty=FLAGS.l2_penalty, budget=FLAGS.budget, budget_penalty=FLAGS.budget_penalty, entropy_penalty=FLAGS.entropy_penalty, entropy_penalty_alpha=FLAGS.entropy_penalty_alpha) def component_fn(): return pn_components.ModelHeadComponent( loss_fn=loss_fn, auxiliary_loss_fn=auxiliary_loss_fn) pathnet_layers.append(create_uniform_layer( num_components=num_tasks, component_fn=component_fn, combiner_fn=pn.SelectCombiner, router_fn=lambda: None)) return pathnet_layers