Esempio n. 1
0
 def _build(self, attribute_value):
     tf.summary.histogram('cont_attribute_value_histogram', attribute_value)
     embedding = snt.Sequential([
         snt.nets.MLP([self._attr_embedding_dim] * 3,
                      activate_final=True,
                      use_dropout=True),
         snt.LayerNorm(),
     ])(tf.cast(attribute_value, dtype=tf.float32))
     tf.summary.histogram('cont_embedding_histogram', embedding)
     return embedding
Esempio n. 2
0
def make_ensemble(num_actions: int,
                  num_ensemble: int = 20,
                  num_hidden_layers: int = 2,
                  num_units: int = 50,
                  prior_scale: float = 3.) -> Sequence[snt.Module]:
    """Convenience function to make an ensemble from flags."""
    output_sizes = [num_units] * num_hidden_layers + [num_actions]
    ensemble = []
    for _ in range(num_ensemble):
        network = snt.Sequential([
            snt.Flatten(),
            snt.nets.MLP(output_sizes),
        ])
        prior_network = snt.Sequential([
            snt.Flatten(),
            snt.nets.MLP(output_sizes),
        ])
        ensemble.append(NetworkWithPrior(network, prior_network, prior_scale))
    return ensemble
Esempio n. 3
0
 def _build(self, inputs):
     hparams = self._hparams
     hidden = snt.Sequential([
         util.concat_features,
         util.make_mlp(hparams,
                       hparams.obs_decoder_fc_hidden_layers,
                       activate_final=True),
     ])(inputs)
     return (self._build_game_output(hidden), self._build_score(hidden),
             self._build_game_over(hidden))
Esempio n. 4
0
def run(bsuite_id: Text) -> Text:
  """Runs a DQN agent on a given bsuite environment, logging to CSV."""

  env = bsuite.load_and_record(
      bsuite_id=bsuite_id,
      save_path=FLAGS.save_path,
      logging_mode=FLAGS.logging_mode,
      overwrite=FLAGS.overwrite,
  )

  # Making the networks.
  hidden_units = [FLAGS.num_units] * FLAGS.num_hidden_layers
  online_network = snt.Sequential([
      snt.Flatten(),
      snt.nets.MLP(hidden_units + [env.action_spec().num_values]),
  ])
  target_network = snt.Sequential([
      snt.Flatten(),
      snt.nets.MLP(hidden_units + [env.action_spec().num_values]),
  ])

  agent = dqn.DQNTF2(
      action_spec=env.action_spec(),
      online_network=online_network,
      target_network=target_network,
      batch_size=FLAGS.batch_size,
      discount=FLAGS.discount,
      replay_capacity=FLAGS.replay_capacity,
      min_replay_size=FLAGS.min_replay_size,
      sgd_period=FLAGS.sgd_period,
      target_update_period=FLAGS.target_update_period,
      optimizer=snt.optimizers.Adam(learning_rate=FLAGS.learning_rate),
      epsilon=FLAGS.epsilon,
      seed=FLAGS.seed,
  )

  experiment.run(
      agent=agent,
      environment=env,
      num_episodes=FLAGS.num_episodes or env.bsuite_num_episodes,  # pytype: disable=attribute-error
      verbose=FLAGS.verbose)

  return bsuite_id
Esempio n. 5
0
    def _build(self, inpt):

        flatten = snt.BatchFlatten()
        mlp = MLP(self._n_hidden, n_out=8)
        seq = snt.Sequential([flatten, mlp])
        params = seq(inpt)

        scale_offset = tf.get_variable('scale_offset',
                                       initializer=self._scale_offset)
        return params[..., :4], params[..., 4:] + scale_offset
Esempio n. 6
0
def make_value_func_dm_control(
    value_layer_sizes: str = '512,512,256',
    adversarial_layer_sizes: str = '512,512,256',
) -> Tuple[snt.Module, snt.Module]:
    layer_sizes = list(map(int, value_layer_sizes.split(',')))
    value_function = snt.Sequential([
        networks.CriticMultiplexer(),
        networks.LayerNormMLP(layer_sizes, activate_final=True),
        snt.Linear(1)
    ])

    layer_sizes = list(map(int, adversarial_layer_sizes.split(',')))
    advsarial_function = snt.Sequential([
        networks.CriticMultiplexer(),
        networks.LayerNormMLP(layer_sizes, activate_final=True),
        snt.Linear(1)
    ])

    return value_function, advsarial_function
Esempio n. 7
0
def make_conv_model():
    return snt.Sequential([
        snt.nets.ConvNet2D(output_channels=[32, 32],
                           kernel_shapes=[3, 3],
                           strides=[1, 1],
                           paddings=['VALID', 'VALID'],
                           activate_final=True),
        snt.BatchFlatten(),
        snt.nets.MLP([256], activate_final=True),
    ])
Esempio n. 8
0
def _make_network(num_actions: int,
                  torso_layers: Sequence[int] = [5],
                  head_layers: Sequence[int] = [5]):
    network = snt.Sequential([
        # Torso MLP.
        snt.nets.MLP(torso_layers, activate_final=True),
        # Dueling MLP head.
        networks.DuellingMLP(num_actions=num_actions, hidden_sizes=head_layers)
    ])
    return network
Esempio n. 9
0
  def testWarning(self):
    seq = snt.Sequential([snt.Linear(output_size=23),
                          snt.Linear(output_size=42)])
    seq(tf.placeholder(dtype=tf.float32, shape=[2, 3]))
    with mock.patch.object(tf.logging, "warning") as mocked_logging_warning:
      self.assertEqual((), seq.get_variables())
      self.assertTrue(mocked_logging_warning.called)

      first_call_args = mocked_logging_warning.call_args[0]
      self.assertTrue("will always return an empty tuple" in first_call_args[0])
Esempio n. 10
0
  def testCopiesModules(self):
    modules = [snt.Linear(output_size=200), tf.tanh, snt.Linear(output_size=10)]
    sequential = snt.Sequential(modules)

    # Modify the list, to simulate PEBKAC. Sequential must make internal copy.
    modules[1] = "i'm a string, not a module"

    # Connecting the Sequential would produce a TypeError if `modules` was
    # stored by reference, rather than making a copy.
    sequential(tf.placeholder(tf.float32, [23, 42]))
Esempio n. 11
0
  def evaluator(
      self,
      variable_source: acme.VariableSource,
      counter: counting.Counter,
  ):
    """The evaluation process."""

    action_spec = self._environment_spec.actions
    observation_spec = self._environment_spec.observations

    # Create environment and target networks to act with.
    environment = self._environment_factory(True)
    agent_networks = self._network_factory(action_spec)

    # Make sure observation network is defined.
    observation_network = agent_networks.get('observation', tf.identity)

    # Create a stochastic behavior policy.
    evaluator_network = snt.Sequential([
        observation_network,
        agent_networks['policy'],
        networks.StochasticMeanHead(),
    ])

    # Ensure network variables are created.
    tf2_utils.create_variables(evaluator_network, [observation_spec])
    policy_variables = {'policy': evaluator_network.variables}

    # Create the variable client responsible for keeping the actor up-to-date.
    variable_client = tf2_variable_utils.VariableClient(
        variable_source,
        policy_variables,
        update_period=self._variable_update_period)

    # Make sure not to evaluate a random actor by assigning variables before
    # running the environment loop.
    variable_client.update_and_wait()

    # Create the agent.
    evaluator = actors.FeedForwardActor(
        policy_network=evaluator_network, variable_client=variable_client)

    # Create logger and counter.
    counter = counting.Counter(counter, 'evaluator')
    logger = loggers.make_default_logger(
        'evaluator', time_delta=self._log_every, steps_key='evaluator_steps')
    observers = self._make_observers() if self._make_observers else ()

    # Create the run loop and return it.
    return acme.EnvironmentLoop(
        environment,
        evaluator,
        counter,
        logger,
        observers=observers)
Esempio n. 12
0
 def _build(self, inputs):
     x = inputs
     n, w, h, c = x.get_shape().as_list()
     net = snt.Sequential([
         lambda x: tf.image.resize_nearest_neighbor(x, [2 * h, 2 * w]),
         snt.Conv2D(self._num_filters, 3, padding=snt.SAME),
         tf.nn.leaky_relu,
         snt.Conv2D(self._num_filters, 3, padding=snt.SAME),
         tf.nn.leaky_relu,
     ])
     return net(x)
 def __init__(self, num_hidden, num_layers, num_output, name='mlp_ode'):
     super(MLP_ODE, self).__init__(name=name)
     self._num_hidden = num_hidden
     self._num_output = num_output
     self._num_layers = num_layers
     self._modules = []
     for _ in range(self._num_layers - 1):
         self._modules.append(snt.Linear(self._num_hidden))
         self._modules.append(tf.math.tanh)
         self._modules.append(snt.Linear(self._num_output))
         self._model = snt.Sequential(self._modules)
Esempio n. 14
0
def make_value_func_bsuite(environment_spec: EnvironmentSpec,
                           layer_sizes: str = '50,50',
                           ) -> snt.Module:
    layer_sizes = list(map(int, layer_sizes.split(',')))
    action_network = functools.partial(
        tf.one_hot, depth=environment_spec.actions.num_values)
    value_function = snt.Sequential([
        networks.CriticMultiplexer(action_network=action_network),
        snt.nets.MLP(layer_sizes, activate_final=True),
        snt.Linear(1)])
    return value_function
Esempio n. 15
0
    def __init__(self, environment_spec: EnvironmentSpec,
                 layer_sizes: Sequence[int]):
        super(ValueFunction, self).__init__()

        action_network = functools.partial(
            tf.one_hot, depth=environment_spec.actions.num_values)
        self._net = snt.Sequential([
            networks.CriticMultiplexer(action_network=action_network),
            snt.nets.MLP(layer_sizes, activate_final=True),
            snt.Linear(1)
        ])
Esempio n. 16
0
def make_mlp_model(latent_size, num_layers):
    """Instantiates a new MLP, followed by LayerNorm.
    The parameters of each new MLP are not shared with others generated by
    this function.
    Returns:
        A Sonnet module which contains the MLP and LayerNorm.
    """
    return snt.Sequential([
        snt.nets.MLP([latent_size] * num_layers, activate_final=True),
        snt.LayerNorm()
    ])
Esempio n. 17
0
def residual_block(main_channel, residual_hiddens):
    output = snt.Sequential([
        snt.Conv2D(output_channels=residual_hiddens,
                   kernel_shape=(3, 3),
                   stride=(1, 1)),
        tf.nn.relu,
        snt.Conv2D(output_channels=main_channel,
                   kernel_shape=(1, 1),
                   stride=(1, 1)),
        tf.nn.relu,
    ])
    return output
Esempio n. 18
0
def upscale_conv(num_blocks=5, init_filter_num=256, **network_kwargs):
    blocklist = [
        snt.Conv2D(init_filter_num, 4, padding=snt.SAME),
        tf.nn.leaky_relu,
        snt.Conv2D(init_filter_num, 3, padding=snt.SAME),
        tf.nn.leaky_relu,
    ]
    num_filters = init_filter_num
    for _ in range(num_blocks):
        blocklist.append(UpscaleBlock(num_filters))
        num_filters /= 2
    return snt.Sequential(blocklist)