コード例 #1
0
ファイル: agent_test.py プロジェクト: luisimagiire/acme
def make_networks(
    action_spec: types.NestedSpec,
    policy_layer_sizes: Sequence[int] = (10, 10),
    critic_layer_sizes: Sequence[int] = (10, 10),
    vmin: float = -150.,
    vmax: float = 150.,
    num_atoms: int = 51,
) -> Dict[str, snt.Module]:
    """Creates networks used by the agent."""

    num_dimensions = np.prod(action_spec.shape, dtype=int)
    policy_layer_sizes = list(policy_layer_sizes) + [num_dimensions]

    policy_network = snt.Sequential(
        [networks.LayerNormMLP(policy_layer_sizes), tf.tanh])
    critic_network = snt.Sequential([
        networks.CriticMultiplexer(critic_network=networks.LayerNormMLP(
            critic_layer_sizes, activate_final=True)),
        networks.DiscreteValuedHead(vmin, vmax, num_atoms)
    ])

    return {
        'policy': policy_network,
        'critic': critic_network,
    }
コード例 #2
0
ファイル: utils.py プロジェクト: peternara/google-research-NN
def make_d4pg_networks(
    action_spec,
    policy_layer_sizes=(256, 256, 256),
    critic_layer_sizes=(512, 512, 256),
    vmin=-150.,
    vmax=150.,
    num_atoms=201):
  """Creates networks used by the d4pg agent."""

  num_dimensions = np.prod(action_spec.shape, dtype=int)
  policy_layer_sizes = list(policy_layer_sizes) + [int(num_dimensions)]

  policy_network = snt.Sequential([
      networks.LayerNormMLP(policy_layer_sizes),
      networks.TanhToSpec(action_spec)
  ])

  # The multiplexer concatenates the (maybe transformed) observations/actions.
  critic_network = snt.Sequential([
      networks.CriticMultiplexer(
          critic_network=networks.LayerNormMLP(
              critic_layer_sizes, activate_final=True)),
      networks.DiscreteValuedHead(vmin, vmax, num_atoms)
  ])

  return {
      'policy': policy_network,
      'critic': critic_network,
      'observation': tf2_utils.batch_concat,
  }
コード例 #3
0
def make_networks(
    action_spec: specs.BoundedArray,
    policy_layer_sizes: Sequence[int] = (50, ),
    critic_layer_sizes: Sequence[int] = (50, ),
    vmin: float = -150.,
    vmax: float = 150.,
    num_atoms: int = 51,
):
    """Creates networks used by the agent."""

    num_dimensions = np.prod(action_spec.shape, dtype=int)

    policy_network = snt.Sequential([
        networks.LayerNormMLP(policy_layer_sizes, activate_final=True),
        networks.MultivariateNormalDiagHead(num_dimensions,
                                            tanh_mean=True,
                                            init_scale=0.3,
                                            fixed_scale=True,
                                            use_tfd_independent=False)
    ])

    # The multiplexer concatenates the (maybe transformed) observations/actions.
    critic_network = networks.CriticMultiplexer(
        critic_network=networks.LayerNormMLP(critic_layer_sizes,
                                             activate_final=True),
        action_network=networks.ClipToSpec(action_spec))
    critic_network = snt.Sequential(
        [critic_network,
         networks.DiscreteValuedHead(vmin, vmax, num_atoms)])

    return {
        'policy': policy_network,
        'critic': critic_network,
        'observation': tf2_utils.batch_concat,
    }
コード例 #4
0
def make_networks(
    action_spec: specs.Array,
    policy_layer_sizes: Sequence[int] = (300, 200),
    critic_layer_sizes: Sequence[int] = (400, 300),
) -> Dict[str, snt.Module]:
  """Creates networks used by the agent."""

  num_dimensions = np.prod(action_spec.shape, dtype=int)
  critic_layer_sizes = list(critic_layer_sizes)

  policy_network = snt.Sequential([
      networks.LayerNormMLP(policy_layer_sizes),
      networks.MultivariateNormalDiagHead(num_dimensions),
  ])
  # The multiplexer concatenates the (maybe transformed) observations/actions.
  critic_network = snt.Sequential([
      networks.CriticMultiplexer(
          critic_network=networks.LayerNormMLP(critic_layer_sizes)),
      networks.DiscreteValuedHead(0., 1., 10),
  ])

  return {
      'policy': policy_network,
      'critic': critic_network,
  }
コード例 #5
0
def make_networks(
    action_spec: specs.BoundedArray,
    policy_layer_sizes: Sequence[int] = (256, 256, 256),
    critic_layer_sizes: Sequence[int] = (512, 512, 256),
    vmin: float = -150.,
    vmax: float = 150.,
    num_atoms: int = 51,
) -> Dict[str, Union[snt.Module, Callable[[tf.Tensor], tf.Tensor]]]:
    """Creates networks used by the agent."""

    num_dimensions = np.prod(action_spec.shape, dtype=int)

    policy_network = snt.Sequential([
        networks.LayerNormMLP(policy_layer_sizes, activate_final=True),
        networks.NearZeroInitializedLinear(num_dimensions),
        networks.TanhToSpec(action_spec)
    ])
    # The multiplexer concatenates the (maybe transformed) observations/actions.
    critic_network = snt.Sequential([
        networks.CriticMultiplexer(),
        networks.LayerNormMLP(critic_layer_sizes, activate_final=True),
        networks.DiscreteValuedHead(vmin, vmax, num_atoms),
    ])

    return {
        'policy': policy_network,
        'critic': critic_network,
        'observation': tf2_utils.batch_concat,
    }
コード例 #6
0
def make_dmpo_networks(
    action_spec,
    policy_layer_sizes = (300, 200),
    critic_layer_sizes = (400, 300),
    vmin = -150.,
    vmax = 150.,
    num_atoms = 51,
):
  """Creates networks used by the agent."""

  num_dimensions = np.prod(action_spec.shape, dtype=int)

  policy_network = snt.Sequential([
      networks.LayerNormMLP(policy_layer_sizes),
      networks.MultivariateNormalDiagHead(num_dimensions)
  ])
  # The multiplexer concatenates the (maybe transformed) observations/actions.
  critic_network = networks.CriticMultiplexer(
      critic_network=networks.LayerNormMLP(critic_layer_sizes),
      action_network=networks.ClipToSpec(action_spec))
  critic_network = snt.Sequential(
      [critic_network,
       networks.DiscreteValuedHead(vmin, vmax, num_atoms)])

  return {
      'policy': policy_network,
      'critic': critic_network,
      'observation': tf_utils.batch_concat,
  }
コード例 #7
0
def make_networks(
    action_spec: specs.BoundedArray,
    num_critic_heads: int,
    policy_layer_sizes: Sequence[int] = (50, ),
    critic_layer_sizes: Sequence[int] = (50, ),
    num_layers_shared: int = 1,
    distributional_critic: bool = True,
    vmin: float = -150.,
    vmax: float = 150.,
    num_atoms: int = 51,
):
    """Creates networks used by the agent."""

    num_dimensions = np.prod(action_spec.shape, dtype=int)

    policy_network = snt.Sequential([
        networks.LayerNormMLP(policy_layer_sizes, activate_final=True),
        networks.MultivariateNormalDiagHead(num_dimensions,
                                            tanh_mean=False,
                                            init_scale=0.69)
    ])

    if not distributional_critic:
        critic_layer_sizes = list(critic_layer_sizes) + [1]

    if not num_layers_shared:
        # No layers are shared
        critic_network_base = None
    else:
        critic_network_base = networks.LayerNormMLP(
            critic_layer_sizes[:num_layers_shared], activate_final=True)
    critic_network_heads = [
        snt.nets.MLP(critic_layer_sizes,
                     activation=tf.nn.elu,
                     activate_final=False) for _ in range(num_critic_heads)
    ]
    if distributional_critic:
        critic_network_heads = [
            snt.Sequential(
                [c, networks.DiscreteValuedHead(vmin, vmax, num_atoms)])
            for c in critic_network_heads
        ]
    # The multiplexer concatenates the (maybe transformed) observations/actions.
    critic_network = snt.Sequential([
        networks.CriticMultiplexer(
            critic_network=critic_network_base,
            action_network=networks.ClipToSpec(action_spec)),
        networks.Multihead(network_heads=critic_network_heads),
    ])

    return {
        'policy': policy_network,
        'critic': critic_network,
        'observation': tf2_utils.batch_concat,
    }
コード例 #8
0
ファイル: lp_dmpo_pixels_drqv2.py プロジェクト: deepmind/acme
def make_networks(
    action_spec: specs.BoundedArray,
    policy_layer_sizes: Sequence[int] = (50, 1024, 1024),
    critic_layer_sizes: Sequence[int] = (50, 1024, 1024),
    vmin: float = -150.,
    vmax: float = 150.,
    num_atoms: int = 51,
) -> Dict[str, snt.Module]:
    """Creates networks used by the agent."""

    num_dimensions = np.prod(action_spec.shape, dtype=int)

    policy_network = snt.Sequential([
        networks.LayerNormMLP(policy_layer_sizes,
                              w_init=snt.initializers.Orthogonal(),
                              activation=tf.nn.relu,
                              activate_final=True),
        networks.MultivariateNormalDiagHead(
            num_dimensions,
            tanh_mean=False,
            init_scale=1.0,
            fixed_scale=False,
            use_tfd_independent=True,
            w_init=snt.initializers.Orthogonal())
    ])

    # The multiplexer concatenates the (maybe transformed) observations/actions.
    critic_network = networks.CriticMultiplexer(
        observation_network=snt.Sequential([
            snt.Linear(critic_layer_sizes[0],
                       w_init=snt.initializers.Orthogonal()),
            snt.LayerNorm(axis=slice(1, None),
                          create_scale=True,
                          create_offset=True), tf.nn.tanh
        ]),
        critic_network=snt.nets.MLP(critic_layer_sizes[1:],
                                    w_init=snt.initializers.Orthogonal(),
                                    activation=tf.nn.relu,
                                    activate_final=True),
        action_network=networks.ClipToSpec(action_spec))
    critic_network = snt.Sequential([
        critic_network,
        networks.DiscreteValuedHead(vmin,
                                    vmax,
                                    num_atoms,
                                    w_init=snt.initializers.Orthogonal())
    ])
    observation_network = networks.DrQTorso()

    return {
        'policy': policy_network,
        'critic': critic_network,
        'observation': observation_network,
    }
コード例 #9
0
ファイル: d4pg.py プロジェクト: CPS-TUWien/racing_dreamer
def make_d4pg_agent(env_spec: specs.EnvironmentSpec, logger: Logger, checkpoint_path: str, hyperparams: Dict):
    params = DEFAULT_PARAMS.copy()
    params.update(hyperparams)
    action_size = np.prod(env_spec.actions.shape, dtype=int).item()
    policy_network = snt.Sequential([
        networks.LayerNormMLP(layer_sizes=[*params.pop('policy_layers'), action_size]),
        networks.NearZeroInitializedLinear(output_size=action_size),
        networks.TanhToSpec(env_spec.actions),
    ])

    critic_network = snt.Sequential([
        networks.CriticMultiplexer(
            critic_network=networks.LayerNormMLP(layer_sizes=[*params.pop('critic_layers'), 1])
        ),
        networks.DiscreteValuedHead(vmin=-100.0, vmax=100.0, num_atoms=params.pop('atoms'))
    ])

    observation_network = tf.identity

    # Make sure observation network is a Sonnet Module.
    observation_network = tf2_utils.to_sonnet_module(observation_network)

    actor = FeedForwardActor(policy_network=snt.Sequential([
        observation_network,
        policy_network
    ]))


    # Create optimizers.
    policy_optimizer = Adam(params.pop('policy_lr'))
    critic_optimizer = Adam(params.pop('critic_lr'))

    # The learner updates the parameters (and initializes them).
    agent = D4PG(
        environment_spec=env_spec,
        policy_network=policy_network,
        critic_network=critic_network,
        observation_network=observation_network,
        policy_optimizer=policy_optimizer,
        critic_optimizer=critic_optimizer,
        logger=logger,
        checkpoint_path=checkpoint_path,
        **params
    )
    agent.__setattr__('eval_actor', actor)
    return agent
コード例 #10
0
def make_value_func_dm_control(
    distributional: bool = True,
    layer_sizes: str = '512,512,256',
    vmin: float = 0.,
    vmax: float = 100.,
    num_atoms: int = 21,
) -> snt.Module:
    layer_sizes = list(map(int, layer_sizes.split(',')))
    if distributional:
        head = networks.DiscreteValuedHead(vmin, vmax, num_atoms)
    else:
        head = snt.Linear(1)
    value_function = snt.Sequential([
        networks.CriticMultiplexer(),
        networks.LayerNormMLP(layer_sizes, activate_final=True), head
    ])
    return value_function
コード例 #11
0
def make_dmpo_networks(
    action_spec,
    policy_layer_sizes=(256, 256, 256),
    critic_layer_sizes=(512, 512, 256),
    vmin=-150.,
    vmax=150.,
    num_atoms=51,
    policy_init_std=1e-9,
    obs_network=None,
    binary_grip_action=False):
  """Creates networks used by the agent."""

  num_dimensions = np.prod(action_spec.shape, dtype=int)
  if policy_layer_sizes:
    policy_network = snt.Sequential([
        networks.LayerNormMLP([int(l) for l in policy_layer_sizes]),
        networks.MultivariateNormalDiagHead(
            num_dimensions,
            init_scale=policy_init_std,
            min_scale=1e-10)
    ])
  else:
    # Useful when initializing from a trained BC network.
    policy_network = snt.Sequential([
        ArmPolicyNormalDiagHead(
            binary_grip_action=binary_grip_action,
            num_dimensions=num_dimensions,
            init_scale=policy_init_std,
            min_scale=1e-10)
    ])
  # The multiplexer concatenates the (maybe transformed) observations/actions.
  critic_network = networks.CriticMultiplexer(
      critic_network=networks.LayerNormMLP(critic_layer_sizes),
      action_network=networks.ClipToSpec(action_spec))
  critic_network = snt.Sequential(
      [critic_network,
       networks.DiscreteValuedHead(vmin, vmax, num_atoms)])
  if obs_network is None:
    obs_network = tf_utils.batch_concat

  return {
      'policy': policy_network,
      'critic': critic_network,
      'observation': obs_network,
  }
コード例 #12
0
def make_feed_forward_networks(
    action_spec: specs.BoundedArray,
    z_spec: specs.BoundedArray,
    policy_layer_sizes: Tuple[int, ...] = (256, 256),
    critic_layer_sizes: Tuple[int, ...] = (256, 256),
    discriminator_layer_sizes: Tuple[int, ...] = (256, 256),
    hierarchical_controller_layer_sizes: Tuple[int, ...] = (256, 256),
    vmin: float = -150.,  # Minimum value for the Critic distribution.
    vmax: float = 150.,  # Maximum value for the Critic distribution.
    num_atoms: int = 51,  # Number of atoms for the discrete value distribution.
) -> Dict[str, types.TensorTransformation]:
    num_dimensions = np.prod(action_spec.shape, dtype=int)
    z_dim = np.prod(z_spec.shape, dtype=int)

    observation_network = tf2_utils.batch_concat

    policy_network = snt.Sequential([
        networks.LayerNormMLP(policy_layer_sizes),
        networks.MultivariateNormalDiagHead(num_dimensions)
    ])

    critic_multiplexer = networks.CriticMultiplexer(
        critic_network=networks.LayerNormMLP(critic_layer_sizes),
        action_network=networks.ClipToSpec(action_spec))

    critic_network = snt.Sequential([
        critic_multiplexer,
        networks.DiscreteValuedHead(vmin, vmax, num_atoms),
    ])

    # The discriminator in DIAYN uses the same architecture as the critic.
    discriminator_network = networks.LayerNormMLP(discriminator_layer_sizes +
                                                  (z_dim, ))

    hierarchical_controller_network = networks.LayerNormMLP(
        hierarchical_controller_layer_sizes + (z_dim, ))

    return {
        'policy': policy_network,
        'critic': critic_network,
        'observation': observation_network,
        'discriminator': discriminator_network,
        'hierarchical_controller': hierarchical_controller_network,
    }
コード例 #13
0
def make_value_func_bsuite(
    environment_spec: EnvironmentSpec,
    distributional: bool = True,
    layer_sizes: str = '50,50',
    vmin: float = 0.,
    vmax: float = 100.,
    num_atoms: int = 21,
) -> snt.Module:
    layer_sizes = list(map(int, layer_sizes.split(',')))
    action_network = functools.partial(
        tf.one_hot, depth=environment_spec.actions.num_values)
    if distributional:
        head = networks.DiscreteValuedHead(vmin, vmax, num_atoms)
    else:
        head = snt.Linear(1)
    value_function = snt.Sequential([
        networks.CriticMultiplexer(action_network=action_network),
        snt.nets.MLP(layer_sizes, activate_final=True), head
    ])
    return value_function
コード例 #14
0
def make_networks(action_spec: specs.BoundedArray):
    """Simple networks for testing.."""

    num_dimensions = np.prod(action_spec.shape, dtype=int)

    policy_network = snt.Sequential([
        networks.LayerNormMLP([50], activate_final=True),
        networks.NearZeroInitializedLinear(num_dimensions),
        networks.TanhToSpec(action_spec)
    ])
    # The multiplexer concatenates the (maybe transformed) observations/actions.
    critic_network = snt.Sequential([
        networks.CriticMultiplexer(
            critic_network=networks.LayerNormMLP([50], activate_final=True)),
        networks.DiscreteValuedHead(-1., 1., 10)
    ])

    return {
        'policy': policy_network,
        'critic': critic_network,
        'observation': tf2_utils.batch_concat,
    }
コード例 #15
0
def make_networks(
    action_spec: specs.BoundedArray,
    policy_layer_sizes: Sequence[int] = (256, 256, 256),
    critic_layer_sizes: Sequence[int] = (512, 512, 256),
    vmin: float = -150.,
    vmax: float = 150.,
    num_atoms: int = 51,
) -> Dict[str, types.TensorTransformation]:
  """Creates networks used by the agent."""

  # Get total number of action dimensions from action spec.
  num_dimensions = np.prod(action_spec.shape, dtype=int)

  # Create the shared observation network; here simply a state-less operation.
  observation_network = tf2_utils.batch_concat

  # Create the policy network.
  policy_network = snt.Sequential([
      networks.LayerNormMLP(policy_layer_sizes),
      networks.MultivariateNormalDiagHead(num_dimensions)
  ])

  # The multiplexer transforms concatenates the observations/actions.
  multiplexer = networks.CriticMultiplexer(
      critic_network=networks.LayerNormMLP(critic_layer_sizes),
      action_network=networks.ClipToSpec(action_spec))

  # Create the critic network.
  critic_network = snt.Sequential([
      multiplexer,
      networks.DiscreteValuedHead(vmin, vmax, num_atoms),
  ])

  return {
      'policy': policy_network,
      'critic': critic_network,
      'observation': observation_network,
  }
コード例 #16
0
ファイル: acmed4pg.py プロジェクト: Abinavraj5427/ddpgTest
# Create the shared observation network; here simply a state-less operation.
observation_network = tf2_utils.batch_concat

# Create the deterministic policy network.
policy_network = snt.Sequential([
    networks.LayerNormMLP((256, 256, 256), activate_final=True),
    networks.NearZeroInitializedLinear(num_dimensions),
    networks.TanhToSpec(environment_spec.actions),
])

# Create the distributional critic network.
critic_network = snt.Sequential([
    # The multiplexer concatenates the observations/actions.
    networks.CriticMultiplexer(),
    networks.LayerNormMLP((512, 512, 256), activate_final=True),
    networks.DiscreteValuedHead(vmin=-150., vmax=150., num_atoms=51),
])

# Create a logger for the agent and environment loop.
agent_logger = loggers.TerminalLogger(label='agent', time_delta=10.)
env_loop_logger = loggers.TerminalLogger(label='env_loop', time_delta=10.)

# Create the D4PG agent.
agent = d4pg.D4PG(environment_spec=environment_spec,
                  policy_network=policy_network,
                  critic_network=critic_network,
                  observation_network=observation_network,
                  sigma=1.0,
                  logger=agent_logger,
                  checkpoint=False)