예제 #1
0
def setup_logging(top_logdir):
    """Initialize CSV and TensorBoard loggers."""
    logdir = env_logger = agent_logger = summary_writer = summary_dir = None
    if top_logdir is not None:
        job_id = set_job_id()
        logdir = os.path.join(top_logdir, job_id)
        summary_dir = os.path.join(top_logdir, 'tb', job_id)
        summary_writer = tf.summary.create_file_writer(summary_dir)
        env_logger = loggers.CSVLogger(logdir, 'env_loop')
        agent_logger = loggers.CSVLogger(logdir, 'learner')

    return logdir, env_logger, agent_logger, summary_writer, summary_dir
예제 #2
0
    def make_logger(
        self,
        to_terminal: bool,
        to_csv: bool,
        to_tensorboard: bool,
        time_delta: float,
        print_fn: Callable[[str], None],
        external_logger: Optional[base.Logger],
        **external_logger_kwargs: Any,
    ) -> loggers.Logger:
        """Build a Mava logger.

        Args:
            label: Name to give to the logger.
            directory: base directory for the  logging of the experiment.
            to_terminal: to print the logs in the terminal.
            to_csv: to save the logs in a csv file.
            to_tensorboard: to write the logs tf-events.
            time_delta: minimum elapsed time (in seconds) between logging events.
            print_fn: function to call which acts like print.
            external_logger: optional external logger.
            external_logger_kwargs: optional external logger params.
        Returns:
            A logger (pipe) object that responds to logger.write(some_dict).
        """
        logger = []

        if to_terminal:
            logger += [
                loggers.TerminalLogger(label=self._label, print_fn=print_fn)
            ]

        if to_csv:
            logger += [
                loggers.CSVLogger(directory_or_file=self._path("csv"),
                                  label=self._label)
            ]

        if to_tensorboard:
            logger += [
                TFSummaryLogger(logdir=self._path("tensorboard"),
                                label=self._label)
            ]

        if external_logger:
            logger += [
                external_logger(
                    label=self._label,
                    **external_logger_kwargs,
                )
            ]

        if logger:
            logger = loggers.Dispatcher(logger)
            logger = loggers.NoneFilter(logger)
            logger = loggers.TimeFilter(logger, time_delta)
        else:
            logger = loggers.NoOpLogger()

        return logger
예제 #3
0
def train_and_evaluate(distance_fn, rng):
    """Train a policy on the learned distance function and evaluate task success.

  Args:
    distance_fn: function mapping a (state, goal)-pair to a state embedding and
        a distance estimate used for policy learning.
    rng: random key used to initialize evaluation actor.
  """
    goal_image = load_goal_image(FLAGS.robot_data_path)
    logdir = FLAGS.logdir
    video_dir = paths.process_path(logdir, 'videos')
    print('Writing videos to', video_dir)
    counter = counting.Counter()
    eval_counter = counting.Counter(counter, prefix='eval', time_delta=0.0)
    # Include training episodes and steps and walltime in the first eval logs.
    counter.increment(episodes=0, steps=0, walltime=0)

    environment = make_environment(
        task=FLAGS.task,
        end_on_success=FLAGS.end_on_success,
        max_episode_steps=FLAGS.max_episode_steps,
        distance_fn=distance_fn,
        goal_image=goal_image,
        baseline_distance=FLAGS.baseline_distance,
        logdir=video_dir,
        counter=counter,
        record_every=FLAGS.record_episodes_frequency,
        num_episodes_to_record=FLAGS.num_episodes_to_record)
    environment_spec = specs.make_environment_spec(environment)
    print('Environment spec')
    print(environment_spec)
    agent_networks = sac.make_networks(environment_spec)

    config = sac.SACConfig(
        target_entropy=sac.target_entropy_from_env_spec(environment_spec),
        num_sgd_steps_per_step=FLAGS.num_sgd_steps_per_step,
        min_replay_size=FLAGS.min_replay_size)
    agent = deprecated_sac.SAC(environment_spec,
                               agent_networks,
                               config=config,
                               counter=counter,
                               seed=FLAGS.seed)

    env_logger = loggers.CSVLogger(logdir, 'env_loop', flush_every=5)
    eval_env_logger = loggers.CSVLogger(logdir, 'eval_env_loop', flush_every=1)
    train_loop = acme.EnvironmentLoop(environment,
                                      agent,
                                      label='train_loop',
                                      logger=env_logger,
                                      counter=counter)

    eval_actor = agent.builder.make_actor(random_key=rng,
                                          policy=sac.apply_policy_and_sample(
                                              agent_networks, eval_mode=True),
                                          environment_spec=environment_spec,
                                          variable_source=agent)

    eval_video_dir = paths.process_path(logdir, 'eval_videos')
    print('Writing eval videos to', eval_video_dir)
    if FLAGS.baseline_distance_from_goal_to_goal:
        state = goal_image
        if distance_fn.history_length > 1:
            state = np.stack([goal_image] * distance_fn.history_length,
                             axis=-1)
        unused_embeddings, baseline_distance = distance_fn(state, goal_image)
        print('Baseline prediction', baseline_distance)
    else:
        baseline_distance = FLAGS.baseline_distance
    eval_env = make_environment(task=FLAGS.task,
                                end_on_success=False,
                                max_episode_steps=FLAGS.max_episode_steps,
                                distance_fn=distance_fn,
                                goal_image=goal_image,
                                eval_mode=True,
                                logdir=eval_video_dir,
                                counter=eval_counter,
                                record_every=FLAGS.num_eval_episodes,
                                num_episodes_to_record=FLAGS.num_eval_episodes,
                                baseline_distance=baseline_distance)

    eval_loop = acme.EnvironmentLoop(eval_env,
                                     eval_actor,
                                     label='eval_loop',
                                     logger=eval_env_logger,
                                     counter=eval_counter)

    assert FLAGS.num_steps % FLAGS.eval_every == 0
    for _ in range(FLAGS.num_steps // FLAGS.eval_every):
        eval_loop.run(num_episodes=FLAGS.num_eval_episodes)
        train_loop.run(num_steps=FLAGS.eval_every)
    eval_loop.run(num_episodes=FLAGS.num_eval_episodes)
예제 #4
0
      snt.Conv2D(32, [8, 8], [4, 4]),
      tf.nn.relu,
      snt.Conv2D(64, [4, 4], [2, 2]),
      tf.nn.relu,
      snt.Conv2D(64, [3, 3], [1, 1]),
      tf.nn.relu,
      snt.Flatten(),
      snt.nets.MLP([50, 50, num_dim]),
  ])
"""
#network=_make_network(num_dimensions)

agent_logger_dqn = loggers.TerminalLogger(label='agent', time_delta=1.)
env_loop_logger_dqn = loggers.TerminalLogger(label='env_loop', time_delta=1.)
csv_logger_for_agent_dqn = loggers.CSVLogger(
    "/content/gdrive/MyDrive/ResultsDQN",
    label="agent")  # Logging csv file directly to colab
csv_logger_for_env_dqn = loggers.CSVLogger(
    "/content/gdrive/MyDrive/ResultsDQN",
    label="env")  # Logging csv file directly to colab

agent = AnotherDQN(environment_spec=environment_spec,
                   network=network,
                   logger=csv_logger_for_agent_dqn,
                   epsilon=0.05)

# Create an loop connecting this agent to the environment created above.
env_loop = EnvironmentLoop2(
    environment, agent,
    logger=csv_logger_for_env_dqn)  # csv_logger_for_env for env does not work
#Comment till here for not DQN
예제 #5
0
def main(_):
    # Create an environment, grab the spec.
    environment = utils.make_environment(task=FLAGS.env_name)
    aqua_config = config.AquademConfig()
    spec = specs.make_environment_spec(environment)
    discretized_spec = aquadem_builder.discretize_spec(spec,
                                                       aqua_config.num_actions)

    # Create AQuaDem builder.
    loss_fn = dqn.losses.MunchausenQLearning(max_abs_reward=100.)
    dqn_config = dqn.DQNConfig(min_replay_size=1000,
                               n_step=3,
                               num_sgd_steps_per_step=8,
                               learning_rate=1e-4,
                               samples_per_insert=256)
    rl_agent = dqn.DQNBuilder(config=dqn_config, loss_fn=loss_fn)
    make_demonstrations = utils.get_make_demonstrations_fn(
        FLAGS.env_name, FLAGS.num_demonstrations, FLAGS.seed)
    builder = aquadem_builder.AquademBuilder(
        rl_agent=rl_agent,
        config=aqua_config,
        make_demonstrations=make_demonstrations)

    # Create networks.
    q_network = aquadem_networks.make_q_network(spec=discretized_spec, )
    dqn_networks = dqn.DQNNetworks(
        policy_network=networks_lib.non_stochastic_network_to_typed(q_network))
    networks = aquadem_networks.make_action_candidates_network(
        spec=spec,
        num_actions=aqua_config.num_actions,
        discrete_rl_networks=dqn_networks)
    exploration_epsilon = 0.01
    discrete_policy = dqn.default_behavior_policy(dqn_networks,
                                                  exploration_epsilon)
    behavior_policy = aquadem_builder.get_aquadem_policy(
        discrete_policy, networks)

    # Create the environment loop used for training.
    agent = local_layout.LocalLayout(seed=FLAGS.seed,
                                     environment_spec=spec,
                                     builder=builder,
                                     networks=networks,
                                     policy_network=behavior_policy,
                                     batch_size=dqn_config.batch_size *
                                     dqn_config.num_sgd_steps_per_step)

    train_logger = loggers.CSVLogger(FLAGS.workdir, label='train')
    train_loop = acme.EnvironmentLoop(environment, agent, logger=train_logger)

    # Create the evaluation actor and loop.
    eval_policy = dqn.default_behavior_policy(dqn_networks, 0.)
    eval_policy = aquadem_builder.get_aquadem_policy(eval_policy, networks)
    eval_actor = builder.make_actor(random_key=jax.random.PRNGKey(FLAGS.seed),
                                    policy=eval_policy,
                                    environment_spec=spec,
                                    variable_source=agent)
    eval_env = utils.make_environment(task=FLAGS.env_name, evaluation=True)

    eval_logger = loggers.CSVLogger(FLAGS.workdir, label='eval')
    eval_loop = acme.EnvironmentLoop(eval_env, eval_actor, logger=eval_logger)

    assert FLAGS.num_steps % FLAGS.eval_every == 0
    for _ in range(FLAGS.num_steps // FLAGS.eval_every):
        eval_loop.run(num_episodes=10)
        train_loop.run(num_steps=FLAGS.eval_every)
    eval_loop.run(num_episodes=10)
예제 #6
0
    def initial_state(self, batch_size: int, **kwargs):
        return self._net.initial_state(batch_size)

    def unroll(self, inputs, state, sequence_length):
        return snt.static_unroll(self._net, inputs, state, sequence_length)


a = SimpleNetwork(num_dimensions)
#from acme.tf.networks.atari import  R2D2AtariNetwork
#a = R2D2AtariNetwork(num_dimensions)

agent_logger_r2d2 = loggers.TerminalLogger(label='agent', time_delta=5.)
env_loop_logger_r2d2 = loggers.TerminalLogger(label='env_loop', time_delta=5.)

csv_logger_for_agent_r2d2 = loggers.CSVLogger(
    "/content/gdrive/MyDrive/ResultsR2D2/",
    label="agent")  # Logging csv file directly to colab
csv_logger_for_env_r2d2 = loggers.CSVLogger(
    "/content/gdrive/MyDrive/ResultsR2D2/", label="env",
    time_delta=1.)  # Logging csv file directly to colab

agent = R2D22(environment_spec=environment_spec,
              network=a,
              store_lstm_state=True,
              burn_in_length=2,
              trace_length=6,
              replay_period=4,
              checkpoint=False,
              logger=csv_logger_for_agent_r2d2)

env_loop = EnvironmentLoop2(environment, agent, logger=csv_logger_for_env_r2d2)