def setup_logging(top_logdir): """Initialize CSV and TensorBoard loggers.""" logdir = env_logger = agent_logger = summary_writer = summary_dir = None if top_logdir is not None: job_id = set_job_id() logdir = os.path.join(top_logdir, job_id) summary_dir = os.path.join(top_logdir, 'tb', job_id) summary_writer = tf.summary.create_file_writer(summary_dir) env_logger = loggers.CSVLogger(logdir, 'env_loop') agent_logger = loggers.CSVLogger(logdir, 'learner') return logdir, env_logger, agent_logger, summary_writer, summary_dir
def make_logger( self, to_terminal: bool, to_csv: bool, to_tensorboard: bool, time_delta: float, print_fn: Callable[[str], None], external_logger: Optional[base.Logger], **external_logger_kwargs: Any, ) -> loggers.Logger: """Build a Mava logger. Args: label: Name to give to the logger. directory: base directory for the logging of the experiment. to_terminal: to print the logs in the terminal. to_csv: to save the logs in a csv file. to_tensorboard: to write the logs tf-events. time_delta: minimum elapsed time (in seconds) between logging events. print_fn: function to call which acts like print. external_logger: optional external logger. external_logger_kwargs: optional external logger params. Returns: A logger (pipe) object that responds to logger.write(some_dict). """ logger = [] if to_terminal: logger += [ loggers.TerminalLogger(label=self._label, print_fn=print_fn) ] if to_csv: logger += [ loggers.CSVLogger(directory_or_file=self._path("csv"), label=self._label) ] if to_tensorboard: logger += [ TFSummaryLogger(logdir=self._path("tensorboard"), label=self._label) ] if external_logger: logger += [ external_logger( label=self._label, **external_logger_kwargs, ) ] if logger: logger = loggers.Dispatcher(logger) logger = loggers.NoneFilter(logger) logger = loggers.TimeFilter(logger, time_delta) else: logger = loggers.NoOpLogger() return logger
def train_and_evaluate(distance_fn, rng): """Train a policy on the learned distance function and evaluate task success. Args: distance_fn: function mapping a (state, goal)-pair to a state embedding and a distance estimate used for policy learning. rng: random key used to initialize evaluation actor. """ goal_image = load_goal_image(FLAGS.robot_data_path) logdir = FLAGS.logdir video_dir = paths.process_path(logdir, 'videos') print('Writing videos to', video_dir) counter = counting.Counter() eval_counter = counting.Counter(counter, prefix='eval', time_delta=0.0) # Include training episodes and steps and walltime in the first eval logs. counter.increment(episodes=0, steps=0, walltime=0) environment = make_environment( task=FLAGS.task, end_on_success=FLAGS.end_on_success, max_episode_steps=FLAGS.max_episode_steps, distance_fn=distance_fn, goal_image=goal_image, baseline_distance=FLAGS.baseline_distance, logdir=video_dir, counter=counter, record_every=FLAGS.record_episodes_frequency, num_episodes_to_record=FLAGS.num_episodes_to_record) environment_spec = specs.make_environment_spec(environment) print('Environment spec') print(environment_spec) agent_networks = sac.make_networks(environment_spec) config = sac.SACConfig( target_entropy=sac.target_entropy_from_env_spec(environment_spec), num_sgd_steps_per_step=FLAGS.num_sgd_steps_per_step, min_replay_size=FLAGS.min_replay_size) agent = deprecated_sac.SAC(environment_spec, agent_networks, config=config, counter=counter, seed=FLAGS.seed) env_logger = loggers.CSVLogger(logdir, 'env_loop', flush_every=5) eval_env_logger = loggers.CSVLogger(logdir, 'eval_env_loop', flush_every=1) train_loop = acme.EnvironmentLoop(environment, agent, label='train_loop', logger=env_logger, counter=counter) eval_actor = agent.builder.make_actor(random_key=rng, policy=sac.apply_policy_and_sample( agent_networks, eval_mode=True), environment_spec=environment_spec, variable_source=agent) eval_video_dir = paths.process_path(logdir, 'eval_videos') print('Writing eval videos to', eval_video_dir) if FLAGS.baseline_distance_from_goal_to_goal: state = goal_image if distance_fn.history_length > 1: state = np.stack([goal_image] * distance_fn.history_length, axis=-1) unused_embeddings, baseline_distance = distance_fn(state, goal_image) print('Baseline prediction', baseline_distance) else: baseline_distance = FLAGS.baseline_distance eval_env = make_environment(task=FLAGS.task, end_on_success=False, max_episode_steps=FLAGS.max_episode_steps, distance_fn=distance_fn, goal_image=goal_image, eval_mode=True, logdir=eval_video_dir, counter=eval_counter, record_every=FLAGS.num_eval_episodes, num_episodes_to_record=FLAGS.num_eval_episodes, baseline_distance=baseline_distance) eval_loop = acme.EnvironmentLoop(eval_env, eval_actor, label='eval_loop', logger=eval_env_logger, counter=eval_counter) assert FLAGS.num_steps % FLAGS.eval_every == 0 for _ in range(FLAGS.num_steps // FLAGS.eval_every): eval_loop.run(num_episodes=FLAGS.num_eval_episodes) train_loop.run(num_steps=FLAGS.eval_every) eval_loop.run(num_episodes=FLAGS.num_eval_episodes)
snt.Conv2D(32, [8, 8], [4, 4]), tf.nn.relu, snt.Conv2D(64, [4, 4], [2, 2]), tf.nn.relu, snt.Conv2D(64, [3, 3], [1, 1]), tf.nn.relu, snt.Flatten(), snt.nets.MLP([50, 50, num_dim]), ]) """ #network=_make_network(num_dimensions) agent_logger_dqn = loggers.TerminalLogger(label='agent', time_delta=1.) env_loop_logger_dqn = loggers.TerminalLogger(label='env_loop', time_delta=1.) csv_logger_for_agent_dqn = loggers.CSVLogger( "/content/gdrive/MyDrive/ResultsDQN", label="agent") # Logging csv file directly to colab csv_logger_for_env_dqn = loggers.CSVLogger( "/content/gdrive/MyDrive/ResultsDQN", label="env") # Logging csv file directly to colab agent = AnotherDQN(environment_spec=environment_spec, network=network, logger=csv_logger_for_agent_dqn, epsilon=0.05) # Create an loop connecting this agent to the environment created above. env_loop = EnvironmentLoop2( environment, agent, logger=csv_logger_for_env_dqn) # csv_logger_for_env for env does not work #Comment till here for not DQN
def main(_): # Create an environment, grab the spec. environment = utils.make_environment(task=FLAGS.env_name) aqua_config = config.AquademConfig() spec = specs.make_environment_spec(environment) discretized_spec = aquadem_builder.discretize_spec(spec, aqua_config.num_actions) # Create AQuaDem builder. loss_fn = dqn.losses.MunchausenQLearning(max_abs_reward=100.) dqn_config = dqn.DQNConfig(min_replay_size=1000, n_step=3, num_sgd_steps_per_step=8, learning_rate=1e-4, samples_per_insert=256) rl_agent = dqn.DQNBuilder(config=dqn_config, loss_fn=loss_fn) make_demonstrations = utils.get_make_demonstrations_fn( FLAGS.env_name, FLAGS.num_demonstrations, FLAGS.seed) builder = aquadem_builder.AquademBuilder( rl_agent=rl_agent, config=aqua_config, make_demonstrations=make_demonstrations) # Create networks. q_network = aquadem_networks.make_q_network(spec=discretized_spec, ) dqn_networks = dqn.DQNNetworks( policy_network=networks_lib.non_stochastic_network_to_typed(q_network)) networks = aquadem_networks.make_action_candidates_network( spec=spec, num_actions=aqua_config.num_actions, discrete_rl_networks=dqn_networks) exploration_epsilon = 0.01 discrete_policy = dqn.default_behavior_policy(dqn_networks, exploration_epsilon) behavior_policy = aquadem_builder.get_aquadem_policy( discrete_policy, networks) # Create the environment loop used for training. agent = local_layout.LocalLayout(seed=FLAGS.seed, environment_spec=spec, builder=builder, networks=networks, policy_network=behavior_policy, batch_size=dqn_config.batch_size * dqn_config.num_sgd_steps_per_step) train_logger = loggers.CSVLogger(FLAGS.workdir, label='train') train_loop = acme.EnvironmentLoop(environment, agent, logger=train_logger) # Create the evaluation actor and loop. eval_policy = dqn.default_behavior_policy(dqn_networks, 0.) eval_policy = aquadem_builder.get_aquadem_policy(eval_policy, networks) eval_actor = builder.make_actor(random_key=jax.random.PRNGKey(FLAGS.seed), policy=eval_policy, environment_spec=spec, variable_source=agent) eval_env = utils.make_environment(task=FLAGS.env_name, evaluation=True) eval_logger = loggers.CSVLogger(FLAGS.workdir, label='eval') eval_loop = acme.EnvironmentLoop(eval_env, eval_actor, logger=eval_logger) assert FLAGS.num_steps % FLAGS.eval_every == 0 for _ in range(FLAGS.num_steps // FLAGS.eval_every): eval_loop.run(num_episodes=10) train_loop.run(num_steps=FLAGS.eval_every) eval_loop.run(num_episodes=10)
def initial_state(self, batch_size: int, **kwargs): return self._net.initial_state(batch_size) def unroll(self, inputs, state, sequence_length): return snt.static_unroll(self._net, inputs, state, sequence_length) a = SimpleNetwork(num_dimensions) #from acme.tf.networks.atari import R2D2AtariNetwork #a = R2D2AtariNetwork(num_dimensions) agent_logger_r2d2 = loggers.TerminalLogger(label='agent', time_delta=5.) env_loop_logger_r2d2 = loggers.TerminalLogger(label='env_loop', time_delta=5.) csv_logger_for_agent_r2d2 = loggers.CSVLogger( "/content/gdrive/MyDrive/ResultsR2D2/", label="agent") # Logging csv file directly to colab csv_logger_for_env_r2d2 = loggers.CSVLogger( "/content/gdrive/MyDrive/ResultsR2D2/", label="env", time_delta=1.) # Logging csv file directly to colab agent = R2D22(environment_spec=environment_spec, network=a, store_lstm_state=True, burn_in_length=2, trace_length=6, replay_period=4, checkpoint=False, logger=csv_logger_for_agent_r2d2) env_loop = EnvironmentLoop2(environment, agent, logger=csv_logger_for_env_r2d2)