def main(_: Any) -> None: # Environment. environment_factory = functools.partial( debugging_utils.make_environment, env_name=FLAGS.env_name, action_space=FLAGS.action_space, ) # Networks. network_factory = lp_utils.partial_kwargs( madqn.make_default_networks, archecture_type=ArchitectureType.recurrent) # Checkpointer appends "Checkpoints" to checkpoint_dir checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}" # Log every [log_every] seconds. log_every = 10 logger_factory = functools.partial( logger_utils.make_logger, directory=FLAGS.base_dir, to_terminal=True, to_tensorboard=True, time_stamp=FLAGS.mava_id, time_delta=log_every, ) # Distributed program. program = madqn.MADQN( environment_factory=environment_factory, network_factory=network_factory, logger_factory=logger_factory, num_executors=1, exploration_scheduler_fn=LinearExplorationScheduler, epsilon_min=0.05, epsilon_decay=5e-4, optimizer=snt.optimizers.Adam(learning_rate=1e-4), checkpoint_subpath=checkpoint_dir, trainer_fn=madqn.training.MADQNRecurrentTrainer, executor_fn=madqn.execution.MADQNRecurrentExecutor, batch_size=32, ).build() # Ensure only trainer runs on gpu, while other processes run on cpu. gpu_id = -1 env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)} local_resources = { "trainer": [], "evaluator": PythonProcess(env=env_vars), "executor": PythonProcess(env=env_vars), } # Launch. lp.launch( program, lp.LaunchType.LOCAL_MULTI_PROCESSING, terminal="current_terminal", local_resources=local_resources, )
def main(_: Any) -> None: # environment environment_factory = functools.partial(smac_utils.make_environment, map_name=FLAGS.map_name) # Networks. network_factory = lp_utils.partial_kwargs( vdn.make_default_networks, policy_networks_layer_sizes=[64, 64]) # Checkpointer appends "Checkpoints" to checkpoint_dir checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}" # Log every [log_every] seconds. log_every = 10 logger_factory = functools.partial( logger_utils.make_logger, directory=FLAGS.base_dir, to_terminal=True, to_tensorboard=True, time_stamp=FLAGS.mava_id, time_delta=log_every, ) # distributed program program = vdn.VDN( environment_factory=environment_factory, network_factory=network_factory, logger_factory=logger_factory, num_executors=1, exploration_scheduler_fn=LinearExplorationScheduler, epsilon_min=0.05, epsilon_decay=1e-5, optimizer=snt.optimizers.SGD(learning_rate=1e-2), checkpoint_subpath=checkpoint_dir, batch_size=512, executor_variable_update_period=100, target_update_period=200, max_gradient_norm=10.0, eval_loop_fn=MonitorParallelEnvironmentLoop, eval_loop_fn_kwargs={ "path": checkpoint_dir, "record_every": 100 }, ).build() # launch gpu_id = -1 env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)} local_resources = { "trainer": [], "evaluator": PythonProcess(env=env_vars), "executor": PythonProcess(env=env_vars), } lp.launch( program, lp.LaunchType.LOCAL_MULTI_PROCESSING, terminal="current_terminal", local_resources=local_resources, )
def main(_: Any) -> None: # Environment. environment_factory = functools.partial( debugging_utils.make_environment, env_name=FLAGS.env_name, action_space=FLAGS.action_space, ) # Networks. network_factory = lp_utils.partial_kwargs(maddpg.make_default_networks, shared_weights=False) # Checkpointer appends "Checkpoints" to checkpoint_dir. checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}" # Log every [log_every] seconds. log_every = 10 logger_factory = functools.partial( logger_utils.make_logger, directory=FLAGS.base_dir, to_terminal=True, to_tensorboard=True, time_stamp=FLAGS.mava_id, time_delta=log_every, ) # Distributed program. program = maddpg.MADDPG( environment_factory=environment_factory, network_factory=network_factory, logger_factory=logger_factory, num_executors=1, policy_optimizer=snt.optimizers.Adam(learning_rate=1e-4), critic_optimizer=snt.optimizers.Adam(learning_rate=1e-4), checkpoint_subpath=checkpoint_dir, max_gradient_norm=40.0, trainer_fn=maddpg.MADDPGNetworkedTrainer, architecture=architectures.NetworkedQValueCritic, connection_spec=custom_connected_network_spec, shared_weights=False, ).build() # Ensure only trainer runs on gpu, while other processes run on cpu. gpu_id = -1 env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)} local_resources = { "trainer": [], "evaluator": PythonProcess(env=env_vars), "executor": PythonProcess(env=env_vars), } # Launch. lp.launch( program, lp.LaunchType.LOCAL_MULTI_PROCESSING, terminal="current_terminal", local_resources=local_resources, )
def main(_: Any) -> None: # Environment. environment_factory = functools.partial( pettingzoo_utils.make_environment, env_class=FLAGS.env_class, env_name=FLAGS.env_name, ) # Networks. network_factory = lp_utils.partial_kwargs( maddpg.make_default_networks, archecture_type=ArchitectureType.recurrent) # Checkpointer appends "Checkpoints" to checkpoint_dir. checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}" # Log every [log_every] seconds. log_every = 10 logger_factory = functools.partial( logger_utils.make_logger, directory=FLAGS.base_dir, to_terminal=True, to_tensorboard=True, time_stamp=FLAGS.mava_id, time_delta=log_every, ) # Distributed program. program = maddpg.MADDPG( environment_factory=environment_factory, network_factory=network_factory, logger_factory=logger_factory, num_executors=1, policy_optimizer=snt.optimizers.Adam(learning_rate=1e-4), critic_optimizer=snt.optimizers.Adam(learning_rate=1e-4), checkpoint_subpath=checkpoint_dir, max_gradient_norm=40.0, trainer_fn=maddpg.training.MADDPGDecentralisedRecurrentTrainer, executor_fn=maddpg.execution.MADDPGRecurrentExecutor, batch_size=32, ).build() # Ensure only trainer runs on gpu, while other processes run on cpu. gpu_id = -1 env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)} local_resources = { "trainer": [], "evaluator": PythonProcess(env=env_vars), "executor": PythonProcess(env=env_vars), } # Launch. lp.launch( program, lp.LaunchType.LOCAL_MULTI_PROCESSING, terminal="current_terminal", local_resources=local_resources, )
def main(_: Any) -> None: # Environment. environment_factory = lp_utils.partial_kwargs(robocup_utils.make_environment) # Networks. network_factory = lp_utils.partial_kwargs( mad4pg.make_default_networks, archecture_type=ArchitectureType.recurrent ) # Checkpointer appends "Checkpoints" to checkpoint_dir. checkpoint_dir = f"{FLAGS.base_dir}/{FLAGS.mava_id}" # Log every [log_every] seconds. log_every = 10 logger_factory = functools.partial( logger_utils.make_logger, directory=FLAGS.base_dir, to_terminal=True, to_tensorboard=True, time_stamp=FLAGS.mava_id, time_delta=log_every, ) program = mad4pg.MAD4PG( architecture=StateBasedQValueCritic, environment_factory=environment_factory, network_factory=network_factory, logger_factory=logger_factory, num_executors=int(FLAGS.num_executors), samples_per_insert=None, trainer_fn=MAD4PGStateBasedRecurrentTrainer, executor_fn=MAD4PGRecurrentExecutor, shared_weights=True, checkpoint_subpath=checkpoint_dir, batch_size=265, ).build() # launch gpu_id = -1 env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)} local_resources = { "trainer": [], "evaluator": PythonProcess(env=env_vars), "executor": PythonProcess(env=env_vars), } lp.launch( program, lp.LaunchType.LOCAL_MULTI_PROCESSING, terminal="current_terminal", local_resources=local_resources, )
def test_maddpg_on_debugging_env(self) -> None: """Tests that the system can run on the simple spread debugging environment without crashing.""" # environment environment_factory = functools.partial( debugging_utils.make_environment, env_name="simple_spread", action_space="continuous", ) # networks network_factory = lp_utils.partial_kwargs(make_networks) # system system = maddpg.MADDPG( environment_factory=environment_factory, network_factory=network_factory, num_executors=2, batch_size=32, min_replay_size=32, max_replay_size=1000, policy_optimizer=snt.optimizers.Adam(learning_rate=1e-4), critic_optimizer=snt.optimizers.Adam(learning_rate=1e-4), checkpoint=False, ) program = system.build() (trainer_node,) = program.groups["trainer"] trainer_node.disable_run() # Launch gpu config - don't use gpu gpu_id = -1 env_vars = {"CUDA_VISIBLE_DEVICES": str(gpu_id)} local_resources = { "trainer": PythonProcess(env=env_vars), "evaluator": PythonProcess(env=env_vars), "executor": PythonProcess(env=env_vars), } lp.launch( program, launch_type="test_mt", local_resources=local_resources, ) trainer: mava.Trainer = trainer_node.create_handle().dereference() for _ in range(5): trainer.step()