def register_carla_model(): ModelCatalog.register_custom_model("carla", CarlaModel)
def setUp(self): ray.init() ModelCatalog.register_custom_model("keras_model", MyKerasModel) ModelCatalog.register_custom_model("torch_model", MyTorchModel)
parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, default="PPO") parser.add_argument("--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf") parser.add_argument("--as-test", action="store_true") parser.add_argument("--stop-iters", type=int, default=50) parser.add_argument("--stop-timesteps", type=int, default=200000) parser.add_argument("--stop-reward", type=float, default=150.0) if __name__ == "__main__": args = parser.parse_args() ray.init(num_cpus=3) ModelCatalog.register_custom_model( "frame_stack_model", FrameStackingCartPoleModel if args.framework != "torch" else TorchFrameStackingCartPoleModel) tune.register_env("stateless_cartpole", lambda c: StatelessCartPole()) config = { "env": "stateless_cartpole", "model": { "vf_share_layers": True, "custom_model": "frame_stack_model", "custom_model_config": { "num_frames": 16, }, # To compare against a simple LSTM: # "use_lstm": True, # "lstm_use_prev_action": True,
# Store last batch size for value_function output. self._last_batch_size = obs.shape[0] # Return 2x the obs (and empty states). # This will further be sent through an automatically provided # LSTM head (b/c we are setting use_lstm=True below). return obs * 2.0, [] def value_function(self): return torch.from_numpy(np.zeros(shape=(self._last_batch_size,))) if __name__ == "__main__": ray.init() # Register the above custom model. ModelCatalog.register_custom_model("my_torch_model", MyCustomModel) # Create the Trainer. trainer = ppo.PPOTrainer( env="CartPole-v0", config={ "framework": "torch", "model": { # Auto-wrap the custom(!) model with an LSTM. "use_lstm": True, # To further customize the LSTM auto-wrapper. "lstm_cell_size": 64, # Specify our custom model from above. "custom_model": "my_torch_model", # Extra kwargs to be passed to your model's c'tor. "custom_model_config": {},
filters_4x4 = [[32, [2, 2], 2], [256, [2, 2], 2]] filters_6x6 = [[32, [2, 2], 2], [256, [2, 2], 2]] filters_10x10 = [[32, [2, 2], 2], [256, [2, 2], 2]] if len(shape) == 3 and shape[:2] == [84, 84]: return filters_84x84 elif len(shape) == 3 and shape[:2] == [42, 42]: return filters_42x42 elif len(shape) == 3 and shape[:2] == [3, 4]: return filters_3x4 elif len(shape) == 3 and shape[:2] == [4, 4]: return filters_4x4 elif len(shape) == 3 and shape[:2] == [6, 6]: return filters_6x6 elif len(shape) == 3 and shape[:2] == [10, 10]: return filters_10x10 elif len(shape) == 1: # Don't use a cnn in this case return [] else: raise ValueError( "No default configuration for obs shape {}".format(shape) + ", you must specify `conv_filters` manually as a model option" ", or add it as a default to the _get_filter_config function.") ModelCatalog.register_custom_model(SPATIAL_STRATEGO_Q_MODEL, SpatialStrategoQModel)
# Create the Model Class with a deep network using keras: class ResNet(Model): """Residual Network model, as used in IMPALA paper""" def _build_layers_v2(self, input_dict, num_outputs, options): """Builds and returns the output and last layer of the network.""" kernel_size = 3 # Size of the kernel for the convolution layers pool_size = 3 # Size of the pooling region for the pooling layers image_shape = input_dict["obs"].get_shape().as_list()[1:] embed_input = Input(shape=image_shape, tensor=input_dict["obs"]) layer1 = convolutional_block(16, embed_input, kernel_size, pool_size) layer2 = convolutional_block(32, layer1, kernel_size, pool_size) layer3 = convolutional_block(32, layer2, kernel_size, pool_size) layer4 = Flatten()(layer3) layer5 = Activation('relu')(layer4) layer5 = Dense(256, activation='relu')(layer5) output = Dense(num_outputs, activation=None)(layer5) return output, layer5 # Register models MODELS = {"ResNet": ResNet} for key in MODELS: ModelCatalog.register_custom_model(key, MODELS[key])
# self.num_outputs, shape)) if not isinstance(state, list): raise ValueError("State output is not a list: {}".format(state)) self._last_output = outputs return outputs, state @override(ModelV2) def get_initial_state(self): if self.use_lstm: return [ np.zeros(self._lstm_state_shape, np.float32), np.zeros(self._lstm_state_shape, np.float32) ] else: return [] def save_config_to_json(self, save_file_path): with open(save_file_path, 'w') as fp: json.dump(self.model_config, fp) # Verify that dictionary is recoverable from json with open(save_file_path, 'r') as fp: saved = json.load(fp) for key, orig_val in self.model_config.items(): assert np.all(saved[key] == orig_val) ModelCatalog.register_custom_model(SAC_SPATIAL_RNN_STRATEGO_MODEL, SACSpatialRNNStrategoModel)
def test_sac_compilation(self): """Tests whether an SACTrainer can be built with all frameworks.""" config = sac.DEFAULT_CONFIG.copy() config["Q_model"] = sac.DEFAULT_CONFIG["Q_model"].copy() config["num_workers"] = 0 # Run locally. config["n_step"] = 3 config["twin_q"] = True config["replay_buffer_config"]["learning_starts"] = 0 config["rollout_fragment_length"] = 10 config["train_batch_size"] = 10 # If we use default buffer size (1e6), the buffer will take up # 169.445 GB memory, which is beyond travis-ci's current (Mar 19, 2021) # available system memory (8.34816 GB). config["replay_buffer_config"]["capacity"] = 40000 # Test with saved replay buffer. config["store_buffer_in_checkpoints"] = True num_iterations = 1 ModelCatalog.register_custom_model("batch_norm", KerasBatchNormModel) ModelCatalog.register_custom_model("batch_norm_torch", TorchBatchNormModel) image_space = Box(-1.0, 1.0, shape=(84, 84, 3)) simple_space = Box(-1.0, 1.0, shape=(3, )) tune.register_env( "random_dict_env", lambda _: RandomEnv({ "observation_space": Dict({ "a": simple_space, "b": Discrete(2), "c": image_space, }), "action_space": Box(-1.0, 1.0, shape=(1, )), }), ) tune.register_env( "random_tuple_env", lambda _: RandomEnv({ "observation_space": Tuple([simple_space, Discrete(2), image_space]), "action_space": Box(-1.0, 1.0, shape=(1, )), }), ) for fw in framework_iterator(config, with_eager_tracing=True): # Test for different env types (discrete w/ and w/o image, + cont). for env in [ "random_dict_env", "random_tuple_env", # "MsPacmanNoFrameskip-v4", "CartPole-v0", ]: print("Env={}".format(env)) # Test making the Q-model a custom one for CartPole, otherwise, # use the default model. config["Q_model"]["custom_model"] = ( "batch_norm{}".format("_torch" if fw == "torch" else "") if env == "CartPole-v0" else None) trainer = sac.SACTrainer(config=config, env=env) for i in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) # Test, whether the replay buffer is saved along with # a checkpoint (no point in doing it for all frameworks since # this is framework agnostic). if fw == "tf" and env == "CartPole-v0": checkpoint = trainer.save() new_trainer = sac.SACTrainer(config, env=env) new_trainer.restore(checkpoint) # Get some data from the buffer and compare. data = trainer.local_replay_buffer.replay_buffers[ "default_policy"]._storage[:42 + 42] new_data = new_trainer.local_replay_buffer.replay_buffers[ "default_policy"]._storage[:42 + 42] check(data, new_data) new_trainer.stop() trainer.stop()
activation_fn=activation, scope="fc{}".format(i), ) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out", ) return output, last_layer ModelCatalog.register_custom_model("PommermanModel1", PommermanModel) class BaseLineAgent(BaseAgent): def act(self, obs, action_space): pass class NoDoAgent(BaseAgent): def act(self, obs, action_space): return 0 class SuicidalAgent(BaseAgent): def act(self, obs, action_space): return 5
def value_function(self): return self.action_param_model.value_function() if __name__ == "__main__": ray.init() register_env( "ExternalHearts", #lambda _: HeartsEnv() lambda _: ExternalHearts(HeartsEnv(), episodes=1000) ) ModelCatalog.register_custom_model("ParametricActionsModel", ParametricActionsModel) ppo_config = {"timesteps_per_iteration": 1000, "model": {"custom_model": "ParametricActionsModel", "use_lstm": True, "max_seq_len": HAND_SIZE, "lstm_use_prev_action_reward": True}, "num_workers": 0} other_config = {"timesteps_per_iteration": 1000, "model": {"custom_model": "ParametricActionsModel", "use_lstm": True, "max_seq_len": HAND_SIZE, "lstm_use_prev_action_reward": True} }
def test_sac_compilation(self): """Tests whether SAC can be built with all frameworks.""" config = (sac.SACConfig().training( n_step=3, twin_q=True, replay_buffer_config={ "learning_starts": 0, "capacity": 40000 }, store_buffer_in_checkpoints=True, train_batch_size=10, ).rollouts(num_rollout_workers=0, rollout_fragment_length=10)) num_iterations = 1 ModelCatalog.register_custom_model("batch_norm", KerasBatchNormModel) ModelCatalog.register_custom_model("batch_norm_torch", TorchBatchNormModel) image_space = Box(-1.0, 1.0, shape=(84, 84, 3)) simple_space = Box(-1.0, 1.0, shape=(3, )) tune.register_env( "random_dict_env", lambda _: RandomEnv({ "observation_space": Dict({ "a": simple_space, "b": Discrete(2), "c": image_space, }), "action_space": Box(-1.0, 1.0, shape=(1, )), }), ) tune.register_env( "random_tuple_env", lambda _: RandomEnv({ "observation_space": Tuple([simple_space, Discrete(2), image_space]), "action_space": Box(-1.0, 1.0, shape=(1, )), }), ) for fw in framework_iterator(config, with_eager_tracing=True): # Test for different env types (discrete w/ and w/o image, + cont). for env in [ "random_dict_env", "random_tuple_env", # "MsPacmanNoFrameskip-v4", "CartPole-v0", ]: print("Env={}".format(env)) # Test making the Q-model a custom one for CartPole, otherwise, # use the default model. config.q_model_config["custom_model"] = ( "batch_norm{}".format("_torch" if fw == "torch" else "") if env == "CartPole-v0" else None) trainer = config.build(env=env) for i in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) # Test, whether the replay buffer is saved along with # a checkpoint (no point in doing it for all frameworks since # this is framework agnostic). if fw == "tf" and env == "CartPole-v0": checkpoint = trainer.save() new_trainer = sac.SAC(config, env=env) new_trainer.restore(checkpoint) # Get some data from the buffer and compare. data = trainer.local_replay_buffer.replay_buffers[ "default_policy"]._storage[:42 + 42] new_data = new_trainer.local_replay_buffer.replay_buffers[ "default_policy"]._storage[:42 + 42] check(data, new_data) new_trainer.stop() trainer.stop()
from rllib_models.rllib_tesp import RLlibTESP from envs.point_env import PointEnv from envs.mujoco.ant import AntEnv from envs.reset_wrapper import ResetWrapper logger = logging.getLogger("ray.rllib.agents") logger.setLevel(logging.DEBUG) # ray.init() ray.init(redis_address="192.168.12.39:32222") env_cls = AntEnv model_cls = RLlibTESP register_env(env_cls.__name__, lambda env_config: ResetWrapper(env_cls(), env_config)) # register_env("PointEnv", lambda env_config: PointEnv(env_config)) ModelCatalog.register_custom_model(model_cls.__name__, model_cls) config = { # "num_workers": 20, "model": { "custom_model": model_cls.__name__, "custom_options": { "rnn_units": 256, "rnn_output_units": 16, "mlp_hidden_units": [512, 512], "vf_share_layers": False, "linear_baseline": True } # "squash_to_range": True, # "free_log_std": True }
def __init__(self, parameters): self.params = parameters # Checking for GPU self.use_gpu = self.params.use_gpu and torch.cuda.is_available() self.device = torch.device("cuda:0" if self.use_gpu else "cpu") # Register model ModelCatalog.register_custom_model("agent_network", AgentNetwork) # Training configuration self.config = { "model": { "custom_model": "agent_network", "custom_options": { "shape": (40, 60, 3), "num_stack": 4 }, }, "env": custom_env_name, "env_config": { "shape": (40, 60, 3), "num_stack": 4 }, "callbacks": partial( TorchModelStoreCallbacks, model_path=self.params.model_dir / "trained_model.pt", ), "double_q": True, "dueling": True, "noisy": True, "n_step": 10, "lr": self.params.learning_rate, "train_batch_size": self.params.batch_size, "buffer_size": self.params.replay_buffer_size, "prioritized_replay": True, "num_workers": self.params.num_workers, "num_gpus": self.use_gpu, "use_pytorch": True, "log_level": logging.INFO, } # Stopping conditions self.stop_conditions = { "training_iteration": self.params.max_num_iterations, "timesteps_total": self.params.max_num_timeteps, "episode_reward_min": self.params.target_episode_reward, }
import ray from ray.tune.registry import register_trainable, register_env from ray.tune import run_experiments, grid_search from ray.rllib.models.catalog import ModelCatalog from maml import MAMLAgent from point_env import PointEnv from reset_wrapper import ResetWrapper from fcnet import FullyConnectedNetwork register_trainable("MAML", MAMLAgent) env_cls = PointEnv register_env(env_cls.__name__, lambda env_config: ResetWrapper(env_cls(), env_config)) ModelCatalog.register_custom_model("maml_mlp", FullyConnectedNetwork) # ray.init() ray.init(redis_address="localhost:32222") config = { "random_seed": grid_search([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), "inner_lr": grid_search([0.01]), "inner_grad_clip": grid_search([10.0, 20.0, 30.0, 40.0]), "clip_param": grid_search([0.1, 0.2, 0.3]), "vf_loss_coeff": grid_search([0.01, 0.02, 0.05, 0.1, 0.2]), "vf_clip_param": grid_search([5.0, 10.0, 15.0, 20.0]), "model": { "custom_model": "maml_mlp", "fcnet_hiddens": [100, 100], "fcnet_activation": "tanh",
import argparse from ray import tune from ray.rllib.contrib.alpha_zero.models.custom_torch_models import DenseModel from ray.rllib.contrib.alpha_zero.environments.cartpole import CartPole from ray.rllib.models.catalog import ModelCatalog if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--num-workers", default=6, type=int) parser.add_argument("--training-iteration", default=10000, type=int) args = parser.parse_args() ModelCatalog.register_custom_model("dense_model", DenseModel) tune.run( "contrib/AlphaZero", stop={"training_iteration": args.training_iteration}, max_failures=0, config={ "env": CartPole, "num_workers": args.num_workers, "sample_batch_size": 50, "train_batch_size": 500, "sgd_minibatch_size": 64, "lr": 1e-4, "num_sgd_iter": 1, "mcts_config": { "puct_coefficient": 1.5,
filters_4x4 = [[32, [2, 2], 2], [256, [2, 2], 2]] filters_6x6 = [[32, [2, 2], 2], [256, [2, 2], 2]] filters_10x10 = [[32, [2, 2], 2], [256, [2, 2], 2]] if len(shape) == 3 and shape[:2] == [84, 84]: return filters_84x84 elif len(shape) == 3 and shape[:2] == [42, 42]: return filters_42x42 elif len(shape) == 3 and shape[:2] == [3, 4]: return filters_3x4 elif len(shape) == 3 and shape[:2] == [4, 4]: return filters_4x4 elif len(shape) == 3 and shape[:2] == [6, 6]: return filters_6x6 elif len(shape) == 3 and shape[:2] == [10, 10]: return filters_10x10 elif len(shape) == 1: # Don't use a cnn in this case return [] else: raise ValueError( "No default configuration for obs shape {}".format(shape) + ", you must specify `conv_filters` manually as a model option" ", or add it as a default to the _get_filter_config function.") ModelCatalog.register_custom_model(SAC_STRATEGO_MODEL, SpatialStrategoModel)
from ray.rllib.evaluation.sample_batch import DEFAULT_POLICY_ID from ray.rllib.evaluation.metrics import summarize_episodes from ray.tune.logger import pretty_print from models.rllib_mlp import RLlibMLP from envs.point_env import PointEnv from envs.reset_wrapper import ResetWrapper logger = logging.getLogger("ray.rllib.agents.maml") logger.setLevel(logging.DEBUG) ray.init() env_cls = PointEnv register_env(env_cls.__name__, lambda env_config: ResetWrapper(env_cls(), env_config)) # register_env("PointEnv", lambda env_config: PointEnv(env_config)) ModelCatalog.register_custom_model("maml_mlp", RLlibMLP) config = { "num_workers": 1, "model": { "custom_model": "maml_mlp", "fcnet_hiddens": [100, 100], "fcnet_activation": "tanh", "custom_options": { "vf_share_layers": True }, # "squash_to_range": True, # "free_log_std": True } }
def register_carla_model(): ModelCatalog.register_custom_model("carla", CarlaModel)
def register(): ModelCatalog.register_custom_model("delayed_action", DelayedActionModel) ModelCatalog.register_custom_model("human_action", HumanActionModel)