Beispiel #1
0
def register_carla_model():
    ModelCatalog.register_custom_model("carla", CarlaModel)
 def setUp(self):
     ray.init()
     ModelCatalog.register_custom_model("keras_model", MyKerasModel)
     ModelCatalog.register_custom_model("torch_model", MyTorchModel)
Beispiel #3
0
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
parser.add_argument("--framework",
                    choices=["tf2", "tf", "tfe", "torch"],
                    default="tf")
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--stop-iters", type=int, default=50)
parser.add_argument("--stop-timesteps", type=int, default=200000)
parser.add_argument("--stop-reward", type=float, default=150.0)

if __name__ == "__main__":
    args = parser.parse_args()
    ray.init(num_cpus=3)

    ModelCatalog.register_custom_model(
        "frame_stack_model", FrameStackingCartPoleModel
        if args.framework != "torch" else TorchFrameStackingCartPoleModel)
    tune.register_env("stateless_cartpole", lambda c: StatelessCartPole())

    config = {
        "env": "stateless_cartpole",
        "model": {
            "vf_share_layers": True,
            "custom_model": "frame_stack_model",
            "custom_model_config": {
                "num_frames": 16,
            },

            # To compare against a simple LSTM:
            # "use_lstm": True,
            # "lstm_use_prev_action": True,
Beispiel #4
0
        # Store last batch size for value_function output.
        self._last_batch_size = obs.shape[0]
        # Return 2x the obs (and empty states).
        # This will further be sent through an automatically provided
        # LSTM head (b/c we are setting use_lstm=True below).
        return obs * 2.0, []

    def value_function(self):
        return torch.from_numpy(np.zeros(shape=(self._last_batch_size,)))


if __name__ == "__main__":
    ray.init()

    # Register the above custom model.
    ModelCatalog.register_custom_model("my_torch_model", MyCustomModel)

    # Create the Trainer.
    trainer = ppo.PPOTrainer(
        env="CartPole-v0",
        config={
            "framework": "torch",
            "model": {
                # Auto-wrap the custom(!) model with an LSTM.
                "use_lstm": True,
                # To further customize the LSTM auto-wrapper.
                "lstm_cell_size": 64,
                # Specify our custom model from above.
                "custom_model": "my_torch_model",
                # Extra kwargs to be passed to your model's c'tor.
                "custom_model_config": {},
Beispiel #5
0
    filters_4x4 = [[32, [2, 2], 2], [256, [2, 2], 2]]

    filters_6x6 = [[32, [2, 2], 2], [256, [2, 2], 2]]

    filters_10x10 = [[32, [2, 2], 2], [256, [2, 2], 2]]

    if len(shape) == 3 and shape[:2] == [84, 84]:
        return filters_84x84
    elif len(shape) == 3 and shape[:2] == [42, 42]:
        return filters_42x42
    elif len(shape) == 3 and shape[:2] == [3, 4]:
        return filters_3x4
    elif len(shape) == 3 and shape[:2] == [4, 4]:
        return filters_4x4
    elif len(shape) == 3 and shape[:2] == [6, 6]:
        return filters_6x6
    elif len(shape) == 3 and shape[:2] == [10, 10]:
        return filters_10x10
    elif len(shape) == 1:
        # Don't use a cnn in this case
        return []
    else:
        raise ValueError(
            "No default configuration for obs shape {}".format(shape) +
            ", you must specify `conv_filters` manually as a model option"
            ", or add it as a default to the _get_filter_config function.")


ModelCatalog.register_custom_model(SPATIAL_STRATEGO_Q_MODEL,
                                   SpatialStrategoQModel)
Beispiel #6
0

# Create the Model Class with a deep network using keras:
class ResNet(Model):
    """Residual Network model, as used in IMPALA paper"""
    def _build_layers_v2(self, input_dict, num_outputs, options):
        """Builds and returns the output and last layer of the network."""

        kernel_size = 3  # Size of the kernel for the convolution layers
        pool_size = 3  # Size of the pooling region for the pooling layers
        image_shape = input_dict["obs"].get_shape().as_list()[1:]

        embed_input = Input(shape=image_shape, tensor=input_dict["obs"])
        layer1 = convolutional_block(16, embed_input, kernel_size, pool_size)
        layer2 = convolutional_block(32, layer1, kernel_size, pool_size)
        layer3 = convolutional_block(32, layer2, kernel_size, pool_size)

        layer4 = Flatten()(layer3)
        layer5 = Activation('relu')(layer4)
        layer5 = Dense(256, activation='relu')(layer5)
        output = Dense(num_outputs, activation=None)(layer5)

        return output, layer5


# Register models
MODELS = {"ResNet": ResNet}

for key in MODELS:
    ModelCatalog.register_custom_model(key, MODELS[key])
        #                 self.num_outputs, shape))
        if not isinstance(state, list):
            raise ValueError("State output is not a list: {}".format(state))

        self._last_output = outputs
        return outputs, state

    @override(ModelV2)
    def get_initial_state(self):
        if self.use_lstm:
            return [
                np.zeros(self._lstm_state_shape, np.float32),
                np.zeros(self._lstm_state_shape, np.float32)
            ]
        else:
            return []

    def save_config_to_json(self, save_file_path):
        with open(save_file_path, 'w') as fp:
            json.dump(self.model_config, fp)

        # Verify that dictionary is recoverable from json
        with open(save_file_path, 'r') as fp:
            saved = json.load(fp)
        for key, orig_val in self.model_config.items():
            assert np.all(saved[key] == orig_val)


ModelCatalog.register_custom_model(SAC_SPATIAL_RNN_STRATEGO_MODEL,
                                   SACSpatialRNNStrategoModel)
Beispiel #8
0
    def test_sac_compilation(self):
        """Tests whether an SACTrainer can be built with all frameworks."""
        config = sac.DEFAULT_CONFIG.copy()
        config["Q_model"] = sac.DEFAULT_CONFIG["Q_model"].copy()
        config["num_workers"] = 0  # Run locally.
        config["n_step"] = 3
        config["twin_q"] = True
        config["replay_buffer_config"]["learning_starts"] = 0
        config["rollout_fragment_length"] = 10
        config["train_batch_size"] = 10
        # If we use default buffer size (1e6), the buffer will take up
        # 169.445 GB memory, which is beyond travis-ci's current (Mar 19, 2021)
        # available system memory (8.34816 GB).
        config["replay_buffer_config"]["capacity"] = 40000
        # Test with saved replay buffer.
        config["store_buffer_in_checkpoints"] = True
        num_iterations = 1

        ModelCatalog.register_custom_model("batch_norm", KerasBatchNormModel)
        ModelCatalog.register_custom_model("batch_norm_torch",
                                           TorchBatchNormModel)

        image_space = Box(-1.0, 1.0, shape=(84, 84, 3))
        simple_space = Box(-1.0, 1.0, shape=(3, ))

        tune.register_env(
            "random_dict_env",
            lambda _: RandomEnv({
                "observation_space":
                Dict({
                    "a": simple_space,
                    "b": Discrete(2),
                    "c": image_space,
                }),
                "action_space":
                Box(-1.0, 1.0, shape=(1, )),
            }),
        )
        tune.register_env(
            "random_tuple_env",
            lambda _: RandomEnv({
                "observation_space":
                Tuple([simple_space, Discrete(2), image_space]),
                "action_space":
                Box(-1.0, 1.0, shape=(1, )),
            }),
        )

        for fw in framework_iterator(config, with_eager_tracing=True):
            # Test for different env types (discrete w/ and w/o image, + cont).
            for env in [
                    "random_dict_env",
                    "random_tuple_env",
                    # "MsPacmanNoFrameskip-v4",
                    "CartPole-v0",
            ]:
                print("Env={}".format(env))
                # Test making the Q-model a custom one for CartPole, otherwise,
                # use the default model.
                config["Q_model"]["custom_model"] = (
                    "batch_norm{}".format("_torch" if fw == "torch" else "")
                    if env == "CartPole-v0" else None)
                trainer = sac.SACTrainer(config=config, env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                check_compute_single_action(trainer)

                # Test, whether the replay buffer is saved along with
                # a checkpoint (no point in doing it for all frameworks since
                # this is framework agnostic).
                if fw == "tf" and env == "CartPole-v0":
                    checkpoint = trainer.save()
                    new_trainer = sac.SACTrainer(config, env=env)
                    new_trainer.restore(checkpoint)
                    # Get some data from the buffer and compare.
                    data = trainer.local_replay_buffer.replay_buffers[
                        "default_policy"]._storage[:42 + 42]
                    new_data = new_trainer.local_replay_buffer.replay_buffers[
                        "default_policy"]._storage[:42 + 42]
                    check(data, new_data)
                    new_trainer.stop()

                trainer.stop()
Beispiel #9
0
                    activation_fn=activation,
                    scope="fc{}".format(i),
                )
                i += 1
            output = slim.fully_connected(
                last_layer,
                num_outputs,
                weights_initializer=normc_initializer(0.01),
                activation_fn=None,
                scope="fc_out",
            )

        return output, last_layer


ModelCatalog.register_custom_model("PommermanModel1", PommermanModel)


class BaseLineAgent(BaseAgent):
    def act(self, obs, action_space):
        pass


class NoDoAgent(BaseAgent):
    def act(self, obs, action_space):
        return 0


class SuicidalAgent(BaseAgent):
    def act(self, obs, action_space):
        return 5
    def value_function(self):
        return self.action_param_model.value_function()



if __name__ == "__main__":
    ray.init()

    register_env(
        "ExternalHearts",
        #lambda _: HeartsEnv()
        lambda _: ExternalHearts(HeartsEnv(), episodes=1000)
    )

    ModelCatalog.register_custom_model("ParametricActionsModel", ParametricActionsModel)

    ppo_config = {"timesteps_per_iteration": 1000,
                  "model": {"custom_model": "ParametricActionsModel",
                            "use_lstm": True,
                            "max_seq_len": HAND_SIZE,
                            "lstm_use_prev_action_reward": True},
                  "num_workers": 0}

    other_config = {"timesteps_per_iteration": 1000,
                  "model": {"custom_model": "ParametricActionsModel",
                            "use_lstm": True,
                            "max_seq_len": HAND_SIZE,
                            "lstm_use_prev_action_reward": True}
                    }
Beispiel #11
0
    def test_sac_compilation(self):
        """Tests whether SAC can be built with all frameworks."""
        config = (sac.SACConfig().training(
            n_step=3,
            twin_q=True,
            replay_buffer_config={
                "learning_starts": 0,
                "capacity": 40000
            },
            store_buffer_in_checkpoints=True,
            train_batch_size=10,
        ).rollouts(num_rollout_workers=0, rollout_fragment_length=10))
        num_iterations = 1

        ModelCatalog.register_custom_model("batch_norm", KerasBatchNormModel)
        ModelCatalog.register_custom_model("batch_norm_torch",
                                           TorchBatchNormModel)

        image_space = Box(-1.0, 1.0, shape=(84, 84, 3))
        simple_space = Box(-1.0, 1.0, shape=(3, ))

        tune.register_env(
            "random_dict_env",
            lambda _: RandomEnv({
                "observation_space":
                Dict({
                    "a": simple_space,
                    "b": Discrete(2),
                    "c": image_space,
                }),
                "action_space":
                Box(-1.0, 1.0, shape=(1, )),
            }),
        )
        tune.register_env(
            "random_tuple_env",
            lambda _: RandomEnv({
                "observation_space":
                Tuple([simple_space, Discrete(2), image_space]),
                "action_space":
                Box(-1.0, 1.0, shape=(1, )),
            }),
        )

        for fw in framework_iterator(config, with_eager_tracing=True):
            # Test for different env types (discrete w/ and w/o image, + cont).
            for env in [
                    "random_dict_env",
                    "random_tuple_env",
                    # "MsPacmanNoFrameskip-v4",
                    "CartPole-v0",
            ]:
                print("Env={}".format(env))
                # Test making the Q-model a custom one for CartPole, otherwise,
                # use the default model.
                config.q_model_config["custom_model"] = (
                    "batch_norm{}".format("_torch" if fw == "torch" else "")
                    if env == "CartPole-v0" else None)
                trainer = config.build(env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                check_compute_single_action(trainer)

                # Test, whether the replay buffer is saved along with
                # a checkpoint (no point in doing it for all frameworks since
                # this is framework agnostic).
                if fw == "tf" and env == "CartPole-v0":
                    checkpoint = trainer.save()
                    new_trainer = sac.SAC(config, env=env)
                    new_trainer.restore(checkpoint)
                    # Get some data from the buffer and compare.
                    data = trainer.local_replay_buffer.replay_buffers[
                        "default_policy"]._storage[:42 + 42]
                    new_data = new_trainer.local_replay_buffer.replay_buffers[
                        "default_policy"]._storage[:42 + 42]
                    check(data, new_data)
                    new_trainer.stop()

                trainer.stop()
Beispiel #12
0
    from rllib_models.rllib_tesp import RLlibTESP
    from envs.point_env import PointEnv
    from envs.mujoco.ant import AntEnv
    from envs.reset_wrapper import ResetWrapper

    logger = logging.getLogger("ray.rllib.agents")
    logger.setLevel(logging.DEBUG)

    # ray.init()
    ray.init(redis_address="192.168.12.39:32222")
    env_cls = AntEnv
    model_cls = RLlibTESP
    register_env(env_cls.__name__,
                 lambda env_config: ResetWrapper(env_cls(), env_config))
    # register_env("PointEnv", lambda env_config: PointEnv(env_config))
    ModelCatalog.register_custom_model(model_cls.__name__, model_cls)

    config = {
        # "num_workers": 20,
        "model": {
            "custom_model": model_cls.__name__,
            "custom_options": {
                "rnn_units": 256,
                "rnn_output_units": 16,
                "mlp_hidden_units": [512, 512],
                "vf_share_layers": False,
                "linear_baseline": True
            }
            # "squash_to_range": True,
            # "free_log_std": True
        }
Beispiel #13
0
    def __init__(self, parameters):
        self.params = parameters

        # Checking for GPU
        self.use_gpu = self.params.use_gpu and torch.cuda.is_available()
        self.device = torch.device("cuda:0" if self.use_gpu else "cpu")

        # Register model
        ModelCatalog.register_custom_model("agent_network", AgentNetwork)

        # Training configuration
        self.config = {
            "model": {
                "custom_model": "agent_network",
                "custom_options": {
                    "shape": (40, 60, 3),
                    "num_stack": 4
                },
            },
            "env":
            custom_env_name,
            "env_config": {
                "shape": (40, 60, 3),
                "num_stack": 4
            },
            "callbacks":
            partial(
                TorchModelStoreCallbacks,
                model_path=self.params.model_dir / "trained_model.pt",
            ),
            "double_q":
            True,
            "dueling":
            True,
            "noisy":
            True,
            "n_step":
            10,
            "lr":
            self.params.learning_rate,
            "train_batch_size":
            self.params.batch_size,
            "buffer_size":
            self.params.replay_buffer_size,
            "prioritized_replay":
            True,
            "num_workers":
            self.params.num_workers,
            "num_gpus":
            self.use_gpu,
            "use_pytorch":
            True,
            "log_level":
            logging.INFO,
        }

        # Stopping conditions
        self.stop_conditions = {
            "training_iteration": self.params.max_num_iterations,
            "timesteps_total": self.params.max_num_timeteps,
            "episode_reward_min": self.params.target_episode_reward,
        }
Beispiel #14
0
import ray
from ray.tune.registry import register_trainable, register_env
from ray.tune import run_experiments, grid_search
from ray.rllib.models.catalog import ModelCatalog

from maml import MAMLAgent
from point_env import PointEnv
from reset_wrapper import ResetWrapper
from fcnet import FullyConnectedNetwork

register_trainable("MAML", MAMLAgent)
env_cls = PointEnv
register_env(env_cls.__name__,
             lambda env_config: ResetWrapper(env_cls(), env_config))
ModelCatalog.register_custom_model("maml_mlp", FullyConnectedNetwork)

# ray.init()
ray.init(redis_address="localhost:32222")

config = {
    "random_seed": grid_search([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
    "inner_lr": grid_search([0.01]),
    "inner_grad_clip": grid_search([10.0, 20.0, 30.0, 40.0]),
    "clip_param": grid_search([0.1, 0.2, 0.3]),
    "vf_loss_coeff": grid_search([0.01, 0.02, 0.05, 0.1, 0.2]),
    "vf_clip_param": grid_search([5.0, 10.0, 15.0, 20.0]),
    "model": {
        "custom_model": "maml_mlp",
        "fcnet_hiddens": [100, 100],
        "fcnet_activation": "tanh",
Beispiel #15
0
import argparse

from ray import tune

from ray.rllib.contrib.alpha_zero.models.custom_torch_models import DenseModel
from ray.rllib.contrib.alpha_zero.environments.cartpole import CartPole
from ray.rllib.models.catalog import ModelCatalog

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-workers", default=6, type=int)
    parser.add_argument("--training-iteration", default=10000, type=int)
    args = parser.parse_args()

    ModelCatalog.register_custom_model("dense_model", DenseModel)

    tune.run(
        "contrib/AlphaZero",
        stop={"training_iteration": args.training_iteration},
        max_failures=0,
        config={
            "env": CartPole,
            "num_workers": args.num_workers,
            "sample_batch_size": 50,
            "train_batch_size": 500,
            "sgd_minibatch_size": 64,
            "lr": 1e-4,
            "num_sgd_iter": 1,
            "mcts_config": {
                "puct_coefficient": 1.5,
Beispiel #16
0
    filters_4x4 = [[32, [2, 2], 2], [256, [2, 2], 2]]

    filters_6x6 = [[32, [2, 2], 2], [256, [2, 2], 2]]

    filters_10x10 = [[32, [2, 2], 2], [256, [2, 2], 2]]

    if len(shape) == 3 and shape[:2] == [84, 84]:
        return filters_84x84
    elif len(shape) == 3 and shape[:2] == [42, 42]:
        return filters_42x42
    elif len(shape) == 3 and shape[:2] == [3, 4]:
        return filters_3x4
    elif len(shape) == 3 and shape[:2] == [4, 4]:
        return filters_4x4
    elif len(shape) == 3 and shape[:2] == [6, 6]:
        return filters_6x6
    elif len(shape) == 3 and shape[:2] == [10, 10]:
        return filters_10x10
    elif len(shape) == 1:
        # Don't use a cnn in this case
        return []
    else:
        raise ValueError(
            "No default configuration for obs shape {}".format(shape) +
            ", you must specify `conv_filters` manually as a model option"
            ", or add it as a default to the _get_filter_config function.")


ModelCatalog.register_custom_model(SAC_STRATEGO_MODEL, SpatialStrategoModel)
Beispiel #17
0
    from ray.rllib.evaluation.sample_batch import DEFAULT_POLICY_ID
    from ray.rllib.evaluation.metrics import summarize_episodes
    from ray.tune.logger import pretty_print
    from models.rllib_mlp import RLlibMLP
    from envs.point_env import PointEnv
    from envs.reset_wrapper import ResetWrapper

    logger = logging.getLogger("ray.rllib.agents.maml")
    logger.setLevel(logging.DEBUG)

    ray.init()
    env_cls = PointEnv
    register_env(env_cls.__name__,
                 lambda env_config: ResetWrapper(env_cls(), env_config))
    # register_env("PointEnv", lambda env_config: PointEnv(env_config))
    ModelCatalog.register_custom_model("maml_mlp", RLlibMLP)

    config = {
        "num_workers": 1,
        "model": {
            "custom_model": "maml_mlp",
            "fcnet_hiddens": [100, 100],
            "fcnet_activation": "tanh",
            "custom_options": {
                "vf_share_layers": True
            },
            # "squash_to_range": True,
            # "free_log_std": True
        }
    }
Beispiel #18
0
def register_carla_model():
    ModelCatalog.register_custom_model("carla", CarlaModel)
Beispiel #19
0
def register():
  ModelCatalog.register_custom_model("delayed_action", DelayedActionModel)
  ModelCatalog.register_custom_model("human_action", HumanActionModel)