def test_conv2d_default_stacks(self):
        """Tests, whether conv2d defaults are available for img obs spaces.
        """
        action_space = gym.spaces.Discrete(2)

        shapes = [
            (480, 640, 3),
            (240, 320, 3),
            (96, 96, 3),
            (84, 84, 3),
            (42, 42, 3),
            (10, 10, 3),
        ]
        for shape in shapes:
            print(f"shape={shape}")
            obs_space = gym.spaces.Box(-1.0, 1.0, shape=shape)
            for fw in framework_iterator():
                model = ModelCatalog.get_model_v2(obs_space,
                                                  action_space,
                                                  2,
                                                  MODEL_DEFAULTS.copy(),
                                                  framework=fw)
                self.assertTrue(isinstance(model,
                                           (VisionNetwork, TorchVision)))
                if fw == "torch":
                    output, _ = model(
                        {"obs": torch.from_numpy(obs_space.sample()[None])})
                else:
                    output, _ = model({"obs": obs_space.sample()[None]})
                # B x [action logits]
                self.assertTrue(output.shape == (1, 2))
                print("ok")
Esempio n. 2
0
 def __init__(self, obs_space, options=None):
     legacy_patch_shapes(obs_space)
     self._obs_space = obs_space
     if not options:
         from ray.rllib.models.catalog import MODEL_DEFAULTS
         self._options = MODEL_DEFAULTS.copy()
     else:
         self._options = options
     self.shape = self._init_shape(obs_space, self._options)
     self._size = int(np.product(self.shape))
     self._i = 0
Esempio n. 3
0
    def __init__(self, obs_space: gym.Space, options: dict = None):
        _legacy_patch_shapes(obs_space)
        self._obs_space = obs_space
        if not options:
            from ray.rllib.models.catalog import MODEL_DEFAULTS

            self._options = MODEL_DEFAULTS.copy()
        else:
            self._options = options
        self.shape = self._init_shape(obs_space, self._options)
        self._size = int(np.product(self.shape))
        self._i = 0
        self._obs_for_type_matching = self._obs_space.sample()
        "is_training": False
    }
    config["model"] = model_config

    # trainer = a3c.A3CTrainer(env="pom", config=config)
    trainer = ppo.appo.APPOTrainer(env="pom", config = config)
    trainer.restore(model_path)
    for i in range(500):
        env.render()
        actions = trainer.compute_action(obs)
        print(actions)
        obs, reward, done, _ = env.step(actions)
        if done:
            break
        time.sleep(0.5)

    env.render()
    time.sleep(10)


if __name__ == "__main__":
    # ray.init(memory=11*1024*1024*1024, object_store_memory=5*1024*1024*1024)
    ray.init()
    ModelCatalog.register_custom_model("my_model", TFCNN)
    register_env("pom", env_creator)
    model_config = MODEL_DEFAULTS.copy()
    model_config["custom_model"] = "my_model"
    model_config["custom_options"] = ""
    entrypoint = next(iter(sys.argv[1:]), "game_eval")
    locals()[entrypoint]()