Python PGAgent.stop Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.agents.pg

Class/Type: PGAgent

Method/Function: stop

Examples at hotexamples.com: 4

Python PGAgent.stop - 4 examples found. These are the top rated real world Python examples of ray.rllib.agents.pg.PGAgent.stop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PGAgent(23)

train(17)

stop(2)

compute_action(1)

restore(1)

save(1)

Example #1

Show file

File: test_io.py Project: robertnishihara/ray

    def testMultiAgent(self):
        register_env("multi_cartpole", lambda _: MultiCartpole(10))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (PGPolicyGraph, obs_space, act_space, {})

        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "output": self.test_dir,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn": (
                        lambda agent_id: random.choice(
                            ["policy_1", "policy_2"])),
                },
            })
        pg.train()
        self.assertEqual(len(os.listdir(self.test_dir)), 1)

        pg.stop()
        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "input": self.test_dir,
                "input_evaluation": ["simulation"],
                "train_batch_size": 2000,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn": (
                        lambda agent_id: random.choice(
                            ["policy_1", "policy_2"])),
                },
            })
        for _ in range(50):
            result = pg.train()
            if not np.isnan(result["episode_reward_mean"]):
                return  # simulation ok
            time.sleep(0.1)
        assert False, "did not see any simulation results"

Example #2

Show file

File: test_nested_spaces.py Project: zdpau/ray-1

    def testRolloutDictSpace(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGAgent(env="nested")
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGAgent(env="nested")
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)

Example #3

Show file

File: test_nested_spaces.py Project: robertnishihara/ray

    def testRolloutDictSpace(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGAgent(env="nested")
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGAgent(env="nested")
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)

Example #4

Show file

File: test_io.py Project: zofuthan/ray

    def testMultiAgent(self):
        register_env("multi_cartpole", lambda _: MultiCartpole(10))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (PGPolicyGraph, obs_space, act_space, {})

        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "output": self.test_dir,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn":
                    (lambda agent_id: random.choice(["policy_1", "policy_2"])),
                },
            })
        pg.train()
        self.assertEqual(len(os.listdir(self.test_dir)), 1)

        pg.stop()
        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "input": self.test_dir,
                "input_evaluation": "simulation",
                "train_batch_size": 2000,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn":
                    (lambda agent_id: random.choice(["policy_1", "policy_2"])),
                },
            })
        for _ in range(50):
            result = pg.train()
            if not np.isnan(result["episode_reward_mean"]):
                return  # simulation ok
            time.sleep(0.1)
        assert False, "did not see any simulation results"