コード例 #1
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
    def testAgentInputPostprocessingEnabled(self):
        self.writeOutputs(self.test_dir)

        # Rewrite the files to drop advantages and value_targets for testing
        for path in glob.glob(self.test_dir + "/*.json"):
            out = []
            for line in open(path).readlines():
                data = json.loads(line)
                del data["advantages"]
                del data["value_targets"]
                out.append(data)
            with open(path, "w") as f:
                for data in out:
                    f.write(json.dumps(data))

        agent = PGAgent(
            env="CartPole-v0",
            config={
                "input": self.test_dir,
                "input_evaluation": [],
                "postprocess_inputs": True,  # adds back 'advantages'
            })

        result = agent.train()
        self.assertEqual(result["timesteps_total"], 250)  # read from input
        self.assertTrue(np.isnan(result["episode_reward_mean"]))
コード例 #2
0
ファイル: test_io.py プロジェクト: zofuthan/ray
    def testAgentInputPostprocessingEnabled(self):
        self.writeOutputs(self.test_dir)

        # Rewrite the files to drop advantages and value_targets for testing
        for path in glob.glob(self.test_dir + "/*.json"):
            out = []
            for line in open(path).readlines():
                data = json.loads(line)
                del data["advantages"]
                del data["value_targets"]
                out.append(data)
            with open(path, "w") as f:
                for data in out:
                    f.write(json.dumps(data))

        agent = PGAgent(
            env="CartPole-v0",
            config={
                "input": self.test_dir,
                "input_evaluation": None,
                "postprocess_inputs": True,  # adds back 'advantages'
            })

        result = agent.train()
        self.assertEqual(result["timesteps_total"], 250)  # read from input
        self.assertTrue(np.isnan(result["episode_reward_mean"]))
コード例 #3
0
ファイル: test_nested_spaces.py プロジェクト: zdpau/ray-1
    def doTestNestedTuple(self, make_env):
        ModelCatalog.register_custom_model("composite2", TupleSpyModel)
        register_env("nested2", make_env)
        pg = PGAgent(env="nested2",
                     config={
                         "num_workers": 0,
                         "sample_batch_size": 5,
                         "train_batch_size": 5,
                         "model": {
                             "custom_model": "composite2",
                         },
                     })
        pg.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "t_spy_in_{}".format(i)))
            pos_i = TUPLE_SAMPLES[i][0].tolist()
            cam_i = TUPLE_SAMPLES[i][1][0].tolist()
            task_i = one_hot(TUPLE_SAMPLES[i][2], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)
コード例 #4
0
    def doTestNestedTuple(self, make_env):
        ModelCatalog.register_custom_model("composite2", TupleSpyModel)
        register_env("nested2", make_env)
        pg = PGAgent(
            env="nested2",
            config={
                "num_workers": 0,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "model": {
                    "custom_model": "composite2",
                },
            })
        pg.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "t_spy_in_{}".format(i)))
            pos_i = TUPLE_SAMPLES[i][0].tolist()
            cam_i = TUPLE_SAMPLES[i][1][0].tolist()
            task_i = one_hot(TUPLE_SAMPLES[i][2], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)
コード例 #5
0
    def doTestNestedDict(self, make_env, test_lstm=False):
        ModelCatalog.register_custom_model("composite", DictSpyModel)
        register_env("nested", make_env)
        pg = PGAgent(
            env="nested",
            config={
                "num_workers": 0,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "model": {
                    "custom_model": "composite",
                    "use_lstm": test_lstm,
                },
            })
        pg.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "d_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)
コード例 #6
0
    def doTestNestedDict(self, make_env, test_lstm=False):
        ModelCatalog.register_custom_model("composite", DictSpyModel)
        register_env("nested", make_env)
        pg = PGAgent(env="nested",
                     config={
                         "num_workers": 0,
                         "sample_batch_size": 5,
                         "model": {
                             "custom_model": "composite",
                             "use_lstm": test_lstm,
                         },
                     })
        pg.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "d_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)
コード例 #7
0
 def writeOutputs(self, output):
     agent = PGAgent(env="CartPole-v0",
                     config={
                         "output": output,
                         "sample_batch_size": 250,
                     })
     agent.train()
     return agent
コード例 #8
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
 def writeOutputs(self, output):
     agent = PGAgent(
         env="CartPole-v0",
         config={
             "output": output,
             "sample_batch_size": 250,
         })
     agent.train()
     return agent
コード例 #9
0
 def testAgentInputDir(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(env="CartPole-v0",
                     config={
                         "input": self.test_dir,
                         "input_evaluation": None,
                     })
     result = agent.train()
     self.assertEqual(result["timesteps_total"], 250)  # read from input
     self.assertTrue(np.isnan(result["episode_reward_mean"]))
コード例 #10
0
ファイル: test_external_env.py プロジェクト: anke522/ray-1
 def testTrainCartpole(self):
     register_env("test", lambda _: SimpleServing(gym.make("CartPole-v0")))
     pg = PGAgent(env="test", config={"num_workers": 0})
     for i in range(100):
         result = pg.train()
         print("Iteration {}, reward {}, timesteps {}".format(
             i, result["episode_reward_mean"], result["timesteps_total"]))
         if result["episode_reward_mean"] >= 100:
             return
     raise Exception("failed to improve reward")
コード例 #11
0
 def testTrainCartpole(self):
     register_env("test", lambda _: SimpleServing(gym.make("CartPole-v0")))
     pg = PGAgent(env="test", config={"num_workers": 0})
     for i in range(100):
         result = pg.train()
         print("Iteration {}, reward {}, timesteps {}".format(
             i, result["episode_reward_mean"], result["timesteps_total"]))
         if result["episode_reward_mean"] >= 100:
             return
     raise Exception("failed to improve reward")
コード例 #12
0
ファイル: test_nested_spaces.py プロジェクト: zdpau/ray-1
    def testMultiAgentComplexSpaces(self):
        ModelCatalog.register_custom_model("dict_spy", DictSpyModel)
        ModelCatalog.register_custom_model("tuple_spy", TupleSpyModel)
        register_env("nested_ma", lambda _: NestedMultiAgentEnv())
        act_space = spaces.Discrete(2)
        pg = PGAgent(env="nested_ma",
                     config={
                         "num_workers": 0,
                         "sample_batch_size": 5,
                         "train_batch_size": 5,
                         "multiagent": {
                             "policy_graphs": {
                                 "tuple_policy":
                                 (PGPolicyGraph, TUPLE_SPACE, act_space, {
                                     "model": {
                                         "custom_model": "tuple_spy"
                                     }
                                 }),
                                 "dict_policy":
                                 (PGPolicyGraph, DICT_SPACE, act_space, {
                                     "model": {
                                         "custom_model": "dict_spy"
                                     }
                                 }),
                             },
                             "policy_mapping_fn": lambda a: {
                                 "tuple_agent": "tuple_policy",
                                 "dict_agent": "dict_policy"
                             }[a],
                         },
                     })
        pg.train()

        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "d_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)

        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "t_spy_in_{}".format(i)))
            pos_i = TUPLE_SAMPLES[i][0].tolist()
            cam_i = TUPLE_SAMPLES[i][1][0].tolist()
            task_i = one_hot(TUPLE_SAMPLES[i][2], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)
コード例 #13
0
 def testAgentInputList(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(env="CartPole-v0",
                     config={
                         "input": glob.glob(self.test_dir + "/*.json"),
                         "input_evaluation": None,
                         "sample_batch_size": 99,
                     })
     result = agent.train()
     self.assertEqual(result["timesteps_total"], 250)  # read from input
     self.assertTrue(np.isnan(result["episode_reward_mean"]))
コード例 #14
0
 def testTrainMultiCartpoleSinglePolicy(self):
     n = 10
     register_env("multi_cartpole", lambda _: MultiCartpole(n))
     pg = PGAgent(env="multi_cartpole", config={"num_workers": 0})
     for i in range(100):
         result = pg.train()
         print("Iteration {}, reward {}, timesteps {}".format(
             i, result["episode_reward_mean"], result["timesteps_total"]))
         if result["episode_reward_mean"] >= 50 * n:
             return
     raise Exception("failed to improve reward")
コード例 #15
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
 def testAgentInputDir(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(
         env="CartPole-v0",
         config={
             "input": self.test_dir,
             "input_evaluation": [],
         })
     result = agent.train()
     self.assertEqual(result["timesteps_total"], 250)  # read from input
     self.assertTrue(np.isnan(result["episode_reward_mean"]))
コード例 #16
0
 def testTrainMultiCartpoleSinglePolicy(self):
     n = 10
     register_env("multi_cartpole", lambda _: MultiCartpole(n))
     pg = PGAgent(env="multi_cartpole", config={"num_workers": 0})
     for i in range(100):
         result = pg.train()
         print("Iteration {}, reward {}, timesteps {}".format(
             i, result["episode_reward_mean"], result["timesteps_total"]))
         if result["episode_reward_mean"] >= 50 * n:
             return
     raise Exception("failed to improve reward")
コード例 #17
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
 def testAgentInputList(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(
         env="CartPole-v0",
         config={
             "input": glob.glob(self.test_dir + "/*.json"),
             "input_evaluation": [],
             "sample_batch_size": 99,
         })
     result = agent.train()
     self.assertEqual(result["timesteps_total"], 250)  # read from input
     self.assertTrue(np.isnan(result["episode_reward_mean"]))
コード例 #18
0
 def testAgentInputDict(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(env="CartPole-v0",
                     config={
                         "input": {
                             self.test_dir: 0.1,
                             "sampler": 0.9,
                         },
                         "train_batch_size": 2000,
                         "input_evaluation": None,
                     })
     result = agent.train()
     self.assertTrue(not np.isnan(result["episode_reward_mean"]))
コード例 #19
0
 def testAgentInputEvalSim(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(env="CartPole-v0",
                     config={
                         "input": self.test_dir,
                         "input_evaluation": "simulation",
                     })
     for _ in range(50):
         result = agent.train()
         if not np.isnan(result["episode_reward_mean"]):
             return  # simulation ok
         time.sleep(0.1)
     assert False, "did not see any simulation results"
コード例 #20
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
 def testAgentInputDict(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(
         env="CartPole-v0",
         config={
             "input": {
                 self.test_dir: 0.1,
                 "sampler": 0.9,
             },
             "train_batch_size": 2000,
             "input_evaluation": [],
         })
     result = agent.train()
     self.assertTrue(not np.isnan(result["episode_reward_mean"]))
コード例 #21
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
 def testAgentInputEvalSim(self):
     self.writeOutputs(self.test_dir)
     agent = PGAgent(
         env="CartPole-v0",
         config={
             "input": self.test_dir,
             "input_evaluation": ["simulation"],
         })
     for _ in range(50):
         result = agent.train()
         if not np.isnan(result["episode_reward_mean"]):
             return  # simulation ok
         time.sleep(0.1)
     assert False, "did not see any simulation results"
コード例 #22
0
    def testMultiAgentComplexSpaces(self):
        ModelCatalog.register_custom_model("dict_spy", DictSpyModel)
        ModelCatalog.register_custom_model("tuple_spy", TupleSpyModel)
        register_env("nested_ma", lambda _: NestedMultiAgentEnv())
        act_space = spaces.Discrete(2)
        pg = PGAgent(
            env="nested_ma",
            config={
                "num_workers": 0,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "multiagent": {
                    "policy_graphs": {
                        "tuple_policy": (
                            PGPolicyGraph, TUPLE_SPACE, act_space,
                            {"model": {"custom_model": "tuple_spy"}}),
                        "dict_policy": (
                            PGPolicyGraph, DICT_SPACE, act_space,
                            {"model": {"custom_model": "dict_spy"}}),
                    },
                    "policy_mapping_fn": lambda a: {
                        "tuple_agent": "tuple_policy",
                        "dict_agent": "dict_policy"}[a],
                },
            })
        pg.train()

        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "d_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)

        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "t_spy_in_{}".format(i)))
            pos_i = TUPLE_SAMPLES[i][0].tolist()
            cam_i = TUPLE_SAMPLES[i][1][0].tolist()
            task_i = one_hot(TUPLE_SAMPLES[i][2], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)
コード例 #23
0
 def testInvalidModel(self):
     ModelCatalog.register_custom_model("invalid", InvalidModel)
     self.assertRaises(ValueError, lambda: PGAgent(
         env="CartPole-v0", config={
             "model": {
                 "custom_model": "invalid",
             },
         }))
コード例 #24
0
 def testQueryEvaluators(self):
     register_env("test", lambda _: gym.make("CartPole-v0"))
     pg = PGAgent(
         env="test", config={"num_workers": 2, "sample_batch_size": 5})
     results = pg.optimizer.foreach_evaluator(lambda ev: ev.batch_steps)
     results2 = pg.optimizer.foreach_evaluator_with_index(
         lambda ev, i: (i, ev.batch_steps))
     self.assertEqual(results, [5, 5, 5])
     self.assertEqual(results2, [(0, 5), (1, 5), (2, 5)])
コード例 #25
0
ファイル: test_io.py プロジェクト: zofuthan/ray
    def testMultiAgent(self):
        register_env("multi_cartpole", lambda _: MultiCartpole(10))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (PGPolicyGraph, obs_space, act_space, {})

        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "output": self.test_dir,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn":
                    (lambda agent_id: random.choice(["policy_1", "policy_2"])),
                },
            })
        pg.train()
        self.assertEqual(len(os.listdir(self.test_dir)), 1)

        pg.stop()
        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "input": self.test_dir,
                "input_evaluation": "simulation",
                "train_batch_size": 2000,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn":
                    (lambda agent_id: random.choice(["policy_1", "policy_2"])),
                },
            })
        for _ in range(50):
            result = pg.train()
            if not np.isnan(result["episode_reward_mean"]):
                return  # simulation ok
            time.sleep(0.1)
        assert False, "did not see any simulation results"
コード例 #26
0
 def testInvalidModel2(self):
     ModelCatalog.register_custom_model("invalid2", InvalidModel2)
     self.assertRaisesRegexp(
         ValueError, "Expected output.*",
         lambda: PGAgent(env="CartPole-v0",
                         config={
                             "model": {
                                 "custom_model": "invalid2",
                             },
                         }))
コード例 #27
0
 def testCallbacks(self):
     counts = Counter()
     pg = PGAgent(env="CartPole-v0",
                  config={
                      "num_workers": 0,
                      "sample_batch_size": 50,
                      "callbacks": {
                          "on_episode_start":
                          lambda x: counts.update({"start": 1}),
                          "on_episode_step":
                          lambda x: counts.update({"step": 1}),
                          "on_episode_end":
                          lambda x: counts.update({"end": 1}),
                          "on_sample_end":
                          lambda x: counts.update({"sample": 1}),
                      },
                  })
     pg.train()
     pg.train()
     pg.train()
     pg.train()
     self.assertEqual(counts["sample"], 4)
     self.assertGreater(counts["start"], 0)
     self.assertGreater(counts["end"], 0)
     self.assertGreater(counts["step"], 200)
     self.assertLess(counts["step"], 400)
コード例 #28
0
    def testTrainMultiCartpoleMultiPolicy(self):
        n = 10
        register_env("multi_cartpole", lambda _: MultiCartpole(n))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            config = {
                "gamma": random.choice([0.5, 0.8, 0.9, 0.95, 0.99]),
                "n_step": random.choice([1, 2, 3, 4, 5]),
            }
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (PGPolicyGraph, obs_space, act_space, config)

        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn": lambda agent_id: "policy_1",
                },
            })

        # Just check that it runs without crashing
        for i in range(10):
            result = pg.train()
            print("Iteration {}, reward {}, timesteps {}".format(
                i, result["episode_reward_mean"], result["timesteps_total"]))
        self.assertTrue(
            pg.compute_action([0, 0, 0, 0], policy_id="policy_1") in [0, 1])
        self.assertTrue(
            pg.compute_action([0, 0, 0, 0], policy_id="policy_2") in [0, 1])
        self.assertRaises(
            KeyError,
            lambda: pg.compute_action([0, 0, 0, 0], policy_id="policy_3"))
コード例 #29
0
 def testCallbacks(self):
     counts = Counter()
     pg = PGAgent(
         env="CartPole-v0", config={
             "num_workers": 0,
             "sample_batch_size": 50,
             "train_batch_size": 50,
             "callbacks": {
                 "on_episode_start": lambda x: counts.update({"start": 1}),
                 "on_episode_step": lambda x: counts.update({"step": 1}),
                 "on_episode_end": lambda x: counts.update({"end": 1}),
                 "on_sample_end": lambda x: counts.update({"sample": 1}),
             },
         })
     pg.train()
     pg.train()
     pg.train()
     pg.train()
     self.assertEqual(counts["sample"], 4)
     self.assertGreater(counts["start"], 0)
     self.assertGreater(counts["end"], 0)
     self.assertGreater(counts["step"], 200)
     self.assertLess(counts["step"], 400)
コード例 #30
0
ファイル: test_io.py プロジェクト: robertnishihara/ray
    def testMultiAgent(self):
        register_env("multi_cartpole", lambda _: MultiCartpole(10))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (PGPolicyGraph, obs_space, act_space, {})

        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "output": self.test_dir,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn": (
                        lambda agent_id: random.choice(
                            ["policy_1", "policy_2"])),
                },
            })
        pg.train()
        self.assertEqual(len(os.listdir(self.test_dir)), 1)

        pg.stop()
        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "input": self.test_dir,
                "input_evaluation": ["simulation"],
                "train_batch_size": 2000,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn": (
                        lambda agent_id: random.choice(
                            ["policy_1", "policy_2"])),
                },
            })
        for _ in range(50):
            result = pg.train()
            if not np.isnan(result["episode_reward_mean"]):
                return  # simulation ok
            time.sleep(0.1)
        assert False, "did not see any simulation results"
コード例 #31
0
 def testQueryEvaluators(self):
     register_env("test", lambda _: gym.make("CartPole-v0"))
     pg = PGAgent(env="test",
                  config={
                      "num_workers": 2,
                      "sample_batch_size": 5,
                      "num_envs_per_worker": 2,
                  })
     results = pg.optimizer.foreach_evaluator(
         lambda ev: ev.sample_batch_size)
     results2 = pg.optimizer.foreach_evaluator_with_index(
         lambda ev, i: (i, ev.sample_batch_size))
     results3 = pg.optimizer.foreach_evaluator(
         lambda ev: ev.foreach_env(lambda env: 1))
     self.assertEqual(results, [10, 10, 10])
     self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
     self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
コード例 #32
0
    def testTrainMultiCartpoleMultiPolicy(self):
        n = 10
        register_env("multi_cartpole", lambda _: MultiCartpole(n))
        single_env = gym.make("CartPole-v0")

        def gen_policy():
            config = {
                "gamma": random.choice([0.5, 0.8, 0.9, 0.95, 0.99]),
                "n_step": random.choice([1, 2, 3, 4, 5]),
            }
            obs_space = single_env.observation_space
            act_space = single_env.action_space
            return (None, obs_space, act_space, config)

        pg = PGAgent(
            env="multi_cartpole",
            config={
                "num_workers": 0,
                "multiagent": {
                    "policy_graphs": {
                        "policy_1": gen_policy(),
                        "policy_2": gen_policy(),
                    },
                    "policy_mapping_fn": lambda agent_id: "policy_1",
                },
            })

        # Just check that it runs without crashing
        for i in range(10):
            result = pg.train()
            print("Iteration {}, reward {}, timesteps {}".format(
                i, result["episode_reward_mean"], result["timesteps_total"]))
        self.assertTrue(
            pg.compute_action([0, 0, 0, 0], policy_id="policy_1") in [0, 1])
        self.assertTrue(
            pg.compute_action([0, 0, 0, 0], policy_id="policy_2") in [0, 1])
        self.assertRaises(
            KeyError,
            lambda: pg.compute_action([0, 0, 0, 0], policy_id="policy_3"))
コード例 #33
0
 def testNoStepOnInit(self):
     register_env("fail", lambda _: FailOnStepEnv())
     pg = PGAgent(env="fail", config={"num_workers": 1})
     self.assertRaises(Exception, lambda: pg.train())
コード例 #34
0
    # initialize trainer - since all agent's use the same policy graph one trainer is fine here
    # otherwise we'd need one trainer per policy graph used for training
    trainer = PGAgent(env='Pomme_v0',
                      config={
                          'multiagent': {
                              'policy_graphs': {
                                  'agent_0':
                                  (PGPolicyGraph, obs_space, act_space, {
                                      "gamma": 0.85
                                  }),
                                  'agent_1':
                                  (PGPolicyGraph, obs_space, act_space, {
                                      "gamma": 0.90
                                  }),
                                  'agent_2':
                                  (PGPolicyGraph, obs_space, act_space, {
                                      "gamma": 0.95
                                  }),
                                  'agent_3':
                                  (PGPolicyGraph, obs_space, act_space, {
                                      "gamma": 0.99
                                  }),
                              },
                              'policy_mapping_fn': lambda agent_id: agent_id
                          },
                          'model': {
                              'custom_preprocessor': 'Featurize_Preprocessor'
                          },
                          'env_config': env_config
                      })
    print('\nTrainer Config:\n', trainer.config, '\n')
コード例 #35
0
    def testRolloutDictSpace(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGAgent(env="nested")
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGAgent(env="nested")
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)
コード例 #36
0
                "num_workers": 0,
                # Configure the agent to run short iterations for debugging
                "exploration_fraction": 0.01,
                "learning_starts": 100,
                "timesteps_per_iteration": 200,
                "env_config": {
                    "observation_size": args.observation_size,
                    "action_size": args.action_size,
                },
            })
    elif args.run == "PG":
        agent = PGAgent(
            env="srv",
            config={
                "num_workers": 0,
                "env_config": {
                    "observation_size": args.observation_size,
                    "action_size": args.action_size,
                },
            })

    # Attempt to restore from checkpoint if possible.
    if os.path.exists(args.checkpoint_file):
        checkpoint_file = open(args.checkpoint_file).read()
        print("Restoring from checkpoint path", checkpoint_file)
        agent.restore(checkpoint_file)

    # Serving and training loop
    while True:
        print(pretty_print(agent.train()))
        checkpoint_file = agent.save()
コード例 #37
0
ファイル: starcraft_env.py プロジェクト: zofuthan/ray
        "num_workers": 4,
        "model": {
            "custom_model": "mask_model",
        },
        "env_config": {
            "pymarl_path": path_to_pymarl
        }
    }
    if args.run.lower() == "qmix":

        def grouped_sc2(cfg):
            env = SC2MultiAgentEnv(cfg)
            agent_list = list(range(env._starcraft_env.n_agents))
            grouping = {
                "group_1": agent_list,
            }
            obs_space = Tuple([env.observation_space for i in agent_list])
            act_space = Tuple([env.action_space for i in agent_list])
            return env.with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space)

        register_env("grouped_starcraft", grouped_sc2)
        agent = QMixAgent(env="grouped_starcraft", config=agent_cfg)
    elif args.run.lower() == "pg":
        agent = PGAgent(env="starcraft", config=agent_cfg)
    elif args.run.lower() == "ppo":
        agent_cfg.update({"vf_share_layers": True})
        agent = PPOAgent(env="starcraft", config=agent_cfg)
    for i in range(args.num_iters):
        print(pretty_print(agent.train()))
コード例 #38
0
ファイル: test_nested_spaces.py プロジェクト: zdpau/ray-1
    def testRolloutDictSpace(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGAgent(env="nested")
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGAgent(env="nested")
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)
コード例 #39
0
 def testNoStepOnInit(self):
     register_env("fail", lambda _: FailOnStepEnv())
     pg = PGAgent(env="fail", config={"num_workers": 1})
     self.assertRaises(Exception, lambda: pg.train())