Beispiel #1
0
def gen_config(**kwargs):
    scenario_path = Path(kwargs["scenario"]).absolute()
    agent_missions_count = Scenario.discover_agent_missions_count(
        scenario_path)
    if agent_missions_count == 0:
        agent_ids = ["default_policy"]
    else:
        agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)]

    config = load_config(kwargs["config_file"],
                         mode=kwargs.get("mode", "training"))
    agents = {agent_id: AgentSpec(**config["agent"]) for agent_id in agent_ids}

    config["env_config"].update({
        "seed": 42,
        "scenarios": [str(scenario_path)],
        "headless": kwargs["headless"],
        "agent_specs": agents,
    })

    obs_space, act_space = config["policy"][1:3]
    tune_config = config["run"]["config"]

    if kwargs["paradigm"] == "centralized":
        config["env_config"].update({
            "obs_space":
            gym.spaces.Tuple([obs_space] * agent_missions_count),
            "act_space":
            gym.spaces.Tuple([act_space] * agent_missions_count),
            "groups": {
                "group": agent_ids
            },
        })
        tune_config.update(config["policy"][-1])
    else:
        policies = {}
        for k in agents:
            policies[k] = config["policy"][:-1] + ({
                **config["policy"][-1], "agent_id":
                k
            }, )
        tune_config.update({
            "multiagent": {
                "policies": policies,
                "policy_mapping_fn": lambda agent_id: agent_id,
            }
        })

    return config
Beispiel #2
0
def main(
    scenario,
    config_file,
    checkpoint,
    num_steps=1000,
    num_episodes=10,
    paradigm="decentralized",
    headless=False,
):
    scenario_path = Path(scenario).absolute()
    agent_missions_count = Scenario.discover_agent_missions_count(scenario_path)
    if agent_missions_count == 0:
        agent_ids = ["default_policy"]
    else:
        agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)]

    config = load_config(config_file, mode="evaluate")
    agents = {
        agent_id: AgentSpec(
            **config["agent"], interface=AgentInterface(**config["interface"])
        )
        for agent_id in agent_ids
    }

    config["env_config"].update(
        {
            "seed": 42,
            "scenarios": [str(scenario_path)],
            "headless": headless,
            "agent_specs": agents,
        }
    )

    obs_space, act_space = config["policy"][1:3]
    tune_config = config["run"]["config"]

    if paradigm == "centralized":
        config["env_config"].update(
            {
                "obs_space": gym.spaces.Tuple([obs_space] * agent_missions_count),
                "act_space": gym.spaces.Tuple([act_space] * agent_missions_count),
                "groups": {"group": agent_ids},
            }
        )
        tune_config.update(config["policy"][-1])
    else:
        policies = {}
        for k in agents:
            policies[k] = config["policy"][:-1] + (
                {**config["policy"][-1], "agent_id": k},
            )
        tune_config.update(
            {
                "multiagent": {
                    "policies": policies,
                    "policy_mapping_fn": lambda agent_id: agent_id,
                }
            }
        )

    ray.init()
    trainer_cls = config["trainer"]
    trainer_config = {"env_config": config["env_config"]}
    if paradigm != "centralized":
        trainer_config.update({"multiagent": tune_config["multiagent"]})
    else:
        trainer_config.update({"model": tune_config["model"]})

    trainer = trainer_cls(env=tune_config["env"], config=trainer_config)

    trainer.restore(checkpoint)
    rollout(trainer, None, num_steps, num_episodes)
    trainer.stop()
Beispiel #3
0
    def __init__(self, load_path, algorithm, policy_name, yaml_path):
        load_path = str(load_path)
        if algorithm == "ppo":
            from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy as LoadPolicy
        elif algorithm in "a2c":
            from ray.rllib.agents.a3c.a3c_tf_policy import A3CTFPolicy as LoadPolicy
            from ray.rllib.agents.a3c import DEFAULT_CONFIG
        elif algorithm == "pg":
            from ray.rllib.agents.pg.pg_tf_policy import PGTFPolicy as LoadPolicy
        elif algorithm == "dqn":
            from ray.rllib.agents.dqn import DQNTFPolicy as LoadPolicy
        elif algorithm == "maac":
            from benchmark.agents.maac.tf_policy import CA2CTFPolicy as LoadPolicy
            from benchmark.agents.maac.tf_policy import DEFAULT_CONFIG
        elif algorithm == "maddpg":
            from benchmark.agents.maddpg.tf_policy import MADDPG2TFPolicy as LoadPolicy
            from benchmark.agents.maddpg.tf_policy import DEFAULT_CONFIG
        elif algorithm == "mfac":
            from benchmark.agents.mfac.tf_policy import MFACTFPolicy as LoadPolicy
            from benchmark.agents.mfac.tf_policy import DEFAULT_CONFIG
        elif algorithm == "networked_pg":
            from benchmark.agents.networked_pg.tf_policy import (
                NetworkedPG as LoadPolicy,
            )
            from benchmark.agents.networked_pg.tf_policy import (
                PG_DEFAULT_CONFIG as DEFAULT_CONFIG,
            )
        else:
            raise ValueError(f"Unsupported algorithm: {algorithm}")

        yaml_path = BASE_DIR / yaml_path
        load_path = BASE_DIR / f"log/results/run/{load_path}"

        config = load_config(yaml_path)
        observation_space = config["policy"][1]
        action_space = config["policy"][2]
        pconfig = DEFAULT_CONFIG

        pconfig["model"].update(config["policy"][-1].get("model", {}))
        pconfig["agent_id"] = policy_name

        self._prep = ModelCatalog.get_preprocessor_for_space(observation_space)
        self._sess = tf.Session(graph=tf.get_default_graph())

        with tf.name_scope(policy_name):
            # Observation space needs to be flattened before passed to the policy
            flat_obs_space = self._prep.observation_space
            policy = LoadPolicy(flat_obs_space, action_space, pconfig)
            self._sess.run(tf.global_variables_initializer())
            objs = pickle.load(open(load_path, "rb"))
            objs = pickle.loads(objs["worker"])
            state = objs["state"]
            weights = state[policy_name]
            policy.set_weights(weights)

        # for op in tf.get_default_graph().get_operations():
        #     print(str(op.name))

        # These tensor names were found by inspecting the trained model
        if algorithm == "ppo":
            # CRUCIAL FOR SAFETY:
            #   We use Tensor("split") instead of Tensor("add") to force
            #   PPO to be deterministic.
            self._input_node = self._sess.graph.get_tensor_by_name(
                f"{policy_name}/observation:0"
            )
            self._output_node = self._sess.graph.get_tensor_by_name(
                f"{policy_name}/split:0"
            )
        elif algorithm == "dqn":
            self._input_node = self._sess.graph.get_tensor_by_name(
                f"{policy_name}/observations:0"
            )
            self._output_node = tf.argmax(
                self._sess.graph.get_tensor_by_name(
                    f"{policy_name}/value_out/BiasAdd:0"
                ),
                axis=1,
            )
        elif algorithm == "maac":
            self._input_node = self._sess.graph.get_tensor_by_name(
                f"{policy_name}/policy-inputs:0"
            )
            self._output_node = tf.argmax(
                self._sess.graph.get_tensor_by_name(
                    f"{policy_name}/logits_out/BiasAdd:0"
                ),
                axis=1,
            )
        elif algorithm == "maddpg":
            self._input_node = self._sess.graph.get_tensor_by_name(
                f"{policy_name}/obs_2:0"
            )
            self._output_node = tf.argmax(
                self._sess.graph.get_tensor_by_name(
                    f"{policy_name}/actor/AGENT_2_actor_RelaxedOneHotCategorical_1/sample/AGENT_2_actor_exp/forward/Exp:0"
                )
            )
        else:
            self._input_node = self._sess.graph.get_tensor_by_name(
                f"{policy_name}/observations:0"
            )
            self._output_node = tf.argmax(
                self._sess.graph.get_tensor_by_name(f"{policy_name}/fc_out/BiasAdd:0"),
                axis=1,
            )
Beispiel #4
0
def main(
    scenario,
    config_file,
    log_dir,
    restore_path=None,
    num_workers=1,
    horizon=1000,
    paradigm="decentralized",
    headless=False,
    cluster=False,
):
    if cluster:
        # ray.init(address="auto", redis_password="******",memory=500*1024*1024)
        ray.init(memory=500 * 1024 * 1024)
        print("--------------- Ray startup ------------\n{}".format(
            ray.state.cluster_resources()))
    scenario_path = Path(scenario).absolute()
    agent_missions_count = Scenario.discover_agent_missions_count(
        scenario_path)
    if agent_missions_count == 0:
        agent_ids = ["default_policy"]
    else:
        agent_ids = [f"AGENT-{i}" for i in range(agent_missions_count)]

    config = load_config(config_file)
    agents = {
        agent_id: AgentSpec(**config["agent"],
                            interface=AgentInterface(**config["interface"]))
        for agent_id in agent_ids
    }

    config["env_config"].update({
        "seed": 42,
        "scenarios": [str(scenario_path)],
        "headless": headless,
        "agent_specs": agents,
    })

    obs_space, act_space = config["policy"][1:3]
    tune_config = config["run"]["config"]

    if paradigm == "centralized":
        config["env_config"].update({
            "obs_space":
            Tuple([obs_space] * agent_missions_count),
            "act_space":
            Tuple([act_space] * agent_missions_count),
            "groups": {
                "group": agent_ids
            },
        })
        tune_config.update(config["policy"][-1])
    else:
        policies = {}

        for k in agents:
            policies[k] = config["policy"][:-1] + ({
                **config["policy"][-1], "agent_id":
                k
            }, )

        tune_config.update({
            "multiagent": {
                "policies": policies,
                "policy_mapping_fn": lambda agent_id: agent_id,
            },
        })

    tune_config.update({
        "env_config": config["env_config"],
        "callbacks": SimpleCallbacks,
        "num_workers": num_workers,
        "horizon": horizon,
    })

    experiment_name = EXPERIMENT_NAME.format(
        scenario=scenario_path.stem,
        n_agent=len(agents),
    )

    log_dir = Path(log_dir).expanduser().absolute() / RUN_NAME
    log_dir.mkdir(parents=True, exist_ok=True)

    if restore_path is not None:
        restore_path = Path(restore_path).expanduser()
        print(f"Loading model from {restore_path}")

    # run experiments
    config["run"].update({
        "run_or_experiment": config["trainer"],
        "name": experiment_name,
        "local_dir": str(log_dir),
        "restore": restore_path,
    })
    analysis = tune.run(**config["run"])

    print(analysis.dataframe().head())