コード例 #1
0
def launch(config):
    config = convert_dottable(config)
    # Step 1: Initialize a CIM environment for using a toy dataset.
    env = Env(config.env.scenario, config.env.topology, durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))

    # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the
    # greedy nature of the DQN algorithm.
    state_shaper = CIMStateShaper(**config.env.state_shaping)
    action_shaper = CIMActionShaper(action_space=action_space)
    experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping)

    # Step 3: Create agents and an agent manager.
    config["agents"]["algorithm"]["input_dim"] = state_shaper.dim
    agent_manager = DQNAgentManager(
        name="cim_learner",
        mode=AgentManagerMode.TRAIN_INFERENCE,
        agent_dict=create_dqn_agents(agent_id_list, config.agents),
        state_shaper=state_shaper,
        action_shaper=action_shaper,
        experience_shaper=experience_shaper
    )

    # Step 4: Create an actor and a learner to start the training process.
    scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration)
    actor = SimpleActor(env, agent_manager)
    learner = SimpleLearner(
        agent_manager, actor, scheduler,
        logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False)
    )
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
コード例 #2
0
def launch(config):
    config = convert_dottable(config)
    env = Env(config.env.scenario,
              config.env.topology,
              durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    config["agents"]["input_dim"] = CIMStateShaper(
        **config.env.state_shaping).dim
    agent_manager = POAgentManager(name="cim_learner",
                                   mode=AgentManagerMode.TRAIN,
                                   agent_dict=create_po_agents(
                                       agent_id_list, config.agents))

    proxy_params = {
        "group_name": os.environ["GROUP"],
        "expected_peers": {
            "actor": int(os.environ["NUM_ACTORS"])
        },
        "redis_address": ("localhost", 6379)
    }

    learner = SimpleLearner(
        agent_manager=agent_manager,
        actor=ActorProxy(proxy_params=proxy_params,
                         experience_collecting_func=
                         merge_experiences_with_trajectory_boundaries),
        scheduler=Scheduler(config.main_loop.max_episode),
        logger=Logger("cim_learner", auto_timestamp=False))
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
    learner.exit()
コード例 #3
0
ファイル: actor.py プロジェクト: yumiaoGitHub/maro
    def as_worker(self, group: str, proxy_options=None, log_dir: str = getcwd()):
        """Executes an event loop where roll-outs are performed on demand from a remote learner.

        Args:
            group (str): Identifier of the group to which the actor belongs. It must be the same group name
                assigned to the learner (and decision clients, if any).
            proxy_options (dict): Keyword parameters for the internal ``Proxy`` instance. See ``Proxy`` class
                for details. Defaults to None.
        """
        if proxy_options is None:
            proxy_options = {}
        proxy = Proxy(group, "actor", {"learner": 1}, **proxy_options)
        logger = Logger(proxy.name, dump_folder=log_dir)
        for msg in proxy.receive():
            if msg.tag == MessageTag.EXIT:
                logger.info("Exiting...")
                proxy.close()
                sys.exit(0)
            elif msg.tag == MessageTag.ROLLOUT:
                ep = msg.payload[PayloadKey.ROLLOUT_INDEX]
                logger.info(f"Rolling out ({ep})...")
                metrics, rollout_data = self.roll_out(
                    ep,
                    training=msg.payload[PayloadKey.TRAINING],
                    model_by_agent=msg.payload[PayloadKey.MODEL],
                    exploration_params=msg.payload[PayloadKey.EXPLORATION_PARAMS]
                )
                if rollout_data is None:
                    logger.info(f"Roll-out {ep} aborted")
                else:
                    logger.info(f"Roll-out {ep} finished")
                    rollout_finish_msg = Message(
                        MessageTag.FINISHED,
                        proxy.name,
                        proxy.peers_name["learner"][0],
                        payload={
                            PayloadKey.ROLLOUT_INDEX: ep,
                            PayloadKey.METRICS: metrics,
                            PayloadKey.DETAILS: rollout_data
                        }
                    )
                    proxy.isend(rollout_finish_msg)
                self.env.reset()
コード例 #4
0
 def __init__(self,
              group_name: str,
              num_actors: int,
              update_trigger: str = None,
              proxy_options: dict = None,
              log_dir: str = getcwd()):
     self.agent = None
     peers = {"actor": num_actors}
     if proxy_options is None:
         proxy_options = {}
     self._proxy = Proxy(group_name, "learner", peers, **proxy_options)
     self._actors = self._proxy.peers_name["actor"]  # remote actor ID's
     self._registry_table = RegisterTable(self._proxy.peers_name)
     if update_trigger is None:
         update_trigger = len(self._actors)
     self._registry_table.register_event_handler(
         f"actor:{MessageTag.FINISHED.value}:{update_trigger}",
         self._on_rollout_finish)
     self.logger = Logger("ACTOR_PROXY", dump_folder=log_dir)
コード例 #5
0
 def __init__(self,
              actor: Union[Actor, ActorProxy],
              agent: Union[AbsAgent, MultiAgentWrapper] = None,
              log_dir: str = getcwd()):
     super().__init__()
     if isinstance(actor, ActorProxy):
         assert agent, "agent cannot be None when the actor is a proxy."
         self.agent = agent
     else:
         # The agent passed to __init__ is ignored in this case
         self.agent = actor.agent
     self.actor = actor
     self.logger = Logger("LEARNER", dump_folder=log_dir)
コード例 #6
0
ファイル: dist_learner.py プロジェクト: yourmoonlight/maro
def launch(config, distributed_config):
    config = convert_dottable(config)
    distributed_config = convert_dottable(distributed_config)
    env = Env(config.env.scenario,
              config.env.topology,
              durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]

    config["agents"]["algorithm"]["input_dim"] = CIMStateShaper(
        **config.env.state_shaping).dim
    agent_manager = DQNAgentManager(name="cim_learner",
                                    mode=AgentManagerMode.TRAIN,
                                    agent_dict=create_dqn_agents(
                                        agent_id_list, config.agents))

    proxy_params = {
        "group_name":
        os.environ["GROUP"]
        if "GROUP" in os.environ else distributed_config.group,
        "expected_peers": {
            "actor":
            int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in
                os.environ else distributed_config.num_actors)
        },
        "redis_address":
        (distributed_config.redis.hostname, distributed_config.redis.port),
        "max_retries":
        15
    }

    learner = SimpleLearner(
        agent_manager=agent_manager,
        actor=ActorProxy(
            proxy_params=proxy_params,
            experience_collecting_func=concat_experiences_by_agent),
        scheduler=TwoPhaseLinearParameterScheduler(
            config.main_loop.max_episode, **config.main_loop.exploration),
        logger=Logger("cim_learner", auto_timestamp=False))
    learner.learn()
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
    learner.exit()
コード例 #7
0
ファイル: launcher.py プロジェクト: yumiaoGitHub/maro
from ilp_agent import IlpAgent

os.environ['LOG_LEVEL'] = 'CRITICAL'
FILE_PATH = os.path.split(os.path.realpath(__file__))[0]
CONFIG_PATH = os.path.join(FILE_PATH, "config.yml")
with io.open(CONFIG_PATH, "r") as in_file:
    raw_config = yaml.safe_load(in_file)
    config = convert_dottable(raw_config)

LOG_PATH = os.path.join(FILE_PATH, "log", config.experiment_name)
if not os.path.exists(LOG_PATH):
    os.makedirs(LOG_PATH)
simulation_logger = Logger(tag="simulation",
                           format_=LogFormat.none,
                           dump_folder=LOG_PATH,
                           dump_mode="w",
                           auto_timestamp=False)
ilp_logger = Logger(tag="ilp",
                    format_=LogFormat.none,
                    dump_folder=LOG_PATH,
                    dump_mode="w",
                    auto_timestamp=False)

if __name__ == "__main__":
    start_time = timeit.default_timer()

    env = Env(scenario=config.env.scenario,
              topology=config.env.topology,
              start_tick=config.env.start_tick,
              durations=config.env.durations,
コード例 #8
0
ファイル: launcher.py プロジェクト: zhangruiskyline/maro
    config_path = os.path.join(real_path, "config.yml")
    config = load_config(config_path)

    # Generate log path.
    date_str = datetime.datetime.now().strftime("%Y%m%d")
    time_str = datetime.datetime.now().strftime("%H%M%S.%f")
    subfolder_name = f"{config.env.param.topology}_{time_str}"

    # Log path.
    config.log.path = os.path.join(config.log.path, date_str, subfolder_name)
    if not os.path.exists(config.log.path):
        os.makedirs(config.log.path)

    simulation_logger = Logger(tag="simulation",
                               dump_folder=config.log.path,
                               dump_mode="w",
                               auto_timestamp=False)

    # Create a demo environment to retrieve environment information.
    simulation_logger.info(
        "Approximating the experience quantity of each agent...")
    demo_env = Env(**config.env.param)
    config.env.exp_per_ep = decision_cnt_analysis(demo_env,
                                                  pv=True,
                                                  buffer_size=8)
    simulation_logger.info(config.env.exp_per_ep)

    # Add some buffer to prevent overlapping.
    config.env.return_scaler, tot_order_amount = return_scaler(
        demo_env, tick=config.env.param.durations, gamma=config.training.gamma)
    simulation_logger.info(
コード例 #9
0
from maro.simulator import Env
from maro.utils import Logger

from components.agent_manager import DQNAgentManager
from components.config import config
from components.state_shaper import CIMStateShaper


if __name__ == "__main__":
    env = Env(config.env.scenario, config.env.topology, durations=config.env.durations)
    agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list]
    state_shaper = CIMStateShaper(**config.state_shaping)
    exploration_config = {"epsilon_range_dict": {"_all_": config.exploration.epsilon_range},
                          "split_point_dict": {"_all_": config.exploration.split_point},
                          "with_cache": config.exploration.with_cache
                          }
    explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config)
    agent_manager = DQNAgentManager(name="cim_remote_learner", agent_id_list=agent_id_list, mode=AgentMode.TRAIN,
                                    state_shaper=state_shaper, explorer=explorer)

    proxy_params = {"group_name": config.distributed.group_name,
                    "expected_peers": config.distributed.learner.peer,
                    "redis_address": (config.distributed.redis.host_name, config.distributed.redis.port)
                    }
    learner = SimpleLearner(trainable_agents=agent_manager,
                            actor=ActorProxy(proxy_params=proxy_params),
                            logger=Logger("distributed_cim_learner", auto_timestamp=False))
    learner.train(total_episodes=config.general.total_training_episodes)
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
コード例 #10
0
            **config.experience_shaping.k_step)

    exploration_config = {
        "epsilon_range_dict": {
            "_all_": config.exploration.epsilon_range
        },
        "split_point_dict": {
            "_all_": config.exploration.split_point
        },
        "with_cache": config.exploration.with_cache
    }
    explorer = TwoPhaseLinearExplorer(agent_id_list,
                                      config.general.total_training_episodes,
                                      **exploration_config)
    agent_manager = DQNAgentManager(name="cim_learner",
                                    mode=AgentMode.TRAIN_INFERENCE,
                                    agent_id_list=agent_id_list,
                                    state_shaper=state_shaper,
                                    action_shaper=action_shaper,
                                    experience_shaper=experience_shaper,
                                    explorer=explorer)
    learner = SimpleLearner(trainable_agents=agent_manager,
                            actor=SimpleActor(env=env,
                                              inference_agents=agent_manager),
                            logger=Logger("single_host_cim_learner",
                                          auto_timestamp=False))

    learner.train(total_episodes=config.general.total_training_episodes)
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
コード例 #11
0
            "_all_": config.exploration.split_point
        },
        "with_cache": config.exploration.with_cache
    }
    explorer = TwoPhaseLinearExplorer(agent_id_list,
                                      config.general.total_training_episodes,
                                      **exploration_config)

    # Step 3: create an agent manager.
    agent_manager = DQNAgentManager(name="cim_learner",
                                    mode=AgentMode.TRAIN_INFERENCE,
                                    agent_id_list=agent_id_list,
                                    state_shaper=state_shaper,
                                    action_shaper=action_shaper,
                                    experience_shaper=experience_shaper,
                                    explorer=explorer)

    # Step 4: Create an actor and a learner to start the training process.
    actor = SimpleActor(env=env, inference_agents=agent_manager)
    learner = SimpleLearner(
        trainable_agents=agent_manager,
        actor=actor,
        logger=Logger(tag="single_host_cim_learner",
                      dump_folder=os.path.join(
                          os.path.split(os.path.realpath(__file__))[0], "log"),
                      auto_timestamp=False))

    learner.train(total_episodes=config.general.total_training_episodes)
    learner.test()
    learner.dump_models(os.path.join(os.getcwd(), "models"))
コード例 #12
0
class ActorProxy(object):
    """Actor proxy that manages a set of remote actors.

    Args:
        group_name (str): Identifier of the group to which the actor belongs. It must be the same group name
            assigned to the actors (and roll-out clients, if any).
        num_actors (int): Expected number of actors in the group identified by ``group_name``.
        update_trigger (str): Number or percentage of ``MessageTag.FINISHED`` messages required to trigger
            learner updates, i.e., model training.
        proxy_options (dict): Keyword parameters for the internal ``Proxy`` instance. See ``Proxy`` class
            for details. Defaults to None.
    """
    def __init__(self,
                 group_name: str,
                 num_actors: int,
                 update_trigger: str = None,
                 proxy_options: dict = None,
                 log_dir: str = getcwd()):
        self.agent = None
        peers = {"actor": num_actors}
        if proxy_options is None:
            proxy_options = {}
        self._proxy = Proxy(group_name, "learner", peers, **proxy_options)
        self._actors = self._proxy.peers_name["actor"]  # remote actor ID's
        self._registry_table = RegisterTable(self._proxy.peers_name)
        if update_trigger is None:
            update_trigger = len(self._actors)
        self._registry_table.register_event_handler(
            f"actor:{MessageTag.FINISHED.value}:{update_trigger}",
            self._on_rollout_finish)
        self.logger = Logger("ACTOR_PROXY", dump_folder=log_dir)

    def roll_out(self,
                 index: int,
                 training: bool = True,
                 model_by_agent: dict = None,
                 exploration_params=None):
        """Collect roll-out data from remote actors.

        Args:
            index (int): Index of roll-out requests.
            training (bool): If true, the roll-out request is for training purposes.
            model_by_agent (dict): Models to be broadcast to remote actors for inference. Defaults to None.
            exploration_params: Exploration parameters to be used by the remote roll-out actors. Defaults to None.
        """
        payload = {
            PayloadKey.ROLLOUT_INDEX: index,
            PayloadKey.TRAINING: training,
            PayloadKey.MODEL: model_by_agent,
            PayloadKey.EXPLORATION_PARAMS: exploration_params
        }
        self._proxy.iscatter(MessageTag.ROLLOUT, SessionType.TASK,
                             [(actor, payload) for actor in self._actors])
        self.logger.info(
            f"Sent roll-out requests to {self._actors} for ep-{index}")

        # Receive roll-out results from remote actors
        for msg in self._proxy.receive():
            if msg.payload[PayloadKey.ROLLOUT_INDEX] != index:
                self.logger.info(
                    f"Ignore a message of type {msg.tag} with ep {msg.payload[PayloadKey.ROLLOUT_INDEX]} "
                    f"(expected {index} or greater)")
                continue
            if msg.tag == MessageTag.FINISHED:
                # If enough update messages have been received, call update() and break out of the loop to start
                # the next episode.
                result = self._registry_table.push(msg)
                if result:
                    env_metrics, details = result[0]
                    break

        return env_metrics, details

    def _on_rollout_finish(self, messages: List[Message]):
        metrics = {
            msg.source: msg.payload[PayloadKey.METRICS]
            for msg in messages
        }
        details = {
            msg.source: msg.payload[PayloadKey.DETAILS]
            for msg in messages
        }
        return metrics, details

    def terminate(self):
        """Tell the remote actors to exit."""
        self._proxy.ibroadcast(component_type="actor",
                               tag=MessageTag.EXIT,
                               session_type=SessionType.NOTIFICATION)
        self.logger.info("Exiting...")
        self._proxy.close()