def zero_play(**args): env = Env(**args) _, pa, is_done = env.step(None) while not is_done: action = Action(pa.vessel_idx, pa.port_idx, 0) r, pa, is_done = env.step(action) return env.snapshot_list
def _init_env(self, backend_name: str) -> None: os.environ["DEFAULT_BACKEND_NAME"] = backend_name self._env = Env( scenario="cim", topology=self._reload_topology, start_tick=0, durations=200, options={"enable-dump-snapshot": tempfile.gettempdir()}) self._business_engine = self._env.business_engine
def setUp(self): env = Env(scenario="vm_scheduling", topology="tests/data/vm_scheduling/azure.2019.toy", start_tick=0, durations=5, snapshot_resolution=1) metrics, decision_event, is_done = env.step(None) while not is_done: action = AllocateAction(vm_id=decision_event.vm_id, pm_id=decision_event.valid_pms[0]) self.metrics, decision_event, is_done = env.step(action)
def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_actor", mode=AgentManagerMode.INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": {"learner": 1}, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } actor_worker = ActorWorker( local_actor=SimpleActor(env=env, agent_manager=agent_manager), proxy_params=proxy_params ) actor_worker.launch()
def launch(config): config = convert_dottable(config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = POAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_po_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"], "expected_peers": { "actor": int(os.environ["NUM_ACTORS"]) }, "redis_address": ("localhost", 6379) } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy(proxy_params=proxy_params, experience_collecting_func= merge_experiences_with_trajectory_boundaries), scheduler=Scheduler(config.main_loop.max_episode), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
def launch(config): config = convert_dottable(config) # Step 1: Initialize a CIM environment for using a toy dataset. env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)) # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the # greedy nature of the DQN algorithm. state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=action_space) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) # Step 3: Create agents and an agent manager. config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_learner", mode=AgentManagerMode.TRAIN_INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) # Step 4: Create an actor and a learner to start the training process. scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration) actor = SimpleActor(env, agent_manager) learner = SimpleLearner( agent_manager, actor, scheduler, logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False) ) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
def cim_dqn_actor(): env = Env(**training_config["env"]) agent = MultiAgentWrapper( {name: get_dqn_agent() for name in env.agent_idx_list}) actor = Actor(env, agent, CIMTrajectoryForDQN, trajectory_kwargs=common_config) actor.as_worker(training_config["group"], log_dir=log_dir)
def run(self): """Initialize environment and process commands.""" metrics = None decision_event = None, is_done = False env = Env(*self._args, **self._kwargs) while True: cmd, content = self._pipe.recv() if cmd == "step": if is_done: # Skip is current environment is completed. self._pipe.send((None, None, True, env.frame_index)) else: metrics, decision_event, is_done = env.step(content) self._pipe.send((metrics, decision_event)) elif cmd == "reset": env.reset() metrics = None decision_event = None is_done = False self._pipe.send(None) elif cmd == "query": node_name, args = content states = env.snapshot_list[node_name][args] self._pipe.send(states) elif cmd == "tick": self._pipe.send(env.tick) elif cmd == "frame_index": self._pipe.send(env.frame_index) elif cmd == "is_done": self._pipe.send(is_done) elif cmd == "stop": self._pipe.send(None) break
def cim_dqn_learner(): env = Env(**training_config["env"]) agent = MultiAgentWrapper( {name: get_dqn_agent() for name in env.agent_idx_list}) scheduler = TwoPhaseLinearParameterScheduler( training_config["max_episode"], **training_config["exploration"]) actor = ActorProxy( training_config["group"], training_config["num_actors"], update_trigger=training_config["learner_update_trigger"]) learner = OffPolicyLearner(actor, scheduler, agent, **training_config["training"]) learner.run()
def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["algorithm"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = DQNAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_dqn_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": { "actor": int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in os.environ else distributed_config.num_actors) }, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy( proxy_params=proxy_params, experience_collecting_func=concat_experiences_by_agent), scheduler=TwoPhaseLinearParameterScheduler( config.main_loop.max_episode, **config.main_loop.exploration), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
def test_cim(): eps = 4 env = Env("cim", "toy.5p_ssddd_l0.0", durations=MAX_TICK) start_time = time() for _ in range(eps): _, _, is_done = env.step(None) while not is_done: _, _, is_done = env.step(None) env.reset() end_time = time() print(f"cim 5p toplogy with {MAX_TICK} total time cost: {(end_time - start_time)/eps}")
format_=LogFormat.none, dump_folder=LOG_PATH, dump_mode="w", auto_timestamp=False) ilp_logger = Logger(tag="ilp", format_=LogFormat.none, dump_folder=LOG_PATH, dump_mode="w", auto_timestamp=False) if __name__ == "__main__": start_time = timeit.default_timer() env = Env(scenario=config.env.scenario, topology=config.env.topology, start_tick=config.env.start_tick, durations=config.env.durations, snapshot_resolution=config.env.resolution) shutil.copy(os.path.join(env._business_engine._config_path, "config.yml"), os.path.join(LOG_PATH, "BEconfig.yml")) shutil.copy(CONFIG_PATH, os.path.join(LOG_PATH, "config.yml")) if config.env.seed is not None: env.set_seed(config.env.seed) metrics: object = None decision_event: DecisionPayload = None is_done: bool = False action: Action = None metrics, decision_event, is_done = env.step(None)
# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. import numpy as np from maro.simulator import Env from maro.rl import AgentMode, SimpleActor, ActorWorker, KStepExperienceShaper, TwoPhaseLinearExplorer from config import config from state_shaper import CIMStateShaper from action_shaper import CIMActionShaper from experience_shaper import TruncatedExperienceShaper from agent_manager import DQNAgentManager if __name__ == "__main__": env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] state_shaper = CIMStateShaper(**config.state_shaping) action_shaper = CIMActionShaper(action_space=list( np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions))) if config.experience_shaping.type == "truncated": experience_shaper = TruncatedExperienceShaper( **config.experience_shaping.truncated) else: experience_shaper = KStepExperienceShaper( reward_func=lambda mt: 1 - mt["container_shortage"] / mt[ "order_requirements"], **config.experience_shaping.k_step) exploration_config = {
auto_event_mode = False start_tick = 0 durations = 100 max_ep = 2 opts = dict() """ enable-dump-snapshot parameter means business_engine needs dump snapshot data before reset. If you leave value to empty string, it will dump to current folder. For getting dump data, please uncomment below line and specify dump destination folder. """ # opts['enable-dump-snapshot'] = '' env = Env(scenario="citi_bike", topology="toy.4s_4t", start_tick=start_tick, durations=durations, snapshot_resolution=60, options=opts) print(env.summary) for ep in range(max_ep): metrics = None decision_evt: DecisionEvent = None is_done = False action = None while not is_done: metrics, decision_evt, is_done = env.step(action) # It will be None at the end.
from maro.simulator import Env env = Env(scenario="cim", topology="toy.5p_ssddd_l0.0", start_tick=0, durations=100) metrics, decision_event, is_done = env.step(None) while not is_done: metrics, decision_event, is_done = env.step(None) print(f"environment metrics: {env.metrics}")
class TestCimScenarios(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestCimScenarios, self).__init__(*args, **kwargs) with open(os.path.join(TOPOLOGY_PATH_CONFIG, "config.yml"), "r") as input_stream: self._raw_topology = yaml.safe_load(input_stream) self._env: Optional[Env] = None self._reload_topology: str = TOPOLOGY_PATH_CONFIG self._business_engine: Optional[CimBusinessEngine] = None random.clear() def _init_env(self, backend_name: str) -> None: os.environ["DEFAULT_BACKEND_NAME"] = backend_name self._env = Env( scenario="cim", topology=self._reload_topology, start_tick=0, durations=200, options={"enable-dump-snapshot": tempfile.gettempdir()}) self._business_engine = self._env.business_engine def test_load_from_config(self) -> None: for backend_name in backends_to_test: self._init_env(backend_name) ######################################################### if len( self._business_engine.configs ) > 0: # Env will not have `configs` if loaded from dump/real. self.assertTrue( compare_dictionary(self._business_engine.configs, self._raw_topology)) self.assertEqual( len(getattr(self._business_engine.frame, "ports")), 22) self.assertEqual(self._business_engine._data_cntr.port_number, 22) self.assertEqual( len(getattr(self._business_engine.frame, "vessels")), 46) self.assertEqual(self._business_engine._data_cntr.vessel_number, 46) self.assertEqual(len(self._business_engine.snapshots), 0) ######################################################### # Vessel vessels: List[ VesselSetting] = self._business_engine._data_cntr.vessels for i, vessel in enumerate(vessels): vessel_config = self._raw_topology["vessels"][vessel.name] self.assertEqual(vessel.index, i) self.assertEqual(vessel.capacity, vessel_config["capacity"]) self.assertEqual(vessel.parking_duration, vessel_config["parking"]["duration"]) self.assertEqual(vessel.parking_noise, vessel_config["parking"]["noise"]) self.assertEqual(vessel.start_port_name, vessel_config["route"]["initial_port_name"]) self.assertEqual(vessel.route_name, vessel_config["route"]["route_name"]) self.assertEqual(vessel.sailing_noise, vessel_config["sailing"]["noise"]) self.assertEqual(vessel.sailing_speed, vessel_config["sailing"]["speed"]) for name, idx in self._business_engine.get_node_mapping( )["vessels"].items(): self.assertEqual(vessels[idx].name, name) ######################################################### # Port ports: List[PortSetting] = self._business_engine._data_cntr.ports port_names = [port.name for port in ports] for i, port in enumerate(ports): assert isinstance(port, SyntheticPortSetting) port_config = self._raw_topology["ports"][port.name] self.assertEqual(port.index, i) self.assertEqual(port.capacity, port_config["capacity"]) self.assertEqual(port.empty_return_buffer.noise, port_config["empty_return"]["noise"]) self.assertEqual(port.full_return_buffer.noise, port_config["full_return"]["noise"]) self.assertEqual( port.source_proportion.noise, port_config["order_distribution"]["source"]["noise"]) for target in port.target_proportions: self.assertEqual( target.noise, port_config["order_distribution"] ["targets"][port_names[target.index]]["noise"]) for name, idx in self._business_engine.get_node_mapping( )["ports"].items(): self.assertEqual(ports[idx].name, name) def test_load_from_real(self) -> None: for topology in [TOPOLOGY_PATH_REAL_BIN, TOPOLOGY_PATH_REAL_CSV]: self._reload_topology = topology for backend_name in backends_to_test: self._init_env(backend_name) for i, port in enumerate(self._business_engine._ports): self.assertEqual(port.booking, 0) self.assertEqual(port.shortage, 0) hard_coded_truth = [556, 0, 20751], [1042, 0, 17320], [0, 0, 25000], [0, 0, 25000] self._env.step(action=None) for i, port in enumerate(self._business_engine._ports): self.assertEqual(port.booking, hard_coded_truth[i][0]) self.assertEqual(port.shortage, hard_coded_truth[i][1]) self.assertEqual(port.empty, hard_coded_truth[i][2]) self._env.reset(keep_seed=True) self._env.step(action=None) for i, port in enumerate(self._business_engine._ports): self.assertEqual(port.booking, hard_coded_truth[i][0]) self.assertEqual(port.shortage, hard_coded_truth[i][1]) self.assertEqual(port.empty, hard_coded_truth[i][2]) self._reload_topology = TOPOLOGY_PATH_CONFIG def test_dump_and_load(self) -> None: dump_from_config(os.path.join(TOPOLOGY_PATH_CONFIG, "config.yml"), TOPOLOGY_PATH_DUMP, 200) self._reload_topology = TOPOLOGY_PATH_DUMP # The reloaded Env should have same behaviors self.test_load_from_config() self.test_vessel_movement() self.test_order_state() self.test_order_export() self.test_early_discharge() self._reload_topology = TOPOLOGY_PATH_CONFIG def test_vessel_movement(self) -> None: for backend_name in backends_to_test: self._init_env(backend_name) hard_coded_period = [ 67, 75, 84, 67, 53, 58, 51, 58, 61, 49, 164, 182, 146, 164, 182, 146, 90, 98, 79, 95, 104, 84, 87, 97, 78, 154, 169, 136, 154, 169, 94, 105, 117, 94, 189, 210, 167, 189, 210, 167, 141, 158, 125, 141, 158, 125 ] self.assertListEqual( self._business_engine._data_cntr.vessel_period, hard_coded_period) ports: List[PortSetting] = self._business_engine._data_cntr.ports port_names: List[str] = [port.name for port in ports] vessel_stops: VesselStopsWrapper = self._business_engine._data_cntr.vessel_stops vessels: List[ VesselSetting] = self._business_engine._data_cntr.vessels # Test invalid argument self.assertIsNone(vessel_stops[None]) ######################################################### for i, vessel in enumerate(vessels): start_port_index = port_names.index(vessel.start_port_name) self.assertEqual(vessel_stops[i, 0].port_idx, start_port_index) ######################################################### for i, vessel in enumerate(vessels): stop_port_indices = [stop.port_idx for stop in vessel_stops[i]] raw_route = self._raw_topology["routes"][vessel.route_name] route_stop_names = [stop["port_name"] for stop in raw_route] route_stop_indices = [ port_names.index(name) for name in route_stop_names ] start_offset = route_stop_indices.index( port_names.index(vessel.start_port_name)) for j, stop_port_index in enumerate(stop_port_indices): self.assertEqual( stop_port_index, route_stop_indices[(j + start_offset) % len(route_stop_indices)]) ######################################################### # STEP: beginning for i, vessel in enumerate(self._business_engine._vessels): self.assertEqual(vessel.idx, i) self.assertEqual(vessel.next_loc_idx, 0) self.assertEqual(vessel.last_loc_idx, 0) ######################################################### self._env.step(action=None) self.assertEqual( self._env.tick, 5) # Vessel 35 will trigger the first arrival event at tick 5 for i, vessel in enumerate(self._business_engine._vessels): if i == 35: self.assertEqual(vessel.next_loc_idx, 1) self.assertEqual(vessel.last_loc_idx, 1) else: self.assertEqual(vessel.next_loc_idx, 1) self.assertEqual(vessel.last_loc_idx, 0) ######################################################### self._env.step(action=None) self.assertEqual( self._env.tick, 6) # Vessel 27 will trigger the second arrival event at tick 6 for i, vessel in enumerate(self._business_engine._vessels): if i == 27: # Vessel 27 just arrives self.assertEqual(vessel.next_loc_idx, 1) self.assertEqual(vessel.last_loc_idx, 1) elif i == 35: # Vessel 35 has already departed self.assertEqual(vessel.next_loc_idx, 2) self.assertEqual(vessel.last_loc_idx, 1) else: self.assertEqual(vessel.next_loc_idx, 1) self.assertEqual(vessel.last_loc_idx, 0) ######################################################### while self._env.tick < 100: self._env.step(action=None) self.assertEqual(self._env.tick, 100) for i, vessel in enumerate(self._business_engine._vessels): expected_next_loc_idx = expected_last_loc_idx = -1 for j, stop in enumerate(vessel_stops[i]): if stop.arrival_tick == self._env.tick: expected_next_loc_idx = expected_last_loc_idx = j break if stop.arrival_tick > self._env.tick: expected_next_loc_idx = j expected_last_loc_idx = j - 1 break self.assertEqual(vessel.next_loc_idx, expected_next_loc_idx) self.assertEqual(vessel.last_loc_idx, expected_last_loc_idx) def test_order_state(self) -> None: for backend_name in backends_to_test: self._init_env(backend_name) for i, port in enumerate(self._business_engine._ports): total_containers = self._raw_topology['total_containers'] initial_container_proportion = self._raw_topology['ports'][ port.name]['initial_container_proportion'] self.assertEqual(port.booking, 0) self.assertEqual(port.shortage, 0) self.assertEqual( port.empty, int(total_containers * initial_container_proportion)) ######################################################### self._env.step(action=None) self.assertEqual(self._env.tick, 5) hard_coded_truth = [ # Should get same results under default random seed [223, 0, 14726], [16, 0, 916], [18, 0, 917], [89, 0, 5516], [84, 0, 4613], [72, 0, 4603], [26, 0, 1374], [24, 0, 1378], [48, 0, 2756], [54, 0, 2760], [26, 0, 1379], [99, 0, 5534], [137, 0, 7340], [19, 0, 912], [13, 0, 925], [107, 0, 6429], [136, 0, 9164], [64, 0, 3680], [24, 0, 1377], [31, 0, 1840], [109, 0, 6454], [131, 0, 7351] ] for i, port in enumerate(self._business_engine._ports): self.assertEqual(port.booking, hard_coded_truth[i][0]) self.assertEqual(port.shortage, hard_coded_truth[i][1]) self.assertEqual(port.empty, hard_coded_truth[i][2]) def test_keep_seed(self) -> None: for backend_name in backends_to_test: self._init_env(backend_name) vessel_stops_1: List[ List[Stop]] = self._business_engine._data_cntr.vessel_stops self._env.step(action=None) port_info_1 = [(port.booking, port.shortage, port.empty) for port in self._business_engine._ports] self._env.reset(keep_seed=True) vessel_stops_2: List[ List[Stop]] = self._business_engine._data_cntr.vessel_stops self._env.step(action=None) port_info_2 = [(port.booking, port.shortage, port.empty) for port in self._business_engine._ports] self._env.reset(keep_seed=False) vessel_stops_3: List[ List[Stop]] = self._business_engine._data_cntr.vessel_stops self._env.step(action=None) port_info_3 = [(port.booking, port.shortage, port.empty) for port in self._business_engine._ports] # Vessel for i in range(self._business_engine._data_cntr.vessel_number): # 1 and 2 should be totally equal self.assertListEqual(vessel_stops_1[i], vessel_stops_2[i]) # 1 and 3 should have difference flag = True for stop1, stop3 in zip(vessel_stops_1[i], vessel_stops_3[i]): self.assertListEqual( [stop1.index, stop1.port_idx, stop1.vessel_idx], [stop3.index, stop3.port_idx, stop3.vessel_idx]) if (stop1.arrival_tick, stop1.leave_tick) != ( stop3.arrival_tick, stop3.leave_tick): flag = False self.assertFalse(flag) # Port self.assertListEqual(port_info_1, port_info_2) self.assertFalse( all(port1 == port3 for port1, port3 in zip(port_info_1, port_info_3))) def test_order_export(self) -> None: """order.tick, order.src_port_idx, order.dest_port_idx, order.quantity""" Order = namedtuple( "Order", ["tick", "src_port_idx", "dest_port_idx", "quantity"]) # for enabled in [False, True]: exporter = PortOrderExporter(enabled) for i in range(5): exporter.add(Order(0, 0, 1, i + 1)) out_folder = tempfile.gettempdir() if os.path.exists(f"{out_folder}/orders.csv"): os.remove(f"{out_folder}/orders.csv") exporter.dump(out_folder) if enabled: with open(f"{out_folder}/orders.csv") as fp: reader = csv.DictReader(fp) row = 0 for line in reader: self.assertEqual(row + 1, int(line["quantity"])) row += 1 else: # Should done nothing self.assertFalse(os.path.exists(f"{out_folder}/orders.csv")) def test_early_discharge(self) -> None: for backend_name in backends_to_test: self._init_env(backend_name) metric, decision_event, is_done = self._env.step(None) assert isinstance(decision_event, DecisionEvent) self.assertEqual(decision_event.action_scope.load, 1240) self.assertEqual(decision_event.action_scope.discharge, 0) self.assertEqual(decision_event.early_discharge, 0) decision_event = pickle.loads( pickle.dumps(decision_event)) # Test serialization load_action = Action(vessel_idx=decision_event.vessel_idx, port_idx=decision_event.port_idx, quantity=1201, action_type=ActionType.LOAD) discharge_action = Action(vessel_idx=decision_event.vessel_idx, port_idx=decision_event.port_idx, quantity=1, action_type=ActionType.DISCHARGE) metric, decision_event, is_done = self._env.step( [load_action, discharge_action]) history = [] while not is_done: metric, decision_event, is_done = self._env.step(None) assert decision_event is None or isinstance( decision_event, DecisionEvent) if decision_event is not None and decision_event.vessel_idx == 35: v = self._business_engine._vessels[35] history.append((v.full, v.empty, v.early_discharge)) hard_coded_benchmark = [(465, 838, 362), (756, 547, 291), (1261, 42, 505), (1303, 0, 42), (1303, 0, 0), (1303, 0, 0), (803, 0, 0)] self.assertListEqual(history, hard_coded_benchmark) # payload_detail_benchmark = { 'ORDER': ['tick', 'src_port_idx', 'dest_port_idx', 'quantity'], 'RETURN_FULL': ['src_port_idx', 'dest_port_idx', 'quantity'], 'VESSEL_ARRIVAL': ['port_idx', 'vessel_idx'], 'LOAD_FULL': ['port_idx', 'vessel_idx'], 'DISCHARGE_FULL': ['vessel_idx', 'port_idx', 'from_port_idx', 'quantity'], 'PENDING_DECISION': [ 'tick', 'port_idx', 'vessel_idx', 'snapshot_list', 'action_scope', 'early_discharge' ], 'LOAD_EMPTY': ['port_idx', 'vessel_idx', 'action_type', 'quantity'], 'DISCHARGE_EMPTY': ['port_idx', 'vessel_idx', 'action_type', 'quantity'], 'VESSEL_DEPARTURE': ['port_idx', 'vessel_idx'], 'RETURN_EMPTY': ['port_idx', 'quantity'] } self.assertTrue( compare_dictionary( self._business_engine.get_event_payload_detail(), payload_detail_benchmark)) port_number = self._business_engine._data_cntr.port_number self.assertListEqual(self._business_engine.get_agent_idx_list(), list(range(port_number)))
if __name__ == "__main__": start_tick = 0 durations = 100 # 100 days opts = dict() """ enable-dump-snapshot parameter means business_engine needs dump snapshot data before reset. If you leave value to empty string, it will dump to current folder. For getting dump data, please uncomment below line and specify dump destination folder. """ opts['enable-dump-snapshot'] = 'YOUR_FOLDER_NAME' # Initialize an environment with a specific scenario, related topology. env = Env(scenario="cim", topology="global_trade.22p_l0.1", start_tick=start_tick, durations=durations, options=opts) # To reset environmental data before starting a new experiment. env.reset() # Query environment summary, which includes business instances, intra-instance attributes, etc. print(env.summary) for ep in range(2): # Gym-like step function. metrics, decision_event, is_done = env.step(None) while not is_done: past_week_ticks = [ x for x in range(max(decision_event.tick - 7, 0), decision_event.tick)
subfolder_name = f"{config.env.param.topology}_{time_str}" # Log path. config.log.path = os.path.join(config.log.path, date_str, subfolder_name) if not os.path.exists(config.log.path): os.makedirs(config.log.path) simulation_logger = Logger(tag="simulation", dump_folder=config.log.path, dump_mode="w", auto_timestamp=False) # Create a demo environment to retrieve environment information. simulation_logger.info( "Approximating the experience quantity of each agent...") demo_env = Env(**config.env.param) config.env.exp_per_ep = decision_cnt_analysis(demo_env, pv=True, buffer_size=8) simulation_logger.info(config.env.exp_per_ep) # Add some buffer to prevent overlapping. config.env.return_scaler, tot_order_amount = return_scaler( demo_env, tick=config.env.param.durations, gamma=config.training.gamma) simulation_logger.info( f"Return value will be scaled down by the factor {config.env.return_scaler}" ) save_config(config, os.path.join(config.log.path, "config.yml")) save_code("examples/cim/gnn", config.log.path)
raw_config = yaml.safe_load(in_file) config = convert_dottable(raw_config) # Overwrite the config. if args.topology is not None: config.env.topology = args.topology if args.seed is not None: config.env.seed = args.seed if args.peep: PEEP_AND_USE_REAL_DATA = True # Init an environment for Citi Bike. env = Env( scenario=config.env.scenario, topology=config.env.topology, start_tick=config.env.start_tick, durations=config.env.durations, snapshot_resolution=config.env.resolution, ) # For debug only, used to peep the BE to get the real future data. if PEEP_AND_USE_REAL_DATA: ENV = env TRIP_PICKER = BinaryReader(env.configs["trip_data"]).items_tick_picker( start_time_offset=config.env.start_tick, end_time_offset=(config.env.start_tick + config.env.durations), time_unit="m" ) if config.env.seed is not None: env.set_seed(config.env.seed)
def single_player_worker(index, config, exp_idx_mapping, pipe, action_io, exp_output): """The A2C worker function to collect experience. Args: index (int): The process index counted from 0. config (dict): It is a dottable dictionary that stores the configuration of the simulation, state_shaper and postprocessing shaper. exp_idx_mapping (dict): The key is agent code and the value is the starting index where the experience is stored in the experience batch. pipe (Pipe): The pipe instance for communication with the main process. action_io (SharedStructure): The shared memory to hold the state information that the main process uses to generate an action. exp_output (SharedStructure): The shared memory to transfer the experience list to the main process. """ env = Env(**config.env.param) fix_seed(env, config.env.seed) static_code_list, dynamic_code_list = list(env.summary["node_mapping"]["ports"].values()), \ list(env.summary["node_mapping"]["vessels"].values()) # Create gnn_state_shaper without consuming any resources. gnn_state_shaper = GNNStateShaper( static_code_list, dynamic_code_list, config.env.param.durations, config.model.feature, tick_buffer=config.model.tick_buffer, max_value=env.configs["total_containers"]) gnn_state_shaper.compute_static_graph_structure(env) action_io_np = action_io.structuralize() action_shaper = DiscreteActionShaper(config.model.action_dim) exp_shaper = ExperienceShaper(static_code_list, dynamic_code_list, config.env.param.durations, gnn_state_shaper, scale_factor=config.env.return_scaler, time_slot=config.training.td_steps, discount_factor=config.training.gamma, idx=index, shared_storage=exp_output.structuralize(), exp_idx_mapping=exp_idx_mapping) i = 0 while pipe.recv() == "reset": env.reset() r, decision_event, is_done = env.step(None) j = 0 logs = [] while not is_done: model_input = gnn_state_shaper(decision_event, env.snapshot_list) action_io_np["v"][:, index] = model_input["v"] action_io_np["p"][:, index] = model_input["p"] action_io_np["vo"][index] = model_input["vo"] action_io_np["po"][index] = model_input["po"] action_io_np["vedge"][index] = model_input["vedge"] action_io_np["pedge"][index] = model_input["pedge"] action_io_np["ppedge"][index] = model_input["ppedge"] action_io_np["mask"][index] = model_input["mask"] action_io_np["pid"][index] = decision_event.port_idx action_io_np["vid"][index] = decision_event.vessel_idx pipe.send("features") model_action = pipe.recv() env_action = action_shaper(decision_event, model_action) exp_shaper.record(decision_event=decision_event, model_action=model_action, model_input=model_input) logs.append([ index, decision_event.tick, decision_event.port_idx, decision_event.vessel_idx, model_action, env_action, decision_event.action_scope.load, decision_event.action_scope.discharge ]) action = Action(decision_event.vessel_idx, decision_event.port_idx, env_action) r, decision_event, is_done = env.step(action) j += 1 action_io_np["sh"][index] = compute_shortage( env.snapshot_list, config.env.param.durations, static_code_list) i += 1 pipe.send("done") gnn_state_shaper.end_ep_callback(env.snapshot_list) # Organize and synchronize exp to shared memory. exp_shaper(env.snapshot_list) exp_shaper.reset() logs = np.array(logs, dtype=np.float) pipe.send(logs)
agent_id = list(state.keys())[0] data = training_data.setdefault(agent_id, {"args": [[] for _ in range(4)]}) data["args"][0].append(state[agent_id]) # state data["args"][1].append(action[agent_id][0]) # action data["args"][2].append(action[agent_id][1]) # log_p data["args"][3].append(self.get_offline_reward(event)) # reward for agent_id in training_data: training_data[agent_id]["args"] = [ np.asarray(vals, dtype=np.float32 if i == 3 else None) for i, vals in enumerate(training_data[agent_id]["args"]) ] return training_data # Single-threaded launcher if __name__ == "__main__": set_seeds(1024) # for reproducibility env = Env(**training_config["env"]) agent = MultiAgentWrapper( {name: get_ac_agent() for name in env.agent_idx_list}) actor = Actor(env, agent, CIMTrajectoryForAC, trajectory_kwargs=common_config) # local actor learner = OnPolicyLearner(actor, training_config["max_episode"]) learner.run()
from examples.ecr.rl_formulations.common.reward_shaper import ECRRewardShaper from examples.ecr.rl_formulations.common.explorer import TwoPhaseLinearExplorer, exploration_config with io.open("../config.yml", "r") as in_file: raw_config = yaml.safe_load(in_file) cf = convert_dottable(raw_config) if cf.rl.modeling == "dqn": from examples.ecr.rl_formulations.dqn_agent_manager import DQNAgentManager, num_actions agent_manager_cls = DQNAgentManager action_space = list(np.linspace(-1.0, 1.0, num_actions)) else: # TODO: enc_gat agent_manager class raise ValueError(f"Unsupported RL algorithm: {cf.rl.modeling}") if __name__ == "__main__": env = Env(cf.env.scenario, cf.env.topology, durations=cf.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] state_shaper = ECRStateShaper(**cf.state_shaping) action_shaper = ECRActionShaper(action_space=action_space) if cf.reward_shaping.type == "truncated": reward_shaper = ECRRewardShaper(agent_id_list=agent_id_list, **cf.reward_shaping.truncated) else: reward_shaper = KStepRewardShaper(reward_func=lambda mt: mt["perf"], **cf.reward_shaping.k_step) explorer = TwoPhaseLinearExplorer(agent_id_list, cf.rl.total_training_episodes, **exploration_config) agent_manager = agent_manager_cls(name="ecr_learner", mode=AgentMode.TRAIN_INFERENCE, agent_id_list=agent_id_list,
os.environ["MARO_STREAMIT_EXPERIMENT_NAME"] = "experiment_example" from random import seed, randint from maro.simulator import Env from maro.simulator.scenarios.cim.common import Action, ActionScope, ActionType from maro.streamit import streamit if __name__ == "__main__": seed(0) NUM_EPISODE = 2 with streamit: # Initialize an environment with a specific scenario, related topology. env = Env(scenario="cim", topology="global_trade.22p_l0.1", start_tick=0, durations=100) # To reset environmental data before starting a new experiment. env.reset() for ep in range(NUM_EPISODE): # Tell streamit we are in a new episode. streamit.episode(ep) # Gym-like step function. metrics, decision_event, is_done = env.step(None) while not is_done: action_scope = decision_event.action_scope to_discharge = action_scope.discharge > 0 and randint(0, 1) > 0
# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. from maro.simulator import Env from maro.simulator.scenarios.cim.common import Action start_tick = 0 durations = 100 # 100 days # Initialize an environment with a specific scenario, related topology. env = Env(scenario="cim", topology="toy.5p_ssddd_l0.0", start_tick=start_tick, durations=durations) # Query environment summary, which includes business instances, intra-instance attributes, etc. print(env.summary) for ep in range(2): # Gym-like step function metrics, decision_event, is_done = env.step(None) while not is_done: past_week_ticks = [ x for x in range(decision_event.tick - 7, decision_event.tick) ] decision_port_idx = decision_event.port_idx intr_port_infos = ["booking", "empty", "shortage"] # Query the decision port booking, empty container inventory, shortage information in the past week past_week_info = env.snapshot_list["ports"][