def __init__(self, normalize, resource_type): #shared parameters self.neighbors_size = 8 self.T = 25 self.max_steps = 1000 self.n_signal = 4 self.resource_type = resource_type if self.resource_type != 'all': self.n_agent = 3 self.n_actions = 2 self.n_episode = 4000 self.max_u = 1 / 3 self.n_neighbors = 2 else: self.n_agent = 4 self.n_actions = 5 self.n_episode = 10000 self.max_u = 0.25 self.n_neighbors = 3 self.input_size = 13 self.nD = self.n_agent self.GAMMA = 0.98 self.fileresults = open('learning.data', "w") self.normalize = normalize self.compute_neighbors = False if normalize: self.obs_rms = [ RunningMeanStd(shape=self.input_size) for _ in range(self.n_agent) ]
def __init__(self, normalize): #shared parameters self.env = gym.make('Pendulum-v0') self.T = 200 self.max_steps = 200 self.n_signal = 4 self.n_agent = 1 self.n_actions = self.env.action_space.shape[0] self.n_episode = 1000 self.max_u = None self.n_neighbors = 2 self.input_size = self.env.observation_space.shape[0] self.nD = self.n_agent self.GAMMA = 0.9 self.fileresults = open('learning.data', "w") self.normalize = normalize self.compute_neighbors = False self.neighbors_size = 2 #max number of neighbor self.compute_neighbors_last = np.array([[0], [1]]) self.compute_neighbors_last_index = [ list(range(len(self.compute_neighbors_last[i]))) for i in range(self.n_agent) ] if normalize: self.obs_rms = [ RunningMeanStd(shape=self.input_size) for _ in range(self.n_agent) ]
def __init__(self, normalize, T=50, one_hot_encoding=True): #shared parameters self.neighbors_size = 24 self.T = T self.max_steps = 10000 self.n_signal = 4 self.n_agent = 5 self.nD = self.n_agent self.n_actions = 5 self.n_episode = 10000 self.max_u = 0.003 self.n_neighbors = 4 if one_hot_encoding: self.input_size = 80 #25*3+3+2 else: self.input_size = 30 #25+3+2 self.GAMMA = 0.98 self.n_resource = 8 self.one_hot_encoding = one_hot_encoding self.fileresults = open('learning.data', "w") self.fileresults2 = open('learningru.data', "w") self.normalize = normalize self.compute_neighbors = False if normalize: self.obs_rms = [ RunningMeanStd(shape=self.input_size) for _ in range(self.n_agent) ] self.requirement = [[2, 1, 0], [1, 0, 1], [0, 1, 1], [1, 1, 0], [0, 1, 2]]
def __init__(self, normalize, resource_type, obs3neighbors): #shared parameters self.n_neighbors = 3 self.neighbors_size = self.n_neighbors self.T = 50 self.max_steps = 1000 self.n_actions = 5 self.n_signal = 4 self.n_agent = 10 self.nD = self.n_agent self.n_resource = 3 self.n_episode = 10000 self.max_u = 0.15 self.GAMMA = 0.98 if obs3neighbors: self.input_size = 6 + self.n_neighbors * 4 else: self.input_size = 6 if resource_type != 'all': self.input_size += 1 if resource_type == 'rr': self.n_episode = 20000 self.fileresults = open('learning.data', "w") self.normalize = normalize self.resource_type = resource_type self.obs3neighbors = obs3neighbors self.compute_neighbors = self.obs3neighbors self.compute_neighbors_last = np.zeros( (self.n_agent, self.n_neighbors), dtype=int) self.compute_neighbors_last_index = [ [i for i in range(self.n_neighbors)] for _ in range(self.n_agent) ] if normalize: self.obs_rms = [ RunningMeanStd(shape=self.input_size) for _ in range(self.n_agent) ]
def __init__(self, normalize_inputs, T, gamma, more_obs, average_rewards): if os.geteuid() != 0: exit( "You need to have root privileges to run this script.\nPlease try again, this time using 'sudo'. Exiting." ) clean() os.system("systemctl start ovsdb-server.service") os.system("systemctl start ovs-vswitchd.service") self.T = T self.average_rewards = average_rewards self.normalize_inputs = normalize_inputs self.GAMMA = gamma self.max_steps = 10000000 self.n_agent = 16 self.nD = self.n_agent self.n_signal = 4 self.n_episode = 300 #for around 3M transitions self.max_u = None self.input_size = 96 self.n_actions = 1 #number of dim per agent self.conf = DEFAULT_CONF # self.conf.update(conf) if more_obs: self.conf.update( {"state_model": ["backlog", "d_backlog", "olimit", "drops"]}) self.input_size = 336 # Init one-to-one mapped variables self.net_man = None self.state_man = None self.traffic_gen = None self.bw_ctrl = None self.sampler = None self.input_file = None self.terminated = False self.reward = RawValue('d', 0) # set the id of this environment self.short_id = dc_utils.generate_id() if self.conf["parallel_envs"]: self.conf["topo_conf"]["id"] = self.short_id # initialize the topology self.topo = TopoFactory.create(self.conf["topo"], self.conf["topo_conf"]) # Save the configuration we have, id does not matter here dc_utils.dump_json(path=self.conf["output_dir"], name="env_config", data=self.conf) dc_utils.dump_json(path=self.conf["output_dir"], name="topo_config", data=self.topo.conf) # set the dimensions of the state matrix self._set_gym_matrices() # Set the active traffic matrix self._set_traffic_matrix(self.conf["tf_index"], self.conf["input_dir"], self.topo) # each unique id has its own sub folder if self.conf["parallel_envs"]: self.conf["output_dir"] += f"/{self.short_id}" # check if the directory we are going to work with exists dc_utils.check_dir(self.conf["output_dir"]) # handle unexpected exits scenarios gracefully atexit.register(self.close) self.compute_neighbors = False self.neighbors_size = 4 # max number of neighbor self.compute_neighbors_last = np.array([[1, 2, 3], [0, 2, 3], [0, 1, 3], [0, 1, 2], [5, 6, 7], [4, 6, 7], [4, 5, 7], [4, 5, 6], [9, 10, 11], [8, 10, 11], [8, 9, 11], [8, 9, 10], [13, 14, 15], [12, 14, 15], [12, 13, 15], [12, 13, 14]]) self.compute_neighbors_last_index = [ list(range(len(self.compute_neighbors_last[i]))) for i in range(self.n_agent) ] if normalize_inputs: self.obs_rms = RunningMeanStd(shape=self.input_size) self.fileresults = open('learning.data', "w")
class Env: def __init__(self, normalize_inputs, T, gamma, more_obs, average_rewards): if os.geteuid() != 0: exit( "You need to have root privileges to run this script.\nPlease try again, this time using 'sudo'. Exiting." ) clean() os.system("systemctl start ovsdb-server.service") os.system("systemctl start ovs-vswitchd.service") self.T = T self.average_rewards = average_rewards self.normalize_inputs = normalize_inputs self.GAMMA = gamma self.max_steps = 10000000 self.n_agent = 16 self.nD = self.n_agent self.n_signal = 4 self.n_episode = 300 #for around 3M transitions self.max_u = None self.input_size = 96 self.n_actions = 1 #number of dim per agent self.conf = DEFAULT_CONF # self.conf.update(conf) if more_obs: self.conf.update( {"state_model": ["backlog", "d_backlog", "olimit", "drops"]}) self.input_size = 336 # Init one-to-one mapped variables self.net_man = None self.state_man = None self.traffic_gen = None self.bw_ctrl = None self.sampler = None self.input_file = None self.terminated = False self.reward = RawValue('d', 0) # set the id of this environment self.short_id = dc_utils.generate_id() if self.conf["parallel_envs"]: self.conf["topo_conf"]["id"] = self.short_id # initialize the topology self.topo = TopoFactory.create(self.conf["topo"], self.conf["topo_conf"]) # Save the configuration we have, id does not matter here dc_utils.dump_json(path=self.conf["output_dir"], name="env_config", data=self.conf) dc_utils.dump_json(path=self.conf["output_dir"], name="topo_config", data=self.topo.conf) # set the dimensions of the state matrix self._set_gym_matrices() # Set the active traffic matrix self._set_traffic_matrix(self.conf["tf_index"], self.conf["input_dir"], self.topo) # each unique id has its own sub folder if self.conf["parallel_envs"]: self.conf["output_dir"] += f"/{self.short_id}" # check if the directory we are going to work with exists dc_utils.check_dir(self.conf["output_dir"]) # handle unexpected exits scenarios gracefully atexit.register(self.close) self.compute_neighbors = False self.neighbors_size = 4 # max number of neighbor self.compute_neighbors_last = np.array([[1, 2, 3], [0, 2, 3], [0, 1, 3], [0, 1, 2], [5, 6, 7], [4, 6, 7], [4, 5, 7], [4, 5, 6], [9, 10, 11], [8, 10, 11], [8, 9, 11], [8, 9, 10], [13, 14, 15], [12, 14, 15], [12, 13, 15], [12, 13, 14]]) self.compute_neighbors_last_index = [ list(range(len(self.compute_neighbors_last[i]))) for i in range(self.n_agent) ] if normalize_inputs: self.obs_rms = RunningMeanStd(shape=self.input_size) self.fileresults = open('learning.data', "w") def _set_gym_matrices(self): # set the action space num_actions = self.topo.get_num_hosts() min_bw = 10000.0 / float(self.topo.conf["max_capacity"]) self.action_min = np.empty(num_actions) self.action_min.fill(min_bw) self.action_max = np.empty(num_actions) self.action_max.fill(1.0) # self.action_space = spaces.Box( # low=action_min, high=action_max, dtype=np.float32) # Initialize the action arrays shared with the control manager # Qdisc do not go beyond uint32 rate limit which is about 4Gbps tx_rate = RawArray(ctypes.c_uint32, num_actions) self.tx_rate = dc_utils.shmem_to_nparray(tx_rate, np.float32) active_rate = RawArray(ctypes.c_uint32, num_actions) self.active_rate = dc_utils.shmem_to_nparray(active_rate, np.float32) log.info("%s Setting action space", (self.short_id)) log.info("from %s", self.action_min) log.info("to %s", self.action_max) # set the observation space num_ports = self.topo.get_num_sw_ports() num_features = len(self.conf["state_model"]) if self.conf["collect_flows"]: num_features += num_actions * 2 obs_min = np.empty(num_ports * num_features + num_actions) obs_min.fill(-np.inf) obs_max = np.empty(num_ports * num_features + num_actions) obs_max.fill(np.inf) # self.observation_space = spaces.Box( # low=obs_min, high=obs_max, dtype=np.float64) def __del__(self): self.fileresults.close() clean() def toggle_compute_neighbors(self): pass def neighbors(self): return (self.compute_neighbors_last, self.compute_neighbors_last_index) def _set_traffic_matrix(self, index, input_dir, topo): traffic_file = topo.get_traffic_pattern(index) self.input_file = f"{input_dir}/{topo.get_name()}/{traffic_file}" def _start_managers(self): # actually generate a topology if it does not exist yet if not self.net_man: log.info("%s Starting network manager...", self.short_id) self.net_man = NetworkManager(self.topo, self.conf["agent"].lower()) # in a similar way start a traffic generator if not self.traffic_gen: log.info("%s Starting traffic generator...", self.short_id) self.traffic_gen = TrafficGen(self.net_man, self.conf["transport"], self.conf["output_dir"]) # Init the state manager if not self.state_man: self.state_man = StateManager(self.conf, self.net_man, self.conf["stats_dict"]) # Init the state sampler # if not self.sampler: # stats = self.state_man.get_stats() # self.sampler = StatsSampler(stats, self.tx_rate, # self.reward, self.conf["output_dir"]) # self.sampler.start() # the bandwidth controller is reinitialized with every new network if not self.bw_ctrl: host_map = self.net_man.host_ctrl_map self.bw_ctrl = BandwidthController(host_map, self.tx_rate, self.active_rate, self.topo.max_bps) self.bw_ctrl.start() def _start_env(self): log.info("%s Starting environment...", self.short_id) # Launch all managers (if they are not active already) # This lazy initialization ensures that the environment object can be # created without initializing the virtual network self._start_managers() # Finally, start the traffic self.traffic_gen.start(self.input_file) def _stop_env(self): log.info("%s Stopping environment...", self.short_id) if self.traffic_gen: log.info("%s Stopping traffic", self.short_id) self.traffic_gen.stop() log.info("%s Done with stopping.", self.short_id) def reset(self): self.rinfo = np.array([0.] * self.n_agent) self.step_count = 0 self._stop_env() self._start_env() return self._get_obs() def _get_obs(self): observations = self.state_man.observe() # Retrieve the bandwidth enforced by bandwidth control observations.extend(self.active_rate) observations = np.array(observations) if self.normalize_inputs: observations = self.obs_rms.obs_filter(np.array(observations)) observations = list(observations) observations = [observations] * self.n_agent return observations def close(self): if self.terminated: return self.terminated = True log.info("%s Closing environment...", self.short_id) if self.state_man: log.info("%s Stopping all state collectors...", self.short_id) self.state_man.close() self.state_man = None if self.bw_ctrl: log.info("%s Shutting down bandwidth control...", self.short_id) self.bw_ctrl.close() self.bw_ctrl = None if self.sampler: log.info("%s Shutting down data sampling.", self.short_id) self.sampler.close() self.sampler = None if self.traffic_gen: log.info("%s Shutting down generators...", self.short_id) self.traffic_gen.close() self.traffic_gen = None if self.net_man: log.info("%s Stopping network.", self.short_id) self.net_man.stop_network() self.net_man = None log.info("%s Done with destroying myself.", self.short_id) def compute_rewards(self, action): return self.state_man.get_reward(action) def step(self, action): # Assume action is in [0 ; 1] action = np.array(action)[:, 0] action = self.action_min + action * (self.action_max - self.action_min) # Truncate actions to legal values # action = np.clip(action, self.action_min, self.action_max) # Retrieve observation and reward obs = self._get_obs() rewards = self.compute_rewards(action) self.reward.value = rewards.sum() self.rinfo += rewards self.step_count += 1 # Update the array with the bandwidth control self.tx_rate[:] = action # The environment is finished when the traffic generators have stopped done = not self.traffic_gen.check_if_traffic_alive() return obs, rewards, done def _handle_interrupt(self, signum, frame): log.warning("%s \nEnvironment: Caught interrupt", self.short_id) atexit.unregister(self.close()) self.close() sys.exit(1) def end_episode(self): if self.average_rewards: self.rinfo = self.rinfo / float(self.step_count) self.fileresults.write(','.join(self.rinfo.flatten().astype('str')) + '\n') self.fileresults.flush() def render(self): pass
i_episode = 0 meta_Pi=[] meta_V=[] for i in range(n_agent): meta_Pi.append(PPOPolicyNetwork(num_features=env.input_size+2, num_actions=n_signal,layer_size=128,epsilon=0.1,learning_rate=lr_actor)) meta_V.append(ValueNetwork(num_features=env.input_size+2, hidden_size=128, learning_rate=0.001)) Pi=[[] for _ in range(n_agent)] V=[[] for _ in range(n_agent)] for i in range(n_agent): for j in range(n_signal): Pi[i].append(PPOPolicyNetwork(num_features=env.input_size, num_actions=n_actions,layer_size=256,epsilon=0.1,learning_rate=lr_actor)) V[i].append(ValueNetwork(num_features=env.input_size, hidden_size=256, learning_rate=0.001)) if normalize_inputs: meta_obs_rms = [RunningMeanStd(shape=2) for _ in range(n_agent)] while i_episode<n_episode: i_episode+=1 avg = [0]*n_agent u_bar = [0]*n_agent utili = [0]*n_agent u = [[] for _ in range(n_agent)] ep_actions = [[] for _ in range(n_agent)] ep_rewards = [[] for _ in range(n_agent)] ep_states = [[] for _ in range(n_agent)] meta_z = [[] for _ in range(n_agent)] meta_rewards = [[] for _ in range(n_agent)]
def __init__(self, normalize_inputs, T, doublereward, max_depart_delay=100000, time_to_teleport=-1, time_to_load_vehicles=0, yellow_time=3, min_green=5, max_green=50): net_file = os.path.dirname(__file__) + '/../data/3x3Grid2lanes.net.xml' # route_file = os.path.dirname(__file__)+'/../data/routes14000.rou.xml' route_file = os.path.dirname( __file__) + '/../data/routes3x3.harder.rou.xml' use_gui = False time_to_load_vehicles = 200 num_seconds = 5000 + time_to_load_vehicles delta_time = 10 #with delta_time at 5, the same action must be taken twice sometimes #two phase solution: # phases = [ # traci.trafficlight.Phase(35, "GGGgrrrrGGGgrrrr"), # traci.trafficlight.Phase(2, "YYYYrrrrYYYYrrrr"), # traci.trafficlight.Phase(35, "rrrrGGGgrrrrGGGg"), # traci.trafficlight.Phase(2, "rrrrYYYYrrrrYYYY"), # ] # four phase solution: phases = [ traci.trafficlight.Phase(300, "GGGrrrrrGGGrrrrr"), traci.trafficlight.Phase(3, "yyyrrrrryyyrrrrr"), traci.trafficlight.Phase(300, "rrrGrrrrrrrGrrrr"), traci.trafficlight.Phase(3, "rrryrrrrrrryrrrr"), traci.trafficlight.Phase(300, "rrrrGGGrrrrrGGGr"), traci.trafficlight.Phase(3, "rrrryyyrrrrryyyr"), traci.trafficlight.Phase(300, "rrrrrrrGrrrrrrrG"), traci.trafficlight.Phase(3, "rrrrrrryrrrrrrry") ] self._net = net_file self._route = route_file self.use_gui = use_gui self.doublereward = doublereward if self.use_gui: self._sumo_binary = sumolib.checkBinary('sumo-gui') else: self._sumo_binary = sumolib.checkBinary('sumo') traci.start([sumolib.checkBinary('sumo'), '-n', self._net]) # start only to retrieve information self.ts_ids = traci.trafficlight.getIDList() self.lanes_per_ts = len( set(traci.trafficlight.getControlledLanes(self.ts_ids[0]))) self.traffic_signals = dict() self.phases = phases self.num_green_phases = len( phases ) // 2 # Number of green phases == number of phases (green+yellow) divided by 2 self.vehicles = dict() self.last_measure = {} # used to reward function remember last measure self.last_measure2 = [] self.last_reward = [0. for _ in range(len(self.ts_ids))] self.sim_max_time = num_seconds self.time_to_load_vehicles = time_to_load_vehicles # number of simulation seconds ran in reset() before learning starts self.delta_time = delta_time # seconds on sumo at each step self.max_depart_delay = max_depart_delay # Max wait time to insert a vehicle self.time_to_teleport = time_to_teleport self.min_green = min_green self.max_green = max_green self.yellow_time = yellow_time self.run = 0 # Original # self.observation_space = spaces.Box(low=np.zeros(self.num_green_phases + 1 + 2 * self.lanes_per_ts), # high=np.ones(self.num_green_phases + 1 + 2 * self.lanes_per_ts)) self.input_size = self.num_green_phases + 1 + 2 * self.lanes_per_ts self.n_actions = self.num_green_phases self.T = T self.max_steps = int( (num_seconds - time_to_load_vehicles) / delta_time) self.n_agent = len(self.ts_ids) self.GAMMA = 0.99 self.n_signal = 4 self.n_episode = 2000 self.nD = self.n_agent if doublereward: self.nD = self.n_agent * 2 self.max_u = None self.normalize_inputs = normalize_inputs self.compute_neighbors = False self.neighbors_size = 4 #max number of neighbor self.compute_neighbors_last = np.array([[1, 3], [0, 2], [1, 5], [0, 4, 6], [1, 3, 5, 7], [2, 4, 8], [3, 7], [6, 8], [5, 7]]) self.compute_neighbors_last_index = [ list(range(len(self.compute_neighbors_last[i]))) for i in range(self.n_agent) ] if normalize_inputs: self.obs_rms = [ RunningMeanStd(shape=self.input_size) for _ in range(self.n_agent) ] self.fileresults = open('learning.data', "w") traci.close()