def ConvertToCSV(env): dir_path = env.sim_params.emission_path emission_filename = \ "{0}-emission.xml".format(env.network.name) emission_path = os.path.join(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path)
def test_emission_to_csv(self): # current path current_path = os.path.realpath(__file__).rsplit("/", 1)[0] # run the emission_to_csv function on a small emission file emission_to_csv(current_path + "/test_files/test-emission.xml") # import the generated csv file and its headers dict1 = [] filename = current_path + "/test_files/test-emission.csv" with open(filename, "r") as infile: reader = csv.reader(infile) headers = next(reader) for row in reader: dict1.append(dict()) for i, key in enumerate(headers): dict1[-1][key] = row[i] # check the names of the headers expected_headers = \ ['time', 'CO', 'y', 'CO2', 'electricity', 'type', 'id', 'eclass', 'waiting', 'NOx', 'fuel', 'HC', 'x', 'route', 'relative_position', 'noise', 'angle', 'PMx', 'speed', 'edge_id', 'lane_number'] self.assertCountEqual(headers, expected_headers) # check the number of rows of the generated csv file # Note that, rl vehicles are missing their final (reset) values, which # I don't think is a problem self.assertEqual(len(dict1), 104)
def visualizer_rllab(args): """Visualizer for rllab experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ # extract the flow environment data = joblib.load(args.file) policy = data['policy'] env = data['env'] # FIXME(ev, ak) only one of these should be needed # unwrapped_env = env._wrapped_env._wrapped_env.env.unwrapped # unwrapped_env = env.wrapped_env.env.env.unwrapped # if this doesn't work, try the one above it unwrapped_env = env._wrapped_env.env.unwrapped # Set sumo to make a video sim_params = unwrapped_env.sim_params sim_params.emission_path = './test_time_rollout/' if args.gen_emission \ else None if args.no_render: sim_params.render = False else: sim_params.render = True unwrapped_env.restart_simulation( sim_params=sim_params, render=sim_params.render) # Load data into arrays rew = [] for j in range(args.num_rollouts): # run a single rollout of the experiment path = rollout(env=env, agent=policy) # collect the observations and rewards from the rollout new_rewards = path['rewards'] # print the cumulative reward of the most recent rollout print('Round {}, return: {}'.format(j, sum(new_rewards))) rew.append(sum(new_rewards)) # print the average cumulative reward across rollouts print('Average, std return: {}, {}'.format(np.mean(rew), np.std(rew))) # if prompted, convert the emission file into a csv file if args.gen_emission: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format( unwrapped_env.scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path)
def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False): """ Runs the given scenario for a set number of runs and a set number of steps per run. Parameters ---------- num_runs: int number of runs the experiment should perform num_steps: int number of steps to be performs in each run of the experiment rl_actions: list or numpy ndarray, optional actions to be performed by rl vehicles in the network (if there are any) convert_to_csv: bool Specifies whether to convert the emission file created by sumo into a csv file """ if rl_actions is None: rl_actions = [] rets = [] for i in range(num_runs): logging.info("Iter #" + str(i)) ret = 0 self.env.reset() for j in range(num_steps): state, reward, done, _ = self.env.step(rl_actions) ret += reward if done: break rets.append(ret) print("Round {0}, return: {1}".format(i, ret)) print("Average Return", np.mean(rets)) self.env.terminate() if convert_to_csv: # collect the location of the emission file dir_path = self.env.sumo_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = \ "{0}/{1}".format(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path)
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # for hacks for old pkl files TODO: remove eventually if not hasattr(sim_params, 'use_ballistic'): sim_params.use_ballistic = False # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] #bmil edit acc_rollout = [] power = 0 for i in range(args.num_rollouts): vel = [] # bmil list for collecting data timerange = [] vel_dict = defaultdict(list) rl_acc = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle # speeds = vehicles.get_speed(vehicles.get_ids()) ids = vehicles.get_ids() speeds = vehicles.get_speed(ids) # BMIL EDIT FOR COLLECTING DATA OF ACCELERATION AND VELOCITY rl = vehicles.get_rl_ids()[0] act = vehicles.get_realized_accel(rl) rl_acc.append(act or 0) timerange.append(vehicles.get_timestep(ids[-1]) / 100000) # only include non-empty speeds if speeds: vel.append(np.mean(speeds)) # bmil edit for veh_id, speed in zip(ids, speeds): vel_dict[veh_id].append(speed) if vehicles.get_timestep(ids[0]) >= 100000: M = 1200 # mass of average sized vehicle (kg) g = 9.81 # gravitational acceleration (m/s^2) Cr = 0.005 # rolling resistance coefficient Ca = 0.3 # aerodynamic drag coefficient rho = 1.225 # air density (kg/m^3) A = 2.6 # vehicle cross sectional area (m^2) speed = vehicles.get_speed(veh_id) prev_speed = vehicles.get_previous_speed(veh_id) accel = abs(speed - prev_speed) / env.sim_step power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed**3 if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) # BMIL EDIT FOR COLLECTING DATA FROM 100000 TO 375000 # Because 0 - 75000 steps are warm up and 75000 - 100000 steps are process of stabilizing if vehicles.get_timestep(rl) >= 100000: acc_rollout.append(act) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) # BMIL EDIT FOR PLOT DATA veh = list(vel_dict.keys()) plt.subplot(3, 1, 1) plt.title('Results') for v in veh[:-1]: plt.plot(timerange, vel_dict[v]) plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(veh[:-1], fontsize=9) plt.grid(True) # plt.show() plt.subplot(3, 1, 2) plt.plot(timerange, vel_dict[veh[-1]], color='r') plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(['lc'] + veh[-1:]) plt.grid(True) plt.subplot(3, 1, 3) plt.plot(timerange, rl_acc, color='b') plt.xlabel('timestep(s)') plt.ylabel('acceleration(m/s^2)') plt.grid(True) # BASE_DIR = '/home/bmil/BMIL_FLOW_CODE/Graph/' # plt.savefig(f'{BASE_DIR}{"__".join(name)}', dpi=400) plt.show() # BMIL EDIT FOR COMPUTING ACCELERATION's MEAN AND VAR acc_rollout1 = [accarr for accarr in acc_rollout] mean_acc_rollout = [np.mean(acc_rollout1)] variance_acc_rollout = [np.var(acc_rollout1)] print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format(np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed))) # BMIL Edit FOR PRINT ACCEL's MEAN AND VAR print("\nAccel, mean (m/s^2):") print(mean_acc_rollout) print("\nAccel, var (m/s^2):") print(variance_acc_rollout) print("\nTotal Power Consumption (kgᐧm^2/s^3):") print(power) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path)
def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False): """Run the given scenario for a set number of runs and steps per run. Parameters ---------- num_runs : int number of runs the experiment should perform num_steps : int number of steps to be performs in each run of the experiment rl_actions : method, optional maps states to actions to be performed by the RL agents (if there are any) convert_to_csv : bool Specifies whether to convert the emission file created by sumo into a csv file Returns ------- info_dict : dict contains returns, average speed per step """ info_dict = {} if rl_actions is None: def rl_actions(*_): return None rets = [] mean_rets = [] ret_lists = [] vels = [] mean_vels = [] std_vels = [] for i in range(num_runs): vel = np.zeros(num_steps) logging.info("Iter #" + str(i)) ret = 0 ret_list = [] state = self.env.reset() for j in range(num_steps): state, reward, done, _ = self.env.step(rl_actions(state)) vel[j] = np.mean( self.env.k.vehicle.get_speed(self.env.k.vehicle.get_ids())) ret += reward ret_list.append(reward) if done: break rets.append(ret) vels.append(vel) mean_rets.append(np.mean(ret_list)) ret_lists.append(ret_list) mean_vels.append(np.mean(vel)) std_vels.append(np.std(vel)) print("Round {0}, return: {1}".format(i, ret)) info_dict["returns"] = rets info_dict["velocities"] = vels info_dict["mean_returns"] = mean_rets info_dict["per_step_returns"] = ret_lists print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format(np.mean(mean_vels), np.std(std_vels))) self.env.terminate() if convert_to_csv: # wait a short period of time to ensure the xml file is readable time.sleep(0.1) # collect the location of the emission file dir_path = self.env.sim_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = os.path.join(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) return info_dict
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # for hacks for old pkl files TODO: remove eventually if not hasattr(sim_params, 'use_ballistic'): sim_params.use_ballistic = False # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] state_translation = { "inflow": 0, "slowing": 1, "parking": 2, "parked": 3, "outflow": 4 } custom_outputs = { k: np.zeros(env_params.horizon * args.num_rollouts) for k in ["num_rollout", "t", "speed", "position", "state", "reward"] } for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for j in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle speeds = vehicles.get_speed(vehicles.get_ids()) # only include non-empty speeds if speeds: vel.append(np.mean(speeds)) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break #"num_rollout", "t", "speed", "position", "state", "reward"]} m = i * env_params.horizon + j custom_outputs["num_rollout"][m] = i custom_outputs["t"][m] = j custom_outputs["reward"][m] = reward veh_id = vehicles.get_ids()[0] custom_outputs["speed"][m] = vehicles.get_speed(veh_id) custom_outputs["position"][m] = vehicles.get_global_position( veh_id, env) custom_outputs["state"][m] = state_translation.get( vehicles.get_state(veh_id), -1) if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format(np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) df = pd.DataFrame(custom_outputs) df.to_csv("output.csv") # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path)
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sumo_params = flow_params['sumo'] setattr(sumo_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sumo_params.restart_instance = False sumo_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo-web3d': sumo_params.num_clients = 2 sumo_params.render = False elif args.render_mode == 'drgb': sumo_params.render = 'drgb' sumo_params.pxpm = 4 elif args.render_mode == 'sumo-gui': sumo_params.render = False elif args.render_mode == 'no-render': sumo_params.render = False if args.save_render: sumo_params.render = 'drgb' sumo_params.pxpm = 4 sumo_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)) import matplotlib.pyplot as plt from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter fig = plt.figure() h = np.linspace(0, 60, 100) Deltav = np.linspace(-6, 12, 100) Headway, DELTAV = np.meshgrid(h, Deltav) # fix v=20m/s xn, yn = Headway.shape geta = np.array(Headway) for xk in range(xn): for yk in range(yn): #输入状态 #Headway[xk,yk] #DELTAV[xk,yk] geta[xk, yk] = agent.compute_action( np.array( [3.8 / 30, DELTAV[xk, yk] / 30, Headway[xk, yk] / 260])) surf = plt.contourf(DELTAV, Headway, geta, 20, cmap=cm.coolwarm) plt.colorbar() #C = plt.contour(DELTAV, Headway, geta, 20, colors='black') # plt.clabel(C, inline = True, fontsize = 10) plt.show() # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie '''
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), np.zeros(size, np.float32)] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation( sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [x / y for x, y in zip(final_outflows, final_inflows)] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format( np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False): """Run the given scenario for a set number of runs and steps per run. Parameters ---------- num_runs : int number of runs the experiment should perform num_steps : int number of steps to be performs in each run of the experiment rl_actions : method, optional maps states to actions to be performed by the RL agents (if there are any) convert_to_csv : bool Specifies whether to convert the emission file created by sumo into a csv file Returns ------- info_dict : dict contains returns, average speed per step """ # raise an error if convert_to_csv is set to True but no emission # file will be generated, to avoid getting an error at the end of the # simulation if convert_to_csv and self.env.sim_params.emission_path is None: raise ValueError( 'The experiment was run with convert_to_csv set ' 'to True, but no emission file will be generated. If you wish ' 'to generate an emission file, you should set the parameter ' 'emission_path in the simulation parameters (SumoParams or ' 'AimsunParams) to the path of the folder where emissions ' 'output should be generated. If you do not wish to generate ' 'emissions, set the convert_to_csv parameter to False.') info_dict = {} if rl_actions is None: def rl_actions(*_): return None rets = [] mean_rets = [] ret_lists = [] vels = [] mean_vels = [] std_vels = [] outflows = [] for i in range(num_runs): vel = np.zeros(num_steps) logging.info("Iter #" + str(i)) ret = 0 ret_list = [] state = self.env.reset() for j in range(num_steps): state, reward, done, _ = self.env.step(rl_actions(state)) vel[j] = np.mean( self.env.k.vehicle.get_speed(self.env.k.vehicle.get_ids())) ret += reward ret_list.append(reward) if done: break rets.append(ret) vels.append(vel) mean_rets.append(np.mean(ret_list)) ret_lists.append(ret_list) mean_vels.append(np.mean(vel)) std_vels.append(np.std(vel)) outflows.append(self.env.k.vehicle.get_outflow_rate(int(500))) print("Round {0}, return: {1}".format(i, ret)) info_dict["returns"] = rets info_dict["velocities"] = vels info_dict["mean_returns"] = mean_rets info_dict["per_step_returns"] = ret_lists info_dict["mean_outflows"] = np.mean(outflows) print("Average, std return: {}, {}".format( np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format( np.mean(mean_vels), np.std(mean_vels))) self.env.terminate() if convert_to_csv: # wait a short period of time to ensure the xml file is readable time.sleep(0.1) # collect the location of the emission file dir_path = self.env.sim_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = os.path.join(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) # Delete the .xml version of the emission file. os.remove(emission_path) return info_dict
def visualizer_rllib(args, seed=None): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False config['callbacks'] = MyCallbacks # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) #flow_params['env'].additional_params["use_seeds"]=args.use_seeds # print(args.use_seeds) seed_tmp = None if seed: with open(seed, 'rb') as f: seed_tmp = pickle.load(f) config['seed'] = int(seed_tmp['rllib_seed']) elif args.use_seeds: with open(args.use_seeds, 'rb') as f: seed_tmp = pickle.load(f) config['seed'] = int(seed_tmp['rllib_seed']) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) if seed_tmp: #setattr(sim_params, 'seed', seed_tmp['sumo_seed']) sim_params.seed = int(int(seed_tmp['sumo_seed']) / 10**6) print(sim_params.seed) #import IPython #IPython.embed() # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) # Merge with `evaluation_config`. evaluation_config = copy.deepcopy(config.get("evaluation_config", {})) config = merge_dicts(config, evaluation_config) if args.run: agent_cls = get_trainable_cls(args.run) elif config_run: agent_cls = get_trainable_cls(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True #if seed is not None: # print(seed) # flow_params["env"].additional_params["use_seeds"] = seed # input() #else: # flow_params["env"].additional_params["use_seeds"] = args.use_seeds if args.horizon: config['horizon'] = args.horizon flow_params['env'].horizon = args.horizon # Create and register a gym+rllib env register_time = time.time() create_env, env_name = make_create_env(params=flow_params, version=0, seeds_file=seed) register_env(env_name, create_env) register_time = time.time() - register_time print("Register Time:", register_time) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = True #False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) create_time = time.time() if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) create_time = time.time() - create_time print("Create time:", create_time) if multiagent: rets = {} # map the agent id to its policy print(config['multiagent']['policy_mapping_fn']) policy_map_fn = config['multiagent']['policy_mapping_fn'] #.func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False restart_time = time.time() env.restart_simulation(sim_params=sim_params, render=sim_params.render) restart_time = time.time() - restart_time print("Restart Time:", restart_time) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] if PRINT_TO_SCREEN: pp = pprint.PrettyPrinter(indent=2) print("config ") pp.pprint(config) print("flow_params ") pp.pprint(flow_params) if REALTIME_PLOTS: # prepare plots # You probably won't need this if you're embedding things in a tkinter plot... plt.ion() fig = plt.figure() axA = fig.add_subplot(331) axA.set_title("Actions") axR = fig.add_subplot(332) axR.set_title("Rewards") axS = fig.add_subplot(333) axS.set_title("States") axS0 = fig.add_subplot(334) axS0.set_title("S0") axS1 = fig.add_subplot(335) axS1.set_title("S1") axS2 = fig.add_subplot(336) axS2.set_title("S2") axA_hist = fig.add_subplot(337) axA_hist.set_title("Actions") axR_hist = fig.add_subplot(338) axR_hist.set_title("Rewards") axS_hist = fig.add_subplot(339) axS_hist.set_title("States") axS.set_ylim((-2, 3)) axA.set_ylim((-5, 5)) axR.set_ylim((-1, 1)) initialized_plot = False # record for visualization purposes actions = [] rewards = [] states = [] times = [] WARMUP = args.warmup run_time = time.time() for i in range(args.num_rollouts): vel = [] time_to_exit = 0 state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): time_to_exit += 1 vehicles = env.unwrapped.k.vehicle if np.mean(vehicles.get_speed(vehicles.get_ids())) > 0: vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) #vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if SUMMARY_PLOTS: # record for visualization purposes actions.append(action) rewards.append(reward) states.append(state) if PRINT_TO_SCREEN: print("action") pp.pprint(action) print("reward") pp.pprint(reward) print("state") pp.pprint(state) print("after step ") if REALTIME_PLOTS: # Update plots. if not initialized_plot: # initialize lineA, = axA.plot( [0] * len(action), 'g^' ) # Returns a tuple of line objects, thus the comma lineR, = axR.plot( 0, 'bs' ) # Returns a tuple of line objects, thus the comma lineS, = axS.plot( [0] * len(state), 'r+' ) # Returns a tuple of line objects, thus the comma initialized_plot = True lineA.set_ydata(action) lineR.set_ydata(reward) lineS.set_ydata(state) fig.canvas.draw() fig.canvas.flush_events() if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if args.use_delay > 0: if vehicles.get_num_arrived() >= args.use_delay: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(5000) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(5000) final_inflows.append(inflow) times.append(time_to_exit) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) run_time = time.time() - run_time print('==== Summary of results ====') print("Run Time: ", run_time) print("Return:") env.close() return_reward = 0 if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) return_reward = np.mean(rew) else: print(rets) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(rets), np.std(rets))) return_reward = np.mean(rets) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) print("Time Delay") print(times) print("Time for certain number of vehicles to exit {:.2f},{:.5f}".format( (np.mean(times)), np.std(times))) if args.output: np.savetxt(args.output, [ return_reward, mean_speed, std_speed, final_inflows, final_outflows, times ]) if SUMMARY_PLOTS: generateHtmlplots(actions, rewards, states) # terminate the environment env.unwrapped.terminate() env.terminate() # Deleting the env in order to remove sumo process del env del evaluation_config # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd) return return_reward, mean_speed, final_inflows, final_outflows
vehicles = env.vehicles vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) action = agent.compute_action(state) state, reward, done, _ = env.step(action) ret += reward if done: break rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) print("Round {}, Return: {}".format(i, ret)) print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format(np.mean(mean_speed), np.std(mean_speed))) print("Average, std outflow: {}, {}".format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = "{0}-emission.xml".format(scenario.name) emission_path = \ "{0}/test_time_rollout/{1}".format(dir_path, emission_filename) emission_to_csv(emission_path)
def visualizer_rllab(args): # extract the flow environment data = joblib.load(args.file) policy = data['policy'] env = data['env'] # FIXME(ev, ak) only one of these should be needed # unwrapped_env = env._wrapped_env._wrapped_env.env.unwrapped # unwrapped_env = env.wrapped_env.env.env.unwrapped # if this doesn't work, try the one above it unwrapped_env = env._wrapped_env.env.unwrapped # Recreate experiment params tot_cars = unwrapped_env.vehicles.num_vehicles rl_cars = unwrapped_env.vehicles.num_rl_vehicles max_path_length = int(env.horizon) flat_obs = env._wrapped_env.observation_space.flat_dim obs_vars = unwrapped_env.obs_var_labels or [] num_obs_var = flat_obs / tot_cars # Set sumo to make a video sumo_params = unwrapped_env.sumo_params sumo_params.emission_path = './test_time_rollout/' if args.no_render: sumo_params.render = False else: sumo_params.render = True unwrapped_env.restart_sumo(sumo_params=sumo_params, render=sumo_params.render) # Load data into arrays all_obs = np.zeros((args.num_rollouts, max_path_length, flat_obs)) all_rewards = np.zeros((args.num_rollouts, max_path_length)) rew = [] for j in range(args.num_rollouts): # run a single rollout of the experiment path = rollout(env=env, agent=policy) # collect the observations and rewards from the rollout new_obs = path['observations'] all_obs[j, :new_obs.shape[0], :new_obs.shape[1]] = new_obs new_rewards = path['rewards'] all_rewards[j, :len(new_rewards)] = new_rewards # print the cumulative reward of the most recent rollout print('Round {}, return: {}'.format(j, sum(new_rewards))) rew.append(sum(new_rewards)) # print the average cumulative reward across rollouts print('Average, std return: {}, {}'.format(np.mean(rew), np.std(rew))) # ensure that a reward_plots folder exists in the directory, and if not, # create one if not os.path.exists('plots') and not os.environ.get('TEST_FLAG', 1): os.makedirs('plots') # create an array of time sim_step = unwrapped_env.sumo_params.sim_step t = np.arange(max_path_length) * sim_step for obs_var_idx in range(int(num_obs_var)): if len(obs_vars) < obs_var_idx + 1: obs_var = 'Observation {0}'.format(obs_var_idx) else: obs_var = obs_vars[obs_var_idx] # plot mean value for observation for each vehicle across rollouts plt.figure() for car in range(tot_cars): center = np.mean(all_obs[:, :, tot_cars * obs_var_idx + car], axis=0) plt.plot(range(max_path_length), center, lw=2.0, label='Veh {}'.format(car)) plt.ylabel(obs_var, fontsize=15) plt.xlabel('time (s)', fontsize=15) plt.title('{2}, Autonomous Penetration: {0}/{1}'.format( rl_cars, tot_cars, obs_var), fontsize=16) plt.legend(loc=0) # save the plot in the "plots" directory unless we're testing if not os.environ.get('TEST_FLAG', 1): plt.savefig('plots/{0}_{1}.png'.format(args.plotname, obs_var), bbox='tight') # plot mean values for the observations across all vehicles and all # rollouts car_mean = np.mean(np.mean( all_obs[:, :, tot_cars * obs_var_idx:tot_cars * (obs_var_idx + 1)], axis=0), axis=1) plt.figure() plt.plot(t, car_mean) plt.ylabel(obs_var, fontsize=15) plt.xlabel('time (s)', fontsize=15) plt.title('Mean {2}, Autonomous Penetration: {0}/{1}'.format( rl_cars, tot_cars, obs_var), fontsize=16) # save the plot in the "plots" directory if not os.environ.get('TEST_FLAG', 1): plt.savefig('plots/{0}_{1}_mean.png'.format( args.plotname, obs_var), bbox='tight') # Make a figure for the mean rewards over the course of the rollout mean_reward = np.mean(all_rewards, axis=0) plt.figure() plt.plot(t, mean_reward, lw=2.0) plt.ylabel('reward', fontsize=15) plt.xlabel('time (s)', fontsize=15) plt.title('Reward, Autonomous Penetration: {0}/{1}'.format( rl_cars, tot_cars), fontsize=16) # save the rewards plot in the "reward_plots" directory if not os.environ.get('TEST_FLAG', 1): plt.savefig('plots/{0}_reward.png'.format(args.plotname), bbox='tight') # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format( unwrapped_env.scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path)
def run_eval(self, num_runs, num_steps, run, saveLogs, train, rl_actions=None, convert_to_csv=False, load_path=None): """ Run the given scenario for a set number of runs and steps per run. Parameters ---------- num_runs: int number of runs the experiment should perform num_steps: int number of steps to be performs in each run of the experiment train: bool Define if it is a trainning or evaluating experiment run: int The number of the current experiment saveLogs: SaveLogs object The instance of the package used to save the logs of the simulation rl_actions: method, optional maps states to actions to be performed by the RL agents (if there are any) convert_to_csv: bool Specifies whether to convert the emission file created by sumo into a csv file load_path: string Path to the model that should be loaded into the neural network Default: None Returns ------- info_dict: dict contains returns, average speed per step if rl_actions is None: def rl_actions(*_): return None """ #1. Initialize the information variables info_dict = {} rets = [] mean_rets = [] ret_lists = [] vels = [] mean_vels = [] std_vels = [] performance = [] collisions = [] #2. Set the reinforcement learning parameters action_set = self.env.getActionSet() print('LOAD PATH -- run:', load_path) time.sleep(2) agent = Agent(action_set, train=False, load_path=load_path) target_update_counter = 0 #3. Run the experiment for a set number of simulations(runs) for i in range(num_runs): #1. initialize the environment vel = np.zeros(num_steps) logging.info("Iter #" + str(i)) ret = 0 ret_list = [] vehicles = self.env.vehicles collision_check = 5 obs = self.get_screen(self.env.reset()) self.env.reset_params() state = np.stack([obs for _ in range(4)], axis=0) #2. Perform one simulation for j in range(num_steps): print('(episode, step) = ', i, ',', j) #1. Select and perform an action(the method rl_action is responsable to select the action to be taken) action, Q_value = agent.select_action(self.concatenate( state, agent), train=False) if Q_value is not None: saveLogs.save_Q_value(Q_value, run) obs, reward, done, _ = self.env.step(action_set[action[0]]) #2. Convert the observation to a pytorch observation obs = self.get_screen(obs) reward = torch.tensor([reward], device=agent.device) #3. Observe new state if not (self.env.arrived or self.env.crashed): next_state = [] next_state.append(obs) next_state.append(deepcopy(state[0])) next_state.append(deepcopy(state[1])) next_state.append(deepcopy(state[2])) else: next_state = None #4. Store the transition in memory agent.memory.push(self.concatenate(state, agent), action, self.concatenate(next_state, agent), reward) #5. Move to the next state state = next_state #6. Flow code vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids())) ret += reward ret_list.append(reward) #7. Decide if the simulation gets to an end if done or self.env.arrived or self.env.crashed: agent.episode_durations.append(j + 1) if self.env.crashed: saveLogs.add_crash() print('Crash') collision_check = 1 elif self.env.arrived: saveLogs.add_arrive() print('all vehicles arrived the destination') break #3. Store information from the simulation saveLogs.add_simulation_time(time=j) performance.append(j) collisions.append(1 if self.env.crashed else 0) #4. flow code rets.append(ret) vels.append(vel) mean_rets.append(np.mean(ret_list)) ret_lists.append(ret_list) mean_vels.append(np.mean(vel)) std_vels.append(np.std(vel)) #5. save rewards #if i % Config.SAVE_REWARDS_FREQUENCy == 0: saveLogs.save_reward(rets, run, i) saveLogs.save_average_reward(ret) saveLogs.save_collision(collision_check, run) saveLogs.save_time(j, run) #4. Store the logs of the simulation info_dict["returns"] = np.array(rets.copy()) info_dict["velocities"] = vels info_dict["mean_returns"] = mean_rets info_dict["per_step_returns"] = ret_lists info_dict["performance"] = np.array(performance.copy()) info_dict["collisions"] = np.array(collisions.copy()) print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format(np.mean(mean_vels), np.std(std_vels))) self.env.terminate() if convert_to_csv: # collect the location of the emission file dir_path = self.env.sumo_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = \ "{0}/{1}".format(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) return info_dict
def generate_emission_csv(emission_path, emission_name): """ Generates csv from emission xml. The xml path needs to be added manually! """ xml_path = os.path.join(emission_path, (emission_name + ".xml")) emission_to_csv(xml_path)
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) name = result_dir.split("/")[-2:] # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # for hacks for old pkl files TODO: remove eventually if not hasattr(sim_params, 'use_ballistic'): sim_params.use_ballistic = False # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # FIXME: this not works # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), np.zeros(size, np.float32)] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] rl_speed = [] # store rl controlled vehicle's speed log2_stack = defaultdict(list) # This dict stores log2 data during rollouts if args.evaluate: env.unwrapped.env_params.evaluate = True # To cover bug for i in range(args.num_rollouts): vel = [] vel_dict = defaultdict(list) timerange = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle ids = vehicles.get_ids() rls = vehicles.get_rl_ids() speeds = vehicles.get_speed(ids) timerange.append(vehicles.get_timestep(ids[-1]) / 10000) # only include non-empty speeds if speeds: vel.append(np.mean(speeds)) for veh_id, speed in zip(ids, speeds): vel_dict[veh_id].append(speed) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [x / y for x, y in zip(final_outflows, final_inflows)] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) log2 = env.unwrapped.log2 for k in log2: log2_stack[k].append(log2[k]) # plot non-rl's speed and rl's speed graph if i == args.num_rollouts - 1 and args.render_mode != "no_render": veh = list(vel_dict.keys()) plt.subplot(2, 1, 1) plt.title('/'.join(name)) for v in veh[:-1]: plt.plot(timerange, vel_dict[v]) plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(veh[:-1]) plt.grid(True) # plt.show() plt.subplot(2, 1, 2) plt.plot(timerange, vel_dict[veh[-1]]) plt.xlabel('timestep(s)') plt.ylabel('speed(m/s)') plt.legend(veh[-1:]) plt.grid(True) plt.show() rl_speed = [np.mean(vel_dict[rl]) for rl in vehicles.get_rl_ids()] for k in log2_stack: log2_stack[k] = np.mean(log2_stack[k]).round(3) # export the log2_stack from time import strftime time = strftime('%Y-%m-%d') flow_autonomous_home = os.path.expanduser('~/log/') with open(flow_autonomous_home + f'/log.csv', 'a') as f: keys = ['\"' + str(k) + '\"' for k in log2_stack.keys()] values = ['\"' + str(v) + '\"' for v in log2_stack.values()] # f.write('time,name,'+','.join(keys)+'\n') f.write(f'{time},{name[0]},{",".join(values)}\n') print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format( np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('') # bmil edit rls = vehicles.get_rl_ids() [print(f'{rls[i]} Speed, mean (m/s): {rl_speed[i]}') for i in range(len(rls))] print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path)
def run(self, num_runs, rl_actions=None, convert_to_csv=False): """Run the given network for a set number of runs. Parameters ---------- num_runs : int number of runs the experiment should perform rl_actions : method, optional maps states to actions to be performed by the RL agents (if there are any) convert_to_csv : bool Specifies whether to convert the emission file created by sumo into a csv file Returns ------- info_dict : dict < str, Any > contains returns, average speed per step """ num_steps = self.env.env_params.horizon # raise an error if convert_to_csv is set to True but no emission # file will be generated, to avoid getting an error at the end of the # simulation if convert_to_csv and self.env.sim_params.emission_path is None: raise ValueError( 'The experiment was run with convert_to_csv set ' 'to True, but no emission file will be generated. If you wish ' 'to generate an emission file, you should set the parameter ' 'emission_path in the simulation parameters (SumoParams or ' 'AimsunParams) to the path of the folder where emissions ' 'output should be generated. If you do not wish to generate ' 'emissions, set the convert_to_csv parameter to False.') # used to store info_dict = { "returns": [], "velocities": [], "outflows": [], } info_dict.update({key: [] for key in self.custom_callables.keys()}) if rl_actions is None: def rl_actions(*_): return None # time profiling information t = time.time() times = [] meanSpeeds = [] for i in range(num_runs): ret = 0 vel = [] custom_vals = {key: [] for key in self.custom_callables.keys()} state = self.env.reset() for j in range(num_steps): t0 = time.time() state, reward, done, _ = self.env.step(rl_actions(state)) t1 = time.time() times.append(1 / (t1 - t0)) # Compute the velocity speeds and cumulative returns. veh_ids = self.env.k.vehicle.get_ids() vel.append(np.mean(self.env.k.vehicle.get_speed(veh_ids))) ret += reward ids = self.env.k.vehicle.get_ids() speeds = self.env.k.vehicle.get_speed(ids) #Only count speeds of cars in edge prior to the 'construction site' targetSpeeds = [] for veh_id in ids: edge = self.env.k.vehicle.get_edge(veh_id) if edge == "edge3" or edge == "edge4": speed = self.env.k.vehicle.get_speed(veh_id) if abs(speed) > 10000: continue targetSpeeds.append(speed) if (len(targetSpeeds) == 0): meanSpeeds.append(0) else: meanSpeeds.append(np.mean(targetSpeeds)) # Compute the results for the custom callables. for (key, lambda_func) in self.custom_callables.items(): custom_vals[key].append(lambda_func(self.env)) if done: break # Store the information from the run in info_dict. outflow = self.env.k.vehicle.get_outflow_rate(int(500)) info_dict["returns"].append(ret) info_dict["velocities"].append(np.mean(vel)) info_dict["outflows"].append(outflow) for key in custom_vals.keys(): info_dict[key].append(np.mean(custom_vals[key])) print("Round {0}, return: {1}".format(i, ret)) # Print the averages/std for all variables in the info_dict. for key in info_dict.keys(): print("Average, std {}: {}, {}".format(key, np.mean(info_dict[key]), np.std(info_dict[key]))) print("Total time:", time.time() - t) print("steps/second:", np.mean(times)) self.env.terminate() if convert_to_csv and self.env.simulator == "traci": # wait a short period of time to ensure the xml file is readable time.sleep(0.1) # collect the location of the emission file dir_path = self.env.sim_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.network.name) emission_path = os.path.join(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) # Delete the .xml version of the emission file. os.remove(emission_path) print(np.mean(meanSpeeds)) meanSpeeds = np.asarray(meanSpeeds) np.savetxt("meanSpeeds_sim.csv", meanSpeeds, delimiter=",") return info_dict
def run_train_eval(self, num_runs, num_steps, run, saveLogs, train, rl_actions=None, convert_to_csv=False, load_path=None): """ Run the given scenario for a set number of runs and steps per run. Parameters ---------- num_runs: int number of runs the experiment should perform num_steps: int number of steps to be performs in each run of the experiment train: bool Define if it is a trainning or evaluating experiment run: int The number of the current experiment saveLogs: SaveLogs object The instance of the package used to save the logs of the simulation rl_actions: method, optional maps states to actions to be performed by the RL agents (if there are any) convert_to_csv: bool Specifies whether to convert the emission file created by sumo into a csv file load_path: string Path to the model that should be loaded into the neural network Default: None Returns ------- info_dict: dict contains returns, average speed per step if rl_actions is None: def rl_actions(*_): return None """ #1. Initialize the information variables info_dict = {} rets = [] mean_rets = [] ret_lists = [] vels = [] mean_vels = [] std_vels = [] performance = [] collisions = [] q_values = [] losses = [] #2. Set the reinforcement learning parameters action_set = self.env.getActionSet() agent = Agent(action_set, train=True, load_path=load_path) target_update_counter = 0 #3. Initialize the variables that decide when to store the best network got_it = 0 # How many times the agent reaches the end of the street max_ret = -200 evaluate_counter = Config.EVALUATE_AMMOUNT best_net_state_dict = None #4. Run the experiment for a set number of simulations(runs) train_simul = 0 total_simul = 0 while train_simul < num_runs: total_simul += 1 #1. initialize the environment vel = np.zeros(num_steps) logging.info("Iter #" + str(total_simul)) ret = 0 ret_list = [] vehicles = self.env.vehicles collision_check = 0 obs = self.get_screen(self.env.reset()) self.env.reset_params() state = np.stack([obs for _ in range(4)], axis=0) #state = torch.from_numpy(obs).to(agent.device).unsqueeze(0) if evaluate_counter == Config.EVALUATE_AMMOUNT: train_simul += 1 #2. Perform one simulation for j in range(num_steps): print('(episode, step) = ', total_simul, ',', j) #1. Select and perform an action(the method rl_action is responsable to select the action to be taken) if evaluate_counter == Config.EVALUATE_AMMOUNT: print('------------ EH IGUAL') agent.policy_net.train() train = True action, Q_value, uncertainty = agent.select_action( self.concatenate(state, agent), train) if Q_value is not None and train: saveLogs.save_Q_value(Q_value, run) q_values.append(Q_value) saveLogs.save_uncertainty(uncertainty, run) obs, reward, done, _ = self.env.step(action_set[action]) #2. Convert the observation to a pytorch observation obs = self.get_screen(obs) reward = torch.tensor([reward], device=agent.device) #state = torch.from_numpy(obs).to(agent.device).unsqueeze(0) #3. Observe new state if not (self.env.arrived or self.env.crashed): next_state = [] next_state.append(obs) next_state.append(deepcopy(state[0])) next_state.append(deepcopy(state[1])) next_state.append(deepcopy(state[2])) #next_state = deepcopy(state) else: next_state = None #4. Store the transition in memory if train and evaluate_counter == Config.EVALUATE_AMMOUNT: agent.append_sample(self.concatenate(state, agent), action, self.concatenate(next_state, agent), reward) #agent.append_sample(state, action, next_state, reward) #5. Move to the next state state = next_state #6. Flow code vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids())) ret += reward ret_list.append(reward) #7. Perform one step of the optimization (on the target network) if in training mode if train and evaluate_counter == Config.EVALUATE_AMMOUNT: print('-----ENTROU NA OTIMIZACAO') loss = agent.optimize_model() saveLogs.save_loss(loss, run) losses.append(loss) agent.policy_net.eval() train = False target_update_counter += 1 #8. update target network if target_update_counter % Config.TARGET_UPDATE == 0: target_update_counter = 0 agent.target_net.load_state_dict( agent.policy_net.state_dict()) print('update target network ok...') #9. Decide if the simulation gets to an end if done or self.env.arrived or self.env.crashed: agent.episode_durations.append(j + 1) if self.env.crashed: saveLogs.add_crash() print('Crash') collision_check = 1 elif self.env.arrived: saveLogs.add_arrive() print('all vehicles arrived the destination') break #3. Decide if the current model of the neural network will be stored if self.env.arrived: got_it += 1 else: got_it = 0 print('got_it:', got_it) if evaluate_counter == Config.EVALUATE_AMMOUNT: print('--------EVALUATE A: ', evaluate_counter) #4. Store information from the simulation saveLogs.add_simulation_time(time=j) performance.append(j) collisions.append(1 if self.env.crashed else 0) #5. flow code rets.append(ret) vels.append(vel) mean_rets.append(np.mean(ret_list)) ret_lists.append(ret_list) mean_vels.append(np.mean(vel)) std_vels.append(np.std(vel)) #6. save rewards #if i % Config.SAVE_REWARDS_FREQUENCy == 0: saveLogs.save_reward(ret, run, train_simul) saveLogs.save_average_reward(ret) saveLogs.save_collision(collision_check, run) saveLogs.save_time(j, run) evaluate_counter = 0 got_it = 0 else: print('--------EVALUATE B: ', evaluate_counter) evaluate_counter += 1 if evaluate_counter == Config.EVALUATE_AMMOUNT and got_it == Config.EVALUATE_AMMOUNT and ret > max_ret: print('------ENTROU PRA SALVAR') max_ret = ret saveLogs.save_model(agent.policy_net, agent.optimizer, 10101010, train_simul * j) best_net_state_dict = agent.policy_net.state_dict() print('got:', got_it) #5. Store the logs of the simulation #a. save the final model of the neural network saveLogs.save_model(agent.policy_net, agent.optimizer, run, train_simul * j) #b. store the data statistics of the simulation info_dict["returns"] = np.array(rets.copy()) info_dict["velocities"] = vels info_dict["mean_returns"] = mean_rets info_dict["per_step_returns"] = ret_lists info_dict["performance"] = np.array(performance.copy()) info_dict["collisions"] = np.array(collisions.copy()) info_dict["loss"] = np.array(losses.copy()) info_dict["q_values"] = np.array(q_values.copy()) print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format(np.mean(mean_vels), np.std(std_vels))) self.env.terminate() if convert_to_csv: # collect the location of the emission file dir_path = self.env.sumo_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = \ "{0}/{1}".format(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) return info_dict
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True # specify emission file path dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # this will be set to true after creating agent and gym print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to true after initializing agent and env # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params) use_lstm = config['model'].get('use_lstm', False) if use_lstm: state_size = config['model']['lstm_cell_size'] lstm_state = [np.zeros(state_size), np.zeros(state_size)] if multiagent: lstm_state = { key: deepcopy(lstm_state) for key in config['multiagent']['policies'].keys() } rewards = [] if multiagent: rewards = defaultdict(list) policy_map_fn = config['multiagent']['policy_mapping_fn'].func # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): obs = env.reset() kv = env.k.vehicle rollout_speeds = [] rollout_reward = 0 if multiagent: rollout_reward = defaultdict(int) for _ in range(env_params.horizon): rollout_speeds.append(np.mean(kv.get_speed(kv.get_ids()))) if multiagent: action = {} for agent_id in obs.keys(): if use_lstm: action[agent_id], obs[ agent_id], logits = agent.compute_action( obs[agent_id], obs=lstm_state[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( obs[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(obs) obs, reward, done, _ = env.step(action) if multiagent: done = done['__all__'] for agent_id, agent_reward in reward.items(): rollout_reward[policy_map_fn(agent_id)] += agent_reward else: rollout_reward += reward if done: break if multiagent: for agent_id, reward in rollout_reward.items(): rewards[agent_id].append(reward) print('rollout %s, agent %s reward: %.5g' % (i, agent_id, reward)) else: rewards.append(rollout_reward) print('rollout %s, reward: %.5g' % (i, rollout_reward)) mean_speed.append(np.nanmean(rollout_speeds)) std_speed.append(np.nanstd(rollout_speeds)) # Compute rate of inflow / outflow in the last 500 steps final_outflows.append(kv.get_outflow_rate(500)) final_inflows.append(kv.get_inflow_rate(500)) print( '\n==== Summary of results: mean (std) [rollout1, rollout2, ...] ====') mean, std = np.mean, np.std if multiagent: for agent_id, agent_rewards in rewards.items(): print('agent %s rewards: %.4g (%.4g) %s' % (agent_id, mean(agent_rewards), std(agent_rewards), agent_rewards)) else: print('rewards: %.4g (%.4g) %s' % (mean(rewards), std(rewards), rewards)) print('mean speeds (m/s): %.4g (%.4g) %s' % (mean(mean_speed), std(mean_speed), mean_speed)) print('std speeds: %.4g (%.4g) %s' % (mean(std_speed), std(std_speed), std_speed)) print('inflows (veh/hr): %.4g (%.4g) %s' % (mean(final_inflows), std(final_inflows), final_inflows)) print('outflows (veh/hr): %.4g (%.4g) %s' % (mean(final_outflows), std(final_outflows), final_outflows)) # Compute throughput efficiency in the last 500 sec of the throughput = [o / i for o, i in zip(final_outflows, final_inflows)] print('throughput efficiency: %.4g (%.4g) %s' % (mean(throughput), std(throughput), throughput)) # terminate the environment env.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def run_eval(self, num_runs, num_steps, run, saveLogs, attack, epsilon, rl_actions=None, convert_to_csv=False, load_path=None): #1. Initialize the information variables info_dict = {} rets = [] mean_rets = [] ret_lists = [] vels = [] mean_vels = [] std_vels = [] performance = [] collisions = [] losses = [] q_values = [] attack_number = 0 attack_dettected = 0 #2. Set the reinforcement learning parameters action_set = self.env.getActionSet() agent = Agent(action_set, train=False, load_path=load_path) target_update_counter = 0 #3. Run the experiment for a set number of simulations(runs) for i in range(num_runs): #1. initialize the environment vel = np.zeros(num_steps) logging.info("Iter #" + str(i)) ret = 0 ret_list = [] vehicles = self.env.vehicles collision_check = 0 obs = self.get_screen(self.env.reset()) self.env.reset_params() state = np.stack([obs for _ in range(4)], axis=0) #2. Perform one simulation for j in range(num_steps): print('(episode, step) = ', i, ',', j) state_conc = self.concatenate(state, agent) #0.Attack the images is_attack = False if attack: random.seed() random_number = random.random() is_attack = False if random_number < Config.ATTACK_PROBABILITY: is_attack = True state_conc = fgsm_attack(state_conc, epsilon, agent, i * j, saveLogs) #state = fgsm_attack(state, epsilon, agent, i*j, saveLogs) detected, uncertainty, confidence = check_attack( agent, state_conc) if is_attack: saveLogs.save_uncertainty_attack(uncertainty, run) else: saveLogs.save_uncertainty_no_attack(uncertainty, run) detection_information(attack, detected, saveLogs) saveLogs.save_uncertainty(uncertainty, run) print('detected: ', detected) #1. Select and perform an action(the method rl_action is responsable to select the action to be taken) #action, Q_value, uncertainty = agent.select_action(state_conc, train=False) action, Q_value = agent.select_action(state_conc, train=False) print('action, Q-value:', action, Q_value) if Q_value is not None: saveLogs.save_Q_value(Q_value, run) q_values.append(Q_value) obs, reward, done, _ = self.env.step(action_set[action]) sc_name2 = os.getcwd() + "/image_simulation/screenshot" + str( i * j) + ".png" self.env.traci_connection.gui.screenshot("View #0", sc_name2) #2. Convert the observation to a pytorch observation obs = self.get_screen(obs) reward = torch.tensor([reward], device=agent.device) #3. Observe new state if not (self.env.arrived or self.env.crashed): next_state = [] next_state.append(obs) next_state.append(deepcopy(state[0])) next_state.append(deepcopy(state[1])) next_state.append(deepcopy(state[2])) #next_state = deepcopy(state) else: next_state = None #4. Store the transition in memory agent.append_sample(self.concatenate(state, agent), action, self.concatenate(next_state, agent), reward) #agent.append_sample(state, action, next_state, reward) #5. Move to the next state state = next_state #6. Flow code vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids())) ret += reward ret_list.append(reward) #7. Decide if the simulation gets to an end if done or self.env.arrived or self.env.crashed: agent.episode_durations.append(j + 1) if self.env.crashed: saveLogs.add_crash() print('Crash') collision_check = 1 elif self.env.arrived: saveLogs.add_arrive() print('all vehicles arrived the destination') break #3. Store information from the simulation saveLogs.add_simulation_time(time=j) performance.append(j) collisions.append(1 if self.env.crashed else 0) #4. flow code rets.append(ret) vels.append(vel) mean_rets.append(np.mean(ret_list)) ret_lists.append(ret_list) mean_vels.append(np.mean(vel)) std_vels.append(np.std(vel)) #5. save rewards #if i % Config.SAVE_REWARDS_FREQUENCy == 0: saveLogs.save_reward(ret, run, i) saveLogs.save_average_reward(ret) saveLogs.save_collision(collision_check, run) saveLogs.save_time(j, run) #4. Store the logs of the simulation info_dict["returns"] = np.array(rets.copy()) info_dict["velocities"] = vels info_dict["mean_returns"] = mean_rets info_dict["per_step_returns"] = ret_lists info_dict["performance"] = np.array(performance.copy()) info_dict["collisions"] = np.array(collisions.copy()) info_dict["loss"] = None info_dict["q_values"] = np.array(collisions.copy()) print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format(np.mean(mean_vels), np.std(std_vels))) self.env.terminate() if convert_to_csv: # collect the location of the emission file dir_path = self.env.sumo_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = \ "{0}/{1}".format(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) return info_dict
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_pkl(result_dir) # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params = flow_params['sim'] sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] sim_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] policy_agent_mapping = default_policy_agent_mapping if hasattr(agent, "workers"): env = agent.workers.local_worker().env multiagent = isinstance(env, MultiAgentEnv) if agent.workers.local_worker().multiagent: policy_agent_mapping = agent.config["multiagent"][ "policy_mapping_fn"] policy_map = agent.workers.local_worker().policy_map state_init = {p: m.get_initial_state() for p, m in policy_map.items()} use_lstm = {p: len(s) > 0 for p, s in state_init.items()} action_init = { p: m.action_space.sample() for p, m in policy_map.items() } else: env = gym.make(env_name) multiagent = False use_lstm = {DEFAULT_POLICY_ID: False} steps = 0 for i in range(args.num_rollouts): vel = [] mapping_cache = {} # in case policy_agent_mapping is stochastic reward_dict = {} obs = env.reset() agent_states = DefaultMapping( lambda agent_id: state_init[mapping_cache[agent_id]]) prev_actions = DefaultMapping( lambda agent_id: action_init[mapping_cache[agent_id]]) prev_rewards = collections.defaultdict(lambda: 0.) done = False reward_total = 0.0 while not done and steps < (env_params.horizon or steps + 1): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs} action_dict = {} for agent_id, a_obs in multi_obs.items(): if a_obs is not None: policy_id = mapping_cache.setdefault( agent_id, policy_agent_mapping(agent_id)) p_use_lstm = use_lstm[policy_id] if p_use_lstm: a_action, p_state, _ = agent.compute_action( a_obs, state=agent_states[agent_id], prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id) agent_states[agent_id] = p_state else: a_action = agent.compute_action( a_obs, prev_action=prev_actions[agent_id], prev_reward=prev_rewards[agent_id], policy_id=policy_id) a_action = _flatten_action(a_action) # tuple actions action_dict[agent_id] = a_action prev_actions[agent_id] = a_action action = action_dict action = action if multiagent else action[_DUMMY_AGENT_ID] next_obs, reward, done, _ = env.step(action) if multiagent: for agent_id, r in reward.items(): prev_rewards[agent_id] = r else: prev_rewards[_DUMMY_AGENT_ID] = reward if multiagent: done = done["__all__"] reward_total += sum(reward.values()) else: reward_total += reward steps += 1 obs = next_obs outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) print("Episode reward", reward_total) print('==== Summary of results ====') print(mean_speed) # if multiagent: # for agent_id, rew in rets.items(): # print('For agent', agent_id) # print(rew) # print('Average, std return: {}, {} for agent {}'.format( # np.mean(rew), np.std(rew), agent_id)) # else: # print(rets) # print('Average, std: {}, {}'.format( # np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s): {}".format(mean_speed)) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s): {}".format(std_speed)) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr): {}".format(final_outflows)) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr): {}".format(final_inflows)) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr): {}".format(throughput_efficiency)) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = False dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.multiagent_envs' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policy_graphs'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation(sim_params=sim_params, render=sim_params.render) final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def run(self, num_runs, num_steps, rl_actions=None, convert_to_csv=False): """ Runs the given scenario for a set number of runs and a set number of steps per run. Parameters num_runs: int number of runs the experiment should perform num_steps: int number of steps to be performs in each run of the experiment rl_actions: list or numpy ndarray, optional actions to be performed by rl vehicles in the network (if there are any) convert_to_csv: bool Specifies whether to convert the emission file created by sumo into a csv file Returns info_dict: dict contains returns, average speed per step """ info_dict = {} if rl_actions is None: rl_actions = [] rets = [] mean_rets = [] ret_lists = [] vels = [] mean_vels = [] std_vels = [] for i in range(num_runs): vel = np.zeros(num_steps) logging.info("Iter #" + str(i)) ret = 0 ret_list = [] vehicles = self.env.vehicles self.env.reset() for j in range(num_steps): state, reward, done, _ = self.env.step(rl_actions) vel[j] = np.mean(vehicles.get_speed(vehicles.get_ids())) ret += reward ret_list.append(reward) if done: break rets.append(ret) vels.append(vel) mean_rets.append(np.mean(ret_list)) ret_lists.append(ret_list) mean_vels.append(np.mean(vel)) std_vels.append(np.std(vel)) print("Round {0}, return: {1}".format(i, ret)) info_dict["returns"] = rets info_dict["velocities"] = vels info_dict["mean_returns"] = mean_rets info_dict["per_step_returns"] = ret_lists print("Average, std return: {}, {}".format(np.mean(rets), np.std(rets))) print("Average, std speed: {}, {}".format(np.mean(mean_vels), np.std(std_vels))) self.env.terminate() if convert_to_csv: # collect the location of the emission file dir_path = self.env.sumo_params.emission_path emission_filename = \ "{0}-emission.xml".format(self.env.scenario.name) emission_path = \ "{0}/{1}".format(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) return info_dict
def run(self, num_runs, num_steps, rl_actions=None, output_to_terminal=True, convert_to_csv=False): """Run the given network for a set number of runs and steps per run. Parameters ---------- num_runs : int number of runs the experiment should perform num_steps : int number of steps to be performs in each run of the experiment rl_actions : method, optional maps states to actions to be performed by the RL agents (if there are any) convert_to_csv : bool Specifies whether to convert the emission file created by sumo into a csv file Returns ------- info_dict : dict contains returns, average speed per step """ # raise an error if convert_to_csv is set to True but no emission # file will be generated, to avoid getting an error at the end of the # simulation if convert_to_csv and self.env.sim_params.emission_path is None: raise ValueError( 'The experiment was run with convert_to_csv set ' 'to True, but no emission file will be generated. If you wish ' 'to generate an emission file, you should set the parameter ' 'emission_path in the simulation parameters (SumoParams or ' 'AimsunParams) to the path of the folder where emissions ' 'output should be generated. If you do not wish to generate ' 'emissions, set the convert_to_csv parameter to False.') info_dict = {} if rl_actions is None: def rl_actions(*_): return None # collecting experiment results, ret = return # reward overall_return_all_runs = [] mean_return_all_runs = [] per_step_return_all_runs = [] # speed per_step_speed_all_runs = [] mean_speed_over_all_runs = [] std_speed_over_all_runs = [] # throughput inflow_over_all_runs = [] outflow_over_all_runs = [] # for each run for i in range(num_runs): logging.info("Run #" + str(i + 1)) state = self.env.reset() # reward overall_return_one_run = 0 per_step_return_one_run = [] # speed per_step_speed_one_run = np.zeros(num_steps) # for each step for j in range(num_steps): # get the states, rewards, etc state, reward, done, _ = self.env.step(rl_actions(state)) # store the returns overall_return_one_run += reward per_step_return_one_run.append(reward) # store the averaged speed of all vehicles at this step per_step_speed_one_run[j] = np.mean( self.env.k.vehicle.get_speed(self.env.k.vehicle.get_ids())) if done: break # reward overall_return_all_runs.append(overall_return_one_run) mean_return_all_runs.append(np.mean(per_step_return_one_run)) per_step_return_all_runs.append(per_step_return_one_run) # speed per_step_speed_all_runs.append(per_step_speed_one_run) mean_speed_over_all_runs.append(np.mean(per_step_speed_one_run)) std_speed_over_all_runs.append(np.std(per_step_speed_one_run)) # get the outflows and inflows for the past 500 seconds, if the simulation is less than # 500 seconds then this will get all inflows (the number of vehicles entering the network) # and outflows (the number of vehicles leaving the network) inflow_over_all_runs.append( self.env.k.vehicle.get_inflow_rate(int(500))) outflow_over_all_runs.append( self.env.k.vehicle.get_outflow_rate(int(500))) # compute the throughput efficiency if np.all(np.array(inflow_over_all_runs) > 1e-5): throughput_over_all_runs = [ x / y for x, y in zip(outflow_over_all_runs, inflow_over_all_runs) ] else: throughput_over_all_runs = [0] * len(inflow_over_all_runs) info_dict["overall_return_all_runs"] = overall_return_all_runs info_dict["mean_return_all_runs"] = mean_return_all_runs info_dict["per_step_return_all_runs"] = per_step_return_all_runs info_dict["per_step_speed_all_runs"] = per_step_speed_all_runs info_dict["mean_ret_all"] = np.mean(overall_return_all_runs) info_dict["std_ret_all"] = np.std(overall_return_all_runs) info_dict["mean_inflows"] = np.mean(inflow_over_all_runs) info_dict["mean_outflows"] = np.mean(outflow_over_all_runs) info_dict["max_spd_all"] = np.max(mean_speed_over_all_runs) info_dict["min_spd_all"] = np.min(mean_speed_over_all_runs) info_dict["mean_spd_all"] = np.mean(mean_speed_over_all_runs) info_dict["std_spd_all"] = np.std(mean_speed_over_all_runs) info_dict["max_tpt_all"] = np.max(throughput_over_all_runs) info_dict["min_tpt_all"] = np.min(throughput_over_all_runs) info_dict["mean_tpt_all"] = np.mean(throughput_over_all_runs) info_dict["std_tpt_all"] = np.std(throughput_over_all_runs) if output_to_terminal: print("Round {0} -- Return: {1}".format(i + 1, overall_return_one_run)) print("Return: {} (avg), {} (std)".format( info_dict["mean_ret_all"], info_dict["std_ret_all"])) print("Speed (m/s): {} (avg), {} (std)".format( info_dict["mean_spd_all"], info_dict["std_spd_all"])) print("Throughput (veh/hr): {} (avg), {} (std)".format( info_dict["mean_tpt_all"], info_dict["std_tpt_all"])) self.env.terminate() if convert_to_csv: # wait a short period of time to ensure the xml file is readable time.sleep(0.1) # collect the location of the emission file dir_path = self.env.sim_params.emission_path emission_filename = "{0}-emission.xml".format( self.env.network.name) emission_path = os.path.join(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) # Delete the .xml version of the emission file. os.remove(emission_path) return info_dict
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # Run on only one cpu for rendering purposes config['num_workers'] = 1 flow_params = get_flow_params(config) # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] if args.evaluate: env_params.evaluate = True sumo_params = flow_params['sumo'] if args.no_render: sumo_params.render = False else: sumo_params.render = True sumo_params.emission_path = './test_time_rollout/' env = ModelCatalog.get_preprocessor_as_wrapper( env_class(env_params=env_params, sumo_params=sumo_params, scenario=scenario)) # Run the environment in the presence of the pre-trained RL agent for the # requested number of time steps / rollouts rets = [] final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) action = agent.compute_action(state) state, reward, done, _ = env.step(action) ret += reward if done: break rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) print('Round {}, Return: {}'.format(i, ret)) print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path)
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] # config = get_rllib_config(result_dir + '/..') # pkl = get_rllib_pkl(result_dir + '/..') config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sumo_params = flow_params['sumo'] setattr(sumo_params, 'num_clients', 1) # Create and register a gym+rllib env create_env, env_name = make_create_env( params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sumo_params.restart_instance = False sumo_params.emission_path = './test_time_rollout/' # pick your rendering mode if args.render_mode == 'sumo-web3d': sumo_params.num_clients = 2 sumo_params.render = False elif args.render_mode == 'drgb': sumo_params.render = 'drgb' sumo_params.pxpm = 4 elif args.render_mode == 'sumo-gui': sumo_params.render = False elif args.render_mode == 'no-render': sumo_params.render = False if args.save_render: sumo_params.render = 'drgb' sumo_params.pxpm = 4 sumo_params.save_render = True # Recreate the scenario from the pickled parameters exp_tag = flow_params['exp_tag'] net_params = flow_params['net'] vehicles = flow_params['veh'] initial_config = flow_params['initial'] module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) scenario = scenario_class( name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config) # Start the environment with the gui turned on and a path for the # emission file module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = 6000 #可以考虑改成6000 env_params.horizon = 6000 # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) env = ModelCatalog.get_preprocessor_as_wrapper(env_class( env_params=env_params, sumo_params=sumo_params, scenario=scenario)) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] final_outflows = [] mean_speed = [] for i in range(1):#args.num_rollouts): vel = [] state = env.reset() done = False if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.vehicles vel.append(vehicles.get_speed(vehicles.get_ids())[0])#这里是整体平均速度 if multiagent: action = {} for agent_id in state.keys(): action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: print(type(state),state) action = agent.compute_action(state) print(type(action),action) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) #mean_speed.append(np.mean(vel))#注意这里 print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format( np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format( np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format( np.mean(final_outflows), np.std(final_outflows))) import matplotlib.pyplot as plt plt.figure() plt.plot(vel) plt.show() # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.emission_to_csv: dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie '''
def main(args): """Execute multiple training operations.""" flags = parse_options(args) # get the hyperparameters env_name, policy, hp, seed = get_hyperparameters_from_dir(flags.dir_name) hp['render'] = not flags.no_render # to visualize the policy # create the algorithm object. We will be using the eval environment in # this object to perform the rollout. alg = OffPolicyRLAlgorithm(policy=policy, env=env_name, **hp) # setup the seed value if not flags.random_seed: random.seed(seed) np.random.seed(seed) tf.compat.v1.set_random_seed(seed) # get the checkpoint number if flags.ckpt_num is None: filenames = os.listdir(os.path.join(flags.dir_name, "checkpoints")) metafiles = [f[:-5] for f in filenames if f[-5:] == ".meta"] metanum = [int(f.split("-")[-1]) for f in metafiles] ckpt_num = max(metanum) else: ckpt_num = flags.ckpt_num # location to the checkpoint ckpt = os.path.join(flags.dir_name, "checkpoints/itr-{}".format(ckpt_num)) # restore the previous checkpoint alg.saver = tf.compat.v1.train.Saver(alg.trainable_vars) alg.load(ckpt) # some variables that will be needed when replaying the rollout policy = alg.policy_tf env = alg.sampler.env # Perform the evaluation procedure. episdoe_rewards = [] # Add an emission path to Flow environments. if env_name in FLOW_ENV_NAMES: sim_params = deepcopy(env.wrapped_env.sim_params) sim_params.emission_path = "./flow_results" env.wrapped_env.restart_simulation(sim_params, render=not flags.no_render) for episode_num in range(flags.num_rollouts): # Run a rollout. obs = env.reset() total_reward = 0 while True: context = [env.current_context] \ if hasattr(env, "current_context") else None action = policy.get_action( np.asarray([obs]), context=context, apply_noise=False, random_actions=False, ) obs, reward, done, _ = env.step(action[0]) if not flags.no_render: env.render() total_reward += reward if done: break # Print total returns from a given episode. episdoe_rewards.append(total_reward) print("Round {}, return: {}".format(episode_num, total_reward)) # Print total statistics. print("Average, std return: {}, {}".format(np.mean(episdoe_rewards), np.std(episdoe_rewards))) if env_name in FLOW_ENV_NAMES: # wait a short period of time to ensure the xml file is readable time.sleep(0.1) # collect the location of the emission file dir_path = env.wrapped_env.sim_params.emission_path emission_filename = "{0}-emission.xml".format( env.wrapped_env.network.name) emission_path = os.path.join(dir_path, emission_filename) # convert the emission file into a csv emission_to_csv(emission_path) # Delete the .xml version of the emission file. os.remove(emission_path)