def create_tf_serving_model(self, algorithm=None, env_string=None, config=None): self.register_env_creator() cls = get_agent_class(algorithm) config["monitor"] = False config["num_workers"] = 1 config["num_gpus"] = 0 agent = cls(env=env_string, config=config) checkpoint = os.path.join(MODEL_OUTPUT_DIR, "checkpoint") agent.restore(checkpoint) export_tf_serving(agent, MODEL_OUTPUT_DIR)
def run(args, parser): def create_environment(env_config): # This import must happen inside the method so that worker processes import this code import roboschool return gym.make(args.env) if not args.config: # Load configuration from file config_dir = os.path.dirname(args.checkpoint) # params.json is saved in the model directory during ray training by default config_path = os.path.join(config_dir, "params.json") with open(config_path) as f: args.config = json.load(f) if not args.env: if not args.config.get("env"): parser.error("the following arguments are required: --env") args.env = args.config.get("env") ray.init() register_env(args.env, create_environment) cls = get_agent_class(args.algorithm) config = args.config config["monitor"] = False config["num_workers"] = 1 config["num_gpus"] = 0 agent = cls(env=args.env, config=config) agent.restore(args.checkpoint) num_episodes = int(args.evaluate_episodes) if args.algorithm == "DQN": env = gym.make(args.env) env = wrap_dqn(env, args.config.get("model", {})) else: env = ModelCatalog.get_preprocessor_as_wrapper(gym.make(args.env)) env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True) all_rewards = [] for episode in range(num_episodes): steps = 0 state = env.reset() done = False reward_total = 0.0 while not done: action = agent.compute_action(state) next_state, reward, done, _ = env.step(action) reward_total += reward steps += 1 state = next_state all_rewards.append(reward_total) print("Episode reward: %s. Episode steps: %s" % (reward_total, steps)) print("Mean Reward:", np.mean(all_rewards)) print("Max Reward:", np.max(all_rewards)) print("Min Reward:", np.min(all_rewards))
def setup_exps_rllib(flow_params, n_cpus, n_rollouts, policy_graphs=None, policy_mapping_fn=None, policies_to_train=None, flags=None): from ray import tune from ray.tune.registry import register_env try: from ray.rllib.agents.agent import get_agent_class except ImportError: from ray.rllib.agents.registry import get_agent_class import torch horizon = flow_params['env'].horizon from ray.rllib.agents.ddpg.ddpg import DEFAULT_CONFIG alg_run = "DDPG" agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) config["num_workers"] = 1 # model config['n_step'] = 1 config['actor_hiddens'] = [64, 64] config['actor_lr'] = 0.0001 # in article 'ddpg' config['critic_lr'] = 0.0001 config['critic_hiddens'] = [64, 64] config['gamma'] = 0.99 config['model']['fcnet_hiddens'] = [64, 64] config['lr']=1e-5 # exploration config['exploration_config']['final_scale'] = 0.05 config['exploration_config']['scale_timesteps'] = 1500000 config['exploration_config']['ou_base_scale'] = 0.1 config['exploration_config']['ou_theta'] = 0.15 config['exploration_config']['ou_sigma'] = 0.2 # optimization config['tau'] = 0.001 config['l2_reg'] = 1e-6 config['train_batch_size'] = 64 config['learning_starts'] = 3000 # evaluation #config['evaluation_interval'] = 5 config['buffer_size'] = 300000 #3e5 config['timesteps_per_iteration'] = 3000 config['prioritized_replay']=False #common config config['framework']='torch' config['callbacks'] = { "on_episode_end": None, "on_episode_start": None, "on_episode_step": None, "on_postprocess_traj": None, "on_sample_end": None, "on_train_result": None } # config["opt_type"]= "adam" for impala and APPO, default is SGD # TrainOneStep class call SGD -->execution_plan function can have policy update function print("cuda is available: ", torch.cuda.is_available()) print('Beginning training.') print("==========================================") print("running algorithm: ", alg_run) # "Framework: ", "torch" # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run # multiagent configuration if policy_graphs is not None: print("policy_graphs", policy_graphs) config['multiagent'].update({'policies': policy_graphs}) if policy_mapping_fn is not None: config['multiagent'].update( {'policy_mapping_fn': tune.function(policy_mapping_fn)}) if policies_to_train is not None: config['multiagent'].update({'policies_to_train': policies_to_train}) create_env, gym_name = make_create_env(params=flow_params) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
upload_dir = args.upload_dir # Import the benchmark and fetch its flow_params benchmark = __import__( "flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"]) flow_params = benchmark.flow_params # get the env name and a creator for the environment create_env, env_name = make_create_env(params=flow_params, version=0) alg_run = "ARS" # initialize a ray instance ray.init(redirect_output=True) agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = min(num_cpus, num_rollouts) if socket.gethostname() == 'matt-desktop': config["num_gpus"] = 1 config["num_rollouts"] = num_rollouts config["rollouts_used"] = num_rollouts # config["sgd_stepsize"] = grid_search([.01, .02]) # config["noise_stdev"] = grid_search([.01, .02]) # optimal hyperparameters: config["sgd_stepsize"] = 0.2 config["noise_stdev"] = 0.2 config['eval_prob'] = 0.05 config['observation_filter'] = "NoFilter" # save the flow params for replay
if __name__ == "__main__": args = parser.parse_args() alg = args.alg.upper() start = time.time() print("stress test starting") ray.init(redirect_output=False) flow_params["env"].horizon = 1 horizon = flow_params["env"].horizon create_env, env_name = make_create_env(params=flow_params, version=0) from ray.rllib.agents.agent import get_agent_class if alg == 'ARS': agent_cls = get_agent_class(alg) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["num_deltas"] = N_CPUS config["deltas_used"] = N_CPUS elif alg == 'PPO': agent_cls = get_agent_class(alg) config = agent_cls._default_config.copy() config["num_workers"] = N_CPUS config["timesteps_per_batch"] = horizon * N_ROLLOUTS config["vf_loss_coeff"] = 1.0 config["kl_target"] = 0.02 config["use_gae"] = True config["horizon"] = 1 config["clip_param"] = 0.2 config["num_sgd_iter"] = 1
def setup_exps_PPO(flow_params): """ Experiment setup with PPO using RLlib. Parameters ---------- flow_params : dictionary of flow parameters Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config["num_workers"] = min(N_CPUS, N_ROLLOUTS) config['train_batch_size'] = HORIZON * N_ROLLOUTS config['simple_optimizer'] = True config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['lr'] = tune.grid_search([1e-5, 1e-4, 1e-3]) config['horizon'] = HORIZON config['clip_actions'] = False # FIXME(ev) temporary ray bug config['observation_filter'] = 'NoFilter' # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(env_name, create_env) test_env = create_env() obs_space = test_env.observation_space act_space = test_env.action_space def gen_policy(): return (PPOPolicyGraph, obs_space, act_space, {}) # Setup PG with a single policy graph for all agents policy_graphs = {'av': gen_policy()} def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policy_graphs': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config
def setup_exps_rllib(flow_params, n_cpus, n_rollouts, policy_graphs=None, policy_mapping_fn=None, policies_to_train=None): """Return the relevant components of an RLlib experiment. Parameters ---------- flow_params : dict flow-specific parameters (see flow/utils/registry.py) n_cpus : int number of CPUs to run the experiment over n_rollouts : int number of rollouts per training iteration policy_graphs : dict, optional TODO policy_mapping_fn : function, optional TODO policies_to_train : list of str, optional TODO Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ horizon = flow_params['env'].horizon alg_run = "DDPG" agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) # Tricks from TD3 config["twin_q"] = True config["policy_delay"] = 2 config["num_workers"] = n_cpus config["train_batch_size"] = horizon * n_rollouts config["horizon"] = horizon config["log_level"] = "DEBUG" config["ignore_worker_failures"] = True config["use_local_critic"] = False # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run # multiagent configuration if policy_graphs is not None: print("policy_graphs", policy_graphs) config['multiagent'].update({'policies': policy_graphs}) if policy_mapping_fn is not None: config['multiagent'].update( {'policy_mapping_fn': tune.function(policy_mapping_fn)}) if policies_to_train is not None: config['multiagent'].update({'policies_to_train': policies_to_train}) create_env, gym_name = make_create_env(params=flow_params) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def visualizer_rllib(args): result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # TODO(ev) backwards compatibility hack try: pkl = get_rllib_pkl(result_dir) except Exception: pass # check if we have a multiagent scenario but in a # backwards compatible way if config.get('multiagent', {}).get('policy_graphs', {}): multiagent = True config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = False dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.multiagent_envs' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policy_graphs'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policy_graphs'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation(sim_params=sim_params, render=sim_params.render) final_outflows = [] mean_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) mean_speed.append(np.mean(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) if multiagent: for agent_id, rew in rets.items(): print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print('Average, std return: {}, {}'.format(np.mean(rets), np.std(rets))) print('Average, std speed: {}, {}'.format(np.mean(mean_speed), np.std(mean_speed))) print('Average, std outflow: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.scenario.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) emission_to_csv(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def setup_exps(flow_params): """Create the relevant components of a multiagent RLlib experiment. Parameters ---------- flow_params : dict input flow-parameters Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['simple_optimizer'] = True config['gamma'] = 0.999 # discount rate config['model'].update({'fcnet_hiddens': [32, 32]}) config['lr'] = tune.grid_search([1e-5]) config['horizon'] = HORIZON config['clip_actions'] = False config['observation_filter'] = 'NoFilter' # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # register as rllib env register_env(env_name, create_env) # multiagent configuration temp_env = create_env() policy_graphs = { 'av': (PPOPolicyGraph, temp_env.observation_space, temp_env.action_space, {}) } def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policy_graphs': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon if args.checkpoint_num == '-1': checks=os.listdir(args.result_dir) checks = [elem for elem in checks if "check" in elem] checks = [elem.split("_")[1] for elem in checks] checks = [int(elem) for elem in checks] args.checkpoint_num=str(max(checks)) # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), np.zeros(size, np.float32)] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation( sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [x / y for x, y in zip(final_outflows, final_inflows)] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {}, {}'.format( np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {}, {}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {}, {}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {}, {}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def setup_exps(flow_params): """Create the relevant components of a multiagent RLlib experiment. Parameters ---------- flow_params : dict input flow-parameters Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ alg_run = 'PPO' agent_cls = get_agent_class(alg_run) config = agent_cls._default_config.copy() config['num_workers'] = N_CPUS config['train_batch_size'] = HORIZON * N_ROLLOUTS config['sgd_minibatch_size'] = 4096 #config['simple_optimizer'] = True config['gamma'] = 0.998 # discount rate config['model'].update({'fcnet_hiddens': [100, 50, 25]}) #config['lr'] = tune.grid_search([5e-4, 1e-4]) config['lr_schedule'] = [[0, 5e-4], [1000000, 1e-4], [4000000, 1e-5], [8000000, 1e-6]] config['horizon'] = HORIZON config['clip_actions'] = False config['observation_filter'] = 'NoFilter' config["use_gae"] = True config["lambda"] = 0.95 config["shuffle_sequences"] = True config["vf_clip_param"] = 1e8 config["num_sgd_iter"] = 10 #config["kl_target"] = 0.003 config["kl_coeff"] = 0.01 config["entropy_coeff"] = 0.001 config["clip_param"] = 0.2 config["grad_clip"] = None config["use_critic"] = True config["vf_share_layers"] = True config["vf_loss_coeff"] = 0.5 # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, env_name = make_create_env(params=flow_params, version=0) # register as rllib env register_env(env_name, create_env) # multiagent configuration temp_env = create_env() policy_graphs = { 'av': (PPOTFPolicy, temp_env.observation_space, temp_env.action_space, {}) } def policy_mapping_fn(_): return 'av' config.update({ 'multiagent': { 'policies': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config
def setup_exps_rllib(flow_params, n_cpus, n_rollouts, policy_graphs=None, policy_mapping_fn=None, policies_to_train=None): """Return the relevant components of an RLlib experiment. Parameters ---------- flow_params : dict flow-specific parameters (see flow/utils/registry.py) n_cpus : int number of CPUs to run the experiment over n_rollouts : int number of rollouts per training iteration policy_graphs : dict, optional TODO policy_mapping_fn : function, optional TODO policies_to_train : list of str, optional TODO Returns ------- str name of the training algorithm str name of the gym environment to be trained dict training configuration parameters """ from ray import tune from ray.tune.registry import register_env try: from ray.rllib.agents.agent import get_agent_class except ImportError: from ray.rllib.agents.registry import get_agent_class # global self._iteration horizon = flow_params['env'].horizon # . **** Gilbert modified source code ### alg_run = "DQN" agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) config["num_workers"] = n_cpus config["train_batch_size"] = horizon * n_rollouts config["gamma"] = 0.999 # discount rate config["model"].update({"fcnet_hiddens": [256]}) # config["model"].update({"fcnet_activation": "relu"}) config["horizon"] = horizon config["exploration_fraction"] = 0.5 # config["lr"] = tune.grid_search([0.0001, 0.001, 0.1]) config["lr"] = 0.001 # PPO specific params # config["use_gae"] = True # config["lambda"] = 0.97 # config["kl_target"] = 0.02 # config["num_sgd_iter"] = 10 # . **** Gilbert modified source code above ### # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run # multiagent configuration if policy_graphs is not None: print("policy_graphs", policy_graphs) config['multiagent'].update({'policies': policy_graphs}) if policy_mapping_fn is not None: config['multiagent'].update( {'policy_mapping_fn': tune.function(policy_mapping_fn)}) if policies_to_train is not None: config['multiagent'].update({'policies_to_train': policies_to_train}) create_env, gym_name = make_create_env(params=flow_params) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/test_time_rollout/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = True print('NOTE: With render mode {}, an extra instance of the SUMO GUI ' 'will display before the GUI for visualizing the result. Click ' 'the green Play arrow to continue.'.format(args.render_mode)) elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True flow_params["env"].additional_params["use_seeds"] = args.use_seeds # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] if PRINT_TO_SCREEN: pp = pprint.PrettyPrinter(indent=2) print("config ") pp.pprint(config) print("flow_params ") pp.pprint(flow_params) if REALTIME_PLOTS: # prepare plots # You probably won't need this if you're embedding things in a tkinter plot... plt.ion() fig = plt.figure() axA = fig.add_subplot(331) axA.set_title("Actions") axR = fig.add_subplot(332) axR.set_title("Rewards") axS = fig.add_subplot(333) axS.set_title("States") axS0 = fig.add_subplot(334) axS0.set_title("S0") axS1 = fig.add_subplot(335) axS1.set_title("S1") axS2 = fig.add_subplot(336) axS2.set_title("S2") axA_hist = fig.add_subplot(337) axA_hist.set_title("Actions") axR_hist = fig.add_subplot(338) axR_hist.set_title("Rewards") axS_hist = fig.add_subplot(339) axS_hist.set_title("States") axS.set_ylim((-2, 3)) axA.set_ylim((-5, 5)) axR.set_ylim((-1, 1)) initialized_plot = False # record for visualization purposes actions = [] rewards = [] states = [] times = [] WARMUP = args.warmup for i in range(args.num_rollouts): vel = [] time_to_exit = 0 state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): time_to_exit += 1 vehicles = env.unwrapped.k.vehicle if np.mean(vehicles.get_speed(vehicles.get_ids())) > 0: vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) #vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if SUMMARY_PLOTS: # record for visualization purposes actions.append(action) rewards.append(reward) states.append(state) if PRINT_TO_SCREEN: print("action") pp.pprint(action) print("reward") pp.pprint(reward) print("state") pp.pprint(state) print("after step ") if REALTIME_PLOTS: # Update plots. if not initialized_plot: # initialize lineA, = axA.plot( [0] * len(action), 'g^' ) # Returns a tuple of line objects, thus the comma lineR, = axR.plot( 0, 'bs' ) # Returns a tuple of line objects, thus the comma lineS, = axS.plot( [0] * len(state), 'r+' ) # Returns a tuple of line objects, thus the comma initialized_plot = True lineA.set_ydata(action) lineR.set_ydata(reward) lineS.set_ydata(state) fig.canvas.draw() fig.canvas.flush_events() if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if args.use_delay > 0: if vehicles.get_num_arrived() >= args.use_delay: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(300) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(300) final_inflows.append(inflow) times.append(time_to_exit) if np.all(np.array(final_inflows) > 1e-5): throughput_efficiency = [ x / y for x, y in zip(final_outflows, final_inflows) ] else: throughput_efficiency = [0] * len(final_inflows) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) if multiagent: for agent_id, rew in rets.items(): print('Round {}, Return: {} for agent {}'.format( i, ret, agent_id)) else: print('Round {}, Return: {}'.format(i, ret)) print('==== Summary of results ====') print("Return:") print(mean_speed) if multiagent: for agent_id, rew in rets.items(): print('For agent', agent_id) print(rew) print('Average, std return: {}, {} for agent {}'.format( np.mean(rew), np.std(rew), agent_id)) else: print(rets) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(rets), np.std(rets))) print("\nSpeed, mean (m/s):") print(mean_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(mean_speed), np.std(mean_speed))) print("\nSpeed, std (m/s):") print(std_speed) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(std_speed), np.std(std_speed))) # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_outflows), np.std(final_outflows))) # Compute departure rate of vehicles in the last 500 sec of the run print("Inflows (veh/hr):") print(final_inflows) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(final_inflows), np.std(final_inflows))) # Compute throughput efficiency in the last 500 sec of the print("Throughput efficiency (veh/hr):") print(throughput_efficiency) print('Average, std: {:.2f}, {:.5f}'.format(np.mean(throughput_efficiency), np.std(throughput_efficiency))) print("Time Delay") print(times) print("Time for certain number of vehicles to exit {:.2f},{:.5f}".format( (np.mean(times)), np.std(times))) if args.output: np.savetxt( args.output, [mean_speed, std_speed, final_inflows, final_outflows, times]) if SUMMARY_PLOTS: generateHtmlplots(actions, rewards, states) # terminate the environment env.unwrapped.terminate() # if prompted, convert the emission file into a csv file if args.gen_emission: time.sleep(0.1) dir_path = os.path.dirname(os.path.realpath(__file__)) emission_filename = '{0}-emission.xml'.format(env.network.name) emission_path = \ '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename) # convert the emission file into a csv file emission_to_csv(emission_path) # print the location of the emission csv file emission_path_csv = emission_path[:-4] + ".csv" print("\nGenerated emission file at " + emission_path_csv) # delete the .xml version of the emission file os.remove(emission_path) # if we wanted to save the render, here we create the movie if args.save_render: dirs = os.listdir(os.path.expanduser('~') + '/flow_rendering') # Ignore hidden files dirs = [d for d in dirs if d[0] != '.'] dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S")) recent_dir = dirs[-1] # create the movie movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir save_dir = os.path.expanduser('~') + '/flow_movies' if not os.path.exists(save_dir): os.mkdir(save_dir) os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png" os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4" os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/" os.system(os_cmd)
def visualizer_rllib(args): """Visualizer for RLlib experiments. This function takes args (see function create_parser below for more detailed information on what information can be fed to this visualizer), and renders the experiment associated with it. """ result_dir = args.result_dir if args.result_dir[-1] != '/' \ else args.result_dir[:-1] config = get_rllib_config(result_dir) # check if we have a multiagent environment but in a # backwards compatible way if config.get('multiagent', {}).get('policies', None): multiagent = True pkl = get_rllib_pkl(result_dir) config['multiagent'] = pkl['multiagent'] else: multiagent = False # Run on only one cpu for rendering purposes config['num_workers'] = 0 flow_params = get_flow_params(config) # hack for old pkl files # TODO(ev) remove eventually sim_params = flow_params['sim'] setattr(sim_params, 'num_clients', 1) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if args.run and config_run: if args.run != config_run: print('visualizer_rllib.py: error: run argument ' + '\'{}\' passed in '.format(args.run) + 'differs from the one stored in params.json ' + '\'{}\''.format(config_run)) sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) elif config_run: agent_cls = get_agent_class(config_run) else: print('visualizer_rllib.py: error: could not find flow parameter ' '\'run\' in params.json, ' 'add argument --run to provide the algorithm or model used ' 'to train the results\n e.g. ' 'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO') sys.exit(1) sim_params.restart_instance = True dir_path = os.path.dirname(os.path.realpath(__file__)) emission_path = '{0}/emission/'.format(dir_path) sim_params.emission_path = emission_path if args.gen_emission else None sim_params.summary_path = emission_path if args.gen_emission else None sim_params.tripinfo_path = emission_path if args.gen_emission else None # pick your rendering mode if args.render_mode == 'sumo_web3d': sim_params.num_clients = 2 sim_params.render = False elif args.render_mode == 'drgb': sim_params.render = 'drgb' sim_params.pxpm = 4 elif args.render_mode == 'sumo_gui': sim_params.render = False # will be set to True below elif args.render_mode == 'no_render': sim_params.render = False if args.save_render: if args.render_mode != 'sumo_gui': sim_params.render = 'drgb' sim_params.pxpm = 4 sim_params.save_render = True # Create and register a gym+rllib env create_env, env_name = make_create_env(params=flow_params, version=0) register_env(env_name, create_env) # check if the environment is a single or multiagent environment, and # get the right address accordingly # single_agent_envs = [env for env in dir(flow.envs) # if not env.startswith('__')] # if flow_params['env_name'] in single_agent_envs: # env_loc = 'flow.envs' # else: # env_loc = 'flow.envs.multiagent' # Start the environment with the gui turned on and a path for the # emission file env_params = flow_params['env'] env_params.restart_instance = False if args.evaluate: env_params.evaluate = True # lower the horizon if testing if args.horizon: config['horizon'] = args.horizon env_params.horizon = args.horizon # create the agent that will be used to compute the actions agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num agent.restore(checkpoint) if hasattr(agent, "local_evaluator") and \ os.environ.get("TEST_FLAG") != 'True': env = agent.local_evaluator.env else: env = gym.make(env_name) if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env if multiagent: rets = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func for key in config['multiagent']['policies'].keys(): rets[key] = [] else: rets = [] if config['model']['use_lstm']: use_lstm = True if multiagent: state_init = {} # map the agent id to its policy policy_map_fn = config['multiagent']['policy_mapping_fn'].func size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [ np.zeros(size, np.float32), np.zeros(size, np.float32) ] else: state_init = [ np.zeros(config['model']['lstm_cell_size'], np.float32), np.zeros(config['model']['lstm_cell_size'], np.float32) ] else: use_lstm = False # if restart_instance, don't restart here because env.reset will restart later if not sim_params.restart_instance: env.restart_simulation(sim_params=sim_params, render=sim_params.render) # Simulate and collect metrics final_outflows = [] final_inflows = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): vel = [] state = env.reset() if multiagent: ret = {key: [0] for key in rets.keys()} else: ret = 0 for _ in range(env_params.horizon): vehicles = env.unwrapped.k.vehicle vel.append(np.mean(vehicles.get_speed(vehicles.get_ids()))) if multiagent: action = {} for agent_id in state.keys(): if use_lstm: action[agent_id], state_init[agent_id], logits = \ agent.compute_action( state[agent_id], state=state_init[agent_id], policy_id=policy_map_fn(agent_id)) else: action[agent_id] = agent.compute_action( state[agent_id], policy_id=policy_map_fn(agent_id)) else: action = agent.compute_action(state) state, reward, done, _ = env.step(action) if multiagent: for actor, rew in reward.items(): ret[policy_map_fn(actor)][0] += rew else: ret += reward if multiagent and done['__all__']: break if not multiagent and done: break if multiagent: for key in rets.keys(): rets[key].append(ret[key]) else: rets.append(ret) outflow = vehicles.get_outflow_rate(500) final_outflows.append(outflow) inflow = vehicles.get_inflow_rate(500) final_inflows.append(inflow) mean_speed.append(np.mean(vel)) std_speed.append(np.std(vel)) # terminate the environment env.unwrapped.terminate() emission_location = os.path.join(emission_path, env.network.name) return emission_location
create_env, env_name = make_create_env( params=flow_params, version=0, render=False) register_env(env_name, create_env) # Determine agent and checkpoint config_run = config['env_config']['run'] if 'run' in config['env_config'] \ else None if (args.run and config_run): if (args.run != config_run): print("visualizer_rllib.py: error: run argument " + "\"{}\" passed in ".format(args.run) + "differs from the one stored in params.json " + "\"{}\"".format(config_run)) sys.exit(1) if (args.run): agent_cls = get_agent_class(args.run) elif (config_run): agent_cls = get_agent_class(config_run) else: print("visualizer_rllib.py: error: could not find flow parameter " "\"run\" in params.json, " "add argument --run to provide the algorithm or model used " "to train the results\n e.g. " "python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO") sys.exit(1) agent = agent_cls(env=env_name, config=config) checkpoint = result_dir + '/checkpoint-' + args.checkpoint_num agent._restore(checkpoint) # Recreate the scenario from the pickled parameters exp_tag = flow_params["exp_tag"]