def simulate_without_rl(flags, module): flow_params = getattr(module, flags.exp_config).flow_params if hasattr(getattr(module, flags.exp_config), "custom_callables"): callables = getattr(module, flags.exp_config).custom_callables else: callables = None flow_params['sim'].render = not flags.no_render flow_params['simulator'] = 'traci' # Specify an emission path if they are meant to be generated. if flags.gen_emission: flow_params['sim'].emission_path = "./data" # Create the flow_params object fp_ = flow_params['exp_tag'] dir_ = flow_params['sim'].emission_path with open(os.path.join(dir_, "{}.json".format(fp_)), 'w') as outfile: json.dump(flow_params, outfile, cls=FlowParamsEncoder, sort_keys=True, indent=4) # Run for the specified number of rollouts. flow_params['env'].horizon = 3000 # Create the experiment object. exp = Experiment(flow_params, callables) exp.run(flags.num_runs, convert_to_csv=flags.gen_emission)
def train_stable_baselines3(submodule, flags): """Train policies using the PPO algorithm in stable-baselines3.""" from stable_baselines3.common.vec_env import DummyVecEnv from stable_baselines3 import PPO import torch start_time = timeit.default_timer() flow_params = submodule.flow_params # Path to the saved files exp_tag = flow_params['exp_tag'] result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S")) # Perform training. print("cuda is available: ", torch.cuda.is_available()) print('Beginning training.') print("==========================================") model = run_model_stablebaseline3(flow_params, flags.num_cpus, flags.rollout_size, flags.num_steps) # Save the model to a desired folder and then delete it to demonstrate # loading. print('Saving the trained model!') path = os.path.realpath(os.path.expanduser('~/baseline_results')) ensure_dir(path) save_path = os.path.join(path, result_name) model.save(save_path) # dump the flow params # check time for choose GPU and CPU stop_time = timeit.default_timer() run_time = stop_time - start_time with open(os.path.join(path, result_name) + '.json', 'w') as outfile: json.dump(flow_params, outfile, cls=FlowParamsEncoder, sort_keys=True, indent=4) # Replay the result by loading the model print('Loading the trained model and testing it out!') model.load(save_path) flow_params = get_flow_params(os.path.join(path, result_name) + '.json') flow_params['sim'].render = False flow_params['env'].horizon = 1500 # 150seconds operation env = env_constructor(params=flow_params, version=0)() # The algorithms require a vectorized environment to run eval_env = DummyVecEnv([lambda: env]) obs = eval_env.reset() reward = 0 for _ in range(flow_params['env'].horizon): action, _states = model.predict(obs) obs, rewards, dones, info = eval_env.step(action) reward += rewards print("--------------------------------------------------------") flow_params['sim'].render = True simulation = Experiment(flow_params) simulation.run(num_runs=1) print('the final reward is {}'.format(reward)) print("total run_time:", run_time, "s")
def train_rllib(submodule, flags): """Train policies using the PPO algorithm in RLlib.""" import ray from ray.tune import run_experiments flow_params = submodule.flow_params n_cpus = submodule.N_CPUS n_rollouts = submodule.N_ROLLOUTS policy_graphs = getattr(submodule, "POLICY_GRAPHS", None) policy_mapping_fn = getattr(submodule, "policy_mapping_fn", None) policies_to_train = getattr(submodule, "policies_to_train", None) alg_run, gym_name, config = setup_exps_rllib(flow_params, n_cpus, n_rollouts, policy_graphs, policy_mapping_fn, policies_to_train, flags) ray.init(num_cpus=n_cpus + 1, object_store_memory=200 * 1024 * 1024) exp_config = { "run": alg_run, "env": gym_name, "config": { **config }, "checkpoint_freq": 20, "checkpoint_at_end": True, "max_failures": 999, "stop": { "training_iteration": flags.num_steps, }, } print(exp_config["config"]["framework"]) if flags.checkpoint_path is not None: exp_config['restore'] = flags.checkpoint_path run_experiments({flow_params["exp_tag"]: exp_config}) simulation = Experiment(flow_params) simulation.run(num_runs=1)