예제 #1
0
def simulate_without_rl(flags, module):
    flow_params = getattr(module, flags.exp_config).flow_params

    if hasattr(getattr(module, flags.exp_config), "custom_callables"):
        callables = getattr(module, flags.exp_config).custom_callables
    else:
        callables = None
    flow_params['sim'].render = not flags.no_render
    flow_params['simulator'] = 'traci'

    # Specify an emission path if they are meant to be generated.
    if flags.gen_emission:
        flow_params['sim'].emission_path = "./data"

        # Create the flow_params object
        fp_ = flow_params['exp_tag']
        dir_ = flow_params['sim'].emission_path
        with open(os.path.join(dir_, "{}.json".format(fp_)), 'w') as outfile:
            json.dump(flow_params,
                      outfile,
                      cls=FlowParamsEncoder,
                      sort_keys=True,
                      indent=4)

    # Run for the specified number of rollouts.

    flow_params['env'].horizon = 3000
    # Create the experiment object.
    exp = Experiment(flow_params, callables)
    exp.run(flags.num_runs, convert_to_csv=flags.gen_emission)
예제 #2
0
def train_stable_baselines3(submodule, flags):
    """Train policies using the PPO algorithm in stable-baselines3."""
    from stable_baselines3.common.vec_env import DummyVecEnv
    from stable_baselines3 import PPO
    import torch
    start_time = timeit.default_timer()
    flow_params = submodule.flow_params
    # Path to the saved files
    exp_tag = flow_params['exp_tag']
    result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S"))

    # Perform training.
    print("cuda is available: ", torch.cuda.is_available())
    print('Beginning training.')
    print("==========================================")
    model = run_model_stablebaseline3(flow_params, flags.num_cpus,
                                      flags.rollout_size, flags.num_steps)

    # Save the model to a desired folder and then delete it to demonstrate
    # loading.
    print('Saving the trained model!')
    path = os.path.realpath(os.path.expanduser('~/baseline_results'))
    ensure_dir(path)
    save_path = os.path.join(path, result_name)
    model.save(save_path)
    # dump the flow params
    # check time for choose GPU and CPU
    stop_time = timeit.default_timer()
    run_time = stop_time - start_time
    with open(os.path.join(path, result_name) + '.json', 'w') as outfile:
        json.dump(flow_params,
                  outfile,
                  cls=FlowParamsEncoder,
                  sort_keys=True,
                  indent=4)

    # Replay the result by loading the model
    print('Loading the trained model and testing it out!')
    model.load(save_path)
    flow_params = get_flow_params(os.path.join(path, result_name) + '.json')

    flow_params['sim'].render = False
    flow_params['env'].horizon = 1500  # 150seconds operation
    env = env_constructor(params=flow_params, version=0)()
    # The algorithms require a vectorized environment to run
    eval_env = DummyVecEnv([lambda: env])
    obs = eval_env.reset()
    reward = 0
    for _ in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    print("--------------------------------------------------------")
    flow_params['sim'].render = True
    simulation = Experiment(flow_params)
    simulation.run(num_runs=1)
    print('the final reward is {}'.format(reward))
    print("total run_time:", run_time, "s")
예제 #3
0
def train_rllib(submodule, flags):
    """Train policies using the PPO algorithm in RLlib."""
    import ray
    from ray.tune import run_experiments

    flow_params = submodule.flow_params
    n_cpus = submodule.N_CPUS
    n_rollouts = submodule.N_ROLLOUTS
    policy_graphs = getattr(submodule, "POLICY_GRAPHS", None)
    policy_mapping_fn = getattr(submodule, "policy_mapping_fn", None)
    policies_to_train = getattr(submodule, "policies_to_train", None)

    alg_run, gym_name, config = setup_exps_rllib(flow_params, n_cpus,
                                                 n_rollouts, policy_graphs,
                                                 policy_mapping_fn,
                                                 policies_to_train, flags)

    ray.init(num_cpus=n_cpus + 1, object_store_memory=200 * 1024 * 1024)
    exp_config = {
        "run": alg_run,
        "env": gym_name,
        "config": {
            **config
        },
        "checkpoint_freq": 20,
        "checkpoint_at_end": True,
        "max_failures": 999,
        "stop": {
            "training_iteration": flags.num_steps,
        },
    }
    print(exp_config["config"]["framework"])
    if flags.checkpoint_path is not None:
        exp_config['restore'] = flags.checkpoint_path
    run_experiments({flow_params["exp_tag"]: exp_config})
    simulation = Experiment(flow_params)
    simulation.run(num_runs=1)