Esempio n. 1
0
def run_model_stablebaseline(flow_params,
                             num_cpus=1,
                             rollout_size=50,
                             num_steps=50):
    """Run the model for num_steps if provided.

    Parameters
    ----------
    num_cpus : int
        number of CPUs used during training
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps
    The total rollout length is rollout_size.

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([
            env_constructor(params=flow_params, version=i)
            for i in range(num_cpus)
        ])

    train_model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size)
    train_model.learn(total_timesteps=num_steps)
    return train_model
Esempio n. 2
0
def run_model_stablebaseline3(flow_params,
                              num_cpus=1,
                              rollout_size=5,
                              num_steps=5):
    from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
    from stable_baselines3 import PPO
    from stable_baselines3.ppo import MlpPolicy
    import torch.nn as nn

    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([
            env_constructor(params=flow_params, version=i)
            for i in range(num_cpus)
        ])

    train_model = PPO(MlpPolicy,
                      env=env,
                      verbose=1,
                      n_epochs=rollout_size,
                      tensorboard_log="./PPO_tensorboard/",
                      device="cuda")  # cpu, gpu selection
    # automatically select gpu
    train_model.learn(total_timesteps=num_steps * rollout_size)  #
    return train_model
Esempio n. 3
0
def run_model(num_cpus=1, rollout_size=50, num_steps=50):
    """Run the model for num_steps if provided. The total rollout length is rollout_size."""
    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        env = DummyVecEnv([lambda: constructor])  # The algorithms require a vectorized environment to run
    else:
        env = SubprocVecEnv([env_constructor(params=flow_params, version=i) for i in range(num_cpus)])

    model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size)
    model.learn(total_timesteps=num_steps)
    return model
Esempio n. 4
0
def run_model(params, rollout_size=50, num_steps=50):
    """Perform the training operation.

    Parameters
    ----------
    params : dict
        flow-specific parameters (see flow/utils/registry.py)
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    constructor = env_constructor(params, version=0)()
    env = DummyVecEnv([lambda: constructor])

    model = TRPO(
        'MlpPolicy',
        env,
        verbose=2,
        timesteps_per_batch=rollout_size,
        gamma=0.999,
        policy_kwargs={
            "net_arch": [100, 50, 25]
        },
    )
    model.learn(total_timesteps=num_steps)

    return model
def run_model(num_cpus=1, rollout_size=50, num_steps=50, use_inflows=False):
    """Run the model for num_steps if provided. The total rollout length is rollout_size."""
    initial_config, net_params = setup_exps(use_inflows)
    # add the new parameters to flow_params
    flow_params['initial'] = initial_config
    flow_params['net'] = net_params

    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        env = DummyVecEnv([lambda: constructor])  # The algorithms require a vectorized environment to run
    else:
        env = SubprocVecEnv([env_constructor(params=flow_params, version=i) for i in range(num_cpus)])

    model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size)
    model.learn(total_timesteps=num_steps)
    return model
Esempio n. 6
0
def train_stable_baselines(submodule, flags):
    """Train policies using the PPO algorithm in stable-baselines."""
    from stable_baselines3.common.vec_env import DummyVecEnv

    flow_params = submodule.flow_params
    # Path to the saved files
    exp_tag = flow_params['exp_tag']
    result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S"))

    # Perform training.
    start_time = timeit.default_timer()
    # print experiment.json information
    print("=========================================")
    print('Beginning training.')
    print('Algorithm :', flags.algorithm)
    model = run_model_stablebaseline(flow_params, flags.num_cpus,
                                     flags.rollout_size, flags.num_steps,
                                     flags.algorithm, flags.exp_config)

    stop_time = timeit.default_timer()
    run_time = stop_time - start_time
    print("Training is Finished")
    print("total runtime: ", run_time)
    # Save the model to a desired folder and then delete it to demonstrate
    # loading.
    print('Saving the trained model!')
    path = os.path.realpath(os.path.expanduser('~/baseline_results'))
    ensure_dir(path)
    save_path = os.path.join(path, result_name)
    model.save(save_path)

    # dump the flow params
    with open(os.path.join(path, result_name) + '.json', 'w') as outfile:
        json.dump(flow_params,
                  outfile,
                  cls=FlowParamsEncoder,
                  sort_keys=True,
                  indent=4)

    # Replay the result by loading the model
    print('Loading the trained model and testing it out!')
    if flags.exp_config.lower() == "ppo":
        from stable_baselines3 import PPO
        model = PPO.load(save_path)
    elif flags.exp_config.lower() == "ddpg":
        from stable_baselines3 import DDPG
        model = DDPG.load(save_path)
    flow_params = get_flow_params(os.path.join(path, result_name) + '.json')
    flow_params['sim'].render = True
    env = env_constructor(params=flow_params, version=0)()
    # The algorithms require a vectorized environment to run
    eval_env = DummyVecEnv([lambda: env])
    obs = eval_env.reset()
    reward = 0
    for _ in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    print('the final reward is {}'.format(reward))
Esempio n. 7
0
def train_stable_baselines3(submodule, flags):
    """Train policies using the PPO algorithm in stable-baselines3."""
    from stable_baselines3.common.vec_env import DummyVecEnv
    from stable_baselines3 import PPO
    import torch
    start_time = timeit.default_timer()
    flow_params = submodule.flow_params
    # Path to the saved files
    exp_tag = flow_params['exp_tag']
    result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S"))

    # Perform training.
    print("cuda is available: ", torch.cuda.is_available())
    print('Beginning training.')
    print("==========================================")
    model = run_model_stablebaseline3(flow_params, flags.num_cpus,
                                      flags.rollout_size, flags.num_steps)

    # Save the model to a desired folder and then delete it to demonstrate
    # loading.
    print('Saving the trained model!')
    path = os.path.realpath(os.path.expanduser('~/baseline_results'))
    ensure_dir(path)
    save_path = os.path.join(path, result_name)
    model.save(save_path)
    # dump the flow params
    # check time for choose GPU and CPU
    stop_time = timeit.default_timer()
    run_time = stop_time - start_time
    with open(os.path.join(path, result_name) + '.json', 'w') as outfile:
        json.dump(flow_params,
                  outfile,
                  cls=FlowParamsEncoder,
                  sort_keys=True,
                  indent=4)

    # Replay the result by loading the model
    print('Loading the trained model and testing it out!')
    model.load(save_path)
    flow_params = get_flow_params(os.path.join(path, result_name) + '.json')

    flow_params['sim'].render = False
    flow_params['env'].horizon = 1500  # 150seconds operation
    env = env_constructor(params=flow_params, version=0)()
    # The algorithms require a vectorized environment to run
    eval_env = DummyVecEnv([lambda: env])
    obs = eval_env.reset()
    reward = 0
    for _ in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    print("--------------------------------------------------------")
    flow_params['sim'].render = True
    simulation = Experiment(flow_params)
    simulation.run(num_runs=1)
    print('the final reward is {}'.format(reward))
    print("total run_time:", run_time, "s")
Esempio n. 8
0
def run_model_stablebaseline(flow_params: Dict,
                             num_cpus: int = 1,
                             rollout_size: int = 50,
                             num_steps: int = 50):
    """
        Run the model with stable_baselines for num_steps if provided.

        Parameters
        ----------
        flow_params : dict
            flow-specific parameters
        num_cpus : int
            number of CPUs used during training
        rollout_size : int
            length of a single rollout
        num_steps : int
            total number of training steps
        The total rollout length is rollout_size.

        Returns
        -------
        stable_baselines.*
            the trained model
    """
    from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
    from stable_baselines import PPO2

    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([
            env_constructor(params=flow_params, version=i)
            for i in range(num_cpus)
        ])

    print_box('Initialising MlpPolicy in Stable Baselines')
    train_model = PPO2('MlpPolicy',
                       env,
                       verbose=1,
                       n_steps=rollout_size,
                       tensorboard_log='delete_ppo')
    train_model.learn(total_timesteps=num_steps)
    return train_model
Esempio n. 9
0
def run_model_stablebaseline(flow_params,
                             num_cpus=1,
                             rollout_size=50,
                             num_steps=50):
    """Run the model for num_steps if provided.

    Parameters
    ----------
    flow_params : dict
        flow-specific parameters
    num_cpus : int
        number of CPUs used during training
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps
    The total rollout length is rollout_size.

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
    from stable_baselines import DDPG
    from stable_baselines.deepq.policies import MlpPolicy
    from stable_baselines.common.noise import NormalActionNoise,OrnsteinUhlenbeckActionNoise,AdaptiveParamNoiseSpec
    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([env_constructor(params=flow_params, version=i)
                             for i in range(num_cpus)])

    train_model = DDPG('MlpPolicy', env, verbose=1, param_noise=param_noise,action_noise=action_noise, tensorboard_log="./DDPG_cartpole_tensorboard/")
    train_model.learn(total_timesteps=num_steps)
    return train_model
Esempio n. 10
0
def train_stable_baselines(submodule, flags):
    """Train policies using the PPO algorithm in stable-baselines."""
    from stable_baselines.common.vec_env import DummyVecEnv
    from stable_baselines import PPO2

    flow_params = submodule.flow_params
    # Path to the saved files
    exp_tag = flow_params['exp_tag']
    exp_folder_name = os.path.join(os.getcwd(), exp_tag)
    if not os.path.exists(exp_folder_name):
        os.makedirs(exp_folder_name)
    result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S"))

    # Perform training.
    print_box('Beginning training.')
    model = run_model_stablebaseline(flow_params, flags.num_cpus,
                                     flags.rollout_size, flags.num_steps)

    # Save the model to a desired folder and then delete it to demonstrate
    # loading.
    # NOTE changed file saving HERE
    print_box('Saving the trained model!')
    path = os.getcwd()
    save_path = os.path.join(path, result_name)
    model.save(save_path)

    # dump the flow params
    with open(os.path.join(path, result_name) + '.json', 'w') as outfile:
        json.dump(flow_params,
                  outfile,
                  cls=FlowParamsEncoder,
                  sort_keys=True,
                  indent=4)

    # Replay the result by loading the model
    print_box('Loading the trained model and testing it out!')
    model = PPO2.load(save_path)
    flow_params = get_flow_params(os.path.join(path, result_name) + '.json')
    flow_params['sim'].render = False
    env = env_constructor(params=flow_params, version=0)()
    # The algorithms require a vectorized environment to run
    eval_env = DummyVecEnv([lambda: env])
    obs = eval_env.reset()
    reward = 0
    for _ in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    print('The final reward is {}'.format(reward))
Esempio n. 11
0
def play_results(path, result_name):
    print('Loading the trained model and testing it out!')
    save_path = os.path.join(path, result_name)
    model = DQN.load(save_path)
    flow_params = get_flow_params(os.path.join(path, result_name) + '.json')
    flow_params['sim'].render = True
    env_con = env_constructor(params=flow_params, version=0)()
    # The algorithms require a vectorized environment to run
    eval_env = DummyVecEnv([lambda: env_con])
    obs = eval_env.reset()
    reward = 0
    for _ in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    print('the final reward is {}'.format(reward))
Esempio n. 12
0
def train_stable_baselines(submodule, flags):
    """Train policies using the PPO algorithm in stable-baselines."""
    from stable_baselines.common.vec_env import DummyVecEnv
    from stable_baselines import DDPG
    flow_params = submodule.flow_params
    # Path to the saved files
    exp_tag = flow_params['exp_tag']
    result_name = '{}/{}'.format(exp_tag, strftime("%Y-%m-%d-%H:%M:%S"))

    # Perform training.
    print('Beginning training.')
    model = run_model_stablebaseline(
        flow_params, flags.num_cpus, flags.rollout_size, flags.num_steps)

    # Save the model to a desired folder and then delete it to demonstrate
    # loading.
    print('Saving the trained model!')
    path = os.path.realpath(os.path.expanduser('~/baseline_results'))
    ensure_dir(path)
    save_path = os.path.join(path, result_name)
    model.save(save_path)

    # dump the flow params
    with open(os.path.join(path, result_name) + '.json', 'w') as outfile:
        json.dump(flow_params, outfile,
                  cls=FlowParamsEncoder, sort_keys=True, indent=4)

    # Replay the result by loading the model
    print('Loading the trained model and testing it out!')
    model = DDPG.load(save_path)
    flow_params = get_flow_params(os.path.join(path, result_name) + '.json')
    flow_params['sim'].render = True
    env = env_constructor(params=flow_params, version=0)()
    
    n_actions = env.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

    # The algorithms require a vectorized environment to run
    eval_env = DummyVecEnv([lambda: env])
    obs = eval_env.reset()
    reward = 0
    for _ in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        reward += rewards
    print('the final reward is {}'.format(reward))
Esempio n. 13
0
    print('Saving the trained model!')
    model.save(save_path)
    # dump the flow params
    with open(os.path.join(path, args.result_name) + '.json', 'w') as outfile:
        json.dump(flow_params,
                  outfile,
                  cls=FlowParamsEncoder,
                  sort_keys=True,
                  indent=4)
    del model
    del flow_params

    # Replay the result by loading the model
    print('Loading the trained model and testing it out!')
    model = PPO2.load(save_path)
    flow_params = get_flow_params(
        os.path.join(path, args.result_name) + '.json')
    flow_params['sim'].render = True
    env_constructor = env_constructor(params=flow_params, version=0)()
    env = DummyVecEnv([
        lambda: env_constructor
    ])  # The algorithms require a vectorized environment to run
    obs = env.reset()
    reward = 0
    for i in range(flow_params['env'].horizon):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        reward += rewards
    print('the final reward is {}'.format(reward))
Esempio n. 14
0
def run_model_stablebaseline(flow_params,
                             num_cpus=1,
                             rollout_size=50,
                             num_steps=50,
                             algorithm="ppo",
                             exp_config=None):
    """Run the model for num_steps if provided.
    Parameters
    ----------
    flow_params : dict
        flow-specific parameters
    num_cpus : int
        number of CPUs used during training
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps
    The total rollout length is rollout_size.
    Returns
    -------
    stable_baselines.*
        the trained model
    """
    from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv

    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([
            env_constructor(params=flow_params, version=i)
            for i in range(num_cpus)
        ])
    if algorithm == "PPO":
        from stable_baselines3 import PPO
        train_model = PPO('MlpPolicy', env, verbose=1, n_steps=rollout_size)
        train_model.learn(total_timesteps=num_steps)
        print("Learning Process is Done.")
        return train_model

    elif algorithm == "DDPG":
        from stable_baselines3 import DDPG
        from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise
        import numpy as np
        if exp_config == 'singleagent_figure_eight':
            train_model = DDPG(
                'MlpPolicy',
                env,
                verbose=1,
                n_episodes_rollout=rollout_size,
                learning_starts=3000,
                learning_rate=0.0001,
                action_noise=OrnsteinUhlenbeckActionNoise(
                    mean=np.zeros(1),
                    sigma=0.15 * np.ones(1),
                    initial_noise=0.7 * np.ones(1)),
                tau=0.005,
                batch_size=128,
                tensorboard_log='tensorboard_ddpg',
                device='cuda',
            )
        else:
            train_model = DDPG(
                'MlpPolicy',
                env,
                verbose=1,
                n_episodes_rollout=rollout_size,
                learning_starts=1200,
                tensorboard_log='tensorboard_ddpg',
                learning_rate=0.0001,
                action_noise=OrnsteinUhlenbeckActionNoise(
                    mean=np.zeros(1),
                    sigma=0.15 * np.ones(1),
                    initial_noise=0.7 * np.ones(1)),
                tau=0.005,
                batch_size=512,
                device='cpu',
            )

        from tensorboard_baselines.callbacks_ddpg import TensorboardCallback
        train_model.learn(
            total_timesteps=num_steps,
            log_interval=2,
            eval_log_path='ddpg_log',
            eval_freq=2,
            eval_freq=10,
            #callback=[TensorboardCallback],
        )
        print("Learning Process is Done.")
        return train_model
Esempio n. 15
0
def run_model_stablebaseline(flow_params, args, model_params=None):
    """Run the model for num_steps if provided.

    Parameters
    ----------
    flow_params :
        Flow related parameters from config.
    args:
        Training arguments from parser.

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    constructor = env_constructor(params=flow_params, version=0)()
    # The algorithms require a vectorized environment to run
    env = DummyVecEnv([lambda: constructor])

    if model_params is None:
        if args.policy == 0:
            policy = MlpPolicy
        elif args.policy == 1:
            policy = LnMlpPolicy
        else:
            warnings.warn("Invalid policy type! Policy set to MlpPolicy.")
            policy = MlpPolicy
        dueling = None if args.dueling else dict(dueling=False)

        train_model = DQN(
            policy=policy,
            env=env,
            gamma=args.gamma,
            learning_rate=args.learning_rate,
            buffer_size=args.buffer_size,
            exploration_fraction=args.exploration_fraction,
            exploration_final_eps=args.exploration_final_eps,
            exploration_initial_eps=args.exploration_initial_eps,
            train_freq=args.train_freq,
            batch_size=args.batch_size,
            double_q=args.double_q,
            learning_starts=args.learning_starts,
            target_network_update_freq=args.target_network_update_freq,
            prioritized_replay=args.prioritized_replay,
            prioritized_replay_alpha=args.prioritized_replay_alpha,
            prioritized_replay_beta0=args.prioritized_replay_beta0,
            prioritized_replay_beta_iters=args.prioritized_replay_beta_iters,
            prioritized_replay_eps=args.prioritized_replay_eps,
            param_noise=args.param_noise,
            policy_kwargs=dueling,
            verbose=args.verbose,
            tensorboard_log=args.tensorboard_log,
            full_tensorboard_log=args.full_tensorboard_log)
    else:
        train_model = DQN(
            policy=model_params["policy"],
            env=env,
            gamma=model_params["gamma"],
            learning_rate=model_params["learning_rate"],
            buffer_size=model_params["buffer_size"],
            exploration_fraction=model_params["exploration_fraction"],
            exploration_final_eps=model_params["exploration_final_eps"],
            exploration_initial_eps=model_params["exploration_initial_eps"],
            train_freq=model_params["train_freq"],
            batch_size=model_params["batch_size"],
            double_q=model_params["double_q"],
            learning_starts=model_params["learning_starts"],
            target_network_update_freq=model_params[
                "target_network_update_freq"],
            prioritized_replay=model_params["prioritized_replay"],
            prioritized_replay_alpha=model_params["prioritized_replay_alpha"],
            prioritized_replay_beta0=model_params["prioritized_replay_beta0"],
            prioritized_replay_beta_iters=model_params[
                "prioritized_replay_beta_iters"],
            prioritized_replay_eps=model_params["prioritized_replay_eps"],
            param_noise=model_params["param_noise"],
            policy_kwargs=model_params["policy_kwargs"],
            verbose=model_params["verbose"],
            tensorboard_log=model_params["tensorboard_log"],
            full_tensorboard_log=model_params["full_tensorboard_log"])

    train_model.learn(total_timesteps=args.num_steps)

    return train_model