Example #1
0
def create_env(env, render=False, shared=False, maddpg=False, evaluate=False):
    """Return, and potentially create, the environment.

    Parameters
    ----------
    env : str or gym.Env
        the environment, or the name of a registered environment.
    render : bool
        whether to render the environment
    shared : bool
        specifies whether agents in an environment are meant to share policies.
        This is solely used by multi-agent Flow environments.
    maddpg : bool
        whether to use an environment variant that is compatible with the
        MADDPG algorithm
    evaluate : bool
        specifies whether this is a training or evaluation environment

    Returns
    -------
    gym.Env or list of gym.Env
        gym-compatible environment(s)
    """
    if env is None:
        # No environment (for evaluation environments).
        return None
    elif isinstance(env, str):
        if env in ENV_ATTRIBUTES.keys():
            env = ENV_ATTRIBUTES[env]["env"](
                evaluate, render, False, shared, maddpg)
        elif env.startswith("multiagent"):
            # multi-agent environments
            env_name = env[11:]
            env = ENV_ATTRIBUTES[env_name]["env"](
                evaluate, render, True, shared, maddpg)
        elif env in ["bottleneck0", "bottleneck1", "bottleneck2", "grid0",
                     "grid1"]:
            # Import the benchmark and fetch its flow_params
            benchmark = __import__("flow.benchmarks.{}".format(env),
                                   fromlist=["flow_params"])
            flow_params = benchmark.flow_params

            # Get the env name and a creator for the environment.
            creator, _ = make_create_env(flow_params, version=0, render=render)

            # Create the environment.
            env = creator()
        else:
            # This is assuming the environment is registered with OpenAI gym.
            env = gym.make(env)

    # Reset the environment.
    if env is not None:
        if isinstance(env, list):
            for next_env in env:
                next_env.reset()
        else:
            env.reset()

    return env
Example #2
0
    def ray_runner(self, num_runs, flow_params, version):
        alg_run = 'PPO'
        HORIZON = 10

        agent_cls = get_agent_class(alg_run)
        config = agent_cls._default_config.copy()
        config['num_workers'] = 1
        config['sample_batch_size'] = 50  # arbitrary
        config['train_batch_size'] = 50  # arbitrary
        config['sgd_minibatch_size'] = 10
        config['num_sgd_iter'] = 1
        config['horizon'] = HORIZON

        # save the flow params for replay
        flow_json = json.dumps(flow_params,
                               cls=FlowParamsEncoder,
                               sort_keys=True,
                               indent=4)
        config['env_config']['flow_params'] = flow_json
        config['env_config']['run'] = alg_run

        create_env, env_name = make_create_env(params=flow_params,
                                               version=version)

        # Register as rllib env
        register_env(env_name, create_env)

        alg = ppo.PPOAgent(env=env_name, config=config)

        for i in range(num_runs):
            alg.train()
            checkpoint_path = alg.save('benchmark_tmp')
            self.assertTrue('%s.index' % os.path.exists(checkpoint_path))
def setup_exps():

    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [16, 16]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02  # 0.01# 0.02
    config["num_sgd_iter"] = 10
    config["horizon"] = HORIZON
    config[
        "clip_param"] = 0.15  #default was 0.3, 0.2 was reported to work well, but maybe too large?
    #config["vf_loss_coeff"] = 1#0.2 #default was 1, this is when using multitask, saw that vf_loss drops to ~10 while policy loss is around 0.01, but with 0.01 coeff didn't estimate loss well

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #4
0
    def __init__(self, flow_params, custom_callables=None):
        """Instantiate the Experiment class.

        Parameters
        ----------
        flow_params : dict
            flow-specific parameters
        custom_callables : dict < str, lambda >
            strings and lambda functions corresponding to some information we
            want to extract from the environment. The lambda will be called at
            each step to extract information from the env and it will be stored
            in a dict keyed by the str.
        """
        self.custom_callables = custom_callables or {}

        # Get the env name and a creator for the environment.
        create_env, _ = make_create_env(flow_params)

        # Create the environment.
        self.env = create_env()

        logging.info(" Starting experiment {} at {}".format(
            self.env.network.name, str(datetime.datetime.utcnow())))

        logging.info("Initializing environment.")
def setup_exps():

    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [16, 16]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 10
    config["horizon"] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #6
0
def setup_DQN_exp():

    alg_run = 'DQN'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = n_cpus
    config['train_batch_size'] = horizon * rollouts
    config['gamma'] = discount_rate
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['horizon'] = horizon
    config["hiddens"] = [256]
    config['model'].update({'fcnet_hiddens': [32, 32]})

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)

    return alg_run, gym_name, config
Example #7
0
def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['simple_optimizer'] = True
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [32, 32]})
    config['lr'] = tune.grid_search([1e-5])
    config['horizon'] = HORIZON
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['observation_filter'] = 'NoFilter'

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(env_name, create_env)

    test_env = create_env()
    obs_space = test_env.observation_space
    act_space = test_env.action_space

    def gen_policy():
        return PPOTFPolicy, obs_space, act_space, {}

    # Setup PG with an ensemble of `num_policies` different policy graphs
    policy_graphs = {'av': gen_policy()}

    def policy_mapping_fn(_):
        return 'av'

    config.update({
        'multiagent': {
            'policies': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['av']
        }
    })

    return alg_run, env_name, config
def setup_exps():

    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [16, 16]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.01  # 0.02
    config["num_sgd_iter"] = 10
    config["horizon"] = HORIZON
    config[
        "clip_param"] = 0.15  #default was 0.3, 0.2 was reported to work well, but maybe too large?
    config[
        "vf_loss_coeff"] = 0  #default was 1, why use it at all? use vf loss when fitting advantage not when improving policy

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #9
0
def setup_exps():

    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [32, 32]})
    config['use_gae'] = True
    config['lambda'] = 0.97
    config['kl_target'] = 0.02
    config['num_sgd_iter'] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['horizon'] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #10
0
def setup_exps(use_inflows=False):
    """Return the relevant components of an RLlib experiment.

    Parameters
    ----------
    use_inflows : bool, optional
        set to True if you would like to run the experiment with inflows of
        vehicles from the edges, and False otherwise

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    # collect the initialization and network-specific parameters based on the
    # choice to use inflows or not
    if use_inflows:
        initial_config, net_params = get_flow_params(
            col_num=N_COLUMNS,
            row_num=N_ROWS,
            additional_net_params=additional_net_params)
    else:
        initial_config, net_params = get_non_flow_params(
            enter_speed=V_ENTER,
            add_net_params=additional_net_params)

    # add the new parameters to flow_params
    flow_params['initial'] = initial_config
    flow_params['net'] = net_params

    alg_run = 'PPO'

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [32, 32]})
    config['use_gae'] = True
    config['lambda'] = 0.97
    config['kl_target'] = 0.02
    config['num_sgd_iter'] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['horizon'] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #11
0
def setup_exps(seeds_file=None):

    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [100, 50, 25]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config["horizon"] = HORIZON
    config["entropy_coeff"] = 0.001

    config["lr"] = 1e-5
    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params,
                                           version=0,
                                           seeds_file=seeds_file)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #12
0
def setup_exps():

    initial_config, net_params = get_non_flow_params(
        add_net_params=additional_net_params)

    flow_params['initial'] = initial_config
    flow_params['net'] = net_params

    alg_run = 'PPO'

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['simple_optimizer'] = True
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [32, 32]})
    config['use_gae'] = True
    config['lambda'] = 0.97
    config['kl_target'] = 0.02
    config['num_sgd_iter'] = 10
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['horizon'] = HORIZON
    config['observation_filter'] = 'NoFilter'

    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    register_env(gym_name, create_env)

    test_env = create_env()
    obs_space = test_env.observation_space
    act_space = test_env.action_space

    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    policy_graphs = {'av': gen_policy()}

    def policy_mapping_fn(_):
        return 'av'

    config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['av']
        }
    })

    return alg_run, gym_name, config
Example #13
0
def setup_exps_rllib(flow_params, n_cpus, n_rollouts):
    """Return the relevant components of an RLlib experiment.

    Parameters
    ----------
    flow_params : dict
        flow-specific parameters (see flow/utils/registry.py)
    n_cpus : int
        number of CPUs to run the experiment over
    n_rollouts : int
        number of rollouts per training iteration

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    horizon = flow_params['env'].horizon

    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = deepcopy(agent_cls._default_config)

    config["num_workers"] = n_cpus
    config["num_cpus_per_worker"] = 1
    config["use_pytorch"] = False
    config["num_gpus"] = 0
    config["train_batch_size"] = horizon * n_rollouts
    config["gamma"] = 0.999  # discount rate
    # config["model"].update({"fcnet_hiddens": [32, 32, 32]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 10
    config['clip_actions'] = True  # FIXME(ev) temporary ray bug
    config["horizon"] = horizon
    config["callbacks"] = MyCallbacks
    # save the flow params for reply
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #14
0
def setup_exps():

    # Import the benchmark and fetch its flow_params
    benchmark = __import__("flow.benchmarks.%s" % benchmark_name,
                           fromlist=["flow_params"])
    flow_params = benchmark.flow_params

    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['simple_optimizer'] = True
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [32, 32]})
    config['lr'] = tune.grid_search([1e-5])
    config['horizon'] = HORIZON
    config['clip_actions'] = False
    config['observation_filter'] = 'NoFilter'

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(env_name, create_env)

    test_env = create_env()
    obs_space = test_env.observation_space
    act_space = test_env.action_space

    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    # Setup PG with an ensemble of `num_policies` different policy graphs
    policy_graphs = {'av': gen_policy()}

    def policy_mapping_fn(_):
        return 'av'

    config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['av']
        }
    })

    return alg_run, env_name, config
    def __init__(self, flow_params=flow_params):
        """Instantiate Experiment."""
        # Get the env name and a creator for the environment.
        create_env, _ = make_create_env(flow_params)

        # Create the environment.
        self.env = create_env()

        logging.info(" Starting experiment {} at {}".format(
            self.env.network.name, str(datetime.datetime.utcnow())))

        logging.info("Initializing environment.")
Example #16
0
def flow_env(use_inflows=False, render=True, sim_step=1, horizon=2000):
    global flow_params
    flow_params['sim'] = SumoParams(
                            sim_step=sim_step,
                            render=render,
                            restart_instance=True, # for long horizon and visualization
                            print_warnings=False,
                            seed=8021
                            )
    flow_params['env'] = EnvParams(
                            horizon=horizon,
                            additional_params=additional_env_params,
                            )
    """
    Parameters
    ----------
    use_inflows : bool, optional
        set to True if you would like to run the experiment with inflows of
        vehicles from the edges, and False otherwise

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    # collect the initialization and network-specific parameters based on the
    # choice to use inflows or not
    if use_inflows:
        initial_config, net_params = get_flow_params(
            col_num=N_COLUMNS,
            row_num=N_ROWS,
            additional_net_params=additional_net_params)
    else:
        initial_config, net_params = get_non_flow_params(
            enter_speed=V_ENTER,
            add_net_params=additional_net_params)

    # add the new parameters to flow_params
    flow_params['initial'] = initial_config
    flow_params['net'] = net_params
    
    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)

    create_env, gym_name = make_create_env(params=flow_params, version=0)
    env = create_env()
    return env, gym_name
def setup_exps():

    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['simple_optimizer'] = True
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [100, 50, 25]})
    config['use_gae'] = True
    config['lambda'] = 0.97
    config['sgd_minibatch_size'] = 128
    config['kl_target'] = 0.02
    config['num_sgd_iter'] = 10
    config['horizon'] = HORIZON
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config['observation_filter'] = 'NoFilter'

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(env_name, create_env)

    test_env = create_env()
    obs_space = test_env.observation_space
    act_space = test_env.action_space

    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    # Setup PG with an ensemble of `num_policies` different policy graphs
    policy_graphs = {'av': gen_policy(), 'adversary': gen_policy()}

    def policy_mapping_fn(agent_id):
        return agent_id

    config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn)
        }
    })
    return alg_run, env_name, config
Example #18
0
    def __init__(self, flow_params, custom_callables=None):

        self.custom_callables = custom_callables or {}

        # Get the env name and a creator for the environment.
        create_env, _ = make_create_env(flow_params)

        # Create the environment.
        self.env = create_env()

        logging.info(" Starting experiment {} at {}".format(
            self.env.network.name, str(datetime.datetime.utcnow())))

        logging.info("Initializing environment.")
Example #19
0
    def __init__(self,
                 flow_params,
                 multiagent=False,
                 shared=False,
                 maddpg=False,
                 render=False,
                 version=0):
        """Create the environment.

        Parameters
        ----------
        flow_params : dict
            environment-specific parameters
        multiagent : bool
            whether the environment is a multi-agent environment
        shared : bool
            whether the policies in the environment are shared or independent.
            This is only relevant if `shared` is set to True.
        render : bool
            whether to render the environment
        version : int
            environment version number, needed for testing purposes
        """
        # Initialize some variables.
        self.multiagent = multiagent
        self.shared = shared
        self.maddpg = maddpg

        if "full_observation_fn" in flow_params["env"].additional_params:
            self.full_observation_fn = deepcopy(
                flow_params["env"].additional_params["full_observation_fn"])
            del flow_params["env"].additional_params["full_observation_fn"]
        else:
            self.full_observation_fn = None

        # Create the wrapped environment.
        create_env, _ = make_create_env(flow_params, version, render)
        self.wrapped_env = create_env()

        # Collect the IDs of individual vehicles if using a multi-agent env.
        if self.multiagent:
            self.agents = list(self.wrapped_env.reset().keys())

        # for tracking the time horizon
        self.step_number = 0
        self.horizon = self.wrapped_env.env_params.horizon
Example #20
0
    def __init__(self, env_name, env_params=None, render=False, version=0):
        """Create the environment.

        Parameters
        ----------
        env_name : str
            the name of the environment to create
        env_params : dict
            environment-specific parameters
        render : bool
            whether to render the environment
        version : int
            environment version number, needed for testing purposes

        Returns
        -------
        gym.Env
            the environment

        Raises
        ------
        AssertionError
            if the `env_name` parameter is not valid
        """
        assert env_name in ["ring", "merge", "figure_eight"]

        # default to empty dictionary if not passed
        env_params = env_params or {}

        # get flow-specific parameters
        flow_params = dict()
        if env_name == "merge":
            flow_params = merge(**env_params)
        elif env_name == "ring":
            flow_params = ring(**env_params)
        elif env_name == "figure_eight":
            flow_params = figure_eight(**env_params)

        # create the wrapped environment
        create_env, _ = make_create_env(flow_params, version, render)
        self.wrapped_env = create_env()

        # for tracking the time horizon
        self.step_number = 0
        self.horizon = self.wrapped_env.env_params.horizon
Example #21
0
def setup_exps(seeds_file=None):

    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["sgd_minibatch_size"] = 128
    config["num_gpus"] = args.num_gpus
    config["gamma"] = 0.998  # discount rate
    config["model"].update({"fcnet_hiddens": [100, 50, 25]})
    #config['lr_schedule'] = [
    #        [0, 1e-4],
    #        [2000000,5e-5]
    #        ]
    config["use_gae"] = True
    config["lambda"] = 0.97
    #config["kl_target"] = 0.02
    config["vf_clip_param"] = 1e6
    config["num_sgd_iter"] = 1
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config["horizon"] = HORIZON
    #config["grad_clip"] = 0.5
    #config["entropy_coeff"] = 0.0001
    config["lr"] = 0.0
    #config["vf_share_layers"] = True
    #config["vf_loss_coeff"] = 0.5
    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params,
                                           version=0,
                                           seeds_file=seeds_file)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #22
0
    def __init__(self, flow_params=flow_params):
        """Instantiate Experiment."""
        # Get the env name and a creator for the environment.
        self.create_env, self.env_name = make_create_env(flow_params)

        # Create the environment.
        self.env = self.create_env()
        self.flow_params = flow_params

        # Register as rllib env
        register_env(self.env_name,self.create_env)

        self.obs_space = self.env.observation_space
        self.act_space = self.env.action_space

        logging.info(" Starting experiment {} at {}".format(
            self.env.network.name, str(datetime.datetime.utcnow())))

        logging.info("Initializing environment.")
Example #23
0
def get_compute_action_rllib(path_to_dir, checkpoint_num, alg):
    """Collect the compute_action method from RLlib's serialized files.

    Parameters
    ----------
    path_to_dir : str
        RLlib directory containing training results
    checkpoint_num : int
        checkpoint number / training iteration of the learned policy
    alg : str
        name of the RLlib algorithm that was used during the training
        procedure

    Returns
    -------
    method
        the compute_action method from the algorithm along with the trained
        parameters
    """
    # collect the configuration information from the RLlib checkpoint
    result_dir = path_to_dir if path_to_dir[-1] != '/' else path_to_dir[:-1]
    config = get_rllib_config(result_dir)

    # run on only one cpu for rendering purposes
    ray.init(num_cpus=1)
    config["num_workers"] = 1

    # create and register a gym+rllib env
    flow_params = get_flow_params(config)
    create_env, env_name = make_create_env(params=flow_params,
                                           version=9999,
                                           render=False)
    register_env(env_name, create_env)

    # recreate the agent
    agent_cls = get_agent_class(alg)
    agent = agent_cls(env=env_name, registry=get_registry(), config=config)

    # restore the trained parameters into the policy
    checkpoint = result_dir + '/checkpoint-{}'.format(checkpoint_num)
    agent._restore(checkpoint)

    return agent.compute_action
Example #24
0
def setup_exps():
    """Return the relevant components of an RLlib experiment.

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS
    config["train_batch_size"] = HORIZON * N_ROLLOUTS
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [20, 15]})
    config["use_gae"] = True
    config["lambda"] = 0.97
    config["kl_target"] = 0.02
    config["num_sgd_iter"] = 20
    config['lr'] = 1e-4
    config['sgd_minibatch_size'] = 128
    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
    config["horizon"] = HORIZON

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
    register_env(gym_name, create_env)
    return alg_run, gym_name, config
Example #25
0
def setup_exps(use_inflows=False):
    """Return the relevant components of an RLlib experiment.

    Parameters
    ----------
    use_inflows : bool, optional
        set to True if you would like to run the experiment with inflows of
        vehicles from the edges, and False otherwise

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    # collect the initialization and network-specific parameters based on the
    # choice to use inflows or not
    if use_inflows:
        initial_config, net_params = get_flow_params(
            col_num=N_COLUMNS,
            row_num=N_ROWS,
            additional_net_params=additional_net_params)
    else:
        initial_config, net_params = get_non_flow_params(
            enter_speed=V_ENTER,
            add_net_params=additional_net_params)

    # add the new parameters to flow_params
    flow_params['initial'] = initial_config
    flow_params['net'] = net_params
    create_env, gym_name = make_create_env(params=flow_params, version=0)
    TheEnv=create_env()
    return TheEnv
Example #26
0
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params,
                       cls=FlowParamsEncoder,
                       sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config'][
    'flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

trials = run_experiments({
    flow_params["exp_tag"]: {
        "run":
        alg_run,
        "env":
        gym_name,
        "config": {
            **config
        },
        "checkpoint_freq":
        1,  # number of iterations between checkpoints
    def test_make_create_env(self):
        """Tests that the make_create_env methods generates an environment with
        the expected flow parameters."""
        # use a flow_params dict derived from flow/benchmarks/figureeight0.py
        vehicles = VehicleParams()
        vehicles.add(veh_id="human",
                     acceleration_controller=(IDMController, {
                         "noise": 0.2
                     }),
                     routing_controller=(ContinuousRouter, {}),
                     car_following_params=SumoCarFollowingParams(
                         speed_mode="obey_safe_speed", ),
                     num_vehicles=13)
        vehicles.add(veh_id="rl",
                     acceleration_controller=(RLController, {}),
                     routing_controller=(ContinuousRouter, {}),
                     car_following_params=SumoCarFollowingParams(
                         speed_mode="obey_safe_speed", ),
                     num_vehicles=1)

        flow_params = dict(
            exp_tag="figure_eight_0",
            env_name="AccelEnv",
            scenario="Figure8Scenario",
            simulator='traci',
            sim=SumoParams(
                sim_step=0.1,
                render=False,
            ),
            env=EnvParams(
                horizon=1500,
                additional_params={
                    "target_velocity": 20,
                    "max_accel": 3,
                    "max_decel": 3,
                    "sort_vehicles": False
                },
            ),
            net=NetParams(
                no_internal_links=False,
                additional_params={
                    "radius_ring": 30,
                    "lanes": 1,
                    "speed_limit": 30,
                    "resolution": 40,
                },
            ),
            veh=vehicles,
            initial=InitialConfig(),
            tls=TrafficLightParams(),
        )

        # some random version number for testing
        v = 23434

        # call make_create_env
        create_env, env_name = make_create_env(params=flow_params, version=v)

        # check that the name is correct
        self.assertEqual(env_name, '{}-v{}'.format(flow_params["env_name"], v))

        # create the gym environment
        env = create_env()

        # Note that we expect the port number in sim_params to change, and
        # that this feature is in fact needed to avoid race conditions
        flow_params["sim"].port = env.sim_params.port

        # check that each of the parameter match
        self.assertEqual(env.env_params.__dict__, flow_params["env"].__dict__)
        self.assertEqual(env.sim_params.__dict__, flow_params["sim"].__dict__)
        self.assertEqual(env.scenario.traffic_lights.__dict__,
                         flow_params["tls"].__dict__)
        self.assertEqual(env.net_params.__dict__, flow_params["net"].__dict__)
        self.assertEqual(env.initial_config.__dict__,
                         flow_params["initial"].__dict__)
        self.assertEqual(env.__class__.__name__, flow_params["env_name"])
        self.assertEqual(env.scenario.__class__.__name__,
                         flow_params["scenario"])
Example #28
0
def GetTrafficLightEnv(inflow_probability, render=False, evaluate=False):
    initial_config, net_params = get_inflow_params(
        col_num=N_COLUMNS,
        row_num=N_ROWS,
        additional_net_params=additional_net_params,
        inflow_probability=inflow_probability)

    flow_params = dict(
        # name of the experiment
        exp_tag='traffic_light_grid',

        # name of the flow environment the experiment is running on
        env_name=TrafficLightGridPOEnv,

        # name of the network class the experiment is running on
        network=DoubleLaneNetwork,

        # simulator that is used by the experiment
        simulator='traci',
        # sumo-related parameters (see flow.core.params.SumoParams)
        sim=SumoParams(sim_step=1,
                       render=render,
                       emission_path="Results",
                       restart_instance=True),

        # environment related parameters (see flow.core.params.EnvParams)
        env=EnvParams(horizon=HORIZON,
                      additional_params=additional_env_params,
                      evaluate=evaluate),

        # network-related parameters (see flow.core.params.NetParams and the
        # network's documentation or ADDITIONAL_NET_PARAMS component). This is
        # filled in by the setup_exps method below.
        net=net_params,

        # vehicles to be placed in the network at the start of a rollout (see
        # flow.core.params.VehicleParams)
        veh=vehicles,

        # parameters specifying the positioning of vehicles upon initialization/
        # reset (see flow.core.params.InitialConfig). This is filled in by the
        # setup_exps method below.
        initial=initial_config,
    )

    # Get the env name and a creator for the environment.
    create_env, _ = make_create_env(flow_params)
    # Create the environment.
    env = create_env()
    return env


# flow_params = dict(
#     # name of the experiment
#     exp_tag='green_wave',

#     # name of the flow environment the experiment is running on
#     env_name=TrafficLightGridPOEnv,

#     # name of the scenario class the experiment is running on
#     network=DoubleLaneNetwork,

#     # simulator that is used by the experiment
#     simulator='traci',

#     # sumo-related parameters (see flow.core.params.SumoParams)
#     sim=SumoParams(
#         sim_step=0.1,
#         #render=False,
#         render=True,
#         restart_instance=True
#     ),

#     # environment related parameters (see flow.core.params.EnvParams)
#     env=EnvParams(
#         horizon=HORIZON,
#         additional_params=additional_env_params,
#     ),

#     # network-related parameters (see flow.core.params.NetParams and the
#     # scenario's documentation or ADDITIONAL_NET_PARAMS component)
#     net=net_params,

#     # vehicles to be placed in the network at the start of a rollout (see
#     # flow.core.vehicles.Vehicles)
#     veh=vehicles,

#     # parameters specifying the positioning of vehicles upon initialization/
#     # reset (see flow.core.params.InitialConfig)
#     initial=initial_config,

#     #tls = traffic_lights
# )

# exp = Experiment(flow_params)
# exp.run(1, convert_to_csv=False)
def setup_exps(flow_params):
    """Create the relevant components of a multiagent RLlib experiment.

    Parameters
    ----------
    flow_params : dict
        input flow-parameters

    Returns
    -------
    str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    #config['simple_optimizer'] = True
    config['gamma'] = 0.9995  # discount rate
    config['model'].update({'fcnet_hiddens': [100, 50, 25]})
    config['lr'] = tune.grid_search([1e-4, 5e-5])
    config['horizon'] = HORIZON
    config['clip_actions'] = False
    config['observation_filter'] = 'NoFilter'
    gae_lambda = 0.97
    config["use_gae"] = True
    config["vf_clip_param"] = 1e10
    config["num_sgd_iter"] = 10

    # save the flow params for replay
    flow_json = json.dumps(flow_params,
                           cls=FlowParamsEncoder,
                           sort_keys=True,
                           indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # register as rllib env
    register_env(env_name, create_env)

    # multiagent configuration
    temp_env = create_env()
    policy_graphs = {
        'av':
        (PPOTFPolicy, temp_env.observation_space, temp_env.action_space, {})
    }

    def policy_mapping_fn(_):
        return 'av'

    config.update({
        'multiagent': {
            'policies': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['av']
        }
    })

    return alg_run, env_name, config
Example #30
0
def visualizer_rllib(args):
    """Visualizer for RLlib experiments.

    This function takes args (see function create_parser below for
    more detailed information on what information can be fed to this
    visualizer), and renders the experiment associated with it.
    """
    result_dir = args.result_dir if args.result_dir[-1] != '/' \
        else args.result_dir[:-1]

    config = get_rllib_config(result_dir)

    # check if we have a multiagent environment but in a
    # backwards compatible way
    if config.get('multiagent', {}).get('policies', None):
        multiagent = True
        pkl = get_rllib_pkl(result_dir)
        config['multiagent'] = pkl['multiagent']
    else:
        multiagent = False

    # Run on only one cpu for rendering purposes
    config['num_workers'] = 0

    flow_params = get_flow_params(config)

    # hack for old pkl files
    # TODO(ev) remove eventually
    sim_params = flow_params['sim']
    setattr(sim_params, 'num_clients', 1)

    # Determine agent and checkpoint
    config_run = config['env_config']['run'] if 'run' in config['env_config'] \
        else None
    if args.run and config_run:
        if args.run != config_run:
            print('visualizer_rllib.py: error: run argument '
                  + '\'{}\' passed in '.format(args.run)
                  + 'differs from the one stored in params.json '
                  + '\'{}\''.format(config_run))
            sys.exit(1)
    if args.run:
        agent_cls = get_agent_class(args.run)
    elif config_run:
        agent_cls = get_agent_class(config_run)
    else:
        print('visualizer_rllib.py: error: could not find flow parameter '
              '\'run\' in params.json, '
              'add argument --run to provide the algorithm or model used '
              'to train the results\n e.g. '
              'python ./visualizer_rllib.py /tmp/ray/result_dir 1 --run PPO')
        sys.exit(1)

    sim_params.restart_instance = True
    dir_path = os.path.dirname(os.path.realpath(__file__))
    emission_path = '{0}/test_time_rollout/'.format(dir_path)
    sim_params.emission_path = emission_path if args.gen_emission else None

    # pick your rendering mode
    if args.render_mode == 'sumo_web3d':
        sim_params.num_clients = 2
        sim_params.render = False
    elif args.render_mode == 'drgb':
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
    elif args.render_mode == 'sumo_gui':
        sim_params.render = True
        print('NOTE: With render mode {}, an extra instance of the SUMO GUI '
              'will display before the GUI for visualizing the result. Click '
              'the green Play arrow to continue.'.format(args.render_mode))
    elif args.render_mode == 'no_render':
        sim_params.render = False
    if args.save_render:
        sim_params.render = 'drgb'
        sim_params.pxpm = 4
        sim_params.save_render = True

    # Create and register a gym+rllib env
    create_env, env_name = make_create_env(params=flow_params, version=0)
    register_env(env_name, create_env)

    # check if the environment is a single or multiagent environment, and
    # get the right address accordingly
    # single_agent_envs = [env for env in dir(flow.envs)
    #                      if not env.startswith('__')]

    # if flow_params['env_name'] in single_agent_envs:
    #     env_loc = 'flow.envs'
    # else:
    #     env_loc = 'flow.envs.multiagent'

    # Start the environment with the gui turned on and a path for the
    # emission file
    env_params = flow_params['env']
    env_params.restart_instance = False
    if args.evaluate:
        env_params.evaluate = True

    # lower the horizon if testing
    if args.horizon:
        config['horizon'] = args.horizon
        env_params.horizon = args.horizon

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_' + args.checkpoint_num
    checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
    agent.restore(checkpoint)

    if hasattr(agent, "local_evaluator") and \
            os.environ.get("TEST_FLAG") != 'True':
        env = agent.local_evaluator.env
    else:
        env = gym.make(env_name)

    if multiagent:
        rets = {}
        # map the agent id to its policy
        policy_map_fn = config['multiagent']['policy_mapping_fn'].func
        for key in config['multiagent']['policies'].keys():
            rets[key] = []
    else:
        rets = []

    if config['model']['use_lstm']:
        use_lstm = True
        if multiagent:
            state_init = {}
            # map the agent id to its policy
            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
            size = config['model']['lstm_cell_size']
            for key in config['multiagent']['policies'].keys():
                state_init[key] = [np.zeros(size, np.float32),
                                   np.zeros(size, np.float32)]
        else:
            state_init = [
                np.zeros(config['model']['lstm_cell_size'], np.float32),
                np.zeros(config['model']['lstm_cell_size'], np.float32)
            ]
    else:
        use_lstm = False

    env.restart_simulation(
        sim_params=sim_params, render=sim_params.render)

    # Simulate and collect metrics
    final_outflows = []
    final_inflows = []
    mean_speed = []
    std_speed = []
    for i in range(args.num_rollouts):
        vel = []
        state = env.reset()
        if multiagent:
            ret = {key: [0] for key in rets.keys()}
        else:
            ret = 0
        for _ in range(env_params.horizon):
            vehicles = env.unwrapped.k.vehicle
            vel.append(np.mean(vehicles.get_speed(vehicles.get_ids())))
            if multiagent:
                action = {}
                for agent_id in state.keys():
                    if use_lstm:
                        action[agent_id], state_init[agent_id], logits = \
                            agent.compute_action(
                            state[agent_id], state=state_init[agent_id],
                            policy_id=policy_map_fn(agent_id))
                    else:
                        action[agent_id] = agent.compute_action(
                            state[agent_id], policy_id=policy_map_fn(agent_id))
            else:
                action = agent.compute_action(state)
            state, reward, done, _ = env.step(action)
            if multiagent:
                for actor, rew in reward.items():
                    ret[policy_map_fn(actor)][0] += rew
            else:
                ret += reward
            if multiagent and done['__all__']:
                break
            if not multiagent and done:
                break

        if multiagent:
            for key in rets.keys():
                rets[key].append(ret[key])
        else:
            rets.append(ret)
        outflow = vehicles.get_outflow_rate(500)
        final_outflows.append(outflow)
        inflow = vehicles.get_inflow_rate(500)
        final_inflows.append(inflow)
        if np.all(np.array(final_inflows) > 1e-5):
            throughput_efficiency = [x / y for x, y in
                                     zip(final_outflows, final_inflows)]
        else:
            throughput_efficiency = [0] * len(final_inflows)
        mean_speed.append(np.mean(vel))
        std_speed.append(np.std(vel))
        if multiagent:
            for agent_id, rew in rets.items():
                print('Round {}, Return: {} for agent {}'.format(
                    i, ret, agent_id))
        else:
            print('Round {}, Return: {}'.format(i, ret))

    print('==== Summary of results ====')
    print("Return:")
    print(mean_speed)
    if multiagent:
        for agent_id, rew in rets.items():
            print('For agent', agent_id)
            print(rew)
            print('Average, std return: {}, {} for agent {}'.format(
                np.mean(rew), np.std(rew), agent_id))
    else:
        print(rets)
        print('Average, std: {}, {}'.format(
            np.mean(rets), np.std(rets)))

    print("\nSpeed, mean (m/s):")
    print(mean_speed)
    print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std(
        mean_speed)))
    print("\nSpeed, std (m/s):")
    print(std_speed)
    print('Average, std: {}, {}'.format(np.mean(std_speed), np.std(
        std_speed)))

    # Compute arrival rate of vehicles in the last 500 sec of the run
    print("\nOutflows (veh/hr):")
    print(final_outflows)
    print('Average, std: {}, {}'.format(np.mean(final_outflows),
                                        np.std(final_outflows)))
    # Compute departure rate of vehicles in the last 500 sec of the run
    print("Inflows (veh/hr):")
    print(final_inflows)
    print('Average, std: {}, {}'.format(np.mean(final_inflows),
                                        np.std(final_inflows)))
    # Compute throughput efficiency in the last 500 sec of the
    print("Throughput efficiency (veh/hr):")
    print(throughput_efficiency)
    print('Average, std: {}, {}'.format(np.mean(throughput_efficiency),
                                        np.std(throughput_efficiency)))

    # terminate the environment
    env.unwrapped.terminate()

    # if prompted, convert the emission file into a csv file
    if args.gen_emission:
        time.sleep(0.1)

        dir_path = os.path.dirname(os.path.realpath(__file__))
        emission_filename = '{0}-emission.xml'.format(env.network.name)

        emission_path = \
            '{0}/test_time_rollout/{1}'.format(dir_path, emission_filename)

        # convert the emission file into a csv file
        emission_to_csv(emission_path)

        # print the location of the emission csv file
        emission_path_csv = emission_path[:-4] + ".csv"
        print("\nGenerated emission file at " + emission_path_csv)

        # delete the .xml version of the emission file
        os.remove(emission_path)

    # if we wanted to save the render, here we create the movie
    if args.save_render:
        dirs = os.listdir(os.path.expanduser('~')+'/flow_rendering')
        # Ignore hidden files
        dirs = [d for d in dirs if d[0] != '.']
        dirs.sort(key=lambda date: datetime.strptime(date, "%Y-%m-%d-%H%M%S"))
        recent_dir = dirs[-1]
        # create the movie
        movie_dir = os.path.expanduser('~') + '/flow_rendering/' + recent_dir
        save_dir = os.path.expanduser('~') + '/flow_movies'
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
        os_cmd = "cd " + movie_dir + " && ffmpeg -i frame_%06d.png"
        os_cmd += " -pix_fmt yuv420p " + dirs[-1] + ".mp4"
        os_cmd += "&& cp " + dirs[-1] + ".mp4 " + save_dir + "/"
        os.system(os_cmd)