Ejemplo n.º 1
0
    def __init__(self, env_name='harvest', num_agents=1):
        self.env_name = env_name
        if env_name == 'harvest':
            print('Initializing Harvest environment')
            self.env = HarvestEnv(ascii_map=HARVEST_MAP_CPR,
                                  num_agents=num_agents,
                                  render=True)
        elif env_name == 'cleanup':
            print('Initializing Cleanup environment')
            self.env = CleanupEnv(num_agents=num_agents, render=True)
        else:
            print('Error! Not a valid environment type')
            return

        self.num_agents = num_agents

        self.agent_policies = []
        self.agents = list(self.env.agents.values())
        # print(agents[0].action_space)
        self.action_dim = self.agents[0].action_space.n
        for _ in range(num_agents):
            # TODO right now only using 1 frame, update later to look back x (e.g. 4) frames. Later RNN/LSTM
            neural_net = ConvFC(
                conv_in_channels=
                3,  # harvest specific input is 15x15x3 (HARVEST_VIEW_SIZE = 7)
                conv_out_channels=3,
                input_size=15,
                hidden_size=64,
                output_size=self.action_dim)
            self.agent_policies.append(
                DQNAgent(0, self.action_dim - 1, neural_net))

        self.env.reset()
Ejemplo n.º 2
0
def test_harvest_map():
    env = HarvestEnv(ascii_map=MINI_HARVEST_MAP, num_agents=1)
    env.reset()
    agents = list(env.agents.values())
    action_dim = agents[0].action_space.n
    for i in range(action_dim):
        env.step({'agent-0': i})
Ejemplo n.º 3
0
    def __init__(self, env_name='cleanup'):
        self.env_name = env_name
        if env_name == 'harvest':
            print('Initializing Harvest environment')
            self.env = HarvestEnv(num_agents=5, render=True)
        elif env_name == 'cleanup':
            print('Initializing Cleanup environment')
            self.env = CleanupEnv(num_agents=5, render=True)
        else:
            print('Error! Not a valid environment type')
            return

        self.env.reset()
    def __init__(self, args):
        self.env_name = args.env
        if self.env_name == "harvest":
            print("Initializing Harvest environment")
            self.env = HarvestEnv(num_agents=5)
        elif self.env_name == "cleanup":
            print("Initializing Cleanup environment")
            self.env = CleanupEnv(num_agents=5)
        elif self.env_name == "switch":
            print("Initializing Switch environment")
            self.env = SwitchEnv(args, num_agents=1)
        else:
            print("Error! Not a valid environment type")
            return

        self.env.reset()
 def env_creator(_):
     ascii_map = HARVEST_MAP
     if harvest_map == 'tiny':
         ascii_map = HARVEST_MAP_TINY
     elif harvest_map == 'toy':
         ascii_map = HARVEST_MAP_TOY
     elif harvest_map == 'cpr':  # note only single agent at present
         ascii_map = HARVEST_MAP_CPR
     elif harvest_map == 'big':
         ascii_map = HARVEST_MAP_BIG
     created_env = HarvestEnv(ascii_map=ascii_map,
                              num_agents=num_agents,
                              ir_param_list=ir_param_list,
                              hit_penalty=hit_penalty,
                              fire_cost=fire_cost)
     return created_env
Ejemplo n.º 6
0
def env_creator(env_config=None):
    num_agents = env_config["num_agents"]
    return HarvestEnv(num_agents=num_agents)
Ejemplo n.º 7
0
 def env_creator(env_config):
     return HarvestEnv(env_config)
Ejemplo n.º 8
0
def setup(env,
          hparams,
          algorithm,
          train_batch_size,
          num_cpus,
          num_gpus,
          num_agents,
          num_symbols,
          grid_search,
          use_gpus_for_workers=False,
          use_gpu_for_driver=False,
          num_workers_per_device=1):

    obs_space = None
    act_space = None
    if env == 'harvest':
        obs_space = HarvestEnv.observation_space(num_agents, num_symbols)
        act_space = HarvestEnv.action_space(num_agents, num_symbols)

        def env_creator(env_config):
            return HarvestEnv(env_config)
    else:
        obs_space = CleanupEnv.observation_space(num_agents, num_symbols)
        act_space = CleanupEnv.action_space(num_agents, num_symbols)

        def env_creator(env_config):
            return CleanupEnv(env_config)

    env_name = env + "_env"
    register_env(env_name, env_creator)

    # register the custom model
    ModelCatalog.register_custom_model(MODEL_NAME, ObedienceLSTM)

    # Each policy can have a different configuration (including custom model)
    def gen_policy():
        return None, obs_space, act_space, {'custom_model': MODEL_NAME}

    # Setup with an ensemble of `num_policies` different policy graphs
    policy_graphs = {}
    for i in range(num_agents):
        policy_graphs['agent-' + str(i)] = gen_policy()

    def policy_mapping_fn(agent_id):
        return agent_id

    # gets the A3C trainer and its default config
    # source at https://github.com/ray-project/ray/blob/d537e9f0d8b84414a2aba7a7d0a68d59241f1490/rllib/agents/a3c/a3c.py
    agent_cls = get_agent_class(algorithm)
    config = agent_cls._default_config.copy()

    # information for replay
    config['env_config']['func_create'] = env_creator
    config['env_config']['env_name'] = env_name
    # config['env_config']['run'] = algorithm
    config['callbacks']['on_postprocess_traj'] = on_postprocess_traj

    # Calculate device configurations
    gpus_for_driver = int(use_gpu_for_driver)
    cpus_for_driver = 1 - gpus_for_driver
    if use_gpus_for_workers:
        spare_gpus = (num_gpus - gpus_for_driver)
        num_workers = int(spare_gpus * num_workers_per_device)
        num_gpus_per_worker = spare_gpus / num_workers
        num_cpus_per_worker = 0
    else:
        spare_cpus = (num_cpus - cpus_for_driver)
        num_workers = int(spare_cpus * num_workers_per_device)
        num_gpus_per_worker = 0
        num_cpus_per_worker = spare_cpus / num_workers

    # hyperparams
    config.update({
        "train_batch_size":
        train_batch_size,
        "sample_batch_size":
        50,
        # "batch_mode": "complete_episodes",
        # "metrics_smoothing_episodes": 1,
        "vf_loss_coeff":
        0.1,
        "horizon":
        1000,
        "gamma":
        0.99,
        "lr_schedule": [[0, hparams['lr_init']],
                        [20000000, hparams['lr_final']]],
        "num_workers":
        num_workers,
        "num_gpus":
        num_gpus,  # The number of GPUs for the driver
        "num_cpus_for_driver":
        cpus_for_driver,
        "num_gpus_per_worker":
        num_gpus_per_worker,  # Can be a fraction
        "num_cpus_per_worker":
        num_cpus_per_worker,  # Can be a fraction
        "entropy_coeff":
        hparams['entropy_coeff'],
        "multiagent": {
            "policies": policy_graphs,
            "policy_mapping_fn": policy_mapping_fn,
        },
        "model": {
            "custom_model": MODEL_NAME,
            #"custom_preprocessor": "nothing",
            "use_lstm": False,
            "custom_options": {
                "num_agents": num_agents,
                "num_symbols": num_symbols,
                "fcnet_hiddens": [32, 32],
                "cell_size": 128,
            },
            "conv_filters": [[6, [3, 3], 1]],
            #"lstm_cell_size": 128
            # conv filters??
        },
        "env_config": {
            "num_agents": num_agents,
            "num_symbols": num_symbols,
            "obedience_weight": .001,
            "leadership_weight": .001,
        },
    })

    if args.algorithm == "PPO":
        config.update({
            "num_sgd_iter": 10,
            "sgd_minibatch_size": 500,
            "vf_loss_coeff": 1e-4
        })

    if args.grid_search:
        pass

    return algorithm, env_name, config
Ejemplo n.º 9
0
 def env_creator(_):
     return HarvestEnv(num_agents=num_agents)
Ejemplo n.º 10
0
def setup(env,
          hparams,
          algorithm,
          train_batch_size,
          num_cpus,
          num_gpus,
          num_agents,
          use_gpus_for_workers=False,
          use_gpu_for_driver=False,
          num_workers_per_device=1):

    if env == 'harvest':

        def env_creator(_):
            return HarvestEnv(num_agents=num_agents)

        single_env = HarvestEnv()
    elif env == "harvest_comm":

        def env_creator(_):
            return HarvestCommEnv(num_agents=num_agents)

        single_env = HarvestCommEnv()
    else:

        def env_creator(_):
            return CleanupEnv(num_agents=num_agents)

        single_env = CleanupEnv()

    env_name = env + "_env"
    register_env(env_name, env_creator)

    obs_space = single_env.observation_space
    act_space = single_env.action_space

    # Each policy can have a different configuration (including custom model)
    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    # Setup PPO with an ensemble of `num_policies` different policy graphs
    policy_graphs = {}
    for i in range(num_agents):
        policy_graphs['agent-' + str(i)] = gen_policy()

    def policy_mapping_fn(agent_id):
        return agent_id

    # register the custom model
    model_name = "conv_to_fc_net"
    ModelCatalog.register_custom_model(model_name, ConvToFCNet)

    agent_cls = get_agent_class(algorithm)
    config = agent_cls._default_config.copy()

    # information for replay
    config['env_config']['func_create'] = tune.function(env_creator)
    config['env_config']['env_name'] = env_name
    config['env_config']['run'] = algorithm

    # Calculate device configurations
    gpus_for_driver = int(use_gpu_for_driver)
    cpus_for_driver = 1 - gpus_for_driver
    if use_gpus_for_workers:
        spare_gpus = (num_gpus - gpus_for_driver)
        num_workers = int(spare_gpus * num_workers_per_device)
        num_gpus_per_worker = spare_gpus / num_workers
        num_cpus_per_worker = 0
    else:
        spare_cpus = (num_cpus - cpus_for_driver)
        num_workers = int(spare_cpus * num_workers_per_device)
        num_gpus_per_worker = 0
        num_cpus_per_worker = spare_cpus / num_workers

    # hyperparams
    config.update({
        "train_batch_size":
        train_batch_size,
        "horizon":
        1000,
        "lr_schedule": [[0, hparams['lr_init']],
                        [20000000, hparams['lr_final']]],
        "num_workers":
        num_workers,
        "num_gpus":
        gpus_for_driver,  # The number of GPUs for the driver
        "num_cpus_for_driver":
        cpus_for_driver,
        "num_gpus_per_worker":
        num_gpus_per_worker,  # Can be a fraction
        "num_cpus_per_worker":
        num_cpus_per_worker,  # Can be a fraction
        "entropy_coeff":
        hparams['entropy_coeff'],
        "multiagent": {
            "policy_graphs": policy_graphs,
            "policy_mapping_fn": tune.function(policy_mapping_fn),
        },
        "model": {
            "custom_model": "conv_to_fc_net",
            "use_lstm": True,
            "lstm_cell_size": 128
        }
    })
    return algorithm, env_name, config
 def env_creator(_):
     return HarvestEnv(
         num_agents=num_agents,
         return_agent_actions=True,
         use_collective_reward=args.use_collective_reward,
     )
Ejemplo n.º 12
0
 def __call__(self):
     return HarvestEnv(ascii_map=MINI_HARVEST_MAP,
                       num_agents=self._num_agents)