Ejemplo n.º 1
0
Archivo: ssd.py Proyecto: 011235813/lio
    def __init__(self, config_env):

        self.name = 'ssd'
        self.config = config_env
        self.dim_obs = [self.config.obs_height, self.config.obs_width, 3]
        self.max_steps = self.config.max_steps

        self.cleaning_penalty = self.config.cleaning_penalty
        # Original space (not necessarily in this order, see
        # the original ssd files):
        # no-op, up, down, left, right, turn-ccw, turn-cw, penalty, clean
        if (self.config.disable_left_right_action
                and self.config.disable_rotation_action):
            self.l_action = 4
            self.cleaning_action_idx = 3
            # up, down, no-op, clean
            self.map_to_orig = {0: 2, 1: 3, 2: 4, 3: 8}
        elif self.config.disable_left_right_action:
            self.l_action = 6
            self.cleaning_action_idx = 5
            # up, down, no-op, rotate cw, rotate ccw, clean
            self.map_to_orig = {0: 2, 1: 3, 2: 4, 3: 5, 4: 6, 5: 8}
        elif self.config.disable_rotation_action:
            self.l_action = 6
            self.cleaning_action_idx = 5
            # left, right, up, down, no-op, clean
            self.map_to_orig = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 8}
        else:  # full action space except penalty beam
            self.l_action = 8
            self.cleaning_action_idx = 7
            # Don't allow penalty beam
            self.map_to_orig = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 8}

        self.obs_cleaned_1hot = self.config.obs_cleaned_1hot

        self.n_agents = self.config.n_agents

        if self.config.map_name == 'cleanup_small_sym':
            ascii_map = maps.CLEANUP_SMALL_SYM
        elif self.config.map_name == 'cleanup_10x10_sym':
            ascii_map = maps.CLEANUP_10x10_SYM

        self.env = CleanupEnv(
            ascii_map=ascii_map,
            num_agents=self.n_agents,
            render=False,
            shuffle_spawn=self.config.shuffle_spawn,
            global_ref_point=self.config.global_ref_point,
            view_size=self.config.view_size,
            random_orientation=self.config.random_orientation,
            cleanup_params=self.config.cleanup_params,
            beam_width=self.config.beam_width)

        # length of action input to learned reward function
        if self.config.obs_cleaned_1hot:
            self.l_action_for_r = 2
        else:
            self.l_action_for_r = self.l_action

        self.steps = 0
Ejemplo n.º 2
0
    def __init__(self, env_name='harvest', num_agents=1):
        self.env_name = env_name
        if env_name == 'harvest':
            print('Initializing Harvest environment')
            self.env = HarvestEnv(ascii_map=HARVEST_MAP_CPR,
                                  num_agents=num_agents,
                                  render=True)
        elif env_name == 'cleanup':
            print('Initializing Cleanup environment')
            self.env = CleanupEnv(num_agents=num_agents, render=True)
        else:
            print('Error! Not a valid environment type')
            return

        self.num_agents = num_agents

        self.agent_policies = []
        self.agents = list(self.env.agents.values())
        # print(agents[0].action_space)
        self.action_dim = self.agents[0].action_space.n
        for _ in range(num_agents):
            # TODO right now only using 1 frame, update later to look back x (e.g. 4) frames. Later RNN/LSTM
            neural_net = ConvFC(
                conv_in_channels=
                3,  # harvest specific input is 15x15x3 (HARVEST_VIEW_SIZE = 7)
                conv_out_channels=3,
                input_size=15,
                hidden_size=64,
                output_size=self.action_dim)
            self.agent_policies.append(
                DQNAgent(0, self.action_dim - 1, neural_net))

        self.env.reset()
 def env_creator(_):
     ascii_map = CLEANUP_MAP
     if cleanup_map == 'small':
         ascii_map = CLEANUP_MAP_SMALL
     created_env = CleanupEnv(ascii_map=ascii_map,
                              num_agents=num_agents,
                              ir_param_list=ir_param_list,
                              hit_penalty=hit_penalty,
                              fire_cost=fire_cost)
     return created_env
Ejemplo n.º 4
0
    def __init__(self, env_name='cleanup'):
        self.env_name = env_name
        if env_name == 'harvest':
            print('Initializing Harvest environment')
            self.env = HarvestEnv(num_agents=5, render=True)
        elif env_name == 'cleanup':
            print('Initializing Cleanup environment')
            self.env = CleanupEnv(num_agents=5, render=True)
        else:
            print('Error! Not a valid environment type')
            return

        self.env.reset()
    def __init__(self, args):
        self.env_name = args.env
        if self.env_name == "harvest":
            print("Initializing Harvest environment")
            self.env = HarvestEnv(num_agents=5)
        elif self.env_name == "cleanup":
            print("Initializing Cleanup environment")
            self.env = CleanupEnv(num_agents=5)
        elif self.env_name == "switch":
            print("Initializing Switch environment")
            self.env = SwitchEnv(args, num_agents=1)
        else:
            print("Error! Not a valid environment type")
            return

        self.env.reset()
Ejemplo n.º 6
0
from social_dilemmas.envs.harvest import HarvestEnv
from social_dilemmas.envs.cleanup import CleanupEnv
import numpy as np
FIRING_CLEANUP_MAP = [
    '@@@@@@',
    '@    @',
    '@HHP @',
    '@RH  @',
    '@H P @',
    '@@@@@@',
]
CLEANUP_VIEW_SIZE = 1

n_agents = 2
n_states = (CLEANUP_VIEW_SIZE*2+1)*(CLEANUP_VIEW_SIZE*2+1)*3
world = CleanupEnv(ascii_map=FIRING_CLEANUP_MAP, num_agents=2)
world.reset()
rand_action = np.random.randint(9, size=2)
obs, rew, dones, info, = world.step({'agent-0': rand_action[0],
                                      'agent-1': rand_action[1]})
for key,value in obs.items():
    value = value.flatten()
    obs[key] = value

def contactSta(stadict,mode):
    sta = []
    for key,value in stadict.items():
        if mode == 's':
            value = value.flatten()
        sta.append(value)
    return sta
Ejemplo n.º 7
0
 def env_creator(env_config):
     return CleanupEnv(env_config)
Ejemplo n.º 8
0
 def env_creator(_):
     return CleanupEnv(num_agents=num_agents)
Ejemplo n.º 9
0
def setup(env,
          hparams,
          algorithm,
          train_batch_size,
          num_cpus,
          num_gpus,
          num_agents,
          use_gpus_for_workers=False,
          use_gpu_for_driver=False,
          num_workers_per_device=1):

    if env == 'harvest':

        def env_creator(_):
            return HarvestEnv(num_agents=num_agents)

        single_env = HarvestEnv()
    elif env == "harvest_comm":

        def env_creator(_):
            return HarvestCommEnv(num_agents=num_agents)

        single_env = HarvestCommEnv()
    else:

        def env_creator(_):
            return CleanupEnv(num_agents=num_agents)

        single_env = CleanupEnv()

    env_name = env + "_env"
    register_env(env_name, env_creator)

    obs_space = single_env.observation_space
    act_space = single_env.action_space

    # Each policy can have a different configuration (including custom model)
    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    # Setup PPO with an ensemble of `num_policies` different policy graphs
    policy_graphs = {}
    for i in range(num_agents):
        policy_graphs['agent-' + str(i)] = gen_policy()

    def policy_mapping_fn(agent_id):
        return agent_id

    # register the custom model
    model_name = "conv_to_fc_net"
    ModelCatalog.register_custom_model(model_name, ConvToFCNet)

    agent_cls = get_agent_class(algorithm)
    config = agent_cls._default_config.copy()

    # information for replay
    config['env_config']['func_create'] = tune.function(env_creator)
    config['env_config']['env_name'] = env_name
    config['env_config']['run'] = algorithm

    # Calculate device configurations
    gpus_for_driver = int(use_gpu_for_driver)
    cpus_for_driver = 1 - gpus_for_driver
    if use_gpus_for_workers:
        spare_gpus = (num_gpus - gpus_for_driver)
        num_workers = int(spare_gpus * num_workers_per_device)
        num_gpus_per_worker = spare_gpus / num_workers
        num_cpus_per_worker = 0
    else:
        spare_cpus = (num_cpus - cpus_for_driver)
        num_workers = int(spare_cpus * num_workers_per_device)
        num_gpus_per_worker = 0
        num_cpus_per_worker = spare_cpus / num_workers

    # hyperparams
    config.update({
        "train_batch_size":
        train_batch_size,
        "horizon":
        1000,
        "lr_schedule": [[0, hparams['lr_init']],
                        [20000000, hparams['lr_final']]],
        "num_workers":
        num_workers,
        "num_gpus":
        gpus_for_driver,  # The number of GPUs for the driver
        "num_cpus_for_driver":
        cpus_for_driver,
        "num_gpus_per_worker":
        num_gpus_per_worker,  # Can be a fraction
        "num_cpus_per_worker":
        num_cpus_per_worker,  # Can be a fraction
        "entropy_coeff":
        hparams['entropy_coeff'],
        "multiagent": {
            "policy_graphs": policy_graphs,
            "policy_mapping_fn": tune.function(policy_mapping_fn),
        },
        "model": {
            "custom_model": "conv_to_fc_net",
            "use_lstm": True,
            "lstm_cell_size": 128
        }
    })
    return algorithm, env_name, config
 def env_creator(_):
     return CleanupEnv(
         num_agents=num_agents,
         return_agent_actions=True,
         use_collective_reward=args.use_collective_reward,
     )
Ejemplo n.º 11
0
 def __call__(self):
     return CleanupEnv(ascii_map=FIRING_CLEANUP_MAP,
                       num_agents=self._num_agents)
Ejemplo n.º 12
0
 def __call__(self):
     return CleanupEnv()