Ejemplo n.º 1
0
def make_env(scenario_name, benchmark=False):
    '''
    Creates a PersonalAgentEnv object as env. This can be used similar to a gym
    environment by calling env.reset() and env.step().
    Use env.render() to view the environment on the screen.
    Input:
        scenario_name   :   name of the scenario from ./scenarios/ to be Returns
                            (without the .py extension)
        benchmark       :   whether you want to produce benchmarking data
                            (usually only done during evaluation)
    Some useful env properties (see environment.py):
        .observation_space  :   Returns the observation space for each agent
        .action_space       :   Returns the action space for each agent
        .n                  :   Returns the number of Agents
    '''
    from particles.environment import PersonalAgentEnv
    import particles.scenarios as scenarios

    # load scenario from script
    scenario = scenarios.load(scenario_name + ".py").Scenario()
    # create world
    world = scenario.make_world()
    # create multiagent environment
    if benchmark:
        env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward,
                               scenario.observation, scenario.benchmark_data)
    else:
        env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward,
                               scenario.observation)
    return env
Ejemplo n.º 2
0
if args.specific_agents != '':
    specific_agents = args.specific_agents.split(' ')
else:
    specific_agents = None

scenario = scenarios.load(args.scenario).Scenario(
    kind=args.personalization, num_agents=args.num_agents, seed=args.seed,
    load_agents=load_agents, save_agents=None,
    specific_agents=specific_agents)
# create world
world = scenario.make_world()
world.episode_len = args.episode_len

env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward,
                       scenario.observation, info_callback=None,
                       done_callback=scenario.done, shared_viewer=True)
env.discrete_action_input = True

env.render()

policies = [Reinforce(i, env.observation_space[i].shape[0],
                      env.action_space[0].n) for i in range(env.n)]

policies[0].load_state_dict(torch.load(
    './trained_models/' + args.trained_model))

eps = np.finfo(np.float32).eps.item()

obs_n = env.reset()
running_reward = 10
Ejemplo n.º 3
0
support_agents = args.specific_agents.split(' ')


# for agent in support_agents:
#     scenario = scenarios.load(args.scenario).Scenario(
#         kind=args.personalization, num_agents=args.num_agents, seed=args.seed,
#         load_agents=load_agents)
scenario = scenarios.load(args.scenario).Scenario(
    kind=args.personalization, num_agents=args.num_agents, seed=args.seed,
    load_agents=load_agents, specific_agents=support_agents)

world = scenario.make_world()
world.episode_len = args.episode_len

env = PersonalAgentEnv(world, scenario.reset_world, scenario.reward,
                       scenario.observation, info_callback=None,
                       done_callback=scenario.done, shared_viewer=True)
env.discrete_action_input = True
env.seed(args.seed)

policies = [model(i, env.observation_space[i].shape[0],
                  env.action_space[0].n) for i in range(env.n)]

if args.optimizer == 'Adam':
    optimizer = optim.Adam(policies[0].parameters(), lr=args.lr)
elif args.optimizer == 'SGD':
    optimizer = optim.SGD(policies[0].parameters(), lr=args.lr)
else:
    raise NotImplementedError

scenario.sample_task = True  # Start off true
Ejemplo n.º 4
0
 # load scenario from script
 scenario = scenarios.load(args.scenario).Scenario(
     kind=args.personalization,
     num_agents=args.num_agents,
     seed=args.seed,
     load_agents=None,
     save_agents=None,
     specific_agents=None)
 # create world
 world = scenario.make_world()
 world.episode_len = args.episode_len
 # create multiagent environment
 env = PersonalAgentEnv(world,
                        scenario.reset_world,
                        scenario.reward,
                        scenario.observation,
                        info_callback=None,
                        done_callback=scenario.done,
                        shared_viewer=True)
 # render call to create viewer window (necessary only for interactive policies)
 env.render()
 # create interactive policies for each agent
 policies = [InteractivePolicy(env, i) for i in range(env.n)]
 # execution loop
 obs_n = env.reset()
 for n in range(100):
     t = 0
     env.reset()
     while t < args.episode_len:
         ep_reward = 0
         # query for action from each agent's policy