from magent.model import BaseModel from numpy import ndarray MAP_SIZE: int = 64 if __name__ == "__main__": environment: magent.GridWorld = magent.GridWorld("forest", map_size=MAP_SIZE) environment.set_render_dir("render") deer_handle: int tiger_handle: int deer_handle, tiger_handle = environment.get_handles() models: List[BaseModel] = [ RandomActor(environment, deer_handle), RandomActor(environment, tiger_handle) ] environment.reset() environment.add_walls(method="random", n=MAP_SIZE * MAP_SIZE * 0.04) environment.add_agents(deer_handle, method="random", n=5) environment.add_agents(tiger_handle, method="random", n=2) tiger_view_space: Tuple = environment.get_view_space(tiger_handle) tiger_feature_space: Tuple = environment.get_feature_space(tiger_handle) print( f"Tiger view space: {tiger_view_space} features: {tiger_feature_space}" ) deer_view_space: Tuple = environment.get_view_space(deer_handle)
def sample_observation(env, handles, n_obs=-1, step=-1): """Sample observations by random actors. These samples can be used for evaluation Parameters ---------- env : environment handles: list of handle n_obs : int number of observation step : int maximum step Returns ------- ret : list of raw observation raw observation for every group the format of raw observation is tuple(view, feature) """ models = [RandomActor(env, handle) for handle in handles] n = len(handles) views = [[] for _ in range(n)] features = [[] for _ in range(n)] done = False step_ct = 0 while not done: obs = [env.get_observation(handle) for handle in handles] ids = [env.get_agent_id(handle) for handle in handles] for i in range(n): act = models[i].infer_action(obs[i], ids[i]) env.set_action(handles[i], act) done = env.step() env.clear_dead() # record steps for i in range(n): views[i].append(obs[i][0]) features[i].append(features[i][1]) if step != -1 and step_ct > step: break if step_ct % 100 == 0: print("sample step %d" % step_ct) step_ct += 1 for i in range(n): views[i] = np.array(views[i], dtype=np.float32).reshape((-1,) + env.get_view_space(handles[i])) features[i] = np.array(features[i], dtype=np.float32).reshape((-1,) + env.get_feature_space(handles[i])) if n_obs != -1: for i in range(n): views[i] = views[i][np.random.choice(np.arange(views[i].shape[0]), n_obs)] features[i] = features[i][np.random.choice(np.arange(features[i].shape[0]), n_obs)] ret = [(v, f) for v, f in zip(views, features)] return ret
parser.add_argument("--greedy", action="store_true") parser.add_argument("--map_size", type=int, default=500) parser.add_argument("--name", type=str, default="tiger") parser.add_argument('--alg', default='dqn', choices=['dqn', 'drqn', 'a2c']) args = parser.parse_args() # init the game env = magent.GridWorld("double_attack", map_size=args.map_size) env.set_render_dir("build/render") # two groups of animal deer_handle, tiger_handle = env.get_handles() # init two models models = [ RandomActor(env, deer_handle, tiger_handle), ] batch_size = 512 unroll = 8 if args.alg == 'dqn': from magent.builtin.tf_model import DeepQNetwork models.append( DeepQNetwork(env, tiger_handle, args.name, batch_size=batch_size, memory_size=2**20, learning_rate=4e-4)) step_batch_size = None
n_step += skip # init the game "forest" (or "battle" here) env = magent.GridWorld(load_forest(int(math.sqrt(agent_number * 20)))) env.reset() # add two groups of animals deer_handle, tiger_handle = env.get_handles() env.add_walls(method="random", n=agent_number / 10) env.add_agents(deer_handle, method="random", n=agent_number / 2) env.add_agents(tiger_handle, method="random", n=agent_number / 2) # init two models if args.num_gpu == 0: model1 = RandomActor(env, deer_handle, "deer") model2 = RandomActor(env, tiger_handle, "tiger") else: if args.frame == 'tf': from magent.builtin.tf_model import DeepQNetwork else: from magent.builtin.mx_model import DeepQNetwork model1 = DeepQNetwork(env, deer_handle, "deer", num_gpu=args.num_gpu, infer_batch_size=100000) model2 = DeepQNetwork(env, tiger_handle, "tiger", num_gpu=args.num_gpu, infer_batch_size=100000) total_reward = 0 print(env.get_view_space(deer_handle)) print(env.get_view_space(tiger_handle)) total_time = 0