# init two models model1 = DeepQNetwork(env, predator, "predator") model2 = DeepQNetwork(env, prey, "prey") # load trained model model1.load("data/pursuit_model") model2.load("data/pursuit_model") done = False step_ct = 0 print("nums: %d vs %d" % (env.get_num(predator), env.get_num(prey))) while not done: # take actions for deers obs_1 = env.get_observation(predator) ids_1 = env.get_agent_id(predator) acts_1 = model1.infer_action(obs_1, ids_1) env.set_action(predator, acts_1) # take actions for tigers obs_2 = env.get_observation(prey) ids_2 = env.get_agent_id(prey) acts_2 = model2.infer_action(obs_2, ids_1) env.set_action(prey, acts_2) # simulate one step done = env.step() # render env.render() # get reward
for j in range(n_agent): ob.append(np.asarray([flat_ob[j]])) ob.append(np.asarray([adj[j]])) ob.append(np.asarray([vec])) acts = model.predict(ob) action[i] = np.zeros(n_agent, dtype=np.int32) for j in range(n_agent): if np.random.rand() < alpha: action[i][j] = random.randrange(n_actions) else: action[i][j] = np.argmax(acts[j]) env.set_action(handles[i], action[i]) obs[1] = env.get_observation(handles[1]) ids[1] = env.get_agent_id(handles[1]) acts = tf_model.infer_action(obs[1], ids[1], 'e_greedy') env.set_action(handles[1], acts) done = env.step() next_obs = env.get_observation(handles[0]) flat_next_obs = observation(next_obs[0], next_obs[1]) rewards = env.get_reward(handles[0]) score += sum(rewards) if steps % 3 == 0: buff.add(flat_ob, action[0], flat_next_obs, rewards, done, adj) if (i_episode - 1) % 10 == 0: env.render() if max_steps == steps: print(dead[0], end='\t') print(dead[1], end='\t')
# tf.reset_default_graph() model9 = DeepQNetwork(env, army9, "battle-l", memory_size=2**10) #tf.reset_default_graph() model10 = DeepQNetwork(env, army10, "battle-r", memory_size=2**10) model9.load("save_model", 14) model10.load("save_model", 14) done = False step_ct = 0 print("nums: %d vs %d" % (env.get_num(army1), env.get_num(army2))) while not done: # take actions for army1 obs_1 = env.get_observation(army1) ids_1 = env.get_agent_id(army1) acts_1 = model1.infer_action(obs_1, ids_1) env.set_action(army1, acts_1) # take actions for army2 obs_2 = env.get_observation(army2) ids_2 = env.get_agent_id(army2) acts_2 = model2.infer_action(obs_2, ids_2) env.set_action(army2, acts_2) # take actions for army3 obs_3 = env.get_observation(army3) ids_3 = env.get_agent_id(army3) acts_3 = model3.infer_action(obs_3, ids_3) env.set_action(army3, acts_3) # take actions for army4