예제 #1
0
파일: api_demo.py 프로젝트: DailinH/MAgent
    # init two models
    model1 = DeepQNetwork(env, predator, "predator")
    model2 = DeepQNetwork(env, prey, "prey")

    # load trained model
    model1.load("data/pursuit_model")
    model2.load("data/pursuit_model")

    done = False
    step_ct = 0
    print("nums: %d vs %d" % (env.get_num(predator), env.get_num(prey)))
    while not done:
        # take actions for deers
        obs_1 = env.get_observation(predator)
        ids_1 = env.get_agent_id(predator)
        acts_1 = model1.infer_action(obs_1, ids_1)
        env.set_action(predator, acts_1)

        # take actions for tigers
        obs_2 = env.get_observation(prey)
        ids_2 = env.get_agent_id(prey)
        acts_2 = model2.infer_action(obs_2, ids_1)
        env.set_action(prey, acts_2)

        # simulate one step
        done = env.step()

        # render
        env.render()

        # get reward
예제 #2
0
파일: api_demo.py 프로젝트: OwalnutO/MAgent
    # init two models
    model1 = DeepQNetwork(env, predator, "predator")
    model2 = DeepQNetwork(env, prey,     "prey")

    # load trained model
    model1.load("data/pursuit_model")
    model2.load("data/pursuit_model")

    done = False
    step_ct = 0
    print("nums: %d vs %d" % (env.get_num(predator), env.get_num(prey)))
    while not done:
        # take actions for deers
        obs_1 = env.get_observation(predator)
        ids_1 = env.get_agent_id(predator)
        acts_1 = model1.infer_action(obs_1, ids_1)
        env.set_action(predator, acts_1)

        # take actions for tigers
        obs_2  = env.get_observation(prey)
        ids_2  = env.get_agent_id(prey)
        acts_2 = model2.infer_action(obs_2, ids_1)
        env.set_action(prey, acts_2)

        # simulate one step
        done = env.step()

        # render
        env.render()

        # get reward
예제 #3
0
파일: multi_battle.py 프로젝트: zzw-x/DGN
        for j in range(n_agent):
            ob.append(np.asarray([flat_ob[j]]))
            ob.append(np.asarray([adj[j]]))
        ob.append(np.asarray([vec]))
        acts = model.predict(ob)
        action[i] = np.zeros(n_agent, dtype=np.int32)
        for j in range(n_agent):
            if np.random.rand() < alpha:
                action[i][j] = random.randrange(n_actions)
            else:
                action[i][j] = np.argmax(acts[j])
        env.set_action(handles[i], action[i])

        obs[1] = env.get_observation(handles[1])
        ids[1] = env.get_agent_id(handles[1])
        acts = tf_model.infer_action(obs[1], ids[1], 'e_greedy')
        env.set_action(handles[1], acts)
        done = env.step()

        next_obs = env.get_observation(handles[0])
        flat_next_obs = observation(next_obs[0], next_obs[1])
        rewards = env.get_reward(handles[0])
        score += sum(rewards)
        if steps % 3 == 0:
            buff.add(flat_ob, action[0], flat_next_obs, rewards, done, adj)

        if (i_episode - 1) % 10 == 0:
            env.render()
        if max_steps == steps:
            print(dead[0], end='\t')
            print(dead[1], end='\t')
예제 #4
0
    # tf.reset_default_graph()
    model9 = DeepQNetwork(env, army9, "battle-l", memory_size=2**10)
    #tf.reset_default_graph()
    model10 = DeepQNetwork(env, army10, "battle-r", memory_size=2**10)
    model9.load("save_model", 14)
    model10.load("save_model", 14)

    done = False
    step_ct = 0
    print("nums: %d vs %d" % (env.get_num(army1), env.get_num(army2)))
    while not done:
        # take actions for army1
        obs_1 = env.get_observation(army1)
        ids_1 = env.get_agent_id(army1)
        acts_1 = model1.infer_action(obs_1, ids_1)
        env.set_action(army1, acts_1)

        # take actions for army2
        obs_2 = env.get_observation(army2)
        ids_2 = env.get_agent_id(army2)
        acts_2 = model2.infer_action(obs_2, ids_2)
        env.set_action(army2, acts_2)

        # take actions for army3
        obs_3 = env.get_observation(army3)
        ids_3 = env.get_agent_id(army3)
        acts_3 = model3.infer_action(obs_3, ids_3)
        env.set_action(army3, acts_3)

        # take actions for army4