Beispiel #1
0
def main():
    """Simple function to bootstrap a game"""
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.RandomAgent(),
        agents.SimpleAgent(),
        agents.HttpAgent(port=10080, host="localhost"),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()
Beispiel #2
0
    def __init__(self, name, trainer, model_path, global_episodes,
                 global_steps):
        self.name = "worker_" + str(name)
        self.number = name
        self.model_path = model_path
        self.trainer = trainer
        self.global_episodes = global_episodes
        self.increment_global_episodes = self.global_episodes.assign_add(1)
        self.global_steps = global_steps
        self.increment_global_steps = self.global_steps.assign_add(1)
        self.episode_rewards = []
        self.episode_lengths = []
        self.episode_mean_values = []
        self.summary_writer = tf.summary.FileWriter(model_path + "/train_" +
                                                    str(self.number))

        #Create the local copy of the network and the tensorflow op to copy global paramters to local network
        self.local_AC = AC_Network(self.name, trainer)
        self.update_local_ops = update_target_graph('global', self.name)

        print('Initializing environment #{}...'.format(self.number))
        agent_list = [
            custom_agents.StaticAgent(),
            agents.SimpleAgent(),
            agents.SimpleAgent(),
            agents.SimpleAgent(),
        ]
        self.env = pommerman.make('PommeFFACompetition-v0', agent_list)
        self.agent = agent_list[0]
Beispiel #3
0
def main():
    # Instantiate the environment
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.RandomAgent(),
        ddpg_agent,
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    env = pommerman.make(args.env_name, agent_list)
    env.seed(RANDOM_SEED)
    # Random seed
    agent_num = 0
    env = EnvWrapper(env, num_agent=agent_num)

    # Generate training data
    stimulator = save_episodes(env)
    stimulator.stimulate()

    observations = []
    actions = []
    rewards = []
    for episode in stimulator.episodes:
        observations.append(episode.observations)
        actions.append(episode.actions)
        rewards.append(episode.reward)

    observations_merged = np.concatenate(observations)
    actions_merged = np.concatenate(actions)
    rewards_merged = np.concatenate(rewards)

    np.save(train_data_obs, observations_merged)
    np.save(train_data_labels, actions_merged)
    np.save(train_data_reward, rewards_merged)
Beispiel #4
0
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.RandomAgent(),
        agents.SimpleAgent(),
        # agents.RandomAgent(),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
        agents.TFPPOAgent(
            "/home/pangliang/nips/playground_pl/scripts/ppo_model/model")
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    agent_list[-1].initialize(env)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            #env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            print(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()
Beispiel #5
0
    def run_game(self, env_name):
        # Create a set of agents (exactly four)
        agent_list = [
            agents.SimpleAgent(),
            agents.RandomAgent(),
            agents.SimpleAgent(),
            agents.RandomAgent(),
            # agents.DockerAgent("pommerman/simple-agent", port=12345),
        ]

        # Limit the agents for one vs one
        if 'oneVsOne' in env_name:
            agent_list = agent_list[:2]
        env = pommerman.make(env_name, agent_list)

        # Run the episodes just like OpenAI Gym
        for i_episode in range(1):
            state = env.reset()
            done = False
            while not done:
                # env.render()
                actions = env.act(state)
                state, reward, done, info = env.step(actions)
            print('Episode {} finished'.format(i_episode))
        env.close()
Beispiel #6
0
def main():
    """Simple function to bootstrap a game"""
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)



    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        AgentTT(),
        agents.SimpleAgent(),
        AgentTT(),
    ]


    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeTeamCompetition-v0', agent_list)
    #env = pommerman.make(''PommeTeamCompetition-v1'', agent_list)



    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        for frame in range(30):
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()
Beispiel #7
0
def main():
    '''Simple function to bootstrap a game.
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.MyAgentRadio(),
        agents.SimpleAgent(),
        # agents.RandomAgent(),
        agents.MyAgentRadio()
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Radio" environment using the agent list
    env = pommerman.make('PommeRadio-v2', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))

        alive = [agent for agent in env._agents if agent.is_alive]
        print(alive)
    env.close()
def main():
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.PlayerAgent(agent_control="arrows"),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent()
    ]

    # Play with AI with agent list bellow
    # agent_list = [
    # agents.SimpleAgent(),
    # agents.PlayerAgent(agent_control="arrows"), # Arrows = Move, Space = Bomb
    # agents.SimpleAgent(),
    # agents.PlayerAgent(agent_control="wasd"), # W,A,S,D = Move, E = Bomb
    # ]

    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(3):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.RandomAgent(),
        agents.SimpleAgent(),
        agents.RandomAgent(),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        env.render()
        img = env._viewer.get_buffer().get_texture().get_image_data()
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
            # data = img.get_data("RGB", img.width * 3)
            # arr = np.frombuffer(data, dtype=np.uint8)
            # reshaped_array = arr.reshape(img.width, img.height, 3)

        print('Episode {} finished'.format(i_episode))
    env.close()
Beispiel #10
0
def main():
    # Print all possible environments in the Pommerman registry
    print(pommerman.registry)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.RandomAgent(),
        agents.SimpleAgent(),
        # agents.RandomAgent(),
        agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()
Beispiel #11
0
def main(config, render=False):

    # List of four agents
    env = gym.make(config)
    agent = agents.TensorForceAgent(algorithm="ppo")
    agent = agent.initialize(env)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        TrainedTensorForceAgent(restore_agent(agent), env),
    ]

    # Environment
    env = pommerman.make(config, agent_list)

    # Run
    rewards = list()
    for episode in range(100):
        state = env.reset()
        done = False
        while not done:
            if render:
                env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        rewards.append(reward)
        print('Episode {} finished'.format(episode), reward,
              np.mean(rewards, axis=0))
    print(np.mean(rewards, axis=0))
    env.close()
Beispiel #12
0
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.PlayerAgent(agent_control="arrows"),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()
Beispiel #13
0
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry

    parser = argparse.ArgumentParser()
    parser.add_argument("--modelname",
                        default=None,
                        help="model name of model_name.json")
    parser.add_argument("--directory",
                        default='./saved_models/',
                        help="file path of models folder")
    print(pommerman.REGISTRY)
    args = parser.parse_args()

    test_agent = agents.TensorForceAgent()
    test_agent.set_agent_id(0)
    # Create a set of agents (exactly four)
    agent_list = [
        test_agent,
        agents.SimpleAgent(),

        # agents.RandomAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent()

        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]

    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    test_agent.initialize(env)
    test_agent.restore_model(directory=args.directory, filename=args.modelname)

    # observations = []
    # inputs = []

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)

            state, reward, done, info = env.step(actions)
            # TODO Change indices of arrays to select player info.
            # observations.append({
            #     'state': env.get_json_info(), 'reward': reward, 'done': done, 'actions': actions})
            print(reward[0])

        print('Episode {} finished'.format(i_episode))

        # save_game(i_episode, observations, info, agent_list)

    env.close()
Beispiel #14
0
def battle(process_number):

    # プロセスIDを取得しておく。
    pid = os.getpid()

    print("battle start, process_number={}, pid={}".format(
        process_number, pid))

    # Javaへの接続を
    gateway = JavaGateway()
    addition_app = gateway.entry_point

    # ゲーム開始を伝える。
    addition_app.start_game(pid)

    # List of four agents
    if False:
        agent_list = [
            MyAgentT(),
            MyAgentT(),
            MyAgentT(),
            MyAgentT(),
        ]

        agent_list = [
            agents.SimpleAgent(),
            agents.SimpleAgent(),
            agents.SimpleAgent(),
            agents.SimpleAgent(),
        ]

    agent_list = [
        MyAgentO(),
        MyAgentT(),
        MyAgentO(),
        MyAgentT(),
    ]

    env = pommerman.make('PommeTeamCompetition-v0', agent_list)
    state = env.reset()
    step = 0
    done = False
    while not done:
        step += 1
        actions = env.act(state)
        state, reward, done, info = env.step(actions)
        print("pid={} step={} actions={}".format(pid, step, actions))

    # 結果を出力する。
    print("battle finished, process_number={}, pid={}, reward={}".format(
        process_number, pid, reward))

    # ゲーム終了を伝える。
    addition_app.finish_game(pid, reward[0], reward[1], reward[2], reward[3])

    # 不要なハンドルを閉じる。
    env.close()
Beispiel #15
0
def main():
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        # agents.DockerAgent("d9fc50459a6d", port=33333),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
    ]
    env = pommerman.make('PommeRadioCompetition-v2', agent_list)
    env_info = {"board_state_shape": get_board_state_size(),
                "flat_state_shape": get_flat_state_size(),
                "board_obs_shape": get_board_obs_size(),
                "flat_obs_shape": get_flat_obs_size(),
                "n_actions": 6,
                "n_agents": 2,
                "episode_limit": 800}
    scheme = {
        "board_state": {"vshape": env_info["board_state_shape"]},
        "flat_state": {"vshape": env_info["flat_state_shape"]},
        "board_obs": {"vshape": env_info["board_obs_shape"], "group": "agents"},
        "flat_obs": {"vshape": env_info["flat_obs_shape"], "group": "agents"},
        "actions": {"vshape": (1,), "group": "agents", "dtype": th.long},
        "avail_actions": {"vshape": (env_info["n_actions"],), "group": "agents", "dtype": th.int},
        "reward": {"vshape": (1,)},
        "terminated": {"vshape": (1,), "dtype": th.uint8},
    }
    rnn_hidden_dim = 256
    # TODO 改
    # mac = TestSeeIdMAC(scheme=scheme, agent_output_type="pi_logits", rnn_hidden_dim=rnn_hidden_dim,
    #               model_load_path='/home/hiogdong/pymarl_pmm/results/models/coma_pmm__2019-11-01_11-39-31/5016/agent.th')
    mac = TestMAC(scheme=scheme, agent_output_type="pi_logits", rnn_hidden_dim=rnn_hidden_dim,
                       model_load_path='/home/hiogdong/pymarl_pmm/results/models/coma_pmm__2019-10-31_20-10-35/199/agent.th')
    test_idx_list = [0, 2]


    n_episode = 400
    for i_episode in range(n_episode):
        obs = env.reset()
        mac.last_action = [th.zeros(6), th.zeros(6)]
        mac.init_hidden(1, rnn_hidden_dim)
        done = False
        while not done:
            actions = env.act(obs)
            for idx, agent_idx in enumerate(test_idx_list):
                action_agent = mac.select_actions(obs[agent_idx], idx).item()
                # if idx == 1:
                #     print('2:', action_agent)
                # else:
                #     print('1:', action_agent)
                temp = th.zeros(6)
                temp[action_agent] = 1
                mac.last_action[idx] = temp
                actions[agent_idx] = action_agent
            obs, reward, done, info = env.step(actions)
            env.render()
Beispiel #16
0
 def _thunk():
     agent_list = [
         agents.SimpleAgent(),
         agents.SimpleAgent(),
         agents.SimpleAgent(),
         agents.SimpleAgent()
     ]
     env = pommerman.make(env_id, agent_list)
     return env
Beispiel #17
0
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeTeamCompetition-v0', agent_list)
    d = []

    # Run the episodes just like OpenAI Gym
    for i_episode in range(300):
        state = env.reset()
        done = False
        while not done:
            #            env.render()
            cur_obs = env.get_observations()
            actions = env.act(state)
            for ob, act in zip(cur_obs, actions):
                val = np.zeros(6)
                val[act] = 1
                d.append([ob, val])

            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()

    lf = get_lf()

    rows = len(d)

    L = np.zeros([6, rows, len(lf)])
    for r in range(rows):
        for i, f in enumerate(lf):
            L[:, r, i] = f(d[r][0])

    gms = []
    for i in range(6):
        gms.append(GenerativeModel())

    # TODO: add ground labels to training
    filename = 'snorkel_model'
    for i, gm in enumerate(gms):
        temp_l = np.squeeze(L[i, :, :]).astype(int)
        gm.train(temp_l)
        gm.save(filename + str(i))
Beispiel #18
0
def main():
    config = "PommeFFACompetition-v0"
    game_state_file = None

    myAgents = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent()
    ]

    env = make(config, myAgents, game_state_file)

    logFile_states_raw = 'simpleAgentStates_raw.txt'
    logFile_states_obs = 'simpleAgentStates_obs.txt'
    logFile_actions = 'simpleAgentActions_sequence_rawObs.txt'
    for i_episode in range(5000):
        #render every 50'th episode
        #args.render = not(i_episode % 50)
        state = env.reset()
        k = list(state[0].keys())
        raw_states = []
        obs_states = []
        SA_actions = []
        action_history = np.zeros(6)
        for t in range(10000):  # Don't infinite loop while learning
            agent_actions = env.act(state)
            for i in range(1):  #try to only log one agent
                #we make a list from position, board, bomb blast strength, bomb life, blast strength, can kick and ammo
                #if agent is alive
                if 10 in state[i][k[0]]:
                    obs, raw = observe(state[i], action_history)
                    raw_states.append(raw.tolist())
                    obs_states.append(obs.tolist())
                    SA_actions.append(agent_actions[i])

                    action_history[:-1] = action_history[1:]
                    action_history[-1] = agent_actions[i]
            state, reward, done, _ = env.step(agent_actions)
            if t == 100 and 10 in state[0][k[0]]:
                with open(logFile_states_obs, 'a') as fp:
                    #                    obs_states = [[int(o) for o in inner_list] for inner_list in obs_states]
                    wr = csv.writer(fp, dialect='excel')
                    wr.writerow(obs_states)
                with open(logFile_states_raw, 'a') as fp:
                    #raw_states = [[int(o) for o in inner_list] for inner_list in raw_states]
                    wr = csv.writer(fp, dialect='excel')
                    wr.writerow(raw_states)
                with open(logFile_actions, 'a') as fp:
                    wr = csv.writer(fp, dialect='excel')
                    wr.writerow(SA_actions)
                print(i_episode)
                break
            if done or not (10 in state[0][k[0]]):
                print(i_episode)
                break
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    with_write = True
    num_rounds = 1

    if with_write:
        num_rounds = 100
        file = open(
            "test_vs_two_simple_depth_str=4_" + str(2) + "__100___" +
            str(time.time()), 'w')

    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)
    a = agents.AlphaBetaAgent(evaluation_function=agents.tomer_eval)
    # Create a set of agents (exactly four)
    agent_list = [
        a,
        # agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),

        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    agent_list[0].initialize(env)
    # Run the episodes just like OpenAI Gym
    for i_episode in range(num_rounds):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)

        if with_write:

            if env._agents[0].is_alive and env._agents[
                    1].is_alive and env._agents[1].is_alive:
                file.write("tie\n\n")
            elif env._agents[0].is_alive:
                file.write("agent 0 won\n\n")
            elif env._agents[1].is_alive:
                file.write("agent 1 won\n\n")
            else:
                #env._agents[2].is_alive:
                file.write("agent 2 won\n\n")
        print('Episode {} finished'.format(i_episode))
    if with_write:
        file.close()
    env.close()
Beispiel #20
0
 def _thunk():
     agent_list = [
         agents.SimpleAgent(),
         # agents.RandomAgent(),
         agents.SimpleAgent(),
         agents.SimpleAgent(),
         agents.SimpleAgent()
         # agents.RandomAgent(),
     ]
     env = pommerman.make("PommeRadioCompetition-v2", agent_list)
     return env
Beispiel #21
0
def test(gnet):
    John = A3CAgent(gnet)
    John.set_train(False)
    agentList = [
        John,
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent()
    ]
    env = pommerman.make('PommeFFACompetition-v0', agentList)
    wins = []
    for ii in range(100):
        John.reset_lstm()
        state = env.reset()
        done = False
        while done == False:
            if ii % 20 == 0:
                env.render()
            # time.sleep(1/24)
            with torch.no_grad():
                actions = env.act(state)
            state_next, reward, done, info = env.step(actions)

        print(
            ii, "DONE. Info:", info, "reward:", reward, "You win = ",
            info['winners'][0] == 0 if info['result'].name == 'Win' else False)
        wins.append(info['winners'][0] if info['result'].name == 'Win' else -1)

    fig, ax = plt.subplots(num=1, clear=True)
    winrate = wins.count(0) / len(wins)
    fig, ax = plt.subplots(num=1, clear=True)
    t, p0, p1, p2, p3 = plt.bar([-1, 0, 1, 2, 3], [
        wins.count(-1) / len(wins) * 100,
        wins.count(0) / len(wins) * 100,
        wins.count(1) / len(wins) * 100,
        wins.count(2) / len(wins) * 100,
        wins.count(3) / len(wins) * 100
    ])
    t.set_facecolor('b')

    p0.set_facecolor('r')
    p1.set_facecolor('g')
    p2.set_facecolor('b')
    p3.set_facecolor('c')
    ax.set_xticks([-1, 0, 1, 2, 3])
    ax.set_xticklabels([
        'Ties', 'Agent\n(A2C)', 'Agent 1\nSimpleAgent', 'Agent 2\nSimpleAgent',
        'Agent 3\nSimpleAgent'
    ])
    ax.set_ylim([0, 100])
    ax.set_ylabel('Percent')
    ax.set_title('Bomberman. FFA mode.')
    print("Winrate: ", winrate)
    plt.show()
Beispiel #22
0
def main():
    """Simple function to bootstrap a game"""
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
    ]
    train_agent_number = 0
    agent_list.insert(train_agent_number, agents.BaseAgent())

    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    env.set_training_agent(train_agent_number)
    my_agent = MyAgent()

    # Run the episodes just like OpenAI Gym
    lose_cnt = 0
    for i_episode in range(EPISODE):
        state = env.reset()
        done = False
        step_count = 0
        while not done:
            step_count += 1
            # fresh env
            env.render()

            # for simple agents making decisions
            actions = env.act(state)

            # RL make decision based on present state
            agent_action = my_agent.act(state, ACTIONS, env)

            actions.insert(train_agent_number, agent_action)

            # get next state
            state_, reward, done, info = env.step(actions)

            # learn from states
            agent_reward = reward[0]
            if done and agent_reward == -1:
                lose_cnt += 1
            # print("#####################")
            # print("coding:", encoded_state.coding, encoded_state_.coding)
            # print("actions:", actions)
            # print("rewards:", reward)
            # print("#####################")
        # print('Episode {} finished'.format(i_episode))
    env.close()
    print("lose rate: ", lose_cnt / float(EPISODE))
    my_agent.q_table.to_csv('QTable.csv')
Beispiel #23
0
def main():
    '''Simple function to bootstrap a game.

       Use this as an example to set up your training env.
    '''

    filename = "rf.pickle"

    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    gms = load_snorkel()
    # Run the episodes just like OpenAI Gym
    train_states = []
    train_labels = []
    for i_episode in range(1500):
        state = env.reset()
        done = False
        while not done:
            #            env.render()
            cur_obs = env.get_observations()
            actions = env.act(state)
            for ob in cur_obs:
                train_states.append(merge(extract_state(ob)))
                probs = np.zeros(6)
                l = np.array([f(ob) for f in get_lf()])
                for i, m in enumerate(gms):
                    tmp = sparse.csr_matrix(l[:, i])
                    probs[i] = m.marginals(tmp)
                train_labels.append(probs)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
    env.close()

    train_labels = np.array([
        np.array(list(map(int, prob == prob.max()))) for prob in train_labels
    ])
    rf = RandomForestClassifier(n_estimators=50)
    rf.fit(train_states, train_labels)

    with open(filename, 'wb') as handle:
        pickle.dump(rf, handle, protocol=pickle.HIGHEST_PROTOCOL)
def run(match_num, iteration_limit, mcts_process_num, result_list=None, process_id=None, render=False):
    """
    Run the match for MCTS and three simple agents.
    :param iteration_limit: The maximal iteration of MCTS
    :param match_num: The number of matches
    :param mcts_process_num: The number of processes used in MCTS
    :param result_list: A list to record results
    :param process_id: The process ID given when you do multiprocessing
    :param render: Determine whether to render game
    :return: None
    """
    if mcts_process_num == 1:
        mcts_process_num = None
    agent_list = [
        MCTSAgent([agents.SimpleAgent for _ in range(3)], iteration_limit, process_count=mcts_process_num),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
    ]

    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    for i_episode in range(match_num):
        state = env.reset()
        done = False
        initial_agents = state[0]['alive']
        survivors = initial_agents
        dead_agents = []
        while not done:
            if render:
                env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)

            survivors = state[0]['alive']
            for agent in initial_agents:
                if agent not in survivors and agent not in dead_agents:
                    dead_agents.append(agent)

        if process_id is not None:
            print('[Process %d, Episode %d] Dead order: ' % (process_id, i_episode),
                  str(dead_agents), 'Survivors:', survivors)
        else:
            print('[Episode %d] Dead order: ' % i_episode, str(dead_agents), 'Survivors:', survivors)

        if result_list is None:
            result_list = []
        result_list.append((dead_agents, survivors))

    env.close()

    return result_list
def main():
    """Simple function to bootstrap a game"""
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
    ]
    train_agent_number = 0
    agent_list.insert(train_agent_number, agents.BaseAgent())

    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    env.set_training_agent(train_agent_number)
    my_agent = MyAgent()

    lose_cnt = 0
    # Run the episodes just like OpenAI Gym
    for i_episode in range(EPISODE):
        state = env.reset()
        done = False
        step_cnt = 1
        while not done:
            step_cnt += 1
            if (step_cnt >= 500): break
            # fresh env
            env.render()
            # for simple agents making decisions
            actions = env.act(state)

            agent_action = 0
            # RL make decision based on present state
            agent_action = my_agent.act(state[train_agent_number], ACTIONS)

            actions.insert(train_agent_number, agent_action)

            # get next state
            state, reward, done, info = env.step(actions)

            # learn from states
            agent_reward = reward[train_agent_number]
            if done:
                if agent_reward == -1:
                    lose_cnt += 1
                    print("lose")
                else:
                    print("win")
    env.close()
    print("win rate: ", 1 - lose_cnt / float(EPISODE))
Beispiel #26
0
def main():
    tf.reset_default_graph()
    # Print all possible environments in the Pommerman registry
    # print(pommerman.registry)
    sess = tf.Session()
    # sess.run(tf.global_variables_initializer())
    # sess = tf_debug.TensorBoardDebugWrapperSession(sess, 'localhost:6064')

    # Create a set of agents (exactly four)
    ddpg_agent = DdpgAgent(id=3, sess=sess)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.RandomAgent(),
        ddpg_agent,
        # agents.DockerAgent("pommerman/simple-agent", port=12345),
    ]
    env = pommerman.make(args.env_name, agent_list)
    env.seed(RANDOM_SEED)

    print('HERE0', sess)
    ddpg_agent.train_transformer(sess, env)
    print('her2')
    print(9 / 0)
    r_sum = np.zeros(1)

    for i in range(args.num_steps):
        # Make the "Free-For-All" environment using the agent list
        env.reset()
        # Run the episodes just like OpenAI Gym

        for i_episode in range(args.max_episode_length):
            state = env.reset()

            done = False
            while not done:

                # if args.display:
                #     env.render()

                actions = env.act(state)
                state, reward, done, info = env.step(actions)
                r_sum[i] += reward[0]

            if i_episode > 300:
                break

        print('Game {} finished'.format(i))

    np.savetxt(args.outdir + '/result_2simple_2random.csv', r_sum, fmt='%1.4e')
    env.close()
Beispiel #27
0
def generate_data(EPISODES, save_file_nm, shuffle_agents=False):
    rnn_agent = RNN_Agent()

    # Init dataset
    dset = dataset(rnn_agent.RNN_SEQUENCE_LENGTH, save_file_nm,
                   rnn_agent.utils)
    if os.path.exists(save_file_nm): dset.load()

    agent_list = [
        rnn_agent,
        agents.SimpleAgent(),
        agents.RandomAgent(),
        agents.SimpleAgent()
    ]
    rnn_agent_index = agent_list.index(rnn_agent)

    if shuffle_agents: shuffle(agent_list)
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    wins = {}
    iter_num = 0
    for an_episode in range(EPISODES):

        state = env.reset()

        #-------------------------------------------------------------------
        done = False
        episode_obs = []
        episode_acts = []
        #while not done and rnn_agent.is_alive:
        while not done:
            #env.render()
            actions = env.act(state)
            episode_acts.append(actions[rnn_agent_index])
            episode_obs.append(rnn_agent.utils.input(state[rnn_agent_index]))
            state, reward, done, info = env.step(actions)

            iter_num += 1
        #-------------------------------------------------------------------

        # Final timestep observation
        episode_obs.append(rnn_agent.utils.input(state[rnn_agent_index]))
        dset.add_episode(episode_obs, episode_acts)

        #print(info)
    #print("Median Act Time: {} seconds".format(np.median(np.array(rnn_agent.act_times))))

    env.close()
    dset.save()
    rnn_agent.sess.close()
    tf.reset_default_graph()
Beispiel #28
0
def main():
    '''Simple function to bootstrap a game.

       Use this as an example to set up your training env.
    '''
    print(f"num games: {NUM_GAMES}, 1v1 free-for-all game, DEPTH={DEPTH}")

    alpha_balanced = agents.AlphaBetaAgent(
        evaluation_function=agents.balanced_eval, depth=DEPTH)
    alpha_attacker = agents.AlphaBetaAgent(
        evaluation_function=agents.attacker_eval, depth=DEPTH)
    alpha_coward = agents.AlphaBetaAgent(
        evaluation_function=agents.pacifist_eval, depth=DEPTH)
    agent_list = [
        alpha_balanced,
        # alpha_attacker,
        # alpha_coward
        # minimax_agent,
        SmartRandomAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        # PlayerAgent(),
    ]
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    for agent in agent_list:
        agent.initialize(env)
    # Run the episodes just like OpenAI Gym
    wins = [0] + [0] * len(agent_list)
    for i_episode in range(NUM_GAMES):
        state = env.reset()
        done = False
        info = None
        turns = 0
        while not done:
            turns += 1
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)

        if info['result'] == Result.Tie:
            wins[0] += 1
        else:
            wins[info["winners"][0]] += 1
        print(
            f'Episode {i_episode} finished, info: {info}, took {turns} turns')

    print(
        f"ties : {wins[0]}\n player zero {wins[1]}\n player one : {wins[2]}\n player two : {wins[3]}\n "
        f"player three : {wins[4]}\n overall games: {NUM_GAMES}")

    env.close()
Beispiel #29
0
    def _thunk():
        agent_list = [
            agents.SimpleAgent(),
            agents.SimpleAgent(),
            agents.SimpleAgent(),
            agents.SimpleAgent(),

            # _agents.StopAgent(),
            # _agents.SuicideAgent(),
            # hit18Agent('1'),
            # hit18Agent('3')
        ]
        env = pommerman.make(env_id, agent_list)
        return env
def main():
    '''Simple function to bootstrap a game.
       
       Use this as an example to set up your training env.
    '''
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.RandomAgent(),
        agents.RandomAgent(),
        #agents.DockerAgent("multiagentlearning/hakozakijunctions", port=12345),
        #agents.DockerAgent("multiagentlearning/eisenach", port=12345),
        agents.DockerAgent("multiagentlearning/skynet955", port=12345),
    ]
    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeTeamCompetition-v1', agent_list)

    # Run the episodes just like OpenAI Gym
    for i_episode in range(1):
        state = env.reset()
        done = False
        while not done:
            env.render()
            actions = env.act(state)
            state, reward, done, info = env.step(actions)
        print('Episode {} finished'.format(i_episode))
        print("Final Result: ", info)
    env.close()