Example #1
0
def main():
    """Simple function to bootstrap a game"""
    # Print all possible environments in the Pommerman registry
    print(pommerman.REGISTRY)

    # Create a set of agents (exactly four)
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
    ]
    train_agent_number = 0
    agent_list.insert(train_agent_number, agents.BaseAgent())

    # Make the "Free-For-All" environment using the agent list
    env = pommerman.make('PommeFFACompetition-v0', agent_list)
    env.set_training_agent(train_agent_number)
    my_agent = MyAgent()

    # Run the episodes just like OpenAI Gym
    lose_cnt = 0
    for i_episode in range(EPISODE):
        state = env.reset()
        done = False
        step_count = 0
        while not done:
            step_count += 1
            # fresh env
            env.render()

            # for simple agents making decisions
            actions = env.act(state)

            # RL make decision based on present state
            agent_action = my_agent.act(state, ACTIONS, env)

            actions.insert(train_agent_number, agent_action)

            # get next state
            state_, reward, done, info = env.step(actions)

            # learn from states
            agent_reward = reward[0]
            if done and agent_reward == -1:
                lose_cnt += 1
            # print("#####################")
            # print("coding:", encoded_state.coding, encoded_state_.coding)
            # print("actions:", actions)
            # print("rewards:", reward)
            # print("#####################")
        # print('Episode {} finished'.format(i_episode))
    env.close()
    print("lose rate: ", lose_cnt / float(EPISODE))
    my_agent.q_table.to_csv('QTable.csv')
Example #2
0
def run(run_number: int) -> ([float], Optional[int], float):
    """ Train agent for multiple episodes on a fresh environment and return
        episode reward history, solve episode (or None if not solved) and time (in seconds) it took. """
    rewards = []  # total reward recorded at the end of each episode
    start_time = time()

    wrapper = LunarLanderWrapper()
    agent = MyAgent(wrapper=wrapper, seed=run_number)

    for episode in range(
            args.episodes):  # train for the given number of episodes
        rewards.append(agent.train())
        if wrapper.solved(rewards):  # exit early if environment is solved
            break
    else:  # no loop break, means the environment wasn't solved in the given episode budget
        episode = None

    duration = time() - start_time
    return rewards, episode, duration
Example #3
0
def main(render=False, interactive=False):
    # List of four agents
    agent_list = [
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        agents.SimpleAgent(),
        MyAgent(),
        #SimpleAgentDebugged(),
        #agents.DockerAgent("pommerman/ibm-agent", port=12345),
    ]

    # Environment of FFA competition
    env = pommerman.make('PommeFFACompetition-v0', agent_list)

    # Run
    rewards = list()
    for episode in range(100):
        state = env.reset()
        done = False
        step = 0
        while not done:
            if verbose:
                print("Step: ", step)
            step += 1
            if render:
                env.render()
            actions = env.act(state)
            if verbose:
                print(actions[-1])
            state, reward, done, info = env.step(actions)
            if interactive:
                sys.stdin.readline()
        rewards.append(reward)
        print('Episode {} finished'.format(episode), reward)
    rewards = np.array(rewards)
    print(np.mean(rewards, axis=0))

    env.close()
Example #4
0
from lunarlander_wrapper import LunarLanderWrapper
from my_agent import MyAgent
wrapper = LunarLanderWrapper()
agent = MyAgent(wrapper=wrapper, seed=42)
rewards = []
rewards = [0.0] * 10
for episode in range(10):

    rewards.append(agent.train())
    if wrapper.solved(rewards[episode]):
        break
wrapper.close()
Example #5
0
    return app


if __name__ == "__main__":
    GAME_SIZE = 4
    SCORE_TO_WIN = 2048
    APP_PORT = 5005
    APP_HOST = "0.0.0.0"

    from game2048.game import Game
    game = Game(size=GAME_SIZE, score_to_win=SCORE_TO_WIN)

    try:
        from my_agent import MyAgent
        sess = tf.Session()
        agent = MyAgent(game=game, sess=sess)
        agent.build()
    except:
        from game2048.agents import RandomAgent
        print(
            "WARNING: Please compile the ExpectiMaxAgent first following the README."
        )
        print("WARNING: You are now using a RandomAgent.")
        agent = RandomAgent(game=game)

    print("Run the webapp at http://<any address for your local host>:%s/" %
          APP_PORT)

    app = get_flask_app(game, agent)
    app.run(port=APP_PORT, threaded=False, host=APP_HOST
            )  # IMPORTANT: `threaded=False` to ensure correct behavior
Example #6
0
runtime_per_run = []
rewards = []

# For each run, train agent until environment is solved, or episode budget
# runs out:
for run in range(num_runs):
    # Initialise result helpers
    end_episode = num_episodes  # indicates in which run the environment was solved
    start = timer()
    rewards = [0.0] * num_episodes  # reward per episode

    # Initialise environment and agent
    wrapper = LunarLanderWrapper(
    )  # TODO: you have to implement this environment
    #agent = QLearner(wrapper=wrapper, seed=run)  # TODO: you have to implement this agent
    agent = MyAgent(wrapper=wrapper, seed=run)
    # For each episode, train the agent on the environment and record the
    # reward of each episode

    #style.use('fivethirtyeight')

    #fig=plt.figure()
    #plt.axis([0,args.episodes,-300,300])
    #plt.xlabel('Episodes')
    #plt.ylabel('AVG Reward')

    for episode in range(num_episodes):
        rewards[episode] = agent.train()
        #if (episode % 100) == 0 and episode != 0:
        #avg_last = float(sum(rewards[episode-100:episode])) / 100
        print("Episode: ", episode)
# Initialise result data structures
rewards_per_run = dict()
runtime_per_run = []

# For each run, train agent until environment is solved, or episode budget
# runs out:
for run in range(num_runs):
    # Initialise result helpers
    end_episode = num_episodes  # indicates in which run the environment was solved
    start = timer()
    rewards = [0.0] * num_episodes  # reward per episode

    # Initialise environment and agent
    wrapper = LunarLanderWrapper(
    )  # TODO: you have to implement this environment
    agent = MyAgent(wrapper=wrapper,
                    seed=run)  # TODO: you have to implement this agent

    # For each episode, train the agent on the environment and record the
    # reward of each episode
    for episode in range(num_episodes):
        rewards[episode] = agent.train()
        # Check if environment is solved
        if wrapper.solved(rewards[:episode]):
            end_episode = episode
            break

    # Record and print performance
    runtime_per_run.append(timer() - start)
    rewards_per_run['run' + str(run)] = rewards
    print('end episode # = ', end_episode)
    mp.set_start_method('spawn')

    # writer = SummaryWriter()

    gnet = Net(N_ACTIONS)

    global_ep, wins, tot_rewards = mp.Value('i',
                                            0), mp.Value('i',
                                                         0), mp.Value('d', 0.)
    res_queue, queue, g_que = mp.Queue(), mp.Queue(), mp.Queue()

    learner = Learner(gnet, queue, g_que, N, global_ep, GAMMA, LR, UP_STEP,
                      1000000000, BS, ENTROPY_COST, BASELINE_COST)

    agents = [
        MyAgent(gnet, i, global_ep, wins, tot_rewards, res_queue, queue, g_que,
                GAMMA, UP_STEP, BS, N_ACTIONS) for i in range(N)
    ]

    learner.start()

    [agent.start() for agent in agents]

    while 0:
        r = res_queue.get()
        if r is not None:
            writer.add_scalar('global_ep_r', r[0], r[1])
            writer.add_scalar('loss', r[2], r[1])
            writer.add_scalar('val_loss', r[3], r[1])
            writer.add_scalar('pol_loss', r[4], r[1])
            writer.add_scalar('H_loss', r[5], r[1])
            writer.add_scalar('depth_loss', r[6], r[1])