def main(): """Simple function to bootstrap a game""" # Print all possible environments in the Pommerman registry print(pommerman.REGISTRY) # Create a set of agents (exactly four) agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] train_agent_number = 0 agent_list.insert(train_agent_number, agents.BaseAgent()) # Make the "Free-For-All" environment using the agent list env = pommerman.make('PommeFFACompetition-v0', agent_list) env.set_training_agent(train_agent_number) my_agent = MyAgent() # Run the episodes just like OpenAI Gym lose_cnt = 0 for i_episode in range(EPISODE): state = env.reset() done = False step_count = 0 while not done: step_count += 1 # fresh env env.render() # for simple agents making decisions actions = env.act(state) # RL make decision based on present state agent_action = my_agent.act(state, ACTIONS, env) actions.insert(train_agent_number, agent_action) # get next state state_, reward, done, info = env.step(actions) # learn from states agent_reward = reward[0] if done and agent_reward == -1: lose_cnt += 1 # print("#####################") # print("coding:", encoded_state.coding, encoded_state_.coding) # print("actions:", actions) # print("rewards:", reward) # print("#####################") # print('Episode {} finished'.format(i_episode)) env.close() print("lose rate: ", lose_cnt / float(EPISODE)) my_agent.q_table.to_csv('QTable.csv')
def run(run_number: int) -> ([float], Optional[int], float): """ Train agent for multiple episodes on a fresh environment and return episode reward history, solve episode (or None if not solved) and time (in seconds) it took. """ rewards = [] # total reward recorded at the end of each episode start_time = time() wrapper = LunarLanderWrapper() agent = MyAgent(wrapper=wrapper, seed=run_number) for episode in range( args.episodes): # train for the given number of episodes rewards.append(agent.train()) if wrapper.solved(rewards): # exit early if environment is solved break else: # no loop break, means the environment wasn't solved in the given episode budget episode = None duration = time() - start_time return rewards, episode, duration
def main(render=False, interactive=False): # List of four agents agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), MyAgent(), #SimpleAgentDebugged(), #agents.DockerAgent("pommerman/ibm-agent", port=12345), ] # Environment of FFA competition env = pommerman.make('PommeFFACompetition-v0', agent_list) # Run rewards = list() for episode in range(100): state = env.reset() done = False step = 0 while not done: if verbose: print("Step: ", step) step += 1 if render: env.render() actions = env.act(state) if verbose: print(actions[-1]) state, reward, done, info = env.step(actions) if interactive: sys.stdin.readline() rewards.append(reward) print('Episode {} finished'.format(episode), reward) rewards = np.array(rewards) print(np.mean(rewards, axis=0)) env.close()
from lunarlander_wrapper import LunarLanderWrapper from my_agent import MyAgent wrapper = LunarLanderWrapper() agent = MyAgent(wrapper=wrapper, seed=42) rewards = [] rewards = [0.0] * 10 for episode in range(10): rewards.append(agent.train()) if wrapper.solved(rewards[episode]): break wrapper.close()
return app if __name__ == "__main__": GAME_SIZE = 4 SCORE_TO_WIN = 2048 APP_PORT = 5005 APP_HOST = "0.0.0.0" from game2048.game import Game game = Game(size=GAME_SIZE, score_to_win=SCORE_TO_WIN) try: from my_agent import MyAgent sess = tf.Session() agent = MyAgent(game=game, sess=sess) agent.build() except: from game2048.agents import RandomAgent print( "WARNING: Please compile the ExpectiMaxAgent first following the README." ) print("WARNING: You are now using a RandomAgent.") agent = RandomAgent(game=game) print("Run the webapp at http://<any address for your local host>:%s/" % APP_PORT) app = get_flask_app(game, agent) app.run(port=APP_PORT, threaded=False, host=APP_HOST ) # IMPORTANT: `threaded=False` to ensure correct behavior
runtime_per_run = [] rewards = [] # For each run, train agent until environment is solved, or episode budget # runs out: for run in range(num_runs): # Initialise result helpers end_episode = num_episodes # indicates in which run the environment was solved start = timer() rewards = [0.0] * num_episodes # reward per episode # Initialise environment and agent wrapper = LunarLanderWrapper( ) # TODO: you have to implement this environment #agent = QLearner(wrapper=wrapper, seed=run) # TODO: you have to implement this agent agent = MyAgent(wrapper=wrapper, seed=run) # For each episode, train the agent on the environment and record the # reward of each episode #style.use('fivethirtyeight') #fig=plt.figure() #plt.axis([0,args.episodes,-300,300]) #plt.xlabel('Episodes') #plt.ylabel('AVG Reward') for episode in range(num_episodes): rewards[episode] = agent.train() #if (episode % 100) == 0 and episode != 0: #avg_last = float(sum(rewards[episode-100:episode])) / 100 print("Episode: ", episode)
# Initialise result data structures rewards_per_run = dict() runtime_per_run = [] # For each run, train agent until environment is solved, or episode budget # runs out: for run in range(num_runs): # Initialise result helpers end_episode = num_episodes # indicates in which run the environment was solved start = timer() rewards = [0.0] * num_episodes # reward per episode # Initialise environment and agent wrapper = LunarLanderWrapper( ) # TODO: you have to implement this environment agent = MyAgent(wrapper=wrapper, seed=run) # TODO: you have to implement this agent # For each episode, train the agent on the environment and record the # reward of each episode for episode in range(num_episodes): rewards[episode] = agent.train() # Check if environment is solved if wrapper.solved(rewards[:episode]): end_episode = episode break # Record and print performance runtime_per_run.append(timer() - start) rewards_per_run['run' + str(run)] = rewards print('end episode # = ', end_episode)
mp.set_start_method('spawn') # writer = SummaryWriter() gnet = Net(N_ACTIONS) global_ep, wins, tot_rewards = mp.Value('i', 0), mp.Value('i', 0), mp.Value('d', 0.) res_queue, queue, g_que = mp.Queue(), mp.Queue(), mp.Queue() learner = Learner(gnet, queue, g_que, N, global_ep, GAMMA, LR, UP_STEP, 1000000000, BS, ENTROPY_COST, BASELINE_COST) agents = [ MyAgent(gnet, i, global_ep, wins, tot_rewards, res_queue, queue, g_que, GAMMA, UP_STEP, BS, N_ACTIONS) for i in range(N) ] learner.start() [agent.start() for agent in agents] while 0: r = res_queue.get() if r is not None: writer.add_scalar('global_ep_r', r[0], r[1]) writer.add_scalar('loss', r[2], r[1]) writer.add_scalar('val_loss', r[3], r[1]) writer.add_scalar('pol_loss', r[4], r[1]) writer.add_scalar('H_loss', r[5], r[1]) writer.add_scalar('depth_loss', r[6], r[1])