def train_agent(hype_space): print("----------------------") print("Evaluating model: ", hype_space) logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TEST) game = brisc.BriscolaGame(2, logger) tf.reset_default_graph() # Initialize agents agents = [] agent = QAgent( 0, hype_space['epsilon_increment'], hype_space['epsilon_max'], hype_space['discount'], NETWORK, hype_space['layers'], hype_space['learning_rate'], hype_space['replace_target_iter']) agents.append(agent) agents.append(RandomAgent()) best_total_wins = train(game, agents, NUM_EPOCHS, EVALUATE_EVERY, EVALUATE_FOR, MODEL_DIR) print ("Best total wins ----->", best_total_wins) best_total_loses = EVALUATE_FOR - best_total_wins return best_total_loses
def main(argv=None): '''Evaluate agent performances against RandomAgent and AIAgent''' logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TEST) game = brisc.BriscolaGame(2, logger) # agent to be evaluated is RandomAgent or QAgent if a model is provided if FLAGS.model_dir: eval_agent = QAgent(network=FLAGS.network) eval_agent.load_model(FLAGS.model_dir) eval_agent.make_greedy() else: eval_agent = RandomAgent() # test agent against RandomAgent agents = [eval_agent, RandomAgent()] total_wins, points_history = evaluate(game, agents, FLAGS.num_evaluations) stats_plotter(agents, points_history, total_wins) # test agent against AIAgent agents = [eval_agent, AIAgent()] total_wins, points_history = evaluate(game, agents, FLAGS.num_evaluations) stats_plotter(agents, points_history, total_wins)
def main(argv=None): # Initializing the environment logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TRAIN) game = brisc.BriscolaGame(2, logger) # Initialize agents agents = [] agent = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max, FLAGS.discount, FLAGS.network, FLAGS.layers, FLAGS.learning_rate, FLAGS.replace_target_iter, FLAGS.batch_size) agents.append(agent) agent = RandomAgent() agents.append(agent) train(game, agents, FLAGS.num_epochs, FLAGS.evaluate_every, FLAGS.num_evaluations, FLAGS.model_dir)
def initGridWorld(width, height, obstacles, initLocs, numRobots): gridworld = Grid.Grid(width, height, obstacles) env = Environment(height, width, gridworld) agents = [ QAgent(j, -1, -1, env.state_n, env.action_n) for j in range(numRobots) ] i = 0 for initLoc in initLocs: agents[i].setLocation(initLoc[0], initLoc[1]) gridworld.cells[initLoc[0]][initLoc[1]].occupied = True gridworld.cells[initLoc[0]][initLoc[1]].visited = True i = i + 1 env.agents = agents env.updateFrontiers() return env
def main(argv=None): # Initializing the environment logger = BriscolaLogger(BriscolaLogger.LoggerLevels.PVP) game = brisc.BriscolaGame(2, logger) # Initialize agents agents = [] agents.append(HumanAgent()) if FLAGS.model_dir: agent = QAgent(network=FLAGS.network) agent.load_model(FLAGS.model_dir) agent.make_greedy() agents.append(agent) else: agent = AIAgent() agents.append(agent) brisc.play_episode(game, agents, train=False)
def main(argv): if len(argv) > 1: env_name = argv[1] else: env_name = "Taxi-v3" env = gym.make(env_name) start_alpha = 0.1 start_gamma = 0.6 start_epsilon = 0.1 decay_rate = 0.0001 if load_from_file: file = open('q_table', 'rb') loaded_q_table = pickle.load(file) q_agent = QAgent(env, alpha=start_alpha, gamma=start_gamma, epsilon=start_epsilon, q_table=loaded_q_table) else: q_agent = QAgent(env, alpha=start_alpha, gamma=start_gamma, epsilon=start_epsilon) if not load_from_file: # metrics total_rewards = [] # every 100 epochs total_steps = [] # every 100 epochs epochs = 15000 for i in range(0, epochs): steps, cum_reward = q_agent.q_learning() if i % 100 == 0 or i == 0: steps, total_reward = q_agent.validation() total_rewards.append(cum_reward) total_steps.append(steps) print(i, epochs, cum_reward) if decay: # change alpha, beta, gamma # https://www.youtube.com/watch?v=QzulmoOg2JE new_alpha = 1 / (1 + decay_rate * i) * start_alpha # linear increase of gamma new_gamma = (1 - start_gamma) / epochs * i + start_gamma q_agent.gamma = new_gamma new_epsilon = 1 / (1 + decay_rate * i) * start_epsilon q_agent.alpha = new_alpha q_agent.epsilon = new_epsilon print(new_alpha, new_gamma, new_epsilon) # pickle files file = open('q_table', 'wb') # dump information to that file pickle.dump(q_agent.q_table, file) # close the file file.close() # show graphs x = np.arange(0, epochs, 100) utils.plot(x, y=total_rewards, x_label='epochs of training', y_label='cummulative reward') utils.plot(x, y=total_steps, x_label='epochs of training', y_label='steps needed') # end of not loading_from_file # after training show agent 5 times input("Press Enter to continue...") os.system('clear') print(q_agent.q_table) for i in range(0, 5): steps, cum_reward = q_agent.validation(sleep_timer=0.25) print("needed steps", steps, "cum_reward", cum_reward) time.sleep(1) print('finished')
from collections import defaultdict import game from agents.one_look_ahead_agent import OneLookAheadAgent import numpy as np import keras as ks from agents.q_agent import QAgent from agents.till_next_turn_agent import TillNextTurnAgent agents = [ QAgent(0, 4, ks.models.load_model("win_loss_model.h5"), gamma=0.99, use_win_rewards=True), QAgent(1, 4, ks.models.load_model("big_net.h5"), gamma=0.99, use_win_rewards=False), # TillNextTurnAgent(2, 4, 15, deeper_top_n=2), # OneLookAheadAgent(3, 4) ] game.play_game(agents, True, shuffle_agents=True) scores = [] wins = defaultdict(lambda: 0) # switched = False for i in range(100):
if self.board[0] != -1 and self.board[0] == self.board[4] and self.board[4] == self.board[8]: return self.board[0] if self.board[2] != -1 and self.board[2] == self.board[4] and self.board[4] == self.board[6]: return self.board[2] return -1 if __name__ == "__main__": from server import Server import threading server_name = ('localhost', 1337) s = Server(server_name, "ttt_rankings", TTTEnvironment()) clients = [ RandomAgent(server_name, "Random1", TTTEnvironment()), RandomAgent(server_name, "Random2", TTTEnvironment()), #QAgent(server_name, "QAgent2", TTTEnvironment(), 0.9, 0.1), QAgent(server_name, "ShortSightedQAgent", TTTEnvironment(), 0.1, 0.1), QAgent(server_name, "HighGammaQAgent", TTTEnvironment(), 0.999, 0.1), QAgent(server_name, "SlowQAgent", TTTEnvironment(), 0.9, 0.01), QAgent(server_name, "FastQAgent", TTTEnvironment(), 0.9, 0.3), ] + list([QAgent(server_name, "QAgent%d"%i, TTTEnvironment(), 0.9, 0.1) for i in range(10)]) \ + list([SMaxQAgent(server_name, "SMaxQAgent%d" % i, TTTEnvironment(), 0.9, 0.1, temp=math.exp((i-5))) for i in range(10)]) for c in clients: threading.Thread(target=c.run).start() s.run()
def main(argv=None): global victory_history_1v2 victory_history_1v2 = [] global victory_history_1vR victory_history_1vR = [] global victory_history_2vR victory_history_2vR = [] global points_history_1v2 points_history_1v2 = [] global points_history_1vR points_history_1vR = [] global points_history_2vR points_history_2vR = [] # Initializing the environment logger = BriscolaLogger(BriscolaLogger.LoggerLevels.TRAIN) game = brisc.BriscolaGame(2, logger) # Initialize agent global agent1 agent1 = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max, FLAGS.discount, FLAGS.network, FLAGS.layers, FLAGS.learning_rate, FLAGS.replace_target_iter, FLAGS.batch_size) global agent2 agent2 = QAgent(FLAGS.epsilon, FLAGS.epsilon_increment, FLAGS.epsilon_max, FLAGS.discount, FLAGS.network, FLAGS.layers, FLAGS.learning_rate, FLAGS.replace_target_iter, FLAGS.batch_size) # Training start_time = time.time() best_total_wins = self_train(game, agent1, agent2, FLAGS.num_epochs, FLAGS.evaluate_every, FLAGS.num_evaluations, FLAGS.copy_every, FLAGS.model_dir) print('Best winning ratio : {:.2%}'.format(best_total_wins / FLAGS.num_evaluations)) print(time.time() - start_time) # Summary graphs x = [ FLAGS.evaluate_every * i for i in range(1, 1 + len(victory_history_1v2)) ] # 1v2 vict_hist = victory_history_1v2 point_hist = points_history_1v2 labels = [agent1.name + '1', agent2.name + '2'] gv.training_summary(x, vict_hist, point_hist, labels, FLAGS, "evaluation_dir/1v2") # 1vRandom vict_hist = victory_history_1vR point_hist = points_history_1vR labels = [agent1.name + '1', RandomAgent().name] gv.training_summary(x, vict_hist, point_hist, labels, FLAGS, "evaluation_dir/1vR") # 2vRandom vict_hist = victory_history_2vR point_hist = points_history_2vR labels = [agent2.name + '2', RandomAgent().name] gv.training_summary(x, vict_hist, point_hist, labels, FLAGS, "evaluation_dir/2vR") # Evaluation against ai agent agents = [agent1, AIAgent()] winners, points = evaluate(game, agents, FLAGS.num_evaluations) gv.evaluate_summary( winners, points, agents, "evaluation_dir/" + agents[0].name + "1 vs " + agents[1].name) agents = [agent2, AIAgent()] winners, points = evaluate(game, agents, FLAGS.num_evaluations) gv.evaluate_summary( winners, points, agents, "evaluation_dir/" + {agents[0].name} + "2 vs " + agents[1].name)