def team_test(load=False): game = Game(100, 100) agent1 = Agent() agent1b = Agent() agent2 = QLearningAgent(epsilon=0.5, alpha=.2, alpha_decay=1.0) agent2b = QLearningAgent(epsilon=0.5, alpha=.2, alpha_decay=1.0) if load: agent2.load_weights("team_weights.txt") agent2.alpha = 0 # add agents game.add_agent(agent1, (47, 30), 0) game.add_agent(agent1b, (53, 30), 0) game.add_agent(agent2, (0, 0), 1) game.add_agent(agent2b, (0, 50), 1) # simulate game iterations = 10000 atexit.register(lambda: agent2.save_weights("team_weights.txt")) game.start() # agent2.debug = True if not load: run_for_iterations(game, iterations) game.add_listener(GraphicsListener(game)) agent2.epsilon = 0.01 agent2b.epsilon = 0.01 for _ in xrange(iterations): game.loop() time.sleep(.05)
def main(): lake_env = gym.make('FrozenLake-v0', is_slippery=False) coffee_env = coffeegame.CoffeeEnv() taxi_env = gym.make('Taxi-v3').env lake_agent = QLearningAgent(lake_env, 100, 20000, decay_rate=0.001, alpha=0.1, gamma=0.6, epsilon=1, rendering_enabled=False) coffee_agent = QLearningAgent(coffee_env, 100, 20000, decay_rate=0.01, alpha=0.1, gamma=0.6, epsilon=1, rendering_enabled=False) taxi_agent = QLearningAgent(taxi_env, 150, 100000, decay_rate=0.01, alpha=0.1, gamma=0.6, epsilon=1, rendering_enabled=False) lake_agent.learn("FrozenLake-v0 (non-slippery)")
def single_agent_test(load=False): game = Game(100, 100) agent = QLearningAgent(alpha=.2, alpha_decay=1.0, epsilon=.5) if load: agent.load_weights("single_agent_weights.txt") agent.alpha = 0 agent.epsilon = .01 # add agents game.add_agent(agent, (25, 15), 0) # simulate game iterations = 50000 atexit.register(lambda: agent.save_weights("single_agent_weights.txt")) game.start() # agent.debug = True if not load: run_for_iterations(game, 10000) game.add_listener(GraphicsListener(game)) for _ in xrange(iterations): game.loop() time.sleep(.05)
def learning_enemies_test(load=False): game = Game(100, 100) agent1 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0) agent1b = HeuristicAgent() agent2 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0) agent2b = HeuristicAgent() if load: QFunction.load("learning_weights.txt") game.set_team_agent(CollaborativeTeamAgent(), 1) agent2.alpha = 0 agent2b.alpha = 0 # add agents game.add_agent(agent1, (5, 10), 0) game.add_agent(agent1b, (95, 10), 0) game.add_agent(agent2, (100, 50), 1) game.add_agent(agent2b, (100, 50), 1) # simulate game iterations = 10000 if not load: atexit.register(lambda: QFunction.save("learning_weights.txt")) game.start() # agent2.debug = True if not load: run_for_iterations(game, iterations) agent2.debug = False game.add_listener(GraphicsListener(game)) for _ in xrange(iterations): game.loop() time.sleep(.05)
def visualize_test(): game = Game(100, 100) agent1 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0) agent1b = HeuristicAgent() agent2 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0) agent2b = HeuristicAgent() QFunction.load("learning_weights.txt") game.set_team_agent(CollaborativeTeamAgent(), 1) agent2.alpha = 0 agent2b.alpha = 0 # add agents game.add_agent(agent1, (5, 10), 0) game.add_agent(agent1b, (95, 10), 0) game.add_agent(agent2, (100, 50), 1) game.add_agent(agent2b, (100, 50), 1) # simulate game iterations = 10000 game.start() graphics = GraphicsListener(game, clear_screen=False) game.add_listener( StateVisualization(game, 1, 0, root=graphics.master, canvas=graphics.w)) game.add_listener(graphics) for _ in xrange(iterations): game.loop() time.sleep(.05)
def battle(): game = Game(100, 100) agent1 = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0) agent1b = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0) agent1c = HeuristicAgent() agent1d = HeuristicAgent() agent2 = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0) agent2b = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0) agent2c = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0) agent2d = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0) QFunction.load("learning_weights.txt") # game.set_team_agent(CollaborativeTeamAgent(), 1) agent2.alpha = 0 agent2b.alpha = 0 # add agents game.add_agent(agent1, (0, 0), 0) game.add_agent(agent1b, (100, 0), 0) # game.add_agent(agent1c, (95,10), 0) # game.add_agent(agent1d, (95,10), 0) game.add_agent(agent2, (100,50), 1) game.add_agent(agent2b, (0,50), 1) # game.add_agent(agent2c, (100,50), 1) # game.add_agent(agent2d, (100,50), 1) # simulate game iterations = 10000 game.start() # game.add_listener(GraphicsListener(game)) # agent2.debug = True for _ in xrange(iterations): game.loop() # time.sleep(.05) print game.game_state.scores
# break # averageOptima = sum(optima) / numGames # averageRounds = total / numGames # print "Average win rate: {0:.2f}%".format(100.0 * wins / total) # print "Average number rounds before bust: {0:.2f}".format(1.0 * averageRounds) # print "Average maximum money over games: {0:.2f}".format(1.0 * averageOptima) # print "S.D. in maximum money over games: {0:.2f}".format((1.0 * sum([(i - averageOptima)**2 for i in optima]) / numGames)**0.5) # print "S.D. in number of rounds over games: {0:.2f}".format((1.0 * sum([(i - averageRounds)**2 for i in numRounds]) / numGames)**0.5) ## Getting win rate for qlearner trainingRounds = 1000000 file = "qLearningMitBetting1000" args = {"flags": ["-np", "-cd"], "file": file} # args = {"flags": ["-np"]} player = QLearningAgent(0.8, 0.1, 1, **args) game = Blackjack(8, player, **args) rounds = 0 player.setTraining(True) wins = 0 total = 0 while rounds < trainingRounds: result = game.playRound() if rounds % 1000 == 0: print rounds rounds += 1 # # Writes best action to file # s = "" # for j in xrange(20, 3, -1): # for i in xrange(2, 12):