예제 #1
0
def team_test(load=False):
    game = Game(100, 100)

    agent1 = Agent()
    agent1b = Agent()
    agent2 = QLearningAgent(epsilon=0.5, alpha=.2, alpha_decay=1.0)
    agent2b = QLearningAgent(epsilon=0.5, alpha=.2, alpha_decay=1.0)

    if load:
        agent2.load_weights("team_weights.txt")
        agent2.alpha = 0

    # add agents
    game.add_agent(agent1, (47, 30), 0)
    game.add_agent(agent1b, (53, 30), 0)
    game.add_agent(agent2, (0, 0), 1)
    game.add_agent(agent2b, (0, 50), 1)

    # simulate game
    iterations = 10000

    atexit.register(lambda: agent2.save_weights("team_weights.txt"))

    game.start()
    # agent2.debug = True
    if not load: run_for_iterations(game, iterations)

    game.add_listener(GraphicsListener(game))
    agent2.epsilon = 0.01
    agent2b.epsilon = 0.01

    for _ in xrange(iterations):
        game.loop()
        time.sleep(.05)
예제 #2
0
def single_agent_test(load=False):
    game = Game(100, 100)
    agent = QLearningAgent(alpha=.2, alpha_decay=1.0, epsilon=.5)

    if load:
        agent.load_weights("single_agent_weights.txt")
        agent.alpha = 0
        agent.epsilon = .01

    # add agents
    game.add_agent(agent, (25, 15), 0)

    # simulate game
    iterations = 50000

    atexit.register(lambda: agent.save_weights("single_agent_weights.txt"))

    game.start()

    # agent.debug = True
    if not load: run_for_iterations(game, 10000)

    game.add_listener(GraphicsListener(game))

    for _ in xrange(iterations):
        game.loop()
        time.sleep(.05)
예제 #3
0
def battle():
    game = Game(100, 100)

    agent1 = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0)
    agent1b = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0)
    agent1c = HeuristicAgent()
    agent1d = HeuristicAgent()
    agent2 = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0)
    agent2b = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0)
    agent2c = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0)
    agent2d = QLearningAgent(epsilon=0.01, alpha=0, alpha_decay=1.0)

    QFunction.load("learning_weights.txt")
    # game.set_team_agent(CollaborativeTeamAgent(), 1)
    agent2.alpha = 0
    agent2b.alpha = 0

    # add agents
    game.add_agent(agent1, (0, 0), 0)
    game.add_agent(agent1b, (100, 0), 0)
    # game.add_agent(agent1c, (95,10), 0)
	# game.add_agent(agent1d, (95,10), 0)
    game.add_agent(agent2, (100,50), 1)
    game.add_agent(agent2b, (0,50), 1)
    # game.add_agent(agent2c, (100,50), 1)
    # game.add_agent(agent2d, (100,50), 1)

	# simulate game
    iterations = 10000

    game.start()
    # game.add_listener(GraphicsListener(game))

    # agent2.debug = True

    for _ in xrange(iterations):
        game.loop()
        # time.sleep(.05)

    print game.game_state.scores
예제 #4
0
def learning_enemies_test(load=False):
    game = Game(100, 100)

    agent1 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0)
    agent1b = HeuristicAgent()

    agent2 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0)
    agent2b = HeuristicAgent()

    if load:
        QFunction.load("learning_weights.txt")
        game.set_team_agent(CollaborativeTeamAgent(), 1)
        agent2.alpha = 0
        agent2b.alpha = 0

    # add agents
    game.add_agent(agent1, (5, 10), 0)
    game.add_agent(agent1b, (95, 10), 0)

    game.add_agent(agent2, (100, 50), 1)
    game.add_agent(agent2b, (100, 50), 1)

    # simulate game
    iterations = 10000

    if not load:
        atexit.register(lambda: QFunction.save("learning_weights.txt"))

    game.start()
    # agent2.debug = True
    if not load:
        run_for_iterations(game, iterations)

    agent2.debug = False
    game.add_listener(GraphicsListener(game))

    for _ in xrange(iterations):
        game.loop()
        time.sleep(.05)
예제 #5
0
def visualize_test():

    game = Game(100, 100)

    agent1 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0)
    agent1b = HeuristicAgent()

    agent2 = QLearningAgent(epsilon=0.2, alpha=.2, alpha_decay=1.0)
    agent2b = HeuristicAgent()

    QFunction.load("learning_weights.txt")
    game.set_team_agent(CollaborativeTeamAgent(), 1)
    agent2.alpha = 0
    agent2b.alpha = 0

    # add agents
    game.add_agent(agent1, (5, 10), 0)
    game.add_agent(agent1b, (95, 10), 0)

    game.add_agent(agent2, (100, 50), 1)
    game.add_agent(agent2b, (100, 50), 1)

    # simulate game
    iterations = 10000

    game.start()

    graphics = GraphicsListener(game, clear_screen=False)
    game.add_listener(
        StateVisualization(game, 1, 0, root=graphics.master,
                           canvas=graphics.w))
    game.add_listener(graphics)

    for _ in xrange(iterations):
        game.loop()
        time.sleep(.05)
예제 #6
0
def main():
    lake_env = gym.make('FrozenLake-v0', is_slippery=False)
    coffee_env = coffeegame.CoffeeEnv()
    taxi_env = gym.make('Taxi-v3').env
    lake_agent = QLearningAgent(lake_env, 100, 20000, decay_rate=0.001, alpha=0.1, gamma=0.6, epsilon=1,
                                rendering_enabled=False)
    coffee_agent = QLearningAgent(coffee_env, 100, 20000, decay_rate=0.01, alpha=0.1, gamma=0.6, epsilon=1,
                                  rendering_enabled=False)
    taxi_agent = QLearningAgent(taxi_env, 150, 100000, decay_rate=0.01, alpha=0.1, gamma=0.6, epsilon=1,
                                rendering_enabled=False)

    lake_agent.learn("FrozenLake-v0 (non-slippery)")
예제 #7
0
# 			break

# averageOptima = sum(optima) / numGames
# averageRounds = total / numGames
# print "Average win rate: {0:.2f}%".format(100.0 * wins / total)
# print "Average number rounds before bust: {0:.2f}".format(1.0 * averageRounds)
# print "Average maximum money over games: {0:.2f}".format(1.0 * averageOptima)
# print "S.D. in maximum money over games: {0:.2f}".format((1.0 * sum([(i - averageOptima)**2 for i in optima]) / numGames)**0.5)
# print "S.D. in number of rounds over games: {0:.2f}".format((1.0 * sum([(i - averageRounds)**2 for i in numRounds]) / numGames)**0.5)

## Getting win rate for qlearner
trainingRounds = 1000000
file = "qLearningMitBetting1000"
args = {"flags": ["-np", "-cd"], "file": file}
# args = {"flags": ["-np"]}
player = QLearningAgent(0.8, 0.1, 1, **args)
game = Blackjack(8, player, **args)
rounds = 0
player.setTraining(True)
wins = 0
total = 0
while rounds < trainingRounds:
    result = game.playRound()
    if rounds % 1000 == 0:
        print rounds
    rounds += 1

# # Writes best action to file
# s = ""
# for j in xrange(20, 3, -1):
# 	for i in xrange(2, 12):
예제 #8
0
    # Q learner iterations
    i = multIndex(sys.argv, ["-qiter"])
    if i != None and i != len(sys.argv) - 1:
        trainingRounds = int(sys.argv[i + 1])

    # Searching for which agent to use
    player = Player(**args)
    i = multIndex(sys.argv, ["-a", "-agent"])
    if i != None and i != len(sys.argv) - 1:
        if sys.argv[i + 1] == "random":
            player = RandomAgent(**args)
        elif sys.argv[i + 1] == "basic":
            player = BasicStrategyAgent(**args)
        elif sys.argv[i + 1] == "qlearning":
            player = QLearningAgent(0.8, 0.1, 1, **args)
            qlearning = True

    game = Blackjack(8, player, **args)

    rounds = 0
    if qlearning:
        # training cycle
        player.setTraining(True)
        while rounds < trainingRounds:
            result = game.playRound()
            if rounds % 1000 == 0:
                print rounds
            rounds += 1
        player.setTraining(False)
        player.setMoney(const.startingMoney)