Esempio n. 1
0
def main():
    learning_rate = 0.05
    discount = 0.9
    iterations = 10000

    agent = Gambler(learning_rate=learning_rate,
                    discount=discount,
                    iterations=iterations)

    # setup simulation
    mouseGame = MouseGame()
    mouseGame.reset()
    total_reward = 0  # Score keeping
    last_total = 0

    # main loop
    for step in range(iterations):
        old_state = list(mouseGame.mouse)  # Store current state
        action = agent.get_next_action(
            old_state)  # Query agent for the next action
        new_state, reward = mouseGame.take_action(
            action)  # Take action, get new state and reward
        agent.update(old_state, new_state, action.value,
                     reward)  # Let the agent update internals

        total_reward += reward  # Keep score
        if step % 250 == 0:  # Print out metadata every 250th iteration
            performance = (total_reward - last_total) / 250.0
            print({
                'step': step,
                'performance': performance,
                'total_reward': total_reward
            })
            last_total = total_reward

        time.sleep(0.00001)  # Avoid spamming stdout too fast!

    # print("Final Q-table", agent.q_table)
    for i in range(len(agent.q_table)):
        for j in range(len(agent.q_table[i])):
            #print("[" + str(i) + "][" + str(j) + "]: ", end="")
            #print(agent.q_table[i][j])
            print("[%d][%d]:" % (i, j), agent.q_table[i][j])
    input()  # so console window doesnt close on windows
Esempio n. 2
0
def main():
    # parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--agent', type=str, default='GAMBLER', help='Which agent to use')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='How quickly the algorithm tries to learn')
    parser.add_argument('--discount', type=float, default=0.95, help='Discount for estimated future action')
    parser.add_argument('--iterations', type=int, default=2000, help='Iteration count')
    FLAGS, unparsed = parser.parse_known_args()

    # select agent
    if FLAGS.agent == 'GAMBLER':
        agent = Gambler(learning_rate=FLAGS.learning_rate, discount=FLAGS.discount, iterations=FLAGS.iterations)
    elif FLAGS.agent == 'ACCOUNTANT':
        agent = Accountant()
    elif FLAGS.agent == 'DEEPGAMBLER':
        agent = DeepGambler(learning_rate=FLAGS.learning_rate, discount=FLAGS.discount, iterations=FLAGS.iterations)
    else:
        agent = Drunkard()

    # setup simulation
    dungeon = DungeonSimulator()
    dungeon.reset()
    total_reward = 0 # Score keeping
    last_total = 0

    # main loop
    for step in range(FLAGS.iterations):
        old_state = dungeon.state # Store current state
        action = agent.get_next_action(old_state) # Query agent for the next action
        new_state, reward = dungeon.take_action(action) # Take action, get new state and reward
        agent.update(old_state, new_state, action, reward) # Let the agent update internals

        total_reward += reward # Keep score
        if step % 250 == 0: # Print out metadata every 250th iteration
            performance = (total_reward - last_total) / 250.0
            print(json.dumps({'step': step, 'performance': performance, 'total_reward': total_reward}))
            last_total = total_reward

        time.sleep(0.0001) # Avoid spamming stdout too fast!