Ejemplo n.º 1
0
def main():
    learning_rate = 0.05
    discount = 0.9
    iterations = 10000

    agent = Gambler(learning_rate=learning_rate,
                    discount=discount,
                    iterations=iterations)

    # setup simulation
    mouseGame = MouseGame()
    mouseGame.reset()
    total_reward = 0  # Score keeping
    last_total = 0

    # main loop
    for step in range(iterations):
        old_state = list(mouseGame.mouse)  # Store current state
        action = agent.get_next_action(
            old_state)  # Query agent for the next action
        new_state, reward = mouseGame.take_action(
            action)  # Take action, get new state and reward
        agent.update(old_state, new_state, action.value,
                     reward)  # Let the agent update internals

        total_reward += reward  # Keep score
        if step % 250 == 0:  # Print out metadata every 250th iteration
            performance = (total_reward - last_total) / 250.0
            print({
                'step': step,
                'performance': performance,
                'total_reward': total_reward
            })
            last_total = total_reward

        time.sleep(0.00001)  # Avoid spamming stdout too fast!

    # print("Final Q-table", agent.q_table)
    for i in range(len(agent.q_table)):
        for j in range(len(agent.q_table[i])):
            #print("[" + str(i) + "][" + str(j) + "]: ", end="")
            #print(agent.q_table[i][j])
            print("[%d][%d]:" % (i, j), agent.q_table[i][j])
    input()  # so console window doesnt close on windows
Ejemplo n.º 2
0
def martingale(initialWealth, betAmount):
    roulette = Roulette()
    gambler = Gambler(initialWealth)

    while gambler.getWealth() >= betAmount:
        winnings = roulette.betColor(betAmount)
        gambler.decrementWealth(betAmount)
        gambler.incrementWealth(winnings)
        if winnings > 0:
            break
        betAmount *= 2

    return gambler.getWealth()
Ejemplo n.º 3
0
def main():
    # parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--agent', type=str, default='GAMBLER', help='Which agent to use')
    parser.add_argument('--learning_rate', type=float, default=0.1, help='How quickly the algorithm tries to learn')
    parser.add_argument('--discount', type=float, default=0.95, help='Discount for estimated future action')
    parser.add_argument('--iterations', type=int, default=2000, help='Iteration count')
    FLAGS, unparsed = parser.parse_known_args()

    # select agent
    if FLAGS.agent == 'GAMBLER':
        agent = Gambler(learning_rate=FLAGS.learning_rate, discount=FLAGS.discount, iterations=FLAGS.iterations)
    elif FLAGS.agent == 'ACCOUNTANT':
        agent = Accountant()
    elif FLAGS.agent == 'DEEPGAMBLER':
        agent = DeepGambler(learning_rate=FLAGS.learning_rate, discount=FLAGS.discount, iterations=FLAGS.iterations)
    else:
        agent = Drunkard()

    # setup simulation
    dungeon = DungeonSimulator()
    dungeon.reset()
    total_reward = 0 # Score keeping
    last_total = 0

    # main loop
    for step in range(FLAGS.iterations):
        old_state = dungeon.state # Store current state
        action = agent.get_next_action(old_state) # Query agent for the next action
        new_state, reward = dungeon.take_action(action) # Take action, get new state and reward
        agent.update(old_state, new_state, action, reward) # Let the agent update internals

        total_reward += reward # Keep score
        if step % 250 == 0: # Print out metadata every 250th iteration
            performance = (total_reward - last_total) / 250.0
            print(json.dumps({'step': step, 'performance': performance, 'total_reward': total_reward}))
            last_total = total_reward

        time.sleep(0.0001) # Avoid spamming stdout too fast!
Ejemplo n.º 4
0
def main():
    print("xxx")
    parser=argparse.ArgumentParser()
    parser.add_argument("--agent", type=str, default = 'DEEPGAMBLER',help="Which agent to use")
    parser.add_argument("--learning_agent", type=float, default = 0.1,help="Choose Learning Rate ")
    parser.add_argument("--discount", type=float, default = 0.95, help="choose discount")
    parser.add_argument("--iterations", type=int, default = 1000,help="No of iterations")

    FLAGS,unparsed = parser.parse_known_args()
    print("xxx")
    if FLAGS.agent == "DRUNKARD":
        agent= Drunkard()
    elif FLAGS.agent=="ACCOUNTANT":
        agent=Accountant()
    elif FLAGS.agent == "GAMBLER":
        agent=Gambler(FLAGS.learning_agent,FLAGS.discount,1.0,FLAGS.iterations)
    else:
        agent=DeepGambler(FLAGS.learning_agent,FLAGS.discount,1.0,FLAGS.iterations)
    dungeon= DungeonSimulator()

    dungeon.reset()
    total_reward = 0
    print("agent created")
    for step in range(FLAGS.iterations):
        old_state = dungeon.state
        action = agent.get_next_action(old_state)
        new_state , reward = dungeon.take_action(action)

        agent.update(old_state, new_state, action, reward)
        
        total_reward += reward

        if step %250 ==0: 
            print(json.dumps({'step': step,'total-reward':total_reward, }))

        time.sleep(0.0001)

    print("FINAL Q TABLE", agent.q_table)
Ejemplo n.º 5
0
def example_1():
    """
	Example 1: Obtains the solution for a given gambler problem using value
	iteration and policy iteration.

	"""
    # Initialises all required inputs for the Gambler.
    name = 'base_problem'
    goal = 100
    win_prob = 0.4
    discount = 0.9

    # Initialises the Gambler.
    gambler = Gambler(name, goal, win_prob, discount)

    # Obtains the optimum value estimate and policy from the Gambler using
    # value iteration.
    gambler.obtain_optimum('value_iteration')

    # Obtains the optimum value estimate and policy from the Gambler using
    # policy iteration.
    gambler.obtain_optimum('policy_iteration')
Ejemplo n.º 6
0
from blackjack import Blackjack
from gambler import Gambler
from card import Suit, CardFace, Card
from random import shuffle

# spawns a blackjack game with 7 decks.

deck = Blackjack(7)
player = Gambler()
dealer = Gambler()
playing = True


def print_dealt_card(self, card_suit, card_value):
    print("cards")


def deal_player_card(self):
    for hand in player.hand:
        hand.append(deck.deal_card)


while (playing):

    print("Enter S to start a new game, or Q to quit")
    main_selection = raw_input()
    main_selection = main_selection.lower()
    dealing = None

    if (main_selection == 's'):
        dealing = True