def main(): learning_rate = 0.05 discount = 0.9 iterations = 10000 agent = Gambler(learning_rate=learning_rate, discount=discount, iterations=iterations) # setup simulation mouseGame = MouseGame() mouseGame.reset() total_reward = 0 # Score keeping last_total = 0 # main loop for step in range(iterations): old_state = list(mouseGame.mouse) # Store current state action = agent.get_next_action( old_state) # Query agent for the next action new_state, reward = mouseGame.take_action( action) # Take action, get new state and reward agent.update(old_state, new_state, action.value, reward) # Let the agent update internals total_reward += reward # Keep score if step % 250 == 0: # Print out metadata every 250th iteration performance = (total_reward - last_total) / 250.0 print({ 'step': step, 'performance': performance, 'total_reward': total_reward }) last_total = total_reward time.sleep(0.00001) # Avoid spamming stdout too fast! # print("Final Q-table", agent.q_table) for i in range(len(agent.q_table)): for j in range(len(agent.q_table[i])): #print("[" + str(i) + "][" + str(j) + "]: ", end="") #print(agent.q_table[i][j]) print("[%d][%d]:" % (i, j), agent.q_table[i][j]) input() # so console window doesnt close on windows
def martingale(initialWealth, betAmount): roulette = Roulette() gambler = Gambler(initialWealth) while gambler.getWealth() >= betAmount: winnings = roulette.betColor(betAmount) gambler.decrementWealth(betAmount) gambler.incrementWealth(winnings) if winnings > 0: break betAmount *= 2 return gambler.getWealth()
def main(): # parse arguments parser = argparse.ArgumentParser() parser.add_argument('--agent', type=str, default='GAMBLER', help='Which agent to use') parser.add_argument('--learning_rate', type=float, default=0.1, help='How quickly the algorithm tries to learn') parser.add_argument('--discount', type=float, default=0.95, help='Discount for estimated future action') parser.add_argument('--iterations', type=int, default=2000, help='Iteration count') FLAGS, unparsed = parser.parse_known_args() # select agent if FLAGS.agent == 'GAMBLER': agent = Gambler(learning_rate=FLAGS.learning_rate, discount=FLAGS.discount, iterations=FLAGS.iterations) elif FLAGS.agent == 'ACCOUNTANT': agent = Accountant() elif FLAGS.agent == 'DEEPGAMBLER': agent = DeepGambler(learning_rate=FLAGS.learning_rate, discount=FLAGS.discount, iterations=FLAGS.iterations) else: agent = Drunkard() # setup simulation dungeon = DungeonSimulator() dungeon.reset() total_reward = 0 # Score keeping last_total = 0 # main loop for step in range(FLAGS.iterations): old_state = dungeon.state # Store current state action = agent.get_next_action(old_state) # Query agent for the next action new_state, reward = dungeon.take_action(action) # Take action, get new state and reward agent.update(old_state, new_state, action, reward) # Let the agent update internals total_reward += reward # Keep score if step % 250 == 0: # Print out metadata every 250th iteration performance = (total_reward - last_total) / 250.0 print(json.dumps({'step': step, 'performance': performance, 'total_reward': total_reward})) last_total = total_reward time.sleep(0.0001) # Avoid spamming stdout too fast!
def main(): print("xxx") parser=argparse.ArgumentParser() parser.add_argument("--agent", type=str, default = 'DEEPGAMBLER',help="Which agent to use") parser.add_argument("--learning_agent", type=float, default = 0.1,help="Choose Learning Rate ") parser.add_argument("--discount", type=float, default = 0.95, help="choose discount") parser.add_argument("--iterations", type=int, default = 1000,help="No of iterations") FLAGS,unparsed = parser.parse_known_args() print("xxx") if FLAGS.agent == "DRUNKARD": agent= Drunkard() elif FLAGS.agent=="ACCOUNTANT": agent=Accountant() elif FLAGS.agent == "GAMBLER": agent=Gambler(FLAGS.learning_agent,FLAGS.discount,1.0,FLAGS.iterations) else: agent=DeepGambler(FLAGS.learning_agent,FLAGS.discount,1.0,FLAGS.iterations) dungeon= DungeonSimulator() dungeon.reset() total_reward = 0 print("agent created") for step in range(FLAGS.iterations): old_state = dungeon.state action = agent.get_next_action(old_state) new_state , reward = dungeon.take_action(action) agent.update(old_state, new_state, action, reward) total_reward += reward if step %250 ==0: print(json.dumps({'step': step,'total-reward':total_reward, })) time.sleep(0.0001) print("FINAL Q TABLE", agent.q_table)
def example_1(): """ Example 1: Obtains the solution for a given gambler problem using value iteration and policy iteration. """ # Initialises all required inputs for the Gambler. name = 'base_problem' goal = 100 win_prob = 0.4 discount = 0.9 # Initialises the Gambler. gambler = Gambler(name, goal, win_prob, discount) # Obtains the optimum value estimate and policy from the Gambler using # value iteration. gambler.obtain_optimum('value_iteration') # Obtains the optimum value estimate and policy from the Gambler using # policy iteration. gambler.obtain_optimum('policy_iteration')
from blackjack import Blackjack from gambler import Gambler from card import Suit, CardFace, Card from random import shuffle # spawns a blackjack game with 7 decks. deck = Blackjack(7) player = Gambler() dealer = Gambler() playing = True def print_dealt_card(self, card_suit, card_value): print("cards") def deal_player_card(self): for hand in player.hand: hand.append(deck.deal_card) while (playing): print("Enter S to start a new game, or Q to quit") main_selection = raw_input() main_selection = main_selection.lower() dealing = None if (main_selection == 's'): dealing = True