def __init__(self, explore_policy='constant epsilon', eps=0.05, natural=False, eps_decay=0.9999, show_every=1000000, evaluate_iter=1000, temp_decay=0.9999, init_temp=20): self.Q = self.initializeQ() self.winrates = [] self.natural = natural self.show_every = show_every self.evaluate_iter = evaluate_iter self.env = blackjack.BlackjackEnv(natural=self.natural) self.n_sub_optimals = [] self.min_eps = 0.001 self.min_temp = 0.1 if explore_policy == 'constant_epsilon': self.explore_policy = self.e_greedy self.eps = eps self.eps_decay = 1 self.temp = init_temp self.temp_decay = 1 elif explore_policy == 'decay_epsilon': self.explore_policy = self.e_greedy self.eps = 1 self.eps_decay = eps_decay self.temp = init_temp self.temp_decay = 1 elif explore_policy == 'boltzmann_exploration': self.explore_policy = self.boltzmann_exploration self.eps = 1 self.eps_decay = 1 self.temp = init_temp self.temp_decay = temp_decay
def __init__(self): # main model # gets trained every step self.model = self.create_model() # Target model this is what we .predict against every step self.target_model = self.create_model() self.target_model.set_weights(self.model.get_weights()) self.replay_memory = deque(maxlen=self.REPLAY_MEMORY_SIZE) # self.tensorboard = ModifiedTensorBoard(log_dir=f"logs/{self.MODEL_NAME}-{int(time.time())}") self.target_update_counter = 0 self.env = blackjack.BlackjackEnv(natural=self.NATURAL) self.ep_rewards = [] self.eps = 1
def evaluate_model(self, n_episodes): results = {-1: 0, 0: 0, 1: 0, 1.5: 0} q_table = self.get_q_table() policy = self.get_best_policy(q_table) game = blackjack.BlackjackEnv(natural=self.NATURAL) for i in range(n_episodes): state = game.reset() done = False while not done: if state[0] < 12: new_state, reward, done, _ = game.step(1) else: action = policy[state] new_state, reward, done, _ = game.step(action) state = new_state results[reward] += 1 winrate = (results[1] + results[1.5]) / n_episodes * 100 print('\nWin Rate: {:.2f} % ({} games)'.format(winrate, n_episodes)) n_sub_optimal = visualization.compare2Optimal(policy) print('Suboptimal Actions: {}/200\n'.format(n_sub_optimal))
def evaluate_policy(self): results = {-1: 0, 0: 0, 1: 0, 1.5: 0} policy = self.get_best_policy() game = blackjack.BlackjackEnv(natural=self.natural) for i in range(self.evaluate_iter): state = game.reset() done = False while not done: if state[0] < 12: new_state, reward, done, _ = game.step(1) else: action = policy[state] new_state, reward, done, _ = game.step(action) state = new_state results[reward] += 1 winrate = (results[1] + results[1.5]) / self.evaluate_iter * 100 print('\nWin Rate: {:.2f} % ({} games)'.format(winrate, self.evaluate_iter)) n_sub_optimal = visualization.compare2Optimal(policy) print('Suboptimal Actions: {}/200\n'.format(n_sub_optimal)) self.winrates.append(winrate)
""" Gagan Heer A00933997 Decision Making: Rule Based Please look over the README.md file if there is any trouble using this file """ import time import gym import blackjack as bj import random env = bj.BlackjackEnv() STAND = 0 HIT = 1 numGames = 1001 # 1 = hit, 0 = stand def rule_based_action(playerTotal, dealerCard): nextAction = None if playerTotal <= 11: nextAction = HIT elif playerTotal >= 17: nextAction = STAND elif dealerCard >= 7 or dealerCard == 1: nextAction = HIT elif (dealerCard <= 6 and dealerCard != 1) and playerTotal >= 13: nextAction = STAND elif dealerCard == 2 or dealerCard == 3: nextAction = HIT else: nextAction = STAND
if done: if reward >= 1: win += 1 elif reward == 0: tie += 1 elif reward == -1: loss += 1 break # percentage of winning games return 100 * win / n_test, 100 * tie / n_test, 100 * loss/n_test # TEST # Results print("WITH REPLACEMENT") env = bj.BlackjackEnv(1000000) results = main(algo='random', nb_games = 50000, txt = 'replacement.csv', txt_to_read = 'replacement.csv', n_test = 10000) print("Wins:", results[0], "% || Ties:", results[1], "% || Losses:", results[2], "%") print("Espérance : " + str(results[0] - results[2])) print("WITHOUT REPLACEMENT") env = bj.BlackjackEnv(nb_deck) results = main(algo='random', nb_games = 50000, txt = 'no_replacement.csv', txt_to_read = 'no_replacement.csv', n_test = 10000) print("Wins:", results[0], "% || Ties:", results[1], "% || Losses:", results[2], "%") print("Espérance : " + str(results[0] - results[2]))