import numpy as np from Players import Player class QPlayer(Player): # https://towardsdatascience.com/simple-reinforcement-learning-q-learning-fcddc4b6fe56 # https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/ def __init__(self, stack=10_000, count=0, eps=0.2, alpha=0.1, gamma=0.9, load=False, update=True): Player.__init__(self, name='Q', stack=stack, count=count) self.eps = eps # percentage of exploration self.alpha = alpha # learning rate self.gamma = gamma # discount factor, typically 0.8-0.99 self.actions = [0, 1] # 0: Stand, 1: Hit self.offset = 3 self.load = load self.update = update if self.load: self.Q = np.load(self.load) else: self.Q = np.zeros((10, 20, 2)) def ExploreOrExploit(self): d, t = self.GetState() if np.random.uniform(0., 1.) < self.eps:
def __init__(self, symbol): Player.__init__(self, symbol)