Ejemplo n.º 1
0
import numpy as np
from Players import Player


class QPlayer(Player):
    # https://towardsdatascience.com/simple-reinforcement-learning-q-learning-fcddc4b6fe56
    # https://www.learndatasci.com/tutorials/reinforcement-q-learning-scratch-python-openai-gym/
    def __init__(self,
                 stack=10_000,
                 count=0,
                 eps=0.2,
                 alpha=0.1,
                 gamma=0.9,
                 load=False,
                 update=True):
        Player.__init__(self, name='Q', stack=stack, count=count)
        self.eps = eps  # percentage of exploration
        self.alpha = alpha  # learning rate
        self.gamma = gamma  # discount factor, typically 0.8-0.99
        self.actions = [0, 1]  # 0: Stand, 1: Hit
        self.offset = 3
        self.load = load
        self.update = update
        if self.load:
            self.Q = np.load(self.load)
        else:
            self.Q = np.zeros((10, 20, 2))

    def ExploreOrExploit(self):
        d, t = self.GetState()
        if np.random.uniform(0., 1.) < self.eps:
Ejemplo n.º 2
0
 def __init__(self, symbol):
     Player.__init__(self, symbol)