def __init__(self, actions, two_sided=False): self._actions = actions #[-1, 0, 1] self._train = True self._x_bins = utils.X_BINS self._y_bins = utils.Y_BINS self._v_x = utils.V_X self._v_y = utils.V_Y self._paddle_locations = utils.PADDLE_LOCATIONS self._num_actions = utils.NUM_ACTIONS # Create the Q Table to work with self.Q = utils.create_q_table() self.two_sided = two_sided self.prev_action = 0 self.prev_state = [0, 0.5, 0, 0, 0.8] self.N = utils.create_q_table() self.pbounce = 0 self.first = True self.itrs = 0 self.explore = True self.explore_print = True self.epsilon = 1 self.decay = 0.999 self.min_epsilon = .001 self.cdecay = .9 self.c_val = 50 self.min_c = 30
def __init__(self, actions, Ne, C, gamma): self.actions = actions self.Ne = Ne # used in exploration function self.C = C self.gamma = gamma # Create the Q and N Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table()
def __init__(self, actions, Ne, C, gamma): self.actions = actions self.Ne = Ne # used in exploration function self.C = C self.gamma = gamma # Create the Q and N Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table( ) #is this a count of how many times that state has been visited? self.reset()
def __init__(self, actions, Ne, C, gamma): self.actions = actions actions.reverse() self.actions_priority = actions self.Ne = Ne # used in exploration function self.C = C self.gamma = gamma self.reset() # Create the Q and N Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table()
def __init__(self, actions, Ne, C, gamma): self.actions = actions self.Ne = Ne # used in exploration function self.C = C self.gamma = gamma self.s = None self.a = None self.points = 0 # Create the Q and N Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table() # reset in init maybe self.reset()
def __init__(self, actions, Ne, C, gamma): self.actions = actions self.Ne = Ne # used in exploration function self.C = C self.gamma = gamma self.first = True self.track_actions = [] self.s = None self.a = None # Create the Q and N Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table()
def __init__(self, actions, two_sided=False): self.two_sided = False self._actions = actions self._train = True self._x_bins = utils.X_BINS self._y_bins = utils.Y_BINS self._v_x = utils.V_X self._v_y = utils.V_Y self._paddle_locations = utils.PADDLE_LOCATIONS self._num_actions = utils.NUM_ACTIONS # Create the Q Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table() self.last_state = None self.games = 0 self.prev_bounce = 0
def __init__(self, actions, Ne, C, gamma): self.actions = actions self.Ne = Ne # used in exploration function print("Ne", Ne) print("gamma", gamma) self.C = C # constant for learning rate self.gamma = gamma # discount # Create the Q and N Table to work with # At the beginning, it should be all zeros for Q-table and N-table self.Q = utils.create_q_table() self.N = utils.create_q_table() self.points = 0 self.s = None self.a = None #?
def __init__(self, actions, Ne, C, gamma): self.actions = actions self.Ne = Ne # used in exploration function self.C = C self.gamma = gamma # Create the Q and N Table to work with self.Q = utils.create_q_table() self.N = utils.create_q_table() #last state self.s = None #last action self.a = None #points self.points = 0 #begin self.begin = False
def __init__(self, actions, two_sided=False): self.two_sided = two_sided self._actions = actions self._train = True self._x_bins = utils.X_BINS self._y_bins = utils.Y_BINS self._v_x = utils.V_X self._v_y = utils.V_Y self._paddle_locations = utils.PADDLE_LOCATIONS self._num_actions = utils.NUM_ACTIONS # Create the Q Table to work with self.Q = utils.create_q_table() self.SApair = utils.create_q_table() self.prev_state = [6, 6, 1, 1, 6] self.prev_action = 0 self.prev_bounces = 0 self.dis_fac = 0.8 self.lrate = 6 self.epsilon = 0.6
def __init__(self, actions, two_sided=False): self.old_state = [-1, -1, -1, -1, -1] self.old_action = 1 self.max_bounces = 0 self.learning_rate = LEARNING_CONSTANT / (LEARNING_CONSTANT + 1) self.discount_rate = DISCOUNT_RATE / (DISCOUNT_RATE + 1) self.epsilon = EPSILON / (EPSILON + 1) self.two_sided = two_sided self.N = utils.create_q_table() self._actions = actions self._train = True self._x_bins = utils.X_BINS self._y_bins = utils.Y_BINS self._v_x = utils.V_X self._v_y = utils.V_Y self._paddle_locations = utils.PADDLE_LOCATIONS self._num_actions = utils.NUM_ACTIONS # Create the Q Table to work with self.Q = utils.create_q_table()
def __init__(self, actions, two_sided=False): self._actions = actions self._train = True self._x_bins = utils.X_BINS self._y_bins = utils.Y_BINS self._v_x = utils.V_X self._v_y = utils.V_Y self._paddle_locations = utils.PADDLE_LOCATIONS self._num_actions = utils.NUM_ACTIONS # Create the Q Table to work with self.Q = utils.create_q_table() self.last_x_dir = self._v_x self.games_played = 0 self.epsilon = .5 self.alpha = .2 self.gamma = .9 self.prev_state = None self.prev_action = None self.prev_reward = None self.N = self.Q.copy()
import utils