Example #1
0
    def __init__(self):
        """Declare agent variables."""
        self._x = None
        self._y = None
        self._z = None

        self.num_of_tilings = 8
        self.dimension = 1024
        self.iht = tiles3.IHT(self.dimension)

        self.gamma = 0.9        #   discount rate
        self.epsilon = 0.05    #   epsilon-greedy
        self.alpha = 0.5 / self.num_of_tilings

        #   weight vector
        self.w = None

        self.z = None           #   z for eligibility trace
        self.lmbda = 0.9         #   lambda for eligibility trace
        
        self.last_action = None
        self.last_state = None

        self.Q = None

        self.count = 0

        print('Sarsa(lambda) Agent with tile coding')
    def __init__(self, st_low, st_high, nb_actions, learn_rate, num_tilings,
                 init_val):
        """
        Params:
            st_low       - state space low boundry, e.g. [-1.2, -0.07] for mountain car
            st_high      - state space high boundry in all dimensions
            action_space - list of possible actions
            learn_rate   - step size, will be adjusted for nb_tilings automatically
            num_tilings  - tiling layers - should be power of 2 and at least 4*len(st_low)
            init_val     - initial state-action values
        """
        st_low = np.array(st_low)
        st_high = np.array(st_high)
        assert len(st_low) == len(st_high)
        self._n_dim = len(st_low)
        self._nb_actions = nb_actions
        self._lr = learn_rate / num_tilings
        self._num_tilings = num_tilings
        self._scales = self._num_tilings / (st_high - st_low)

        # e.g. 8 tilings, 2d space, 3 actions
        # nb_total_tiles = (8+1) * (8+1) * 8 * 3
        nb_total_tiles = (num_tilings +
                          1)**self._n_dim * num_tilings * nb_actions

        self._iht = tiles3.IHT(nb_total_tiles)
        self._weights = np.zeros(nb_total_tiles) + init_val / num_tilings
Example #3
0
 def __init__(self, num_dimensions, dimension_ranges, num_actions, num_tiles=2048, num_tilings=8, scale_inputs=False):
     self.scale_inputs = scale_inputs
     self.num_tiles = num_tiles
     self.num_dimensions = num_dimensions
     self.dimension_ranges = dimension_ranges
     self.num_actions = num_actions
     self.theta = [0.001 * random.random() for _ in range(self.num_tiles*num_actions)]
     # self.theta = [-0.001 * random.random() for _ in range(self.num_tiles*num_actions)]
     self.theta = np.asarray(self.theta)
     self.iht = tiles3.IHT(self.num_tiles)
     self.num_tilings = num_tilings
Example #4
0
 def __init__(self, iht_size=4096, num_tilings=8, num_tiles=8):
     """
     Initializes the MountainCar Tile Coder
     Initializers:
     iht_size -- int, the size of the index hash table, typically a power of 2
     num_tilings -- int, the number of tilings
     num_tiles -- int, the number of tiles. Here both the width and height of the
                  tile coder are the same
     Class Variables:
     self.iht -- tc.IHT, the index hash table that the tile coder will use
     self.num_tilings -- int, the number of tilings the tile coder will use
     self.num_tiles -- int, the number of tiles the tile coder will use
     """
     self.iht = tiles3.IHT(iht_size)
     self.num_tilings = num_tilings
     self.num_tiles = num_tiles
 def __init__(self, var_ranges, num_tiles, num_tilings):
     assert len(var_ranges) == len(num_tiles), \
         "Input variables length do not match"
     assert all(isinstance(val, int) and val > 0 for val in num_tiles), \
         "number of tiles should be an array of integers > 0"
     assert all(len(var_range) == 2 for var_range in var_ranges), \
         "variable range should be a finite numeric interval"
     assert isinstance(num_tilings, int), \
         "number of tilings should be an integer"
     self.var_ranges = var_ranges
     self.num_tiles = num_tiles
     self.num_var = len(var_ranges)
     self.num_tilings = num_tilings
     self.var_coeff = np.zeros(self.num_var, dtype=np.float32)
     self.get_coeffs()
     self.iht_size = self.calc_iht_size()
     self.iht = tiles3.IHT(self.iht_size)
Example #6
0
    def __init__(self):
        super().__init__(action_size=3, discount=1.0)

        self.step_size = 0.3/8
        self.w = np.zeros((2048,1),dtype=float)
        self.w_last = np.copy(self.w)


        # difference
        self.R_mean = 0.0
        self.beta = 0.0
        # trace
        self.z = np.zeros_like(self.w)
        self.lambd = 0.92
        
        self.iht = tiles3.IHT(2048)
        self.fig = plt.figure()
        self.ax = Axes3D(self.fig)
        plt.ion()
Example #7
0
    def __init__(self,
                 step_size,
                 lam,
                 trace_update=accumulating_trace,
                 num_of_tilings=8,
                 max_size=4096):
        self.max_size = max_size
        self.num_of_tilings = num_of_tilings
        self.trace_update = trace_update
        self.lam = lam

        self.step_size = step_size / num_of_tilings
        self.hash_table = t3.IHT(max_size)
        self.weights = np.zeros(max_size)
        self.trace = np.zeros(max_size)

        self.position_scale = self.num_of_tilings / (POSITION_MAX -
                                                     POSITION_MIN)
        self.velocity_scale = self.num_of_tilings / (VELOCITY_MAX -
                                                     VELOCITY_MIN)
Example #8
0
 def __init__(self, iht_size=4096, num_tilings=8, num_tiles=8):
     self.iht = tc.IHT(iht_size)
     self.num_tilings = num_tilings
     self.num_tiles = num_tiles
Example #9
0
 def setUp(self):
     random.seed(123)
     self.iht = tc.IHT(1024)