def val_cal(self, state, action, gradient=False):
        """
        the feature is constructed by (tiles, action)
        this func has two procedure:
        1: get the feature
        2: cal the value of the (state, action) pair
        :par
        |state: instance for observation
        |action: int, for discrete number
        |gradient: bool, if true, then return the feature
        """
        #print('the state is', [8*(state[0]+1.2)/(0.6+1.2), 8*(state[1]+0.07)/(0.07+0.07)] )
        #print(action,[round(8*(state[0]+1.2)/(0.6+1.2), 3),
        #            round(8*(state[1]+0.07)/(0.07+0.07), 3)], 'the val cal state is')
        feature = tc.tiles(
            self.iht, NUM_OF_TILINGS,
            [8 * (state[0]) / (0.6 + 1.2), 8 * (state[1]) / (0.07 + 0.07)],
            [action])
        #feature_gradient = tc.tiles(self.iht, 8, [8*(state[0]+self.eps_gradient[0])/(0.6+1.2),
        #                       8*(state[1]+self.eps_gradient[1])/(0.07+0.07)], [action])
        #feature = tc.tiles(self.iht, 8, [round(8*(state[0]+1.2)/(0.6+1.2), 3),
        #            round(8*(state[1]+0.07)/(0.07+0.07), 3)], [action])
        #feature = [8*(state[0]+1.2)/(0.6+1.2), 8*(state[1]+0.07)/(0.07+0.07)]
        if gradient: return feature
        val = sum(self.weights[feature])

        return val
예제 #2
0
    def estimate(self, state, action):
        pos, vel, action = self._test_input(state, action)

        active_tiles = tile_coding.tiles(
            self._hashtable, self._num_of_tillings,
            [self._pos_scale * pos, self._vel_scale * vel], [action])

        return np.sum(self._weights[active_tiles])
예제 #3
0
    def __init__(self):
        self.actionSet = action.ActionSet

        self.dynaQUpdates = 50

        self.alpha = 1  #0.5 # learning rate
        self.gamma = 0.9  # future discount

        # Set up the tiling based on a standard random seed
        random.seed()
        self.numTilings = 3  #8
        self.numDimensions = 2  #6
        maxVal = 1000
        vals = [0 for i in range(self.numDimensions)]
        self.iht = tc.IHT(maxVal)

        # Initialize with 0s to standardize randomness of hashing across experiments
        tc.tiles(self.iht, self.numTilings, vals)

        self.history = History()
        # We need to store the s-a value where our state is the 8d tiling
        #  and action is a change in acceleration

        # we WISH we could do this in an np array, but it would be a 1024^8 size array (where each entry was the action values)
        # self.Q = np.zeros([self.numTilings, len(self.actionSet)])
        # Maybe a dictionary will work?
        # self.Q = {}
        # self.StateReward = {}
        # self.TilingToState = {}

        # Currently a full Q array is ~2.4gb
        self.Q = self.setupQ(maxVal)
        self.model = self.setupModel(maxVal)

        self.visitedStates = set()
        # self.visitedStates = dict()

        # self.visitedStatesCount = dict()
        # self.visitedStatesModelCount = dict()

        self.normFirst = 0
        self.dynaFirst = 0
    def __call__(self, s):
        """
        return the value of given state; V_hat(s)

        input:
            state
        output:
            value of the given state
        """
        #Determine tile index
        feature_vector = tiles(self.w.shape[0], self.num_tilings, s)
        estimated_value = np.sum(self.w[feature_vector])
        return estimated_value
예제 #5
0
def get_feature(state, action):
    hash_table = iht
    num_tilings = tile_n
    position_scale = num_tilings / (max_position - min_position)
    velocity_scale = num_tilings / (max_velocity - min_velocity)
    position, velocity = state

    indexs = tiles(hash_table, num_tilings,
                   [position_scale * position, velocity_scale * velocity],
                   [action])
    feature = [0 for _ in range(feat_n)]
    for index in indexs:
        feature[index] = 1
    return feature
예제 #6
0
    def update(self, state, action, target):
        pos, vel, action = self._test_input(state, action)
        assert pos < 0.5  # this should never be called on terminal state

        active_tiles = tile_coding.tiles(
            self._hashtable, self._num_of_tillings,
            [self._pos_scale * pos, self._vel_scale * vel], [action])

        est = np.sum(self._weights[active_tiles])

        delta = self._step_size * (target - est)

        for tile in active_tiles:
            self._weights[tile] += delta
예제 #7
0
    def getActiveTiles(self, d_state, s_action, agent):
        '''
        get indices of active tiles for given state and action

        :param d_state: dictionary. the intern state representation of an agent
        :param s_action: string. a valid action
        :param agent: Agent object. the agent using the value function
        '''
        action = agent.d_translate_to_valuefun[s_action]
        l_features_values = []
        for s_key in agent.features_names:
            f_min = self.d_normalizers[s_key]['MIN']
            f_value = max(0., d_state[s_key] - f_min)
            if d_state[s_key] > self.d_normalizers[s_key]['MAX']:
                f_value = self.d_normalizers[s_key]['MAX']
                f_value -= self.d_normalizers[s_key]['MIN']
            f_value *= self.featuresScale[s_key]
            l_features_values.append(f_value)
        activeTiles = tile_coding.tiles(self.hashTable, self.numOfTilings,
                                        l_features_values, [action])
        return activeTiles
예제 #8
0
    def getActiveTiles(self, d_state, s_action, agent):
        '''
        get indices of active tiles for given state and action

        :param d_state: dictionary. the intern state representation of an agent
        :param s_action: string. a valid action
        :param agent: Agent object. the agent using the value function
        '''
        action = agent.d_translate_to_valuefun[s_action]
        l_features_values = []
        for s_key in agent.features_names:
            f_min = self.d_normalizers[s_key]['MIN']
            f_value = max(0., d_state[s_key] - f_min)
            if d_state[s_key] > self.d_normalizers[s_key]['MAX']:
                f_value = self.d_normalizers[s_key]['MAX']
                f_value -= self.d_normalizers[s_key]['MIN']
            f_value *= self.featuresScale[s_key]
            l_features_values.append(f_value)
        activeTiles = tile_coding.tiles(self.hashTable, self.numOfTilings,
                                        l_features_values, [action])
        return activeTiles
예제 #9
0
 def getTile(self, state):
     if (len(state) != self.numDimensions):
         print("ERROR: unexpected state size for tiling")
         return None
     else:
         return tc.tiles(self.iht, self.numTilings, state)
def get_tile(x, y, action=[]):
    return tiles(iht, numTilings, [
        numTilings * x / (position_max - position_min), numTilings * y /
        (velocity_max - velocity_min)
    ], action)
예제 #11
0
def tile_code(S, A, iht=IHT(4096), num_tilings=8):
    position, velocity = S
    return tiles(iht, num_tilings, [num_tilings * position / (0.5 + 1.2),
                                    num_tilings * velocity / (0.07 + 0.07)], [A])
예제 #12
0
 def s2f(self, s):
     #Determine tile index
     feature_vector = tiles(self.w.shape[0], self.num_tilings, s)
     return feature_vector