def get_active_tiles(self, state, action): position = state[0] velocity = state[1] max_position, max_velocity = tuple(self.env.observation_space.high) min_position, min_velocity = tuple(self.env.observation_space.low) continuous_features = [8*position/(max_position - min_position), 8*velocity/(max_velocity - min_velocity)] return tiles(self.iht, NUM_OF_TILINGS, continuous_features, [action])
def get_active_tiles(state, action, env): position = state[0] velocity = state[1] max_position, max_velocity = tuple(env.observation_space.high) min_position, min_velocity = tuple(env.observation_space.low) max_action = env.action_space.high[0] min_action = env.action_space.low[0] return tiles(iht, NUM_OF_TILINGS, [8*position/(max_position - min_position), \ 8*velocity/(max_velocity - min_velocity), \ 8*action[0]/(max_action - min_action)])
def get_tiles(self, state): position = state[0] velocity = state[1] max_position, max_velocity = tuple(self.env.observation_space.high) min_position, min_velocity = tuple(self.env.observation_space.low) continuous_features = [8*position/(max_position - min_position), 8*velocity/(max_velocity - min_velocity)] tile_nos = tiles(self.iht, NUM_OF_TILINGS, continuous_features) tile_array = np.zeros((1, MAX_SIZE)) tile_array[:,tile_nos] = 1.0 return tile_array
def get_active_tiles(state, action, env): position = state[0] velocity = state[1] x3 = state[2] x4 = state[3] max_position, max_velocity, max_x3, max_x4 = tuple(env.observation_space.high) min_position, min_velocity, min_x3, min_x4 = tuple(env.observation_space.low) return tiles(iht, NUM_OF_TILINGS, [8*position/(max_position - min_position), 8*velocity/(max_velocity - min_velocity), 8*x3/(max_x3 - min_x3), 8*x4/(max_x4 - min_x4)], [action])
def get_active_tiles_tensor(self, state, action): position = state[0] velocity = state[1] acceleration = state[1] - self.old_velocity max_position, max_velocity = tuple(self.env.observation_space.high) min_position, min_velocity = tuple(self.env.observation_space.low) max_acceleration = max_velocity - min_velocity continuous_features = [8*position/(max_position - min_position), 8*velocity/(max_velocity - min_velocity), 8*acceleration/(2*max_acceleration)] tile_nos = tiles(self.iht, NUM_OF_TILINGS, continuous_features, [action]) tile_array = np.zeros((MAX_SIZE, 1)) tile_array[tile_nos] = 1.0 return tile_array
def get_active_tiles_tensor(self, state, action): position = state[0] velocity = state[1] acceleration = state[1] - self.old_velocity max_position, max_velocity = tuple(self.env.observation_space.high) min_position, min_velocity = tuple(self.env.observation_space.low) max_acceleration = max_velocity - min_velocity continuous_features = [8*position/(max_position - min_position), 8*velocity/(max_velocity - min_velocity), 8*acceleration/(2*max_acceleration)] tile_nos = tiles(self.iht, NUM_OF_TILINGS, continuous_features, [action]) tile_array = np.zeros((MAX_SIZE, 1)) tile_array[tile_nos] = 1.0 # sparse_tensor = tf.SparseTensor(indices = map(lambda x: [0, x], tile_nos), # values = [1.0]*len(tile_nos), # shape = [1,MAX_SIZE]) return tile_array