コード例 #1
0
 def get_active_tiles(self, state, action):
     position = state[0]
     velocity = state[1]
     max_position, max_velocity = tuple(self.env.observation_space.high)
     min_position, min_velocity = tuple(self.env.observation_space.low)
     continuous_features = [8*position/(max_position - min_position),
                            8*velocity/(max_velocity - min_velocity)]
     return tiles(self.iht, NUM_OF_TILINGS, continuous_features, [action])
コード例 #2
0
def get_active_tiles(state, action, env):
    position = state[0]
    velocity = state[1]
    max_position, max_velocity = tuple(env.observation_space.high)
    min_position, min_velocity = tuple(env.observation_space.low)
    max_action = env.action_space.high[0]
    min_action = env.action_space.low[0]
    return tiles(iht, NUM_OF_TILINGS, [8*position/(max_position - min_position), \
                                        8*velocity/(max_velocity - min_velocity), \
                                        8*action[0]/(max_action - min_action)])
コード例 #3
0
 def get_tiles(self, state):
     position = state[0]
     velocity = state[1]
     max_position, max_velocity = tuple(self.env.observation_space.high)
     min_position, min_velocity = tuple(self.env.observation_space.low)
     continuous_features = [8*position/(max_position - min_position),
                            8*velocity/(max_velocity - min_velocity)]
     tile_nos = tiles(self.iht, NUM_OF_TILINGS, continuous_features)
     tile_array = np.zeros((1, MAX_SIZE))
     tile_array[:,tile_nos] = 1.0
     return tile_array              
コード例 #4
0
ファイル: MountainCar.py プロジェクト: syllogismos/ai
def get_active_tiles(state, action, env):
    position = state[0]
    velocity = state[1]
    x3 = state[2]
    x4 = state[3]
    max_position, max_velocity, max_x3, max_x4 = tuple(env.observation_space.high)
    min_position, min_velocity, min_x3, min_x4 = tuple(env.observation_space.low)
    return tiles(iht, NUM_OF_TILINGS, [8*position/(max_position - min_position),
                                       8*velocity/(max_velocity - min_velocity),
                                       8*x3/(max_x3 - min_x3),
                                       8*x4/(max_x4 - min_x4)], [action])
コード例 #5
0
 def get_active_tiles_tensor(self, state, action):
     position = state[0]
     velocity = state[1]
     acceleration = state[1] - self.old_velocity
     max_position, max_velocity = tuple(self.env.observation_space.high)
     min_position, min_velocity = tuple(self.env.observation_space.low)
     max_acceleration = max_velocity - min_velocity
     continuous_features = [8*position/(max_position - min_position),
                            8*velocity/(max_velocity - min_velocity),
                            8*acceleration/(2*max_acceleration)]
     tile_nos = tiles(self.iht, NUM_OF_TILINGS, continuous_features, [action])
     tile_array = np.zeros((MAX_SIZE, 1))
     tile_array[tile_nos] = 1.0
     return tile_array
コード例 #6
0
 def get_active_tiles_tensor(self, state, action):
     position = state[0]
     velocity = state[1]
     acceleration = state[1] - self.old_velocity
     max_position, max_velocity = tuple(self.env.observation_space.high)
     min_position, min_velocity = tuple(self.env.observation_space.low)
     max_acceleration = max_velocity - min_velocity
     continuous_features = [8*position/(max_position - min_position),
                            8*velocity/(max_velocity - min_velocity),
                            8*acceleration/(2*max_acceleration)]
     tile_nos = tiles(self.iht, NUM_OF_TILINGS, continuous_features, [action])
     tile_array = np.zeros((MAX_SIZE, 1))
     tile_array[tile_nos] = 1.0
     # sparse_tensor = tf.SparseTensor(indices = map(lambda x: [0, x], tile_nos),
     #                                     values = [1.0]*len(tile_nos),
     #                                     shape = [1,MAX_SIZE])
     return tile_array