import logging logger = logging.getLogger(__name__) # An example using the MountainCar domain if __name__ == "__main__" and True: import gym env = gym.make('MountainCar-v0') na = env.action_space.n # Tile coding for discretization to binary vectors tiling_1 = vcf.features.BinaryTiling(env.observation_space, 11) tiling_2 = vcf.features.BinaryTiling(env.observation_space, 19) tiling_3 = vcf.features.BinaryTiling(env.observation_space, 31) # Concatenate binary vectors phi = vcf.Union(tiling_1, tiling_2, tiling_3) # Define the control (discrete actions Q-learning) dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002) dq_params = { 'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25), 'gm' : 0.9999, 'gm_p' : vcf.Constant(0.9999, 0), 'lm' : vcf.Constant(0.5, 0), } control = vcf.Agent(dq, phi, dq_params) # List of agents to update learners = [control] # Set up the experiment
# Set up the experiment env = gym.make('MountainCar-v0') space = env.observation_space # TODO: REMOVE WHEN DONE DEBUGGING # Tile coding for discretization tiling_1 = vcf.UniformTiling(env.observation_space, 2) tiling_2 = vcf.UniformTiling(env.observation_space, 3) tiling_3 = vcf.UniformTiling(env.observation_space, 7) tiling_4 = vcf.UniformTiling(env.observation_space, 13) # Convert tile indices to binary vector bvec_1 = vcf.BinaryVector(tiling_1.high, tiling_1) bvec_2 = vcf.BinaryVector(tiling_2.high, tiling_2) bvec_3 = vcf.BinaryVector(tiling_3.high, tiling_3) bvec_4 = vcf.BinaryVector(tiling_4.high, tiling_4) # Concatenate binary vectors phi = vcf.Union(bvec_1, bvec_2, bvec_3, bvec_4) # Set up agent nf = len(phi) na = env.action_space.n agent = vcf.DiscreteQ(nf, na, epsilon=0.05) alpha_0 = 0.1 gamma = 0.999 lmbda = 0.8 # Optimistic Q-value initialization agent.w += 1 # Initialize values reward = 0 done = False
# An example using the MountainCar domain # Currently, the history of the run is very large, and recording it requires # a substantial amount of memory if __name__ == "__main__" and False: import gym env = gym.make('MountainCar-v0') na = env.action_space.n # Define components of feature function tile_1 = vcf.UniformTiling(env.observation_space, 5) tile_2 = vcf.UniformTiling(env.observation_space, 19) bvec_1 = vcf.BinaryVector(tile_1.high, tile_1) bvec_2 = vcf.BinaryVector(tile_2.high, tile_2) # Define the feature function phi = vcf.Union(vcf.BiasUnit(), bvec_1, bvec_2) # Define the control (discrete actions Q-learning) dq = vcf.DiscreteQ(len(phi), na, epsilon=0.02) dq_params = { 'alpha': vcf.Constant(0.01), 'gm': vcf.Constant(0.9999, 0), 'gm_p': vcf.Constant(0.9999, 0), 'lm': vcf.Constant(0.1, 0), } control = vcf.Agent(dq, phi, dq_params) # List of agents to update agents = [control] # Set up the experiment
if __name__ == "__main__": import gym # Create the environment env = gym.make('MountainCar-v0') # Load the control agent control = jt.load(open(CONTROL_PATH, 'r')) # Define a learning agent # Tile coding for discretization to binary vectors tiling_1 = vcf.features.BinaryTiling(env.observation_space, 17) tiling_2 = vcf.features.BinaryTiling(env.observation_space, 19) bias = vcf.features.BiasUnit() # Concatenate binary vectors phi = vcf.Union(bias, tiling_1, tiling_2) # Parameters for the agent td_params = { 'alpha': vcf.parameters.EpisodicPowerLaw(0.15, 0.5), 'gm': vcf.Constant(0.999, 0), 'gm_p': vcf.Constant(0.999, 0), 'lm': vcf.Constant(0.1, 0), } # Specify the algorithm algo = vcf.algos.TD(len(phi)) # Combine into agent agent = vcf.Agent(algo, phi, td_params) # List of agents to update learners = [agent]
env = gym.make('MountainCar-v0') # Set up representation bias_unit = vcf.BiasUnit() # Tile coding for discretization tiling_1 = vcf.UniformTiling(env.observation_space, 3) tiling_2 = vcf.UniformTiling(env.observation_space, 5) tiling_3 = vcf.UniformTiling(env.observation_space, 11) tiling_4 = vcf.UniformTiling(env.observation_space, 19) # Convert tile indices to binary vector bvec_1 = vcf.BinaryVector(tiling_1.high, tiling_1) bvec_2 = vcf.BinaryVector(tiling_2.high, tiling_2) bvec_3 = vcf.BinaryVector(tiling_3.high, tiling_3) bvec_4 = vcf.BinaryVector(tiling_4.high, tiling_4) # Concatenate binary vectors phi = vcf.Union(bias_unit, bvec_1, bvec_2, bvec_3, bvec_4) # Set up agents nf = len(phi) na = env.action_space.n # Control agent, value function learning, delta agent, delta-squared agent control_agent = vcf.DiscreteQ(nf, na, epsilon=0.05) value_agent = vcf.TD(nf) delta_agent = vcf.TD(nf) square_agent = vcf.TD(nf) # Zero value initialization control_agent.w *= 0 control_agent.w += np.random.normal(0, 1, control_agent.w.shape) # Fixed parameters