Example #1
0
import logging
logger = logging.getLogger(__name__)


# An example using the MountainCar domain
if __name__ == "__main__" and True:
    import gym
    env = gym.make('MountainCar-v0')
    na = env.action_space.n

    # Tile coding for discretization to binary vectors
    tiling_1    = vcf.features.BinaryTiling(env.observation_space, 11)
    tiling_2    = vcf.features.BinaryTiling(env.observation_space, 19)
    tiling_3    = vcf.features.BinaryTiling(env.observation_space, 31)
    # Concatenate binary vectors
    phi         = vcf.Union(tiling_1, tiling_2, tiling_3)

    # Define the control (discrete actions Q-learning)
    dq = vcf.DiscreteQ(len(phi), na, epsilon=0.002)
    dq_params = {
        'alpha' : vcf.parameters.EpisodicPowerLaw(0.2, 0.25),
        'gm'    : 0.9999,
        'gm_p'  : vcf.Constant(0.9999, 0),
        'lm'    : vcf.Constant(0.5, 0),
    }
    control = vcf.Agent(dq, phi, dq_params)

    # List of agents to update
    learners = [control]

    # Set up the experiment
Example #2
0
    # Set up the experiment
    env = gym.make('MountainCar-v0')
    space = env.observation_space # TODO: REMOVE WHEN DONE DEBUGGING
    
    # Tile coding for discretization
    tiling_1    = vcf.UniformTiling(env.observation_space, 2)
    tiling_2    = vcf.UniformTiling(env.observation_space, 3)
    tiling_3    = vcf.UniformTiling(env.observation_space, 7)
    tiling_4    = vcf.UniformTiling(env.observation_space, 13)
    # Convert tile indices to binary vector
    bvec_1      = vcf.BinaryVector(tiling_1.high, tiling_1) 
    bvec_2      = vcf.BinaryVector(tiling_2.high, tiling_2)
    bvec_3      = vcf.BinaryVector(tiling_3.high, tiling_3)
    bvec_4      = vcf.BinaryVector(tiling_4.high, tiling_4)
    # Concatenate binary vectors
    phi         = vcf.Union(bvec_1, bvec_2, bvec_3, bvec_4)

    # Set up agent
    nf = len(phi)
    na = env.action_space.n
    agent = vcf.DiscreteQ(nf, na, epsilon=0.05)
    alpha_0 = 0.1
    gamma = 0.999
    lmbda = 0.8

    # Optimistic Q-value initialization
    agent.w += 1

    # Initialize values
    reward = 0
    done = False
Example #3
0
# An example using the MountainCar domain
# Currently, the history of the run is very large, and recording it requires
# a substantial amount of memory
if __name__ == "__main__" and False:
    import gym
    env = gym.make('MountainCar-v0')
    na = env.action_space.n

    # Define components of feature function
    tile_1 = vcf.UniformTiling(env.observation_space, 5)
    tile_2 = vcf.UniformTiling(env.observation_space, 19)
    bvec_1 = vcf.BinaryVector(tile_1.high, tile_1)
    bvec_2 = vcf.BinaryVector(tile_2.high, tile_2)
    # Define the feature function
    phi = vcf.Union(vcf.BiasUnit(), bvec_1, bvec_2)

    # Define the control (discrete actions Q-learning)
    dq = vcf.DiscreteQ(len(phi), na, epsilon=0.02)
    dq_params = {
        'alpha': vcf.Constant(0.01),
        'gm': vcf.Constant(0.9999, 0),
        'gm_p': vcf.Constant(0.9999, 0),
        'lm': vcf.Constant(0.1, 0),
    }
    control = vcf.Agent(dq, phi, dq_params)

    # List of agents to update
    agents = [control]

    # Set up the experiment
Example #4
0
if __name__ == "__main__":
    import gym
    # Create the environment
    env = gym.make('MountainCar-v0')

    # Load the control agent
    control = jt.load(open(CONTROL_PATH, 'r'))

    # Define a learning agent
    # Tile coding for discretization to binary vectors
    tiling_1 = vcf.features.BinaryTiling(env.observation_space, 17)
    tiling_2 = vcf.features.BinaryTiling(env.observation_space, 19)
    bias = vcf.features.BiasUnit()
    # Concatenate binary vectors
    phi = vcf.Union(bias, tiling_1, tiling_2)

    # Parameters for the agent
    td_params = {
        'alpha': vcf.parameters.EpisodicPowerLaw(0.15, 0.5),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.1, 0),
    }
    # Specify the algorithm
    algo = vcf.algos.TD(len(phi))
    # Combine into agent
    agent = vcf.Agent(algo, phi, td_params)

    # List of agents to update
    learners = [agent]
    env = gym.make('MountainCar-v0')

    # Set up representation
    bias_unit = vcf.BiasUnit()
    # Tile coding for discretization
    tiling_1 = vcf.UniformTiling(env.observation_space, 3)
    tiling_2 = vcf.UniformTiling(env.observation_space, 5)
    tiling_3 = vcf.UniformTiling(env.observation_space, 11)
    tiling_4 = vcf.UniformTiling(env.observation_space, 19)
    # Convert tile indices to binary vector
    bvec_1 = vcf.BinaryVector(tiling_1.high, tiling_1)
    bvec_2 = vcf.BinaryVector(tiling_2.high, tiling_2)
    bvec_3 = vcf.BinaryVector(tiling_3.high, tiling_3)
    bvec_4 = vcf.BinaryVector(tiling_4.high, tiling_4)
    # Concatenate binary vectors
    phi = vcf.Union(bias_unit, bvec_1, bvec_2, bvec_3, bvec_4)

    # Set up agents
    nf = len(phi)
    na = env.action_space.n
    # Control agent, value function learning, delta agent, delta-squared agent
    control_agent = vcf.DiscreteQ(nf, na, epsilon=0.05)
    value_agent = vcf.TD(nf)
    delta_agent = vcf.TD(nf)
    square_agent = vcf.TD(nf)

    # Zero value initialization
    control_agent.w *= 0
    control_agent.w += np.random.normal(0, 1, control_agent.w.shape)

    # Fixed parameters