Beispiel #1
0
# print("Cart velocity bins:", cartVelocityGroup)

observationDigitizer = ArrayDigitizer([cartPositionGroup, cartVelocityGroup])
transformation = EnvTransformation(observationDigitizer)

task = GymTask.createTask(gymRawEnv)
env = task.env
env.setTransformation(transformation)
## env.setCumulativeRewardMode()

# create value table and initialize with ones
table = ActionValueTable(observationDigitizer.states, env.numActions)

if state_load_file is not None:
    loadedParams = np.fromfile(state_load_file)
    table._setParameters(loadedParams)
    # print( "raw data:", loadedParams )
else:
    # table.initialize(0.0)
    rand_arr = np.random.rand(table.paramdim)
    table.initialize(rand_arr)

# create agent with controller and learner - use SARSA(), Q() or QLambda() here
## alpha -- learning rate (preference of new information)
## gamma -- discount factor (importance of future reward)

# learner = Q(0.5, 0.99)
learner = SARSA(0.5, 0.99)
# learner = QLambda(0.5, 0.99, 0.9)
explorer = learner.explorer