Beispiel #1
0
# "ENV_NAME": "CartPole-v0",
params = {
    "PLATFORM": "openai",
    "ENV_NAME": "MountainCarContinuous-v0",
    "METHOD": "QLearning",
    "REPORTING_INTERVAL": 100,
    "LOG_LEVEL": 2,
    "NUMBER_EPISODES_MEAN": 10,
    "MEAN_REWARD_BOUND": 90,
    "NUM_TRIALS": 1,
    "MAX_EPISODES": 10000,
    "EPSILON_DECAY_LAST_FRAME": 1000000, # 500000 do not solve
    "EPSILON_START": 1.0,
    "EPSILON_FINAL": 0.02, # 0.02
    "LEARNING_RATE": 0.05,
    "GAMMA": 0.99,
    "DISCRETIZE_STATE": True,
    "DISCRETIZE_STATE_BIN_SIZE": 10,
    "DISCRETIZE_ACTION": True,
    "DISCRETIZE_ACTION_BIN_SIZE": 50,
}

exp = UntilWinExperiment(params)
exp.run()

# solved the problem using action discretization
# Problem solved in 1839 episodes
# Trial took 62.19 seconds

Beispiel #2
0
dqn_focus_sharing = dqn_sharing.copy()
dqn_focus_sharing.update(focus)

# others
dqn_prio_sharing = dqn_sharing.copy()
dqn_prio_sharing.update(prio)

dqn_prio_focus_sharing = dqn_sharing.copy()
dqn_prio_focus_sharing.update(prio)
dqn_prio_focus_sharing.update(focus)

## prepare the experiment
exp_group = exp_group
experiments = {
    'dqn': dqn,
    'dqn_prio': dqn_prio,
    'dqn_sharing': dqn_sharing,
    'dqn_prio_sharing': dqn_prio_sharing,
    'dqn_focus_sharing': dqn_focus_sharing,
    'dqn_prio_focus_sharing': dqn_prio_focus_sharing,
}

for exp_name, params in reversed(list(experiments.items())):
    print(exp_name, params)
    if 'sharing' in exp_name:
        exp = MultiAgentExperiment(params, exp_name, exp_group)
    else:
        exp = UntilWinExperiment(params, exp_name, exp_group)
    exp.run()
Beispiel #3
0
    "EPSILON_START": 1.0,
    "EPSILON_FINAL": 0,
    "LEARNING_RATE": 0.3,
    "GAMMA": 0.99
}

params["ENV_NAME"] = "FrozenLakeNotSlippery-v0"

results = []
methods = [
    "QLearning", "Sarsa", "FirstVisitMonteCarlo", "EveryVisitMonteCarlo",
    "NStepsQLearning", "NStepsSarsa"
]
for method in methods:
    params["METHOD"] = method
    exp = UntilWinExperiment(params)
    result = exp.run()
    results.append(result)

for method, result in zip(methods, results):
    print("Method {} took an average of {:.2f} episodes".format(
        method, result))

# Results, using all same parameters:
# For "FrozenLakeNotSlippery-v0"
#
# Method QLearning took an average of 321.20 episodes
# Method Sarsa took an average of 500.53 episodes
# Method FirstVisitMonteCarlo took an average of 341.27 episodes
# Method EveryVisitMonteCarlo took an average of 304.73 episodes
# Method NStepsQLearning took an average of 287.47 episodes