# "ENV_NAME": "CartPole-v0", params = { "PLATFORM": "openai", "ENV_NAME": "MountainCarContinuous-v0", "METHOD": "QLearning", "REPORTING_INTERVAL": 100, "LOG_LEVEL": 2, "NUMBER_EPISODES_MEAN": 10, "MEAN_REWARD_BOUND": 90, "NUM_TRIALS": 1, "MAX_EPISODES": 10000, "EPSILON_DECAY_LAST_FRAME": 1000000, # 500000 do not solve "EPSILON_START": 1.0, "EPSILON_FINAL": 0.02, # 0.02 "LEARNING_RATE": 0.05, "GAMMA": 0.99, "DISCRETIZE_STATE": True, "DISCRETIZE_STATE_BIN_SIZE": 10, "DISCRETIZE_ACTION": True, "DISCRETIZE_ACTION_BIN_SIZE": 50, } exp = UntilWinExperiment(params) exp.run() # solved the problem using action discretization # Problem solved in 1839 episodes # Trial took 62.19 seconds
dqn_focus_sharing = dqn_sharing.copy() dqn_focus_sharing.update(focus) # others dqn_prio_sharing = dqn_sharing.copy() dqn_prio_sharing.update(prio) dqn_prio_focus_sharing = dqn_sharing.copy() dqn_prio_focus_sharing.update(prio) dqn_prio_focus_sharing.update(focus) ## prepare the experiment exp_group = exp_group experiments = { 'dqn': dqn, 'dqn_prio': dqn_prio, 'dqn_sharing': dqn_sharing, 'dqn_prio_sharing': dqn_prio_sharing, 'dqn_focus_sharing': dqn_focus_sharing, 'dqn_prio_focus_sharing': dqn_prio_focus_sharing, } for exp_name, params in reversed(list(experiments.items())): print(exp_name, params) if 'sharing' in exp_name: exp = MultiAgentExperiment(params, exp_name, exp_group) else: exp = UntilWinExperiment(params, exp_name, exp_group) exp.run()
"EPSILON_START": 1.0, "EPSILON_FINAL": 0, "LEARNING_RATE": 0.3, "GAMMA": 0.99 } params["ENV_NAME"] = "FrozenLakeNotSlippery-v0" results = [] methods = [ "QLearning", "Sarsa", "FirstVisitMonteCarlo", "EveryVisitMonteCarlo", "NStepsQLearning", "NStepsSarsa" ] for method in methods: params["METHOD"] = method exp = UntilWinExperiment(params) result = exp.run() results.append(result) for method, result in zip(methods, results): print("Method {} took an average of {:.2f} episodes".format( method, result)) # Results, using all same parameters: # For "FrozenLakeNotSlippery-v0" # # Method QLearning took an average of 321.20 episodes # Method Sarsa took an average of 500.53 episodes # Method FirstVisitMonteCarlo took an average of 341.27 episodes # Method EveryVisitMonteCarlo took an average of 304.73 episodes # Method NStepsQLearning took an average of 287.47 episodes