def main(): env = utils.setupEnv('MarLo-TrickyArena-v0') # Get the number of available actions, minus waiting action actionSize = env.action_space.n epsilonDecay = 0.98 alphas = [0.8, 0.5, 0.1] gammas = [1, 0.5] for alpha in alphas: for gamma in gammas: QTableName = "QTable_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".json" CSVName = "Results_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".csv" myAgent = QLearningAgent(actionSize, 200, QTableName, CSVName, False, epsilonDecay, alpha, gamma) # Start the running of the Agent myAgent.runAgent(env) return
def main(): env = utils.setupEnv('MarLo-CliffWalking-v0') # Get the number of available actions, minus waiting action #actionSize = env.action_space.n actionSize = 5 epsilonDecay = 0.98 #alphas = [0.8,0.5,0.1] #gammas = [1,0.5] alphas = [0.8] gammas = [1] i = 1 for alpha in alphas: for gamma in gammas: QTableName = "QTable_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".json" CSVName = str(i) + "_Test_Results_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".csv" myAgent = QLearningAgent(actionSize, 25, QTableName, CSVName, True, epsilonDecay, alpha, gamma, 0.00, training=True) print( "\n\n -------------- Starting test run of Decay %s, Alpha %s and Gamma %s --------- \n \n" % (epsilonDecay, alpha, gamma)) # Start the running of the Agent myAgent.runAgent(env) return
""" Initialize differents Agents SARSA Expected SARSA QLearning """ #print(f"EPSILON: {params.EPSILON}, LEARNING_RATE: {params.LEARNING_RATE}, DISCOUNT: {params.DISCOUNT}, DISCRETE_VALUE: {params.DISCRETE_VALUE}, env.observation_space.high: {params.env.observation_space.high},\ # env.action_space.n: {params.env.action_space.n}, env.action_space: {params.env.action_space}, \ # DISCRETE: {params.DISCRETE}") qLearningAgent = QLearningAgent(params.EPSILON, params.LEARNING_RATE, params.DISCOUNT, params.DISCRETE_VALUE, params.env.observation_space.high, params.env.action_space.n, params.env.action_space, params.DISCRETE) sarsaAgent = SarsaAgent(params.EPSILON, params.LEARNING_RATE, params.DISCOUNT, params.DISCRETE_VALUE, params.env.observation_space.high, params.env.action_space.n, params.env.action_space, params.DISCRETE) expectedSarsaAgent = ExpectedSarsaAgent(params.EPSILON, params.LEARNING_RATE, params.DISCOUNT, params.DISCRETE_VALUE, params.env.observation_space.high, params.env.action_space.n, params.env.action_space, params.DISCRETE)