def main(): if len(sys.argv) > 1: env = utils.setupEnv(sys.argv[1]) else: env = utils.setupEnv(port=10000) # Get the number of available states and actions - generates the output of CNN observation_shape = env.observation_space.shape action_size = env.action_space.n #pdb.set_trace() # Can start from a pre-built model #load = input("Load model? y/n or an epsilon value to continue: ") block_map_shape = (4,4,3) myagent = agent(observation_shape, action_size,block_map_shape, True,0.0) #pdb.set_trace() scores = trainAgent(env, myagent) ''' if load == 'y': myagent = agent(observation_shape, action_size, block_map_shape,True,0.1) #pdb.set_trace() scores = testAgent(env,myagent) elif load == 'n': myagent = agent(observation_shape, action_size,block_map_shape) #pdb.set_trace() scores = trainAgent(env, myagent) else: #TODO - how come the 'epsilon value' runs still load a model?? myagent = agent(observation_shape, action_size, block_map_shape,True,float(load)) scores = trainAgent(env,myagent) ''' np.savetxt('dqn_botscores',np.array(scores)) #plt.plot(scores) #plt.show() return
def main(): # Take in command line arguments, and use for environment setup if len(sys.argv) > 1: env = utils.setupEnv(sys.argv[1]) elif len(sys.argv) > 2: env = utils.setupEnv(sys.argv[1], port=sys.argv[2]) else: env = utils.setupEnv() # Get the number of available actions actionSize = env.action_space.n # Give user decision on loading model or not load = input("Load Q Table? y/n - Default as y:________") # Set the Agent to Load Q-Table if user chooses to load if load.lower() == 'n': myAgent = QLearningAgent(actionSize, 200, 'QTable.json', 'qlearningResults.csv', epsilon=1.0) else: myAgent = QLearningAgent(actionSize, 200, 'QTable.json', 'qlearningResults.csv', True, epsilon=1.0) # Start the running of the Agent myAgent.runAgent(env) return
def main(): env = utils.setupEnv("MarLo-CliffWalking-v0") # Get the number of available actions, minus waiting action actionSize = env.action_space.n epsilonDecay = 0.97 #not alpha #have 6 values for gamma # which ones are best then why we used values for the q-learning #best two run for cliff and and treck a gammas = [1, 0.8, 0.6, 0.4, 0.2, 0] for gamma in gammas: mc_QTableName = "mc_QTable_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace(".", "_") + ".json" mc_CSVName = "mc_Results_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace(".", "_") + ".csv" myAgent = MC_agent(actionSize, mc_QTableName, mc_CSVName, False, epsilonDecay, gamma) # Start the running of the Agent myAgent.runAgent(env) return
def main(): env = utils.setupEnv('MarLo-TrickyArena-v0') # Get the number of available actions, minus waiting action actionSize = env.action_space.n epsilonDecay = 0.98 alphas = [0.8, 0.5, 0.1] gammas = [1, 0.5] for alpha in alphas: for gamma in gammas: QTableName = "QTable_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".json" CSVName = "Results_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".csv" myAgent = QLearningAgent(actionSize, 200, QTableName, CSVName, False, epsilonDecay, alpha, gamma) # Start the running of the Agent myAgent.runAgent(env) return
def main(): if len(sys.argv) > 1: env = utils.setupEnv(sys.argv[1]) else: env = utils.setupEnv() # Get the number of available actions actionSize = env.action_space.n # Give user decision on loadind model or not load = input("Load Q Table? y/n - Default as y:________") # Set the Agent to Load Q-Table if user chooses if load.lower() == 'n': myAgent = MC_agent(actionSize) else: myAgent = MC_agent(actionSize, True) # Start the running of the Agent myAgent.runAgent(env) return
def main(): # If arguments are supplied when running the agent, pass them to the setup env function, else use defaults if len(sys.argv) > 1: env = utils.setupEnv(sys.argv[1]) elif len(sys.argv) > 2: env = utils.setupEnv(sys.argv[1], port=sys.argv[2]) else: env = utils.setupEnv() # Get the number of available states and actions - generates the output of CNN observation_shape = env.observation_space.shape action_size = env.action_space.n # Initialise agent and then run it. myagent = agent(observation_shape, action_size, False, 1.0) scores = trainAgent(env, myagent) ''' #Can start from a pre-built model load = input("Load model? y/n or an epsilon value to continue: ") if load == 'y': myagent = agent(observation_shape, action_size, block_map_shape,True,0.1) #pdb.set_trace() scores = testAgent(env,myagent) elif load == 'n': myagent = agent(observation_shape, action_size,block_map_shape) #pdb.set_trace() scores = trainAgent(env, myagent) else: #TODO - how come the 'epsilon value' runs still load a model?? myagent = agent(observation_shape, action_size, block_map_shape,True,float(load)) scores = trainAgent(env,myagent) ''' np.savetxt('dqn_botscores', np.array(scores)) #plt.plot(scores) #plt.show() return
def main(): env = utils.setupEnv('MarLo-CliffWalking-v0') # Get the number of available actions, minus waiting action #actionSize = env.action_space.n actionSize = 5 epsilonDecay = 0.98 #alphas = [0.8,0.5,0.1] #gammas = [1,0.5] alphas = [0.8] gammas = [1] i = 1 for alpha in alphas: for gamma in gammas: QTableName = "QTable_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".json" CSVName = str(i) + "_Test_Results_Alpha_" + str(alpha).replace( ".", "_") + "_Gamma_" + str(gamma).replace( ".", "_") + "_Decay_" + str(epsilonDecay).replace( ".", "_") + ".csv" myAgent = QLearningAgent(actionSize, 25, QTableName, CSVName, True, epsilonDecay, alpha, gamma, 0.00, training=True) print( "\n\n -------------- Starting test run of Decay %s, Alpha %s and Gamma %s --------- \n \n" % (epsilonDecay, alpha, gamma)) # Start the running of the Agent myAgent.runAgent(env) return