def main(): # resolve the parameters sent from the command line call params = obtainParameters() # resolve file issues regarding the execution of the algorithm prepareFolders(params['commandPath'], params['filePath']) myMDP = MDP(params['filePath']) myAgent = Agent(myMDP) Wacumulado = 0 for i in range(int(params['numberOfExecutions'])): print 'Running experiment ' + str(i + 1) + ' of ' + str(params['numberOfExecutions']) myQLearning = QLearning(myMDP, \ myAgent, \ alpha = float(params['alpha']), \ gamma = float(params['gamma']), \ epsilon = float(params['epsilon']), \ epsilonIncrement = float(params['epsilonIncrement']), \ K = int(params['K']), \ H = int(params['H']), \ gammaPRQL = float(params['gammaPRQL'])) W, Ws = myQLearning.execute() Wacumulado += array(Ws) Ws = Wacumulado / float(params['numberOfExecutions']) saveOutputFiles(myQLearning, params, Ws)
def main(): # resolve the parameters sent from the command line call params = obtainParameters() # resolve file issues regarding the execution of the algorithm prepareFolders(params['commandPath'], params['filePath']) myMDP = MDP(params['filePath']) myAgent = Agent(myMDP) W_avg_list_accum = 0 outputAccum = {} for i in range(int(params['numberOfExecutions'])): print 'Running experiment ' + str(i + 1) + ' of ' + str(params['numberOfExecutions']); sys.stdout.flush() myPRQLearning = PRQLearning(myMDP, \ myAgent, \ alpha = float(params['alpha']), \ gamma = float(params['gamma']), \ epsilon = float(params['epsilon']), \ epsilonIncrement = float(params['epsilonIncrement']), \ K = int(params['K']), \ H = int(params['H']), \ gammaPRQL = float(params['gammaPRQL']), \ tau = float(params['tau']), \ deltaTau = float(params['deltaTau']), \ psi = float(params['psi']), \ v = float(params['v']), \ filePath = params['filePath']) output = myPRQLearning.execute() accumulateOutput(outputAccum, output) W_avg_list = output['W_avg_list'] W_avg_list_accum += array(W_avg_list) W_avg_list = W_avg_list_accum / float(params['numberOfExecutions']) saveOutputFiles(myPRQLearning.myQLearning, params, outputAccum)