if highest_reward < cumulated_reward: highest_reward = cumulated_reward qbest = qlearn.get_Q() nextState = ''.join(map(str, observation)) qlearn.learn(state, action, reward, nextState) env._flush(force=True) if not (done): state = nextState else: last_time_steps = numpy.append(last_time_steps, [int(i + 1)]) qlearn.saveQ("QValues", qbest) break print("===== Completed episode {}".format(x)) # #Modify this to save the highest reward # if (x > 0) and (x % 5 == 0): # qlearn.saveQ("QValues") plotter.plot( env ) #currently this saves q values at every 5 episode intervals... m, s = divmod(int(time.time() - start_time), 60) h, m = divmod(m, 60) print("Starting EP: " + str(x + 1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(round(qlearn.gamma, 2)) + " - epsilon: " +
nextState = ''.join(map(str, observation)) qlearn.learn(state, action, reward, nextState) env._flush(force=True) if not(done): state = nextState else: last_time_steps = numpy.append(last_time_steps, [int(i + 1)]) break print("===== Completed episode {}".format(x)) if (x > 0) and (x % 5 == 0): qlearn.saveQ("QValues") plotter.plot(env) m, s = divmod(int(time.time() - start_time), 60) h, m = divmod(m, 60) print ("Starting EP: " + str(x+1) + " - [alpha: " + str(round(qlearn.alpha, 2)) + " - gamma: " + str(round(qlearn.gamma, 2)) + " - epsilon: " + str(round(qlearn.epsilon, 2)) + "] - Reward: " + str(cumulated_reward) + " Time: %d:%02d:%02d" % (h, m, s)) #time.sleep(2) # Github table content print ("\n|"+str(total_episodes)+"|"+str(qlearn.alpha)+"|" +
def save(self): print 'saving agent.' qlearn.saveQ()