from qlearner import QLearner import pylab as plt my_learner = QLearner() my_learner.load_maze('/u/braun/tlab/QLearner/data/reward_4x4.npy', '/u/braun/tlab/QLearner/data/meta_4x4.txt') #print ("testing data load\n\n") #my_learner.display_Q() #my_learner.display_R() print("begin training...") reward = my_learner.train(0.7) my_learner.display_Q() my_learner.display_R() steps = my_learner.test(7) # 7 foods in 4x4 maze print("steps") print(steps) print("") plt.hist(reward, 50, normed=1, facecolor='g', alpha=0.75) plt.xlabel('Episodes required to reach 200') plt.ylabel('Frequency') plt.title('Histogram') plt.show()
# Initialise environment and agent wrapper = CartPoleWrapperDiscrete() agent = QLearner(wrapper=wrapper, seed=run) style.use('fivethirtyeight') fig = plt.figure() plt.axis([0, args.episodes, 0, 300]) plt.xlabel('Episodes') plt.ylabel('AVG Reward last 50 episodes') # For each episode, train the agent on the environment and record the # reward of each episode for episode in range(num_episodes): rewards[episode] = agent.train() if (episode % 50) == 0 and episode != 0: avg_last = float(sum(rewards[episode - 50:episode])) / 50 plt.scatter(episode, avg_last) plt.pause(0.05) # Check if environment is solved if wrapper.solved(rewards[:episode]): end_episode = episode break # Record and print performance runtime_per_run.append(timer() - start) rewards_per_run['run' + str(run)] = rewards if end_episode >= 99: print('average reward of last 100 episodes of run', run, '=', float(sum(rewards[-100:])) / 100)