def evaluator(rlAlgorithm,featureExtractor,nameAlg,nameFeat,multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500): ab = AngryBirdsMDP() trial = 0 levelsPassed=[] totalRewards =[] name = nameAlg+'_'+nameFeat+'_'+str(64*multiple) agent = angryAgent() rl = rlAlgorithm(actions=lambda x: agent.getAngryBirdsActions(x,multiple),featureExtractor=featureExtractor, epsilon = epsilon) if nameAlg == 'Q': while trial<numTrials: print(trial) rl.explorationProb = epsilon simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) rl.explorationProb = 0.0 outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) levelsPassed = levelsPassed + outcome['levelsPassed'] totalRewards = totalRewards + outcome['totalRewards'] print(totalRewards) trial +=1 elif nameAlg == 'LSVI': while trial<numTrials: try: print (trial) rl.makeLSVI(epsilon) simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) rl.makeLSVI(0.0) outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) levelsPassed = levelsPassed + outcome['levelsPassed'] totalRewards = totalRewards + outcome['totalRewards'] print(totalRewards) trial +=1 except AssertionError: break elif nameAlg == 'RLSVI' or nameAlg =='DRLSVI': while trial<numTrials: try: print (trial) rl.rlsvi.sigma = 500.0 simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) rl.rlsvi.sigma = 500.0 outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) levelsPassed = levelsPassed + outcome['levelsPassed'] totalRewards = totalRewards + outcome['totalRewards'] print(totalRewards) trial +=1 except AssertionError: break elif nameAlg =='DRLSVI': while trial<numTrials: try: print (trial) rl.makeRLSVI() simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) rl.makeLSVI(0.0) outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False) levelsPassed = levelsPassed + outcome['levelsPassed'] totalRewards = totalRewards + outcome['totalRewards'] print(totalRewards) trial +=1 except AssertionError: break cumlevelsPassed = np.cumsum(levelsPassed)/range(1,len(levelsPassed)+1) cumtotalRewards = np.cumsum(totalRewards)/range(1,len(totalRewards)+1) with open('../results/levelsPassed_'+name,'wb') as file: pickle.dump(levelsPassed,file) with open('../results/cumlevelsPassed_'+name,'wb') as file: pickle.dump(cumlevelsPassed,file) with open('../results/totalRewards_'+name,'wb') as file: pickle.dump(totalRewards,file) with open('../results/cumtotalRewards_'+name,'wb') as file: pickle.dump(cumtotalRewards,file) print('Max Total Rewards _'+name,max(totalRewards)) trials = range(1,len(cumlevelsPassed)+1) plt.figure() plt.plot(trials,cumlevelsPassed,lw=2,color='blue') plt.scatter(trials,cumlevelsPassed) plt.xlabel('Number of trials',fontsize='large') plt.ylabel('Number of levels passed',fontsize='large') plt.title('Rewards per trials', fontsize=20) maxLevel=0 for i in range(len(cumlevelsPassed)): if levelsPassed[i]>maxLevel: maxLevel=levelsPassed[i] plt.axvline(i+1,color='r', linestyle='--') plt.savefig('../plots/levelsPassed__'+name+'.png') plt.figure() plt.plot(trials,cumtotalRewards,lw=2,color='green') plt.scatter(trials,cumtotalRewards) plt.xlabel('Number of trials',fontsize='large') plt.ylabel('Number of total rewards',fontsize='large') plt.title('Rewards per trials', fontsize=20) maxLevel=0 for i in range(len(cumlevelsPassed)): if levelsPassed[i]>maxLevel: maxLevel=levelsPassed[i] plt.axvline(i+1,color='r', linestyle='--') plt.savefig('../plots/totalRewards_'+name+'.png')
rl = rlAlgorithm(actions=agent.getAngryBirdsActions,featureExtractor=featureExtractor,\ epsilon=epsilon) trainingOutcomes = simulate(ab_train,rl,numTrials=30, maxIterations=1000, verbose=False, show=False) testOutcomes = simulate(ab_test,rl,numTrials=10, maxIterations=1000, verbose=False, show=False) with open('../results/train_'+name,'wb') as file: pickle.dump(trainingOutcomes,file) with open('../results/test_'+name,'wb') as file: pickle.dump(testOutcomes,file) if __name__ == '__main__': agent = angryAgent() evaluator(QLearningAlgorithm,agent.PPFeatureExtractor,'Q','PP',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(QLearningAlgorithm,agent.NPPOFeatureExtractor,'Q','NPPO',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(QLearningAlgorithm,agent.NPPSFeatureExtractor,'Q','NPPS',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(DRLSVI,agent.NPPFeatureExtractor,'DRLSVI','NPP',multiple = 1.0,numTrials=50, epsilon = 0.0, sigma = 500) evaluator(RLSVI,agent.NPPFeatureExtractor,'LSVI','NPP',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple = 2.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(RLSVI,agent.NPPFeatureExtractor,'RLSVI','NPP',multiple = 2.0,numTrials=50, epsilon = 0.0, sigma = 500) evaluator(RLSVI,agent.NPPFeatureExtractor,'LSVI','NPP',multiple = 2.0,numTrials=50, epsilon = 0.3, sigma = 500) evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple = 2.0,numTrials=50, epsilon = 0.3, sigma = 500) generalize_evaluator(RLSVI,agent.NPPFeatureExtractor,'RLSVI','NPP',multiple=1.0,numTrials=50, epsilon = 0.3, sigma = 500) generalize_evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple=1.0,numTrials=50, epsilon = 0.3, sigma = 500) trials = range(1,42)