Ejemplo n.º 1
0
def evaluator(rlAlgorithm,featureExtractor,nameAlg,nameFeat,multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500):
    ab = AngryBirdsMDP()
    trial = 0
    levelsPassed=[]
    totalRewards =[]
    name = nameAlg+'_'+nameFeat+'_'+str(64*multiple)

    agent = angryAgent()


    rl = rlAlgorithm(actions=lambda x: agent.getAngryBirdsActions(x,multiple),featureExtractor=featureExtractor, epsilon = epsilon)

    if nameAlg == 'Q':
        while trial<numTrials:
            print(trial)
            rl.explorationProb = epsilon
            simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
            rl.explorationProb = 0.0
            outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
            levelsPassed = levelsPassed + outcome['levelsPassed']
            totalRewards = totalRewards + outcome['totalRewards']
            print(totalRewards)
            trial +=1

    elif nameAlg == 'LSVI':
        while trial<numTrials:
            try:
                print (trial)
                rl.makeLSVI(epsilon)
                simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
                rl.makeLSVI(0.0)
                outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
                levelsPassed = levelsPassed + outcome['levelsPassed']
                totalRewards = totalRewards + outcome['totalRewards']
                print(totalRewards)

                trial +=1
            except AssertionError:
                break
    elif nameAlg == 'RLSVI' or nameAlg =='DRLSVI':
        while trial<numTrials:
            try:
                print (trial)
                rl.rlsvi.sigma = 500.0
                simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
                rl.rlsvi.sigma = 500.0
                outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
                levelsPassed = levelsPassed + outcome['levelsPassed']
                totalRewards = totalRewards + outcome['totalRewards']
                print(totalRewards)

                trial +=1
            except AssertionError:
                break
    elif nameAlg =='DRLSVI':
        while trial<numTrials:
            try:
                print (trial)
                rl.makeRLSVI()
                simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
                rl.makeLSVI(0.0)
                outcome = simulate(ab,rl,numTrials=1, maxIterations=1000, verbose=False, show=False)
                levelsPassed = levelsPassed + outcome['levelsPassed']
                totalRewards = totalRewards + outcome['totalRewards']
                print(totalRewards)

                trial +=1
            except AssertionError:
                break






    cumlevelsPassed = np.cumsum(levelsPassed)/range(1,len(levelsPassed)+1)
    cumtotalRewards = np.cumsum(totalRewards)/range(1,len(totalRewards)+1)

    with open('../results/levelsPassed_'+name,'wb') as file:
        pickle.dump(levelsPassed,file)
    with open('../results/cumlevelsPassed_'+name,'wb') as file:
        pickle.dump(cumlevelsPassed,file)
    with open('../results/totalRewards_'+name,'wb') as file:
        pickle.dump(totalRewards,file)
    with open('../results/cumtotalRewards_'+name,'wb') as file:
        pickle.dump(cumtotalRewards,file)

    print('Max Total Rewards _'+name,max(totalRewards))

    trials = range(1,len(cumlevelsPassed)+1)

    plt.figure()

    plt.plot(trials,cumlevelsPassed,lw=2,color='blue')
    plt.scatter(trials,cumlevelsPassed)
    plt.xlabel('Number of trials',fontsize='large')
    plt.ylabel('Number of levels passed',fontsize='large')
    plt.title('Rewards per trials', fontsize=20)
    maxLevel=0
    for i in range(len(cumlevelsPassed)):
        if levelsPassed[i]>maxLevel:
            maxLevel=levelsPassed[i]
            plt.axvline(i+1,color='r', linestyle='--')
    plt.savefig('../plots/levelsPassed__'+name+'.png')

    plt.figure()
    plt.plot(trials,cumtotalRewards,lw=2,color='green')
    plt.scatter(trials,cumtotalRewards)
    plt.xlabel('Number of trials',fontsize='large')
    plt.ylabel('Number of total rewards',fontsize='large')
    plt.title('Rewards per trials', fontsize=20)
    maxLevel=0
    for i in range(len(cumlevelsPassed)):
        if levelsPassed[i]>maxLevel:
            maxLevel=levelsPassed[i]
            plt.axvline(i+1,color='r', linestyle='--')
    plt.savefig('../plots/totalRewards_'+name+'.png')
Ejemplo n.º 2
0

    rl = rlAlgorithm(actions=agent.getAngryBirdsActions,featureExtractor=featureExtractor,\
                            epsilon=epsilon)
    trainingOutcomes = simulate(ab_train,rl,numTrials=30, maxIterations=1000, verbose=False, show=False)
    testOutcomes = simulate(ab_test,rl,numTrials=10, maxIterations=1000, verbose=False, show=False)

    with open('../results/train_'+name,'wb') as file:
        pickle.dump(trainingOutcomes,file)
    with open('../results/test_'+name,'wb') as file:
        pickle.dump(testOutcomes,file)


if __name__ == '__main__':

    agent = angryAgent()
    evaluator(QLearningAlgorithm,agent.PPFeatureExtractor,'Q','PP',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(QLearningAlgorithm,agent.NPPOFeatureExtractor,'Q','NPPO',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(QLearningAlgorithm,agent.NPPSFeatureExtractor,'Q','NPPS',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(DRLSVI,agent.NPPFeatureExtractor,'DRLSVI','NPP',multiple = 1.0,numTrials=50, epsilon = 0.0, sigma = 500)
    evaluator(RLSVI,agent.NPPFeatureExtractor,'LSVI','NPP',multiple = 1.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple = 2.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(RLSVI,agent.NPPFeatureExtractor,'RLSVI','NPP',multiple = 2.0,numTrials=50, epsilon = 0.0, sigma = 500)
    evaluator(RLSVI,agent.NPPFeatureExtractor,'LSVI','NPP',multiple = 2.0,numTrials=50, epsilon = 0.3, sigma = 500)
    evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple = 2.0,numTrials=50, epsilon = 0.3, sigma = 500)

    generalize_evaluator(RLSVI,agent.NPPFeatureExtractor,'RLSVI','NPP',multiple=1.0,numTrials=50, epsilon = 0.3, sigma = 500)
    generalize_evaluator(QLearningAlgorithm,agent.NPPFeatureExtractor,'Q','NPP',multiple=1.0,numTrials=50, epsilon = 0.3, sigma = 500)

    trials = range(1,42)