コード例 #1
0
    def onerun(r):
        avg_payouts_in_run = []
        learned_actions_in_run = []

        for prediction_accuracy in np.linspace(linspace_from, linspace_to,
                                               linspace_steps):
            problem = Newcomb(predictor_accuracy=prediction_accuracy,
                              payouts=np.array([[1000000, 0], [1001000,
                                                               1000]]))
            agent = SARSANewcombAgent(problem,
                                      alpha=0.1,
                                      gamma=0.9,
                                      epsilon=0.9)

            log.info('Playing ...')
            log.info('%s' % (str(agent)))
            log.info('%s' % (str(problem)))

            _, payouts = interact_multiple(agent, problem, interactions)
            avg_payout = payouts.mean(axis=0)
            avg_payouts_in_run.append(avg_payout)

            log.info('Average Payout for predicion accuraccy %.3f: %.3f' %
                     (prediction_accuracy, avg_payout))

            learned_actions_in_run.append(agent.get_learned_action())
        return (np.array(avg_payouts_in_run), np.array(learned_actions_in_run))
コード例 #2
0
ファイル: rl_sarsa.py プロジェクト: pradeepprasad/morlbench
@author: Dominik Meyer <*****@*****.**>
"""

import sys
sys.path.append('..')
sys.path.append('.')
import logging as log
import numpy as np

log.basicConfig(level=log.DEBUG)

from problems import Newcomb
from agents import SARSANewcombAgent
from experiment_helpers import interact_multiple

if __name__ == '__main__':
    problem = Newcomb(predictor_accuracy=0.1,
                      payouts=np.array([[1000000, 0], [1001000, 1000]]))
    agent = SARSANewcombAgent(problem, alpha=0.1, gamma=0.9, epsilon=0.9)

    interactions = 10000

    log.info('Playing ...')
    log.info('%s' % (str(agent)))
    log.info('%s' % (str(problem)))

    _, payouts = interact_multiple(agent, problem, interactions)

    log.info('Average Payout: %f, Learned Action: %i' %
             (payouts.mean(axis=0), agent.get_learned_action()))
コード例 #3
0
                                 payouts=np.array([[1000000, 0],
                                                   [1001000, 1000]]))
        problem2 = RandomNewcomb(predictor_accuracy=predictor_accuracy,
                                 payouts=np.array([[1000000, 0],
                                                   [1001000, 1000]]))
        agent1 = OneBoxNewcombAgent(problem1)
        agent2 = TwoBoxNewcombAgent(problem2)

        log.info('Playing ...')
        log.info('%s' % (str(agent1)))
        log.info('%s' % (str(problem1)))
        log.info(' VERSUS')
        log.info('%s' % (str(agent2)))
        log.info('%s' % (str(problem2)))

        _, payouts1 = interact_multiple(agent1, problem1, interactions)
        _, payouts2 = interact_multiple(agent2, problem2, interactions)
        avg_payout1 = payouts1.mean(axis=0)
        avg_payout2 = payouts2.mean(axis=0)

        avg_payouts1.append(avg_payout1)
        avg_payouts2.append(avg_payout2)

        log.info('Average Payout: %.3f vs. %.3f' % (avg_payout1, avg_payout2))

    avg_payouts1 = np.array(avg_payouts1)
    avg_payouts2 = np.array(avg_payouts2)

    plot_that_pretty_rldm15([
        np.linspace(linspace_from, linspace_to, linspace_steps),
        np.linspace(linspace_from, linspace_to, linspace_steps)