def onerun(r): avg_payouts_in_run = [] learned_actions_in_run = [] for prediction_accuracy in np.linspace(linspace_from, linspace_to, linspace_steps): problem = Newcomb(predictor_accuracy=prediction_accuracy, payouts=np.array([[1000000, 0], [1001000, 1000]])) agent = SARSANewcombAgent(problem, alpha=0.1, gamma=0.9, epsilon=0.9) log.info('Playing ...') log.info('%s' % (str(agent))) log.info('%s' % (str(problem))) _, payouts = interact_multiple(agent, problem, interactions) avg_payout = payouts.mean(axis=0) avg_payouts_in_run.append(avg_payout) log.info('Average Payout for predicion accuraccy %.3f: %.3f' % (prediction_accuracy, avg_payout)) learned_actions_in_run.append(agent.get_learned_action()) return (np.array(avg_payouts_in_run), np.array(learned_actions_in_run))
@author: Dominik Meyer <*****@*****.**> """ import sys sys.path.append('..') sys.path.append('.') import logging as log import numpy as np log.basicConfig(level=log.DEBUG) from problems import Newcomb from agents import SARSANewcombAgent from experiment_helpers import interact_multiple if __name__ == '__main__': problem = Newcomb(predictor_accuracy=0.1, payouts=np.array([[1000000, 0], [1001000, 1000]])) agent = SARSANewcombAgent(problem, alpha=0.1, gamma=0.9, epsilon=0.9) interactions = 10000 log.info('Playing ...') log.info('%s' % (str(agent))) log.info('%s' % (str(problem))) _, payouts = interact_multiple(agent, problem, interactions) log.info('Average Payout: %f, Learned Action: %i' % (payouts.mean(axis=0), agent.get_learned_action()))
payouts=np.array([[1000000, 0], [1001000, 1000]])) problem2 = RandomNewcomb(predictor_accuracy=predictor_accuracy, payouts=np.array([[1000000, 0], [1001000, 1000]])) agent1 = OneBoxNewcombAgent(problem1) agent2 = TwoBoxNewcombAgent(problem2) log.info('Playing ...') log.info('%s' % (str(agent1))) log.info('%s' % (str(problem1))) log.info(' VERSUS') log.info('%s' % (str(agent2))) log.info('%s' % (str(problem2))) _, payouts1 = interact_multiple(agent1, problem1, interactions) _, payouts2 = interact_multiple(agent2, problem2, interactions) avg_payout1 = payouts1.mean(axis=0) avg_payout2 = payouts2.mean(axis=0) avg_payouts1.append(avg_payout1) avg_payouts2.append(avg_payout2) log.info('Average Payout: %.3f vs. %.3f' % (avg_payout1, avg_payout2)) avg_payouts1 = np.array(avg_payouts1) avg_payouts2 = np.array(avg_payouts2) plot_that_pretty_rldm15([ np.linspace(linspace_from, linspace_to, linspace_steps), np.linspace(linspace_from, linspace_to, linspace_steps)