from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent coin flip actions, which is a prediction of the coin landing on # heads or tails. coin_flip_action_enum = util.enum('aTails', 'aHeads') # Define a enumeration to represent coin flip observations e.g. the coin landed on heads or tails. coin_flip_observation_enum = util.enum('oTails', 'oHeads') # Define a enumeration to represent coin flip rewards e.g. win or lose, for correcting predicting # the coin flip. coin_flip_reward_enum = util.enum('rLose', 'rWin') # Define some shorthand notation for ease of reference. aHeads = coin_flip_action_enum.aHeads aTails = coin_flip_action_enum.aTails oHeads = coin_flip_observation_enum.oHeads oTails = coin_flip_observation_enum.oTails
import math import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import util # An enumeration type used to specify the type of Monte Carlo search node. # Chance nodes represent sets of possible observations (one child per observation). # Decision nodes represent sets of possible actions (one child per action). # Decision and chance nodes alternate. nodetype_enum = util.enum('chance', 'decision') # Defines some shortcuts for ease of reference. chance_node = nodetype_enum.chance decision_node = nodetype_enum.decision class MonteCarloSearchNode: """ A class to represent a node in the Monte Carlo search tree. The nodes in the search tree represent simulated actions and percepts between an agent following an upper confidence bounds (UCB) policy and a generative model of the environment represented by a context tree. The purpose of the tree is to determine the expected reward of the available actions through sampling. Sampling proceeds several time steps
# -*- coding: utf-8 -*- """ Defines a base class for AIXI-approximate agents. """ from __future__ import division from __future__ import print_function from __future__ import unicode_literals import copy import random from pyaixi import util # Define a enumeration to represent what type of environment update has been performed. update_enum = util.enum('action_update', 'percept_update') # Define some short cuts for ease of reference. action_update = update_enum.action_update percept_update = update_enum.percept_update class Agent: """ This base class represents the minimum class elements for a AIXI-style agent. The following attributes and methods must be available in all agents, in order for the main interaction loop to get responses, give environmental feedback, manage learning, and monitor progress: - `age` - `average_reward()`
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as listening, or opening doors. tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight') # Define a enumeration to represent environment observations: either not # hearing the tiger, or hearing it from behind either door. tiger_observation_enum = util.enum('oNull', 'oLeft', 'oRight') # Define a enumeration to represent rewards as a result of actions: being eaten by the tiger, # getting information from listening, or finding the gold # NOTE: since the enumeration values need to be positive, these values are defined relative to # 100. # So -100 points is 0, -1 points is 99, and 10 points is 110. tiger_reward_enum = util.enum(rEaten = 0, rListen = 99, rGold = 110) # Define some shorthand notation for ease of reference. aListen = tiger_action_enum.aListen aLeft = tiger_action_enum.aLeft
from __future__ import print_function from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # 4 actions, encoded in 2 bits. tiger_action_enum = util.enum('stand','listen','open_left_door','open_right_door') # 3 Oberservations, encoded in 2 bits. # 'left': the tiger is behind the left door. # 'right': the tiger is behind the right door. # 'void': the agent tried to listen while standing, and thus heard nothing. tiger_observation_enum = util.enum('left','right','void') # Reward ranges from 0 to 130, encoded in 8 bits. # 0(-100) reward is given by choosing the worst action: open the door with tiger hiding behind. # 90(-10) reward is given by choosing an 'invalid' action: e.g. stand while standing. # 99(-1) reward is given by choosing an 'valid' action: e.g. stand sitting. # 130(30) reward is given by choosing the best action: open the door with gold behind. tiger_reward_enum = util.enum(penalty = 90, eaten = 0, gold = 130, normal = 99) # 2 states, encoded in 1 bit.
import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) # Ensure xrange is defined on Python 3. from six.moves import xrange from pyaixi import environment, util # Define a enumeration to represent environment observations: either a square # is empty, filled with the agent's piece, or the environment's piece. tictactoe_observation_enum = util.enum('oEmpty', 'oAgent', 'oEnv') # Define a enumeration to represent rewards as a result of actions: invalid actions, # losses, null, draws, and wins. # NOTE: since the enumeration values need to be positive, these values are defined relative to 3. # So -3 points is 0, -2 points is 1, and 2 points is 5. tictactoe_reward_enum = util.enum(rInvalid = 0, rLoss = 1, rNull = 3, rDraw = 4, rWin = 5) # Define some shorthand notation for ease of reference. oEmpty = tictactoe_observation_enum.oEmpty oAgent = tictactoe_observation_enum.oAgent oEnv = tictactoe_observation_enum.oEnv rInvalid = tictactoe_reward_enum.rInvalid rLoss = tictactoe_reward_enum.rLoss rNull = tictactoe_reward_enum.rNull
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent rock-paper-scissors actions, which is the # agent performing either a rock, paper, or a scissors move. rock_paper_scissors_action_enum = util.enum('aRock', 'aPaper', 'aScissors') # Define a enumeration to represent rock-paper-scissors observations, which is the # opponent performing either a rock, paper, or a scissors move. rock_paper_scissors_observation_enum = util.enum('oRock', 'oPaper', 'oScissors') # Define a enumeration to represent losing, drawing, or winning. rock_paper_scissors_reward_enum = util.enum('rLose', 'rDraw', 'rWin') # Define some shorthand notation for ease of reference. aRock = rock_paper_scissors_action_enum.aRock aPaper = rock_paper_scissors_action_enum.aPaper aScissors = rock_paper_scissors_action_enum.aScissors oRock = rock_paper_scissors_observation_enum.oRock oPaper = rock_paper_scissors_observation_enum.oPaper
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as betting or passing. kuhn_poker_action_enum = util.enum("aBet", "aPass") # Define a enumeration to represent environment observations, such as card values, # and opponent bet status # The final observation is of the form `agent-card + opponent-bet-status`. kuhn_poker_observation_enum = util.enum(oJack=0, oQueen=1, oKing=2, oBet=0, oPass=4) # Define a enumeration to represent rewards as a result of actions: betting and losing, # betting and winning, passing and losing, passing and winning. kuhn_poker_reward_enum = util.enum(rBetLoss=0, rPassLoss=1, rPassWin=3, rBetWin=4) # Define some shorthand notation for ease of reference. aBet = kuhn_poker_action_enum.aBet aPass = kuhn_poker_action_enum.aPass oJack = kuhn_poker_observation_enum.oJack
from __future__ import print_function from __future__ import unicode_literals from pyaixi import environment, util import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) # define actions kuhn_action_enum = util.enum('aPass', 'aBet') # define observations kuhn_observation_enum = util.enum('oPJ', 'oPQ', 'oPK', 'oBJ', 'oBQ', 'oBK') # define cards kuhn_card_enum = util.enum('J', 'Q', 'K') # define rewards: there are fout rewards, and no draw kuhn_reward_enum = util.enum(rLose2=0, rLose1=1, rWin1=3, rWin2=4) aPass = kuhn_action_enum.aPass aBet = kuhn_action_enum.aBet oPJ = kuhn_observation_enum.oPJ oPQ = kuhn_observation_enum.oPQ
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as listening, just standing, or opening doors. extended_tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight', 'aStand') # Define a enumeration to represent environment observations: either not # hearing the tiger, or hearing it from behind either door. extended_tiger_observation_enum = util.enum('oNull', 'oLeft', 'oRight') # Define a enumeration to represent rewards as a result of actions: being eaten by the tiger, # performing an invalid action (listening while sitting), getting information from listening, # just standing there, or finding the gold. # NOTE: since the enumeration values need to be positive, these values are defined relative to # 100. # So -100 points is 0, -1 points is 99, and 30 points is 130. extended_tiger_reward_enum = util.enum(rInvalid=0, rTiger=0, rStand=99, rListen=100,
from __future__ import print_function from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent the agent's actions, which is a choice between left or right. oned_maze_action_enum = util.enum('aLeft', 'aRight') # As the agent's observations are uninformative, the observation is defined as goal or non-goal instead of location. oned_maze_observation_enum = util.enum('oGoal', 'oNonGoal') # Define a enumeration to represent the agent's rewards e.g. 1 or 0, for reaching the goal point. oned_maze_reward_enum = util.enum('rOne', 'rZero') # Define the locations of the cells in the 1D maze. locs = [0, 1, 2, 3] # Define some shorthand notation for ease of reference. aLeft = oned_maze_action_enum.aLeft aRight = oned_maze_action_enum.aRight oGoal = oned_maze_observation_enum.oGoal
import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) # Ensure xrange is defined on Python 3. from six.moves import xrange from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as going left, up, right, or down. maze_action_enum = util.enum("aLeft", "aUp", "aRight", "aDown") # Define a enumeration to represent environment observations: either a cell # is empty, or has a wall in various (bit) positions. maze_observation_enum = util.enum(oNull=0, oLeftWall=1, oUpWall=2, oRightWall=4, oDownWall=8) # Define an enumber to represent observation encoding constants. maze_observation_encoding_enum = util.enum("cUninformative", "cWalls", "cCoordinates") # Define some shorthand notation for ease of reference. aLeft = maze_action_enum.aLeft aUp = maze_action_enum.aUp aRight = maze_action_enum.aRight aDown = maze_action_enum.aDown oNull = maze_observation_enum.oNull
# -*- coding: utf-8 -*- """ Defines a base class for AIXI-approximate agents. """ from __future__ import division from __future__ import print_function from __future__ import unicode_literals import copy import random from pyaixi import util # Define a enumeration to represent what type of environment update has been performed. update_enum = util.enum('action_update', 'percept_update') # Define some short cuts for ease of reference. action_update = update_enum.action_update percept_update = update_enum.percept_update class Agent: """ This base class represents the minimum class elements for a AIXI-style agent. The following attributes and methods must be available in all agents, in order for the main interaction loop to get responses, give environmental feedback, manage learning, and monitor progress: - `age` - `average_reward()` - `generate_random_action()`
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent rock-paper-scissors actions, which is the # agent performing either a rock, paper, or a scissors move. rock_paper_scissors_action_enum = util.enum('aRock', 'aPaper', 'aScissors') # Define a enumeration to represent rock-paper-scissors observations, which is the # opponent performing either a rock, paper, or a scissors move. rock_paper_scissors_observation_enum = util.enum('oRock', 'oPaper', 'oScissors') # Define a enumeration to represent losing, drawing, or winning. rock_paper_scissors_reward_enum = util.enum('rLose', 'rDraw', 'rWin') # Define some shorthand notation for ease of reference. aRock = rock_paper_scissors_action_enum.aRock aPaper = rock_paper_scissors_action_enum.aPaper aScissors = rock_paper_scissors_action_enum.aScissors oRock = rock_paper_scissors_observation_enum.oRock
from __future__ import division from __future__ import print_function from __future__ import unicode_literals from pyaixi import environment, util import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) # Defines an enumeration to represent agent action: bet on current observation or pass. kp_action_enum = util.enum('agent_pass', 'agent_bet') # Defines an enumeration to represent agent observation. # The agent observes both its own card and the opponent's action. kp_card_observation_enum = util.enum(j=0, q=1, k=2) kp_opponent_observation_enum = util.enum(op_bet=0, op_pass=3) # Observation codes: # 0: agent has J, Opponent bet, # 1: agent has Q, Opponent bet, # 2: agent has K, Opponent bet, # 3: agent has J, Opponent passed, # 4: agent has Q, Opponent passed, # 5: agent has K, Opponent passed. # Reward is (final chip count) - (initial chip count). kp_bet_reward_enum = util.enum(fourChips=4,
from __future__ import division from __future__ import print_function from __future__ import unicode_literals from pyaixi import environment, util import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight') tiger_observation_enum = util.enum('oLeft', 'oRight', 'oTiger', 'oGold', 'oNone') # tiger normalised from -100 to 0, gold from 10 to 110, listen from -1 to 99 tiger_reward_enum = util.enum(rTiger=0, rGold=110, rListen=99) aListen = tiger_action_enum.aListen aLeft = tiger_action_enum.aLeft aRight = tiger_action_enum.aRight oLeft = tiger_observation_enum.oLeft oRight = tiger_observation_enum.oRight oTiger = tiger_observation_enum.oTiger oGold = tiger_observation_enum.oGold oNone = tiger_observation_enum.oNone
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as listening, just standing, or opening doors. extended_tiger_action_enum = util.enum('aListen', 'aLeft', 'aRight', 'aStand') # Define a enumeration to represent environment observations: either not # hearing the tiger, or hearing it from behind either door. extended_tiger_observation_enum = util.enum('oNull', 'oLeft', 'oRight') # Define a enumeration to represent rewards as a result of actions: being eaten by the tiger, # performing an invalid action (listening while sitting), getting information from listening, # just standing there, or finding the gold. # NOTE: since the enumeration values need to be positive, these values are defined relative to # 100. # So -100 points is 0, -1 points is 99, and 30 points is 130. extended_tiger_reward_enum = util.enum(rInvalid = 0, rTiger = 0, rStand = 99, rListen = 100, rGold = 130) # Define some shorthand notation for ease of reference. aListen = extended_tiger_action_enum.aListen
import random import sys import numpy as np # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util #specify the map of pacman, and the probability to generate the pellets default_probability = 0.5 layout_txt = "pacMan.txt" #Define the enumerations to represents the observations and actions pacman_action_enum = util.enum('top', 'down', 'left', 'right') pacman_wall_observations_enum = util.enum(wNull=0, wTopWall=1, wDownWall=2, wLeftWall=4, wRightWall=8) pacman_ghost_observation_enum = util.enum(gNull=0, gTopWall=16, gDownWall=32, gLeftWall=64, gRightWall=128) pacman_smell_observation_enum = util.enum(mD_n=0, mD_2=256, mD_3=512, mD_4=1024)
import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) # Ensure xrange is defined on Python 3. from six.moves import xrange from pyaixi import environment, util # Define a enumeration to represent environment observations: either a square # is empty, filled with the agent's piece, or the environment's piece. tictactoe_observation_enum = util.enum('oEmpty', 'oAgent', 'oEnv') # Define a enumeration to represent rewards as a result of actions: invalid actions, # losses, null, draws, and wins. # NOTE: since the enumeration values need to be positive, these values are defined relative to 3. # So -3 points is 0, -2 points is 1, and 2 points is 5. tictactoe_reward_enum = util.enum(rInvalid=0, rLoss=1, rNull=3, rDraw=4, rWin=5) # Define some shorthand notation for ease of reference. oEmpty = tictactoe_observation_enum.oEmpty oAgent = tictactoe_observation_enum.oAgent oEnv = tictactoe_observation_enum.oEnv
from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as betting or passing. kuhn_poker_action_enum = util.enum('aBet', 'aPass') # Define a enumeration to represent environment observations, such as card values, # and opponent bet status # The final observation is of the form `agent-card + opponent-bet-status`. kuhn_poker_observation_enum = util.enum(oJack=0, oQueen=1, oKing=2, oBet=0, oPass=4) # Define a enumeration to represent rewards as a result of actions: betting and losing, # betting and winning, passing and losing, passing and winning. kuhn_poker_reward_enum = util.enum(rBetLoss=0, rPassLoss=1, rPassWin=3,
from __future__ import division from __future__ import print_function from __future__ import unicode_literals import os import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util RPS_action_enum = util.enum('aRock', 'aPaper', 'aScissor') RPS_observation_enum = util.enum('oRock', 'oPaper', 'oScissor') RPS_reward_enum = util.enum('rLose', 'rDraw', 'rWin') aRock = RPS_action_enum.aRock aPaper = RPS_action_enum.aPaper aScissor = RPS_action_enum.aScissor oRock = RPS_observation_enum.oRock oPaper = RPS_observation_enum.oPaper oScissor = RPS_observation_enum.oScissor #translate the rewards to the -1 0 1 ''' RPS_reward_enum.rLose=-1 RPS_reward_enum.rDraw=0
import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) # Ensure xrange is defined on Python 3. from six.moves import xrange from pyaixi import environment, util # Define a enumeration to represent agent interactions with the environment, # such as going left, up, right, or down. maze_action_enum = util.enum('aLeft', 'aUp', 'aRight', 'aDown') # Define a enumeration to represent environment observations: either a cell # is empty, or has a wall in various (bit) positions. maze_observation_enum = util.enum(oNull=0, oLeftWall=1, oUpWall=2, oRightWall=4, oDownWall=8) # Define an enumber to represent observation encoding constants. maze_observation_encoding_enum = util.enum('cUninformative', 'cWalls', 'cCoordinates') # Define some shorthand notation for ease of reference. aLeft = maze_action_enum.aLeft
from __future__ import division from __future__ import print_function from __future__ import unicode_literals import os import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import environment, util # Defines an enumeration to represent agent action: move up, down, left, right in the maze cheese_maze_action_enum = util.enum('up', 'down', 'left', 'right') # Defines an enumeration to represent agent observation: which aliased location the mouse is at cheese_maze_observation_enum = util.enum(five=5, seven=7, eight=8, nine=9, ten=10, twelve=12) # Defines an enumeration to represent agent reward: the agent took an invalid step or valid or highest rewarded step cheese_maze_reward_enum = util.enum(wall=0, move=9, cheese=20) # Defines some shorthand notation for ease of reference. up = cheese_maze_action_enum.up down = cheese_maze_action_enum.down
import random import sys # Insert the package's parent directory into the system search path, so that this package can be # imported when the aixi.py script is run directly from a release archive. PROJECT_ROOT = os.path.realpath(os.path.join(os.pardir, os.pardir)) sys.path.insert(0, PROJECT_ROOT) from pyaixi import util # An enumeration type used to specify the type of Monte Carlo search node. # Chance nodes represent a set of possible observation # (one child per observation) while decision nodes # represent sets of possible actions (one child per action). # Decision and chance nodes alternate. nodetype_enum = util.enum('chance', 'decision') # Define some short cuts for ease of reference. chance_node = nodetype_enum.chance decision_node = nodetype_enum.decision class MonteCarloSearchNode: """ A class to represent a node in the Monte Carlo search tree. The nodes in the search tree represent simulated actions and percepts between an agent following an upper confidence bounds (UCB) policy and a generative model of the environment represented by a context tree. The purpose of the tree is to determine the expected reward of the available actions through sampling. Sampling proceeds several time steps into the future according to the size of the agent's horizon. (`MC_AIXI_CTW_Agent.horizon`)