Ejemplo n.º 1
0
    def __init__(self,
                 actions,
                 name="qlearner",
                 alpha=0.1,
                 gamma=0.99,
                 epsilon=0.2,
                 explore="uniform",
                 anneal=False):
        '''
        Args:
            actions (list): Contains strings denoting the actions.
            name (str): Denotes the name of the agent.
            alpha (float): Learning rate.
            gamma (float): Discount factor.
            epsilon (float): Exploration term.
            explore (str): One of {softmax, uniform}. Denotes explore policy.
        '''
        Agent.__init__(self, name=name, actions=actions, gamma=gamma)

        # Set/initialize parameters and other relevant classwide data
        self.alpha, self.alpha_init = alpha, alpha
        self.epsilon, self.epsilon_init = epsilon, epsilon
        self.step_number = 0
        self.anneal = anneal
        self.default_q = 0.0
        self.q_func = defaultdict(lambda: self.default_q)

        # Choose explore type. Can also be "uniform" for \epsilon-greedy.
        self.explore = explore
Ejemplo n.º 2
0
 def __init__(self, policy, name="fixed-policy"):
     '''
     Args:
         policy (func: S ---> A)
     '''
     Agent.__init__(self, name=name, actions=[])
     self.policy = policy
     self.name = name
Ejemplo n.º 3
0
 def __init__(self, actions, gamma=0.95, horizon=4, s_a_threshold=10):
     Agent.__init__(self,
                    name="rmax-h" + str(horizon),
                    actions=actions,
                    gamma=gamma)
     self.rmax = 1.0
     self.horizon = horizon
     self.s_a_threshold = s_a_threshold
     self.reset()
Ejemplo n.º 4
0
    def __init__(self,
                 actions,
                 env_model,
                 explore_param=m.sqrt(2),
                 rollout_depth=100,
                 num_rollouts_per_step=50,
                 name="mcts",
                 gamma=0.99):
        self.env_model = env_model
        self.rollout_depth = rollout_depth
        self.num_rollouts_per_step = num_rollouts_per_step
        self.value_total = defaultdict(float)
        self.explore_param = explore_param
        self.visitation_counts = defaultdict(lambda: 1)

        Agent.__init__(self, name=name, actions=actions, gamma=gamma)
Ejemplo n.º 5
0
    def __init__(self,
                 actions,
                 name="Q-learning",
                 alpha=0.1,
                 gamma=0.9,
                 epsilon=0.05,
                 explore="uniform",
                 anneal=False,
                 custom_q_init=None,
                 default_q=0):
        '''
        Args:
            actions (list): Contains strings denoting the actions.
            name (str): Denotes the name of the agent.
            alpha (float): Learning rate.
            gamma (float): Discount factor.
            epsilon (float): Exploration term.
            explore (str): One of {softmax, uniform}. Denotes explore policy.
            custom_q_init (defaultdict{state, defaultdict{action, float}}): a dictionary of dictionaries storing the initial q-values. Can be used for potential shaping (Wiewiora, 2003)
            default_q (float): the default value to initialize every entry in the q-table with [by default, set to 0.0]
        '''
        name_ext = "-" + explore if explore != "uniform" else ""
        Agent.__init__(self,
                       name=name + name_ext,
                       actions=actions,
                       gamma=gamma)

        # Set/initialize parameters and other relevant classwide data
        self.alpha, self.alpha_init = alpha, alpha
        self.epsilon, self.epsilon_init = epsilon, epsilon
        self.step_number = 0
        self.anneal = anneal
        self.default_q = default_q  # 0 # 1 / (1 - self.gamma)
        self.explore = explore
        self.custom_q_init = custom_q_init
        self._action_history = []  # store actions taken

        # Q Function:
        if self.custom_q_init:
            self.q_func = self.custom_q_init
        else:
            self.q_func = defaultdict(
                lambda: defaultdict(lambda: self.default_q))
Ejemplo n.º 6
0
    def __init__(self,
                 actions,
                 name="qlearner",
                 alpha=0.05,
                 gamma=0.95,
                 epsilon=0.01,
                 explore="softmax"):
        '''
        Args:
            actions (list): Contains strings denoting the actions.
            name (str): Denotes the name of the agent.
            alpha (float): Learning rate.
            gamma (float): Discount factor.
            epsilon (float): Exploration term.
            explore (str): One of {softmax, uniform}. Denotes explore policy.
        '''
        Agent.__init__(self, name=name, actions=actions, gamma=gamma)

        # Set/initialize parameters and other relevant classwide data
        self.alpha = alpha
        self.epsilon = epsilon

        # Choose explore type. Can also be "uniform" for \epsilon-greedy.
        self.explore = explore
Ejemplo n.º 7
0
 def __init__(self, actions, name=""):
 	name = "random" if name is "" else name
     Agent.__init__(self, name=name, actions=actions)
Ejemplo n.º 8
0
 def __init__(self, actions):
     Agent.__init__(self, name="random", actions=actions)