def __init__(self, actions, name="qlearner", alpha=0.1, gamma=0.99, epsilon=0.2, explore="uniform", anneal=False): ''' Args: actions (list): Contains strings denoting the actions. name (str): Denotes the name of the agent. alpha (float): Learning rate. gamma (float): Discount factor. epsilon (float): Exploration term. explore (str): One of {softmax, uniform}. Denotes explore policy. ''' Agent.__init__(self, name=name, actions=actions, gamma=gamma) # Set/initialize parameters and other relevant classwide data self.alpha, self.alpha_init = alpha, alpha self.epsilon, self.epsilon_init = epsilon, epsilon self.step_number = 0 self.anneal = anneal self.default_q = 0.0 self.q_func = defaultdict(lambda: self.default_q) # Choose explore type. Can also be "uniform" for \epsilon-greedy. self.explore = explore
def __init__(self, policy, name="fixed-policy"): ''' Args: policy (func: S ---> A) ''' Agent.__init__(self, name=name, actions=[]) self.policy = policy self.name = name
def __init__(self, actions, gamma=0.95, horizon=4, s_a_threshold=10): Agent.__init__(self, name="rmax-h" + str(horizon), actions=actions, gamma=gamma) self.rmax = 1.0 self.horizon = horizon self.s_a_threshold = s_a_threshold self.reset()
def __init__(self, actions, env_model, explore_param=m.sqrt(2), rollout_depth=100, num_rollouts_per_step=50, name="mcts", gamma=0.99): self.env_model = env_model self.rollout_depth = rollout_depth self.num_rollouts_per_step = num_rollouts_per_step self.value_total = defaultdict(float) self.explore_param = explore_param self.visitation_counts = defaultdict(lambda: 1) Agent.__init__(self, name=name, actions=actions, gamma=gamma)
def __init__(self, actions, name="Q-learning", alpha=0.1, gamma=0.9, epsilon=0.05, explore="uniform", anneal=False, custom_q_init=None, default_q=0): ''' Args: actions (list): Contains strings denoting the actions. name (str): Denotes the name of the agent. alpha (float): Learning rate. gamma (float): Discount factor. epsilon (float): Exploration term. explore (str): One of {softmax, uniform}. Denotes explore policy. custom_q_init (defaultdict{state, defaultdict{action, float}}): a dictionary of dictionaries storing the initial q-values. Can be used for potential shaping (Wiewiora, 2003) default_q (float): the default value to initialize every entry in the q-table with [by default, set to 0.0] ''' name_ext = "-" + explore if explore != "uniform" else "" Agent.__init__(self, name=name + name_ext, actions=actions, gamma=gamma) # Set/initialize parameters and other relevant classwide data self.alpha, self.alpha_init = alpha, alpha self.epsilon, self.epsilon_init = epsilon, epsilon self.step_number = 0 self.anneal = anneal self.default_q = default_q # 0 # 1 / (1 - self.gamma) self.explore = explore self.custom_q_init = custom_q_init self._action_history = [] # store actions taken # Q Function: if self.custom_q_init: self.q_func = self.custom_q_init else: self.q_func = defaultdict( lambda: defaultdict(lambda: self.default_q))
def __init__(self, actions, name="qlearner", alpha=0.05, gamma=0.95, epsilon=0.01, explore="softmax"): ''' Args: actions (list): Contains strings denoting the actions. name (str): Denotes the name of the agent. alpha (float): Learning rate. gamma (float): Discount factor. epsilon (float): Exploration term. explore (str): One of {softmax, uniform}. Denotes explore policy. ''' Agent.__init__(self, name=name, actions=actions, gamma=gamma) # Set/initialize parameters and other relevant classwide data self.alpha = alpha self.epsilon = epsilon # Choose explore type. Can also be "uniform" for \epsilon-greedy. self.explore = explore
def __init__(self, actions, name=""): name = "random" if name is "" else name Agent.__init__(self, name=name, actions=actions)
def __init__(self, actions): Agent.__init__(self, name="random", actions=actions)