def __init__(self, alpha, discount, env, epsilon=0.2): super().__init__(alpha, discount, env) self.explore_policy = policy.EpsilonGreedyPolicy( env.state_space, env.action_space, self.qvalues, epsilon) self.next_action = self.get_explore_action(env.get_state())
def __init__(self, alpha, epsilon, discount, environment): super().__init__(alpha, epsilon, discount, environment) self.policy = policy.EpsilonGreedyPolicy(environment.state_space, environment.action_space, self.qvalues) self.explore_policy = self.policy
def __init__(self, alpha, discount, environment, n, epsilon=0.2): super().__init__(alpha, discount, environment) self.n = n self.epsilon = epsilon self.policy = policy.EpsilonGreedyPolicy(environment.state_space, environment.action_space, self.qvalues, self.epsilon) self.explore_policy = self.policy self.init_episode(environment)
def __init__(self, alpha, discount, env, episilon=0.2): super().__init__(alpha, discount, env) ssp = env.state_space asp = env.action_space self.optimal_policy = policy.GreedyPolicy(ssp, asp, self.qvalues, episilon) self.explore_policy = policy.EpsilonGreedyPolicy( ssp, asp, self.qvalues, episilon) self.draw_policy = self.optimal_policy self.returns = defaultdict(list) self.episode = []