def get_tabular_expected_sarsa(self) -> td0_tabular.TD0: return td0_tabular.TD0(self.get_mdp_rep_for_rl_tabular(), self.exploring_start, TDAlgorithm.ExpectedSARSA, self.softmax, self.epsilon, self.epsilon_half_life, self.learning_rate, self.learning_rate_decay, self.num_episodes, self.max_steps)
def get_tabular_qlearning(self) -> td0_tabular.TD0: return td0_tabular.TD0( self.get_mdp_rep_for_rl_tabular(), TDAlgorithm.QLearning, self.softmax, self.epsilon, self.epsilon_half_life, self.learning_rate, self.learning_rate_decay, self.num_episodes, self.max_steps )