def get_fa_expected_sarsa_lambda(self) -> tdl_fa.TDLambda: return tdl_fa.TDLambda( self.get_mdp_rep_for_rl_fa(), TDAlgorithm.ExpectedSARSA, self.softmax, self.epsilon, self.epsilon_half_life, self.learning_rate, self.lambd, self.num_episodes, self.max_steps, self.fa_spec, self.tdl_fa_offline )
def get_fa_qlearning_lambda(self) -> tdl_fa.TDLambda: return tdl_fa.TDLambda( self.get_mdp_rep_for_rl_fa(), self.exploring_start, TDAlgorithm.QLearning, self.softmax, self.epsilon, self.epsilon_half_life, self.lambd, self.num_episodes, self.batch_size, self.max_steps, self.fa_spec, self.tdl_fa_offline )