Exemple #1
0
 def get_mdp_refined(self) -> MDPRefined:
     return MDPRefined(self.get_mdp_refined_dict(), gamma=1.)
                1: (0.2, 4.8),
                2: (0.4, 9.2),
                3: (0.4, -8.2)
            }
        },
        3: {
            'a': {
                3: (1.0, 0.0)
            },
            'b': {
                3: (1.0, 0.0)
            }
        }
    }
    gamma_val = 1.0
    mdp_ref_obj1 = MDPRefined(mdp_refined_data, gamma_val)
    mdp_rep_obj = mdp_ref_obj1.get_mdp_rep_for_rl_tabular()

    exploring_start_val = False
    softmax_flag = False
    episodes_limit = 10000
    epsilon_val = 0.1
    epsilon_half_life_val = 1000
    learning_rate_val = 0.1
    max_steps_val = 1000
    state_ff = [lambda s: float(s)]
    sa_ff = [
        lambda x: float(x[0]),
        lambda x: 1. if x[1] == 'a' else 0.,
        lambda x: 1. if x[1] == 'b' else 0.,
        lambda x: 1. if x[1] == 'c' else 0.,
 def get_mdp_refined(self, model) -> MDPRefined:
     return MDPRefined(self.get_mdp_refined_dict(model), self.epoch_disc_factor)