def __init__(self, mdp, name="value_iter", delta=0.0001, max_iterations=500, sample_rate=3): ''' Args: mdp (MDP) delta (float): After an iteration if VI, if no change more than @\delta has occurred, terminates. max_iterations (int): Hard limit for number of iterations. sample_rate (int): Determines how many samples from @mdp to take to estimate T(s' | s, a). horizon (int): Number of steps before terminating. ''' Planner.__init__(self, mdp, name=name) self.delta = delta self.max_iterations = max_iterations self.sample_rate = sample_rate self.value_func = defaultdict(float) self.reachability_done = False self.has_computed_matrix = False self.bellman_backups = 0 self.trans_dict = defaultdict( lambda: defaultdict(lambda: defaultdict(float)))
def __init__(self, mdp, name="mcts", explore_param=math.sqrt(2), rollout_depth=20, num_rollouts_per_step=10): Planner.__init__(self, mdp, name=name) self.rollout_depth = rollout_depth self.num_rollouts_per_step = num_rollouts_per_step self.value_total = defaultdict(lambda : defaultdict(float)) self.explore_param = explore_param self.visitation_counts = defaultdict(lambda : defaultdict(lambda : 0))
def __init__(self, mdp, name="dyna", max_iterations=500): Planner.__init__(self, mdp, name=name) self.max_iterations = max_iterations self.value_func = defaultdict(float) self.max_q_act_histories = defaultdict(str) self.reachability_done = False self.has_computed_matrix = False self.bellman_backups = 0 self.epsilon = 0.5 #epsilon-decay should be implemented self.trans_dict = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda: 0))) self.reward_dict = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda: defaultdict(lambda: 0)))) self.trans_prob = defaultdict(lambda:defaultdict(lambda:defaultdict(float))) self.reward_prob = defaultdict(lambda:defaultdict(lambda:defaultdict(lambda: defaultdict(float)))) self.default_q = 0 self.alpha=0.1 #step-size #initialize all Q(s,a) as zero self.q_func = defaultdict(lambda: defaultdict(lambda: self.default_q)) self.N = 10 self.previous_record = {} #keep track of previously visited s-a pairs