def __init__(self, actions, gamma=.9, r_max=1., v_max=None, deduce_v_max=True, n_known=None, deduce_n_known=True, epsilon_q=0.1, epsilon_m=None, delta=None, n_states=None, max_memory_size=None, prior=None, estimate_distances_online=True, min_sampling_probability=.1, name="ExpLRMax", path='results/'): LRMax.__init__(self, actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=deduce_v_max, n_known=n_known, deduce_n_known=deduce_n_known, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_memory_size, prior=prior, estimate_distances_online=estimate_distances_online, min_sampling_probability=min_sampling_probability, name=name) # Counters used for experiments (not useful to the algorithm) self.n_rmax = 0 # number of times the rmax bound is used for all the updates of 1 task self.n_lip = 0 # number of times the lipschitz bound is used for all the updates of 1 task # Counter for prior use self.n_prior_use = 0 # number of times the prior is used for each update of 1 task self.n_dista_use = 0 # number of times the distance is used for each update of 1 task # Recorded variables self.discounted_return = 0. self.total_return = 0. self.n_time_steps = 0 # number of time steps self.path = path self.write_data = False # Enable data writing self.instance_number = 0 self.run_number = 0
def __init__( self, actions, gamma=.9, r_max=1., v_max=None, deduce_v_max=True, n_known=None, deduce_n_known=True, epsilon_q=0.1, epsilon_m=None, delta=None, n_states=None, max_memory_size=None, prior=None, estimate_distances_online=True, min_sampling_probability=.1, name="LRMaxQInit" ): """ :param actions: action space of the environment :param gamma: (float) discount factor :param r_max: (float) known upper-bound on the reward function :param v_max: (float) known upper-bound on the value function :param deduce_v_max: (bool) set to True to deduce v_max from r_max :param n_known: (int) count after which a state-action pair is considered known (only set n_known if delta and epsilon are not defined) :param deduce_n_known: (bool) set to True to deduce n_known from (delta, n_states, epsilon_m) :param epsilon_q: (float) precision of value iteration algorithm for Q-value computation :param epsilon_m: (float) precision of the learned models in L1 norm :param delta: (float) models are learned epsilon_m-closely with probability at least 1 - delta :param n_states: (int) number of states :param max_memory_size: (int) maximum number of saved models (infinity if None) :param prior: (float) prior knowledge of maximum model's distance :param estimate_distances_online: (bool) set to True for online estimation of a tighter upper-bound for the model pseudo-distances. The estimation is valid with high probability. :param min_sampling_probability: (float) minimum sampling probability of an environment :param name: (str) """ self.name = name self.n_required_tasks = mqi.number_of_tasks_for_high_confidence_upper_bound(delta, min_sampling_probability) self.maxQ_memory = [] # Upper-bounds on the Q-values of previous MDPs LRMax.__init__(self, actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=deduce_v_max, n_known=n_known, deduce_n_known=deduce_n_known, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_memory_size, prior=prior, estimate_distances_online=estimate_distances_online, min_sampling_probability=min_sampling_probability, name=name)
def __init__(self, actions, gamma=.9, r_max=1., v_max=None, deduce_v_max=True, n_known=None, deduce_n_known=True, epsilon_q=0.1, epsilon_m=None, delta=None, n_states=None, max_memory_size=None, prior=None, estimate_distances_online=True, min_sampling_probability=.1, name="ExpLRMax"): """ See LRMax class. """ LRMax.__init__(self, actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=deduce_v_max, n_known=n_known, deduce_n_known=deduce_n_known, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_memory_size, prior=prior, estimate_distances_online=estimate_distances_online, min_sampling_probability=min_sampling_probability, name=name) self.time_step = 0 self.time_step_counter = [] self.data = {'n_computation': [0], 'n_prior_use': [0]}