Esempio n. 1
0
    def __init__(self,
                 actions,
                 gamma=.9,
                 r_max=1.,
                 v_max=None,
                 deduce_v_max=True,
                 n_known=None,
                 deduce_n_known=True,
                 epsilon_q=0.1,
                 epsilon_m=None,
                 delta=None,
                 n_states=None,
                 max_memory_size=None,
                 prior=None,
                 estimate_distances_online=True,
                 min_sampling_probability=.1,
                 name="ExpLRMax",
                 path='results/'):
        LRMax.__init__(self,
                       actions=actions,
                       gamma=gamma,
                       r_max=r_max,
                       v_max=v_max,
                       deduce_v_max=deduce_v_max,
                       n_known=n_known,
                       deduce_n_known=deduce_n_known,
                       epsilon_q=epsilon_q,
                       epsilon_m=epsilon_m,
                       delta=delta,
                       n_states=n_states,
                       max_memory_size=max_memory_size,
                       prior=prior,
                       estimate_distances_online=estimate_distances_online,
                       min_sampling_probability=min_sampling_probability,
                       name=name)

        # Counters used for experiments (not useful to the algorithm)
        self.n_rmax = 0  # number of times the rmax bound is used for all the updates of 1 task
        self.n_lip = 0  # number of times the lipschitz bound is used for all the updates of 1 task

        # Counter for prior use
        self.n_prior_use = 0  # number of times the prior is used for each update of 1 task
        self.n_dista_use = 0  # number of times the distance is used for each update of 1 task

        # Recorded variables
        self.discounted_return = 0.
        self.total_return = 0.
        self.n_time_steps = 0  # number of time steps

        self.path = path
        self.write_data = False  # Enable data writing
        self.instance_number = 0
        self.run_number = 0
Esempio n. 2
0
    def __init__(
            self,
            actions,
            gamma=.9,
            r_max=1.,
            v_max=None,
            deduce_v_max=True,
            n_known=None,
            deduce_n_known=True,
            epsilon_q=0.1,
            epsilon_m=None,
            delta=None,
            n_states=None,
            max_memory_size=None,
            prior=None,
            estimate_distances_online=True,
            min_sampling_probability=.1,
            name="LRMaxQInit"
    ):
        """
        :param actions: action space of the environment
        :param gamma: (float) discount factor
        :param r_max: (float) known upper-bound on the reward function
        :param v_max: (float) known upper-bound on the value function
        :param deduce_v_max: (bool) set to True to deduce v_max from r_max
        :param n_known: (int) count after which a state-action pair is considered known
        (only set n_known if delta and epsilon are not defined)
        :param deduce_n_known: (bool) set to True to deduce n_known from (delta, n_states, epsilon_m)
        :param epsilon_q: (float) precision of value iteration algorithm for Q-value computation
        :param epsilon_m: (float) precision of the learned models in L1 norm
        :param delta: (float) models are learned epsilon_m-closely with probability at least 1 - delta
        :param n_states: (int) number of states

        :param max_memory_size: (int) maximum number of saved models (infinity if None)
        :param prior: (float) prior knowledge of maximum model's distance
        :param estimate_distances_online: (bool) set to True for online estimation of a tighter upper-bound for the
        model pseudo-distances. The estimation is valid with high probability.
        :param min_sampling_probability: (float) minimum sampling probability of an environment
        :param name: (str)
        """
        self.name = name
        self.n_required_tasks = mqi.number_of_tasks_for_high_confidence_upper_bound(delta, min_sampling_probability)
        self.maxQ_memory = []  # Upper-bounds on the Q-values of previous MDPs

        LRMax.__init__(self, actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=deduce_v_max,
                       n_known=n_known, deduce_n_known=deduce_n_known, epsilon_q=epsilon_q, epsilon_m=epsilon_m,
                       delta=delta, n_states=n_states, max_memory_size=max_memory_size, prior=prior,
                       estimate_distances_online=estimate_distances_online,
                       min_sampling_probability=min_sampling_probability, name=name)
Esempio n. 3
0
    def __init__(self,
                 actions,
                 gamma=.9,
                 r_max=1.,
                 v_max=None,
                 deduce_v_max=True,
                 n_known=None,
                 deduce_n_known=True,
                 epsilon_q=0.1,
                 epsilon_m=None,
                 delta=None,
                 n_states=None,
                 max_memory_size=None,
                 prior=None,
                 estimate_distances_online=True,
                 min_sampling_probability=.1,
                 name="ExpLRMax"):
        """
        See LRMax class.
        """
        LRMax.__init__(self,
                       actions=actions,
                       gamma=gamma,
                       r_max=r_max,
                       v_max=v_max,
                       deduce_v_max=deduce_v_max,
                       n_known=n_known,
                       deduce_n_known=deduce_n_known,
                       epsilon_q=epsilon_q,
                       epsilon_m=epsilon_m,
                       delta=delta,
                       n_states=n_states,
                       max_memory_size=max_memory_size,
                       prior=prior,
                       estimate_distances_online=estimate_distances_online,
                       min_sampling_probability=min_sampling_probability,
                       name=name)

        self.time_step = 0
        self.time_step_counter = []

        self.data = {'n_computation': [0], 'n_prior_use': [0]}