Exemple #1
0
def experiment():
    n_env = 5
    env_distribution = make_env_distribution(env_class='maze-mono-goal', env_name='maze-mono-goal', n_env=n_env, gamma=GAMMA)
    actions = env_distribution.get_actions()
    p_min = 1. / float(n_env)
    delta = .1

    m = 100
    max_mem = 10
    rmax = RMax(actions=actions, gamma=GAMMA, count_threshold=m)
    rmax_q = MaxQInit(actions=actions, gamma=GAMMA, count_threshold=m, min_sampling_probability=p_min, delta=delta)
    lrmax1 = LRMax(actions=actions, gamma=GAMMA, count_threshold=m, max_memory_size=max_mem, prior=1.)
    lrmax05 = LRMax(actions=actions, gamma=GAMMA, count_threshold=m, max_memory_size=max_mem, prior=0.5)
    lrmax02 = LRMax(actions=actions, gamma=GAMMA, count_threshold=m, max_memory_size=max_mem, prior=0.2)
    lrmax_learn = LRMax(
        actions=actions, gamma=GAMMA, count_threshold=m, max_memory_size=max_mem, prior=None,
        min_sampling_probability=p_min, delta=delta
    )

    agents_pool = [rmax, lrmax1, lrmax05, lrmax02, lrmax_learn, rmax_q]

    run_agents_lifelong(
        agents_pool, env_distribution, samples=20, episodes=100, steps=1000, reset_at_terminal=False,
        open_plot=True, cumulative_plot=False, is_tracked_value_discounted=True, plot_only=False, plot_title=False
    )
Exemple #2
0
def experiment(p):
    # Parameters
    gamma = .9
    n_env = 5
    size = p['size']
    env_distribution = make_env_distribution(
        env_class='tight', n_env=n_env, gamma=gamma,
        env_name=p['name'],
        w=size,
        h=size,
        stochastic=p['stochastic']
    )
    actions = env_distribution.get_actions()
    n_known = p['n_known']
    p_min = 1. / float(n_env)
    epsilon_q = .01
    epsilon_m = .01
    delta = .1
    r_max = 1.
    v_max = p['v_max']
    n_states = 4
    max_mem = 1

    # Agents
    rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax')
    lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                  deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                  max_memory_size=max_mem, prior=None, estimate_distances_online=True,
                  min_sampling_probability=p_min, name='LRMax')
    lrmax_p01 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                      deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                      max_memory_size=max_mem, prior=0.1, estimate_distances_online=True,
                      min_sampling_probability=p_min, name='LRMax(Dmax=0.1)')
    lrmax_p02 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                      deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                      max_memory_size=max_mem, prior=0.2, estimate_distances_online=True,
                      min_sampling_probability=p_min, name='LRMax(Dmax=0.2)')
    maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                        deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                        min_sampling_probability=p_min, name='MaxQInit')
    lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                            deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
                            n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True,
                            min_sampling_probability=p_min, name='LRMaxQInit')
    lrmaxqinit_p01 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                                deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
                                n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True,
                                min_sampling_probability=p_min, name='LRMaxQInit(Dmax=0.1)')
    lrmaxqinit_p02 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                                deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
                                n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True,
                                min_sampling_probability=p_min, name='LRMaxQInit(Dmax=0.2)')
    agents_pool = [rmax, lrmax, lrmax_p01, lrmax_p02, maxqinit, lrmaxqinit, lrmaxqinit_p01, lrmaxqinit_p02]

    # Run
    run_agents_lifelong(agents_pool, env_distribution, n_instances=3, n_tasks=p['n_tasks'], n_episodes=p['n_episodes'],
                        n_steps=p['n_steps'],
                        reset_at_terminal=False, open_plot=False, plot_title=True, do_run=True, do_plot=True,
                        parallel_run=True, n_processes=None)
Exemple #3
0
def experiment():
    n_env = 5
    env_distribution = make_env_distribution(env_class='corridor',
                                             n_env=n_env,
                                             gamma=GAMMA,
                                             w=20,
                                             h=1)
    actions = env_distribution.get_actions()
    p_min = 1. / float(n_env)
    delta = .1

    m = 1
    max_mem = 2
    rmax = RMax(actions=actions, gamma=GAMMA, count_threshold=m)
    rmax_q = MaxQInit(actions=actions,
                      gamma=GAMMA,
                      count_threshold=m,
                      min_sampling_probability=p_min,
                      delta=delta)
    lrmax0_2 = LRMaxCT(actions=actions,
                       gamma=GAMMA,
                       count_threshold=m,
                       max_memory_size=max_mem,
                       prior=0.2)
    lrmax0_6 = LRMaxCT(actions=actions,
                       gamma=GAMMA,
                       count_threshold=m,
                       max_memory_size=max_mem,
                       prior=0.6)
    lrmax1_0 = LRMaxCT(actions=actions,
                       gamma=GAMMA,
                       count_threshold=m,
                       max_memory_size=max_mem,
                       prior=1.0)
    lrmax_learn = LRMaxCT(actions=actions,
                          gamma=GAMMA,
                          count_threshold=m,
                          max_memory_size=max_mem,
                          prior=None,
                          min_sampling_probability=p_min,
                          delta=delta)

    agents_pool = [rmax, lrmax1_0, lrmax0_6, lrmax0_2, lrmax_learn, rmax_q]

    run_agents_lifelong(agents_pool,
                        env_distribution,
                        samples=20,
                        episodes=20,
                        steps=10,
                        reset_at_terminal=False,
                        open_plot=True,
                        cumulative_plot=False,
                        is_tracked_value_discounted=False,
                        plot_only=False,
                        plot_title=False)
def experiment():
    # Parameters
    gamma = .9
    env_distribution = make_env_distribution(env_class='deterministic-super-tight',
                                             env_name='deterministic-super-tight-bignknown',
                                             gamma=gamma)
    actions = env_distribution.get_actions()
    n_known = 100
    p_min = 1. / 3.
    epsilon_q = .01
    epsilon_m = .01
    delta = .1
    r_max = 1.
    v_max = 1.
    n_states = 4
    max_mem = 9

    # Agents
    rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax')
    lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                  deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                  max_memory_size=max_mem, prior=None, estimate_distances_online=True,
                  min_sampling_probability=p_min, name='LRMax')
    lrmaxprior = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                       deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                       max_memory_size=max_mem, prior=0.1, estimate_distances_online=True,
                       min_sampling_probability=p_min, name='LRMax(Dmax=0.1)')
    maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                        deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                        min_sampling_probability=p_min, name='MaxQInit')
    lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                            deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
                            n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True,
                            min_sampling_probability=p_min, name='LRMaxQInit')
    lrmaxqinitprior = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                                 deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
                                 n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True,
                                 min_sampling_probability=p_min, name='LRMaxQInit(Dmax=0.1)')
    agents_pool = [rmax, lrmax, lrmaxprior, maxqinit, lrmaxqinit, lrmaxqinitprior]

    # Run
    run_agents_lifelong(agents_pool, env_distribution, n_instances=1, n_tasks=100, n_episodes=200, n_steps=100,
                        reset_at_terminal=False, open_plot=False, plot_title=True, do_run=False, do_plot=True,
                        parallel_run=True, n_processes=None)
Exemple #5
0
def experiment():
    # Parameters
    gamma = .9
    env_distribution = make_env_distribution(env_class='stochastic-tight', env_name='stochastic-tight', gamma=gamma)
    actions = env_distribution.get_actions()
    n_known = 10
    p_min = 1. / 7.  # There are seven possible MDPs
    epsilon_q = .1
    epsilon_m = .01
    delta = .1
    r_max = 1.
    v_max = 1.
    n_states = 4
    max_mem = 10

    # Agents
    rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax')
    lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                  deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                  max_memory_size=max_mem, prior=None, estimate_distances_online=True,
                  min_sampling_probability=p_min, name='LRMax')
    lrmaxprior = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                       deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                       max_memory_size=max_mem, prior=0.2, estimate_distances_online=True,
                       min_sampling_probability=p_min, name='LRMax(Dmax=0.2)')
    maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                        deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
                        min_sampling_probability=p_min, name='MaxQInit')
    lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
                            deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
                            n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True,
                            min_sampling_probability=p_min, name='LRMaxQInit')
    agents_pool = [rmax, lrmax, lrmaxprior, maxqinit]  # , lrmaxqinit]

    # Run
    run_agents_lifelong(agents_pool, env_distribution, n_instances=5, n_tasks=50, n_episodes=50, n_steps=100,
                        reset_at_terminal=False, plot_only=False, open_plot=True, plot_title=True)
Exemple #6
0
def example():
    n_env = 4
    env_distribution = make_env_distribution(env_class='test',
                                             n_env=n_env,
                                             gamma=GAMMA,
                                             w=60,
                                             h=20)
    actions = env_distribution.get_actions()

    m = 1  # Count threshold
    max_mem = None
    p_min = 1. / float(n_env)
    delta = 0.99
    lrmax = LRMax(actions=actions,
                  gamma=GAMMA,
                  count_threshold=m,
                  max_memory_size=max_mem,
                  prior=None,
                  min_sampling_probability=p_min,
                  delta=delta)
    rmax_max_q_init = MaxQInit(actions=actions,
                               gamma=GAMMA,
                               count_threshold=m,
                               min_sampling_probability=p_min,
                               delta=delta)
    rmax = RMax(actions=actions, gamma=GAMMA, count_threshold=m)

    run_agents_lifelong([rmax_max_q_init, lrmax, rmax],
                        env_distribution,
                        samples=10,
                        episodes=10,
                        steps=100,
                        reset_at_terminal=False,
                        open_plot=True,
                        cumulative_plot=False,
                        is_tracked_value_discounted=True,
                        plot_only=False)
Exemple #7
0
def experiment():
    # Parameters
    gamma = .9
    n_env = 5
    n_states = 20
    env_distribution = make_env_distribution(env_class='corridor',
                                             n_env=n_env,
                                             gamma=gamma,
                                             w=n_states,
                                             h=1)
    actions = env_distribution.get_actions()
    n_known = 1
    p_min = 1. / float(n_env)
    r_max = 1.
    v_max = 10.
    epsilon_q = .01
    epsilon_m = .01
    delta = .1
    max_mem = 1

    # Agents
    rmax = RMax(actions=actions,
                gamma=gamma,
                r_max=r_max,
                v_max=v_max,
                deduce_v_max=False,
                n_known=n_known,
                deduce_n_known=False,
                epsilon_q=epsilon_q,
                epsilon_m=epsilon_m,
                name='RMax')
    lrmax = LRMax(actions=actions,
                  gamma=gamma,
                  r_max=r_max,
                  v_max=v_max,
                  deduce_v_max=False,
                  n_known=n_known,
                  deduce_n_known=False,
                  epsilon_q=epsilon_q,
                  epsilon_m=epsilon_m,
                  delta=delta,
                  n_states=n_states,
                  max_memory_size=max_mem,
                  prior=None,
                  estimate_distances_online=True,
                  min_sampling_probability=p_min,
                  name='LRMax')
    lrmaxprior02 = LRMax(actions=actions,
                         gamma=gamma,
                         r_max=r_max,
                         v_max=v_max,
                         deduce_v_max=False,
                         n_known=n_known,
                         deduce_n_known=False,
                         epsilon_q=epsilon_q,
                         epsilon_m=epsilon_m,
                         delta=delta,
                         n_states=n_states,
                         max_memory_size=max_mem,
                         prior=0.2,
                         estimate_distances_online=False,
                         min_sampling_probability=p_min,
                         name='LRMax(0.2)')
    maxqinit = MaxQInit(actions=actions,
                        gamma=gamma,
                        r_max=r_max,
                        v_max=v_max,
                        deduce_v_max=False,
                        n_known=n_known,
                        deduce_n_known=False,
                        epsilon_q=epsilon_q,
                        epsilon_m=epsilon_m,
                        delta=delta,
                        n_states=n_states,
                        min_sampling_probability=p_min,
                        name='MaxQInit')
    lrmaxqinit = LRMaxQInit(actions=actions,
                            gamma=gamma,
                            r_max=r_max,
                            v_max=v_max,
                            deduce_v_max=False,
                            n_known=n_known,
                            deduce_n_known=False,
                            epsilon_q=epsilon_q,
                            epsilon_m=epsilon_m,
                            delta=delta,
                            n_states=n_states,
                            max_memory_size=max_mem,
                            prior=None,
                            estimate_distances_online=True,
                            min_sampling_probability=p_min,
                            name='LRMaxQInit')
    lrmaxqinitprior02 = LRMaxQInit(actions=actions,
                                   gamma=gamma,
                                   r_max=r_max,
                                   v_max=v_max,
                                   deduce_v_max=False,
                                   n_known=n_known,
                                   deduce_n_known=False,
                                   epsilon_q=epsilon_q,
                                   epsilon_m=epsilon_m,
                                   delta=delta,
                                   n_states=n_states,
                                   max_memory_size=max_mem,
                                   prior=0.2,
                                   estimate_distances_online=True,
                                   min_sampling_probability=p_min,
                                   name='LRMaxQInit(0.2)')
    agents_pool = [
        rmax, lrmax, lrmaxprior02, maxqinit, lrmaxqinit, lrmaxqinitprior02
    ]

    # Run
    run_agents_lifelong(agents_pool,
                        env_distribution,
                        name_identifier=None,
                        n_instances=1,
                        n_tasks=20,
                        n_episodes=20,
                        n_steps=11,
                        reset_at_terminal=False,
                        do_run=False,
                        do_plot=True,
                        open_plot=False,
                        episodes_moving_average=False,
                        episodes_ma_width=10,
                        tasks_moving_average=False,
                        tasks_ma_width=10,
                        latex_rendering=True,
                        plot_title=False)
Exemple #8
0
def experiment(p, name):
    # Parameters
    gamma = .9
    n_env = 5
    size = p['size']
    env_distribution = make_env_distribution(env_class='tight',
                                             n_env=n_env,
                                             gamma=gamma,
                                             env_name=name,
                                             version=p['version'],
                                             w=size,
                                             h=size,
                                             stochastic=p['stochastic'],
                                             verbose=False)
    actions = env_distribution.get_actions()
    n_known = p['n_known']
    p_min = 1. / n_env
    epsilon_q = .01
    epsilon_m = .01
    delta = .1
    r_max = 1.
    v_max = 10.
    n_states = 4
    max_mem = 1

    # Agents
    rmax = RMax(actions=actions,
                gamma=gamma,
                r_max=r_max,
                v_max=v_max,
                deduce_v_max=False,
                n_known=n_known,
                deduce_n_known=False,
                epsilon_q=epsilon_q,
                epsilon_m=epsilon_m,
                name='RMax')
    lrmax = LRMax(actions=actions,
                  gamma=gamma,
                  r_max=r_max,
                  v_max=v_max,
                  deduce_v_max=False,
                  n_known=n_known,
                  deduce_n_known=False,
                  epsilon_q=epsilon_q,
                  epsilon_m=epsilon_m,
                  delta=delta,
                  n_states=n_states,
                  max_memory_size=max_mem,
                  prior=None,
                  estimate_distances_online=True,
                  min_sampling_probability=p_min,
                  name='LRMax')
    lrmax_p01 = LRMax(actions=actions,
                      gamma=gamma,
                      r_max=r_max,
                      v_max=v_max,
                      deduce_v_max=False,
                      n_known=n_known,
                      deduce_n_known=False,
                      epsilon_q=epsilon_q,
                      epsilon_m=epsilon_m,
                      delta=delta,
                      n_states=n_states,
                      max_memory_size=max_mem,
                      prior=0.1,
                      estimate_distances_online=True,
                      min_sampling_probability=p_min,
                      name='LRMax(0.1)')
    lrmax_p015 = LRMax(actions=actions,
                       gamma=gamma,
                       r_max=r_max,
                       v_max=v_max,
                       deduce_v_max=False,
                       n_known=n_known,
                       deduce_n_known=False,
                       epsilon_q=epsilon_q,
                       epsilon_m=epsilon_m,
                       delta=delta,
                       n_states=n_states,
                       max_memory_size=max_mem,
                       prior=0.15,
                       estimate_distances_online=True,
                       min_sampling_probability=p_min,
                       name='LRMax(0.15)')
    lrmax_p02 = LRMax(actions=actions,
                      gamma=gamma,
                      r_max=r_max,
                      v_max=v_max,
                      deduce_v_max=False,
                      n_known=n_known,
                      deduce_n_known=False,
                      epsilon_q=epsilon_q,
                      epsilon_m=epsilon_m,
                      delta=delta,
                      n_states=n_states,
                      max_memory_size=max_mem,
                      prior=0.2,
                      estimate_distances_online=True,
                      min_sampling_probability=p_min,
                      name='LRMax(0.2)')
    maxqinit = MaxQInit(actions=actions,
                        gamma=gamma,
                        r_max=r_max,
                        v_max=v_max,
                        deduce_v_max=False,
                        n_known=n_known,
                        deduce_n_known=False,
                        epsilon_q=epsilon_q,
                        epsilon_m=epsilon_m,
                        delta=delta,
                        n_states=n_states,
                        min_sampling_probability=p_min,
                        name='MaxQInit')
    lrmaxqinit = LRMaxQInit(actions=actions,
                            gamma=gamma,
                            r_max=r_max,
                            v_max=v_max,
                            deduce_v_max=False,
                            n_known=n_known,
                            deduce_n_known=False,
                            epsilon_q=epsilon_q,
                            epsilon_m=epsilon_m,
                            delta=delta,
                            n_states=n_states,
                            max_memory_size=max_mem,
                            prior=None,
                            estimate_distances_online=True,
                            min_sampling_probability=p_min,
                            name='LRMaxQInit')
    lrmaxqinit_p01 = LRMaxQInit(actions=actions,
                                gamma=gamma,
                                r_max=r_max,
                                v_max=v_max,
                                deduce_v_max=False,
                                n_known=n_known,
                                deduce_n_known=False,
                                epsilon_q=epsilon_q,
                                epsilon_m=epsilon_m,
                                delta=delta,
                                n_states=n_states,
                                max_memory_size=max_mem,
                                prior=0.1,
                                estimate_distances_online=True,
                                min_sampling_probability=p_min,
                                name='LRMaxQInit(0.1)')
    lrmaxqinit_p015 = LRMaxQInit(actions=actions,
                                 gamma=gamma,
                                 r_max=r_max,
                                 v_max=v_max,
                                 deduce_v_max=False,
                                 n_known=n_known,
                                 deduce_n_known=False,
                                 epsilon_q=epsilon_q,
                                 epsilon_m=epsilon_m,
                                 delta=delta,
                                 n_states=n_states,
                                 max_memory_size=max_mem,
                                 prior=0.15,
                                 estimate_distances_online=True,
                                 min_sampling_probability=p_min,
                                 name='LRMaxQInit(0.15)')
    lrmaxqinit_p02 = LRMaxQInit(actions=actions,
                                gamma=gamma,
                                r_max=r_max,
                                v_max=v_max,
                                deduce_v_max=False,
                                n_known=n_known,
                                deduce_n_known=False,
                                epsilon_q=epsilon_q,
                                epsilon_m=epsilon_m,
                                delta=delta,
                                n_states=n_states,
                                max_memory_size=max_mem,
                                prior=0.2,
                                estimate_distances_online=True,
                                min_sampling_probability=p_min,
                                name='LRMaxQInit(0.2)')
    # agents_pool = [rmax, lrmax, lrmax_p01, lrmax_p015, lrmax_p02, maxqinit, lrmaxqinit, lrmaxqinit_p01, lrmaxqinit_p015, lrmaxqinit_p02]
    agents_pool = [
        rmax, lrmax, lrmax_p02, lrmax_p01, maxqinit, lrmaxqinit, lrmaxqinit_p01
    ]

    # Run
    run_agents_lifelong(agents_pool,
                        env_distribution,
                        n_instances=2,
                        n_tasks=p['n_tasks'],
                        n_episodes=p['n_episodes'],
                        n_steps=p['n_steps'],
                        reset_at_terminal=False,
                        open_plot=False,
                        plot_title=False,
                        plot_legend=2,
                        do_run=True,
                        do_plot=True,
                        parallel_run=True,
                        n_processes=None,
                        episodes_moving_average=True,
                        episodes_ma_width=100,
                        tasks_moving_average=False,
                        latex_rendering=True)
Exemple #9
0
def experiment():
    # Parameters
    gamma = .9
    n_env = 5
    w, h = 20, 20
    n_states = w * h
    env_distribution = make_env_distribution(
        env_class='grid-world',
        env_name='grid-world-two-goals-large',
        n_env=n_env,
        gamma=gamma,
        w=w,
        h=h)
    actions = env_distribution.get_actions()
    n_known = 1
    p_min = 1. / float(n_env)
    r_max = 1.
    v_max = 10.
    epsilon_q = .01
    epsilon_m = .01
    delta = .1
    max_mem = 1

    # Agents
    rmax = RMax(actions=actions,
                gamma=gamma,
                r_max=r_max,
                v_max=v_max,
                deduce_v_max=False,
                n_known=n_known,
                deduce_n_known=False,
                epsilon_q=epsilon_q,
                epsilon_m=epsilon_m,
                name='RMax')
    lrmax = LRMax(actions=actions,
                  gamma=gamma,
                  r_max=r_max,
                  v_max=v_max,
                  deduce_v_max=False,
                  n_known=n_known,
                  deduce_n_known=False,
                  epsilon_q=epsilon_q,
                  epsilon_m=epsilon_m,
                  delta=delta,
                  n_states=n_states,
                  max_memory_size=max_mem,
                  prior=None,
                  estimate_distances_online=True,
                  min_sampling_probability=p_min,
                  name='LRMax')
    lrmaxprior02 = LRMax(actions=actions,
                         gamma=gamma,
                         r_max=r_max,
                         v_max=v_max,
                         deduce_v_max=False,
                         n_known=n_known,
                         deduce_n_known=False,
                         epsilon_q=epsilon_q,
                         epsilon_m=epsilon_m,
                         delta=delta,
                         n_states=n_states,
                         max_memory_size=max_mem,
                         prior=0.2,
                         estimate_distances_online=False,
                         min_sampling_probability=p_min,
                         name='LRMax(Dmax0.2)')
    maxqinit = MaxQInit(actions=actions,
                        gamma=gamma,
                        r_max=r_max,
                        v_max=v_max,
                        deduce_v_max=False,
                        n_known=n_known,
                        deduce_n_known=False,
                        epsilon_q=epsilon_q,
                        epsilon_m=epsilon_m,
                        delta=delta,
                        n_states=n_states,
                        min_sampling_probability=p_min,
                        name='MaxQInit')
    lrmaxqinit = LRMaxQInit(actions=actions,
                            gamma=gamma,
                            r_max=r_max,
                            v_max=v_max,
                            deduce_v_max=False,
                            n_known=n_known,
                            deduce_n_known=False,
                            epsilon_q=epsilon_q,
                            epsilon_m=epsilon_m,
                            delta=delta,
                            n_states=n_states,
                            max_memory_size=max_mem,
                            prior=None,
                            estimate_distances_online=True,
                            min_sampling_probability=p_min,
                            name='LRMaxQInit')
    lrmaxqinitprior02 = LRMaxQInit(actions=actions,
                                   gamma=gamma,
                                   r_max=r_max,
                                   v_max=v_max,
                                   deduce_v_max=False,
                                   n_known=n_known,
                                   deduce_n_known=False,
                                   epsilon_q=epsilon_q,
                                   epsilon_m=epsilon_m,
                                   delta=delta,
                                   n_states=n_states,
                                   max_memory_size=max_mem,
                                   prior=0.2,
                                   estimate_distances_online=True,
                                   min_sampling_probability=p_min,
                                   name='LRMaxQInit(Dmax0.2)')
    agents_pool = [
        rmax, lrmax, lrmaxprior02, maxqinit, lrmaxqinit, lrmaxqinitprior02
    ]

    # Run
    run_agents_lifelong(agents_pool,
                        env_distribution,
                        name_identifier=None,
                        n_instances=1,
                        n_tasks=100,
                        n_episodes=100,
                        n_steps=13,
                        reset_at_terminal=False,
                        open_plot=False,
                        plot_title=True,
                        do_run=True,
                        do_plot=True,
                        parallel_run=True,
                        n_processes=None)