def experiment(p): # Parameters gamma = .9 n_env = 5 size = p['size'] env_distribution = make_env_distribution( env_class='tight', n_env=n_env, gamma=gamma, env_name=p['name'], w=size, h=size, stochastic=p['stochastic'] ) actions = env_distribution.get_actions() n_known = p['n_known'] p_min = 1. / float(n_env) epsilon_q = .01 epsilon_m = .01 delta = .1 r_max = 1. v_max = p['v_max'] n_states = 4 max_mem = 1 # Agents rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax') lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax') lrmax_p01 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(Dmax=0.1)') lrmax_p02 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(Dmax=0.2)') maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, min_sampling_probability=p_min, name='MaxQInit') lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit') lrmaxqinit_p01 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(Dmax=0.1)') lrmaxqinit_p02 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(Dmax=0.2)') agents_pool = [rmax, lrmax, lrmax_p01, lrmax_p02, maxqinit, lrmaxqinit, lrmaxqinit_p01, lrmaxqinit_p02] # Run run_agents_lifelong(agents_pool, env_distribution, n_instances=3, n_tasks=p['n_tasks'], n_episodes=p['n_episodes'], n_steps=p['n_steps'], reset_at_terminal=False, open_plot=False, plot_title=True, do_run=True, do_plot=True, parallel_run=True, n_processes=None)
def experiment(): # Parameters gamma = .9 env_distribution = make_env_distribution(env_class='deterministic-super-tight', env_name='deterministic-super-tight-bignknown', gamma=gamma) actions = env_distribution.get_actions() n_known = 100 p_min = 1. / 3. epsilon_q = .01 epsilon_m = .01 delta = .1 r_max = 1. v_max = 1. n_states = 4 max_mem = 9 # Agents rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax') lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax') lrmaxprior = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(Dmax=0.1)') maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, min_sampling_probability=p_min, name='MaxQInit') lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit') lrmaxqinitprior = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(Dmax=0.1)') agents_pool = [rmax, lrmax, lrmaxprior, maxqinit, lrmaxqinit, lrmaxqinitprior] # Run run_agents_lifelong(agents_pool, env_distribution, n_instances=1, n_tasks=100, n_episodes=200, n_steps=100, reset_at_terminal=False, open_plot=False, plot_title=True, do_run=False, do_plot=True, parallel_run=True, n_processes=None)
def experiment(): # Parameters gamma = .9 env_distribution = make_env_distribution(env_class='stochastic-tight', env_name='stochastic-tight', gamma=gamma) actions = env_distribution.get_actions() n_known = 10 p_min = 1. / 7. # There are seven possible MDPs epsilon_q = .1 epsilon_m = .01 delta = .1 r_max = 1. v_max = 1. n_states = 4 max_mem = 10 # Agents rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax') lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax') lrmaxprior = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(Dmax=0.2)') maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, min_sampling_probability=p_min, name='MaxQInit') lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit') agents_pool = [rmax, lrmax, lrmaxprior, maxqinit] # , lrmaxqinit] # Run run_agents_lifelong(agents_pool, env_distribution, n_instances=5, n_tasks=50, n_episodes=50, n_steps=100, reset_at_terminal=False, plot_only=False, open_plot=True, plot_title=True)
def experiment(): # Parameters gamma = .9 n_env = 5 n_states = 20 env_distribution = make_env_distribution(env_class='corridor', n_env=n_env, gamma=gamma, w=n_states, h=1) actions = env_distribution.get_actions() n_known = 1 p_min = 1. / float(n_env) r_max = 1. v_max = 10. epsilon_q = .01 epsilon_m = .01 delta = .1 max_mem = 1 # Agents rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax') lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax') lrmaxprior02 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=False, min_sampling_probability=p_min, name='LRMax(0.2)') maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, min_sampling_probability=p_min, name='MaxQInit') lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit') lrmaxqinitprior02 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(0.2)') agents_pool = [ rmax, lrmax, lrmaxprior02, maxqinit, lrmaxqinit, lrmaxqinitprior02 ] # Run run_agents_lifelong(agents_pool, env_distribution, name_identifier=None, n_instances=1, n_tasks=20, n_episodes=20, n_steps=11, reset_at_terminal=False, do_run=False, do_plot=True, open_plot=False, episodes_moving_average=False, episodes_ma_width=10, tasks_moving_average=False, tasks_ma_width=10, latex_rendering=True, plot_title=False)
def experiment(p, name): # Parameters gamma = .9 n_env = 5 size = p['size'] env_distribution = make_env_distribution(env_class='tight', n_env=n_env, gamma=gamma, env_name=name, version=p['version'], w=size, h=size, stochastic=p['stochastic'], verbose=False) actions = env_distribution.get_actions() n_known = p['n_known'] p_min = 1. / n_env epsilon_q = .01 epsilon_m = .01 delta = .1 r_max = 1. v_max = 10. n_states = 4 max_mem = 1 # Agents rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax') lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax') lrmax_p01 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(0.1)') lrmax_p015 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.15, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(0.15)') lrmax_p02 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax(0.2)') maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, min_sampling_probability=p_min, name='MaxQInit') lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit') lrmaxqinit_p01 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.1, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(0.1)') lrmaxqinit_p015 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.15, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(0.15)') lrmaxqinit_p02 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(0.2)') # agents_pool = [rmax, lrmax, lrmax_p01, lrmax_p015, lrmax_p02, maxqinit, lrmaxqinit, lrmaxqinit_p01, lrmaxqinit_p015, lrmaxqinit_p02] agents_pool = [ rmax, lrmax, lrmax_p02, lrmax_p01, maxqinit, lrmaxqinit, lrmaxqinit_p01 ] # Run run_agents_lifelong(agents_pool, env_distribution, n_instances=2, n_tasks=p['n_tasks'], n_episodes=p['n_episodes'], n_steps=p['n_steps'], reset_at_terminal=False, open_plot=False, plot_title=False, plot_legend=2, do_run=True, do_plot=True, parallel_run=True, n_processes=None, episodes_moving_average=True, episodes_ma_width=100, tasks_moving_average=False, latex_rendering=True)
def experiment(): # Parameters gamma = .9 n_env = 5 w, h = 20, 20 n_states = w * h env_distribution = make_env_distribution( env_class='grid-world', env_name='grid-world-two-goals-large', n_env=n_env, gamma=gamma, w=w, h=h) actions = env_distribution.get_actions() n_known = 1 p_min = 1. / float(n_env) r_max = 1. v_max = 10. epsilon_q = .01 epsilon_m = .01 delta = .1 max_mem = 1 # Agents rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax') lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMax') lrmaxprior02 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=False, min_sampling_probability=p_min, name='LRMax(Dmax0.2)') maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, min_sampling_probability=p_min, name='MaxQInit') lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit') lrmaxqinitprior02 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known, deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True, min_sampling_probability=p_min, name='LRMaxQInit(Dmax0.2)') agents_pool = [ rmax, lrmax, lrmaxprior02, maxqinit, lrmaxqinit, lrmaxqinitprior02 ] # Run run_agents_lifelong(agents_pool, env_distribution, name_identifier=None, n_instances=1, n_tasks=100, n_episodes=100, n_steps=13, reset_at_terminal=False, open_plot=False, plot_title=True, do_run=True, do_plot=True, parallel_run=True, n_processes=None)