def main():
    # Make MDP Distribution.
    mdp_class = "four_room"
    environment = make_mdp.make_mdp_distr(mdp_class=mdp_class, grid_dim=10)

    sa_stack = make_random_sa_stack(environment, max_num_levels=5)
    sa_stack.print_state_space_sizes()
Esempio n. 2
0
def main():

    # ========================
    # === Make Environment ===
    # ========================
    mdp_class = "four_room"
    environment = make_mdp.make_mdp_distr(mdp_class=mdp_class, grid_dim=10)
    actions = environment.get_actions()

    # ==========================
    # === Make SA, AA Stacks ===
    # ==========================
    # sa_stack, aa_stack = aa_stack_h.make_random_sa_diropt_aa_stack(environment, max_num_levels=3)
    sa_stack, aa_stack = hierarchy_helpers.make_hierarchy(environment,
                                                          num_levels=3)

    mdp = environment.sample()
    HVI = HierarchicalValueIteration(mdp, sa_stack, aa_stack)
    VI = ValueIteration(mdp)

    h_iters, h_val = HVI.run_vi()
    iters, val = VI.run_vi()

    print "H:", h_iters, h_val
    print "V:", iters, val
Esempio n. 3
0
def main(open_plot=True):
    # Make MDP distribution, agents.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
    ql_agent = QLearnerAgent(actions=mdp_distr.get_actions())
    rand_agent = RandomAgent(actions=mdp_distr.get_actions())

    # Run experiment and make plot.
    run_agents_multi_task([ql_agent, rand_agent], mdp_distr, task_samples=50, episodes=1, steps=1500, reset_at_terminal=True, open_plot=open_plot)
Esempio n. 4
0
def main():

    # ======================
    # == Make Environment ==
    # ======================
    mdp_class = "four_room"
    environment = make_mdp.make_mdp_distr(mdp_class=mdp_class, grid_dim=7)
    actions = environment.get_actions()

    # ====================
    # == Make Hierarchy ==
    # ====================
    sa_stack, aa_stack = make_hierarchy(environment, num_levels=3)
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
    ql_agent = QLearningAgent(actions=mdp_distr.get_actions())
    rand_agent = RandomAgent(actions=mdp_distr.get_actions())

    # Make goal-based option agent.
    goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
    goal_based_aa = ActionAbstraction(prim_actions=mdp_distr.get_actions(), options=goal_based_options)
    option_agent = AbstractionWrapper(QLearningAgent, agent_params={"actions":mdp_distr.get_actions()}, action_abstr=goal_based_aa)

    # Run experiment and make plot.
    run_agents_lifelong([ql_agent, rand_agent, option_agent], mdp_distr, samples=10, episodes=100, steps=150, open_plot=open_plot)
Esempio n. 6
0
def main():

    # ========================
    # === Make Environment ===
    # ========================
    mdp_class = "four_room"
    gamma = 1.0
    environment = make_mdp.make_mdp_distr(mdp_class=mdp_class, step_cost=0.01, grid_dim=15, gamma=gamma)
    actions = environment.get_actions()


    # ==========================
    # === Make SA, AA Stacks ===
    # ==========================
    sa_stack, aa_stack = hierarchy_helpers.make_hierarchy(environment, num_levels=2)

    # Debug.
    print "\n" + ("=" * 30) + "\n== Done making abstraction. ==\n" + ("=" * 30) + "\n"
    sa_stack.print_state_space_sizes()
    aa_stack.print_action_spaces_sizes()

    # ===================
    # === Make Agents ===
    # ===================
    # baseline_agent = QLearnerAgent(actions)
    agent_class = QLearnerAgent
    baseline_agent = agent_class(actions, gamma=gamma)
    rand_agent = RandomAgent(actions)
    # hierarch_r_max = HRMaxAgent(actions, sa_stack=sa_stack, aa_stack=aa_stack)
    l0_hierarch_agent = HierarchyAgent(agent_class, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=0, name_ext="-$l_0$")
    l1_hierarch_agent = HierarchyAgent(agent_class, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=1, name_ext="-$l_1$")
    # l2_hierarch_agent = HierarchyAgent(agent_class, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=2, name_ext="-$l_2$")
    dynamic_hierarch_agent = DynamicHierarchyAgent(agent_class, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=1, name_ext="-$d$")
    # dynamic_rmax_hierarch_agent = DynamicHierarchyAgent(RMaxAgent, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=1, name_ext="-$d$")

    print "\n" + ("=" * 26)
    print "== Running experiments. =="
    print "=" * 26 + "\n"

    # ======================
    # === Run Experiment ===
    # ======================
    agents = [dynamic_hierarch_agent, baseline_agent]

    run_agents_multi_task(agents, environment, task_samples=10, steps=20000, episodes=1, reset_at_terminal=True)
Esempio n. 7
0
def main():

    # ========================
    # === Make Environment ===
    # ========================
    mdp_class = "hrooms"
    environment = make_mdp.make_mdp_distr(mdp_class=mdp_class)
    actions = environment.get_actions()

    # ==========================
    # === Make SA, AA Stacks ===
    # ==========================
    # sa_stack, aa_stack = aa_stack_h.make_random_sa_diropt_aa_stack(environment, max_num_levels=3)
    sa_stack, aa_stack = hierarchy_helpers.make_hierarchy(environment, num_levels=3)

    # Debug.
    print("\n" + ("=" * 30))
    print("== Done making abstraction. ==")
    print("=" * 30 + "\n")
    sa_stack.print_state_space_sizes()
    print("Num Action Abstractions:", len(aa_stack.get_aa_list()))

    # ===================
    # === Make Agents ===
    # ===================
    baseline_agent = QLearningAgent(actions)
    rmax_agent = RMaxAgent(actions)
    rand_agent = RandomAgent(actions)
    l0_hierarch_agent = HierarchyAgent(QLearningAgent, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=0, name_ext="-$l_0$")
    l1_hierarch_agent = HierarchyAgent(QLearningAgent, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=1, name_ext="-$l_1$")
    # l2_hierarch_agent = HierarchyAgent(QLearningAgent, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=2, name_ext="-$l_2$")
    dynamic_hierarch_agent = DynamicHierarchyAgent(QLearningAgent, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=1, name_ext="-$d$")
    # dynamic_rmax_hierarch_agent = DynamicHierarchyAgent(RMaxAgent, sa_stack=sa_stack, aa_stack=aa_stack, cur_level=1, name_ext="-$d$")

    print("\n" + ("=" * 26))
    print("== Running experiments. ==")
    print("=" * 26 + "\n")

    # ======================
    # === Run Experiment ===
    # ======================
    agents = [l1_hierarch_agent, dynamic_hierarch_agent, baseline_agent]
    run_agents_multi_task(agents, environment, task_samples=10, steps=1500, episodes=1, reset_at_terminal=True)
Esempio n. 8
0
def main():

    # MDP Setting.
    lifelong = True
    mdp_class = "four_room"
    grid_dim = 11

    # Make MDP.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class=mdp_class, grid_dim=grid_dim)
    actions = mdp_distr.get_actions()
    experiment_type = "aa"

    goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
    goal_based_aa = ActionAbstraction(prim_actions=actions, options=goal_based_options)

    # Visualize Action Abstractions.
    visualize_options_grid(mdp_distr, goal_based_aa)

    input("Press any key to quit ")
    quit()
def main(open_plot=True):
    # Setup MDP, Agents.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class="four_room")
    ql_agent = QLearningAgent(actions=mdp_distr.get_actions())
    rand_agent = RandomAgent(actions=mdp_distr.get_actions())

    # Make goal-based option agent.
    goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
    goal_based_aa = ActionAbstraction(prim_actions=mdp_distr.get_actions(),
                                      options=goal_based_options)
    option_agent = AbstractionWrapper(QLearningAgent,
                                      actions=mdp_distr.get_actions(),
                                      action_abstr=goal_based_aa)

    # Run experiment and make plot.
    run_agents_lifelong([ql_agent, rand_agent, option_agent],
                        mdp_distr,
                        samples=10,
                        episodes=100,
                        steps=150,
                        open_plot=open_plot)
def main():

    # MDP Setting.
    lifelong = True
    mdp_class = "four_room"
    grid_dim = 11

    # Make MDP.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class=mdp_class, grid_dim=grid_dim)
    actions = mdp_distr.get_actions()
    experiment_type = "aa"

    goal_based_options = aa_helpers.make_goal_based_options(mdp_distr)
    goal_based_aa = ActionAbstraction(prim_actions=actions,
                                      options=goal_based_options)

    # Visualize Action Abstractions.
    visualize_options_grid(mdp_distr, goal_based_aa)

    input("Press any key to quit ")
    quit()
def main(eps=0.1, open_plot=True):

    mdp_class, is_goal_terminal, samples, alg = parse_args()

    # Setup multitask setting.
    mdp_distr = make_mdp.make_mdp_distr(mdp_class=mdp_class)
    actions = mdp_distr.get_actions()

    # Compute average MDP.
    print "Making and solving avg MDP...",
    sys.stdout.flush()
    avg_mdp = compute_avg_mdp(mdp_distr)
    avg_mdp_vi = ValueIteration(avg_mdp,
                                delta=0.001,
                                max_iterations=1000,
                                sample_rate=5)
    iters, value = avg_mdp_vi.run_vi()

    ### Yuu

    transfer_fixed_agent = FixedPolicyAgent(avg_mdp_vi.policy,
                                            name="transferFixed")
    rand_agent = RandomAgent(actions, name="$\pi^u$")

    opt_q_func = compute_optimistic_q_function(mdp_distr)
    avg_q_func = avg_mdp_vi.get_q_function()

    if alg == "q":
        pure_ql_agent = QLearnerAgent(actions, epsilon=eps, name="Q-0")
        qmax = 1.0 * (1 - 0.99)
        # qmax = 1.0
        pure_ql_agent_opt = QLearnerAgent(actions,
                                          epsilon=eps,
                                          default_q=qmax,
                                          name="Q-vmax")
        transfer_ql_agent_optq = QLearnerAgent(actions,
                                               epsilon=eps,
                                               name="Q-trans-max")
        transfer_ql_agent_optq.set_init_q_function(opt_q_func)
        transfer_ql_agent_avgq = QLearnerAgent(actions,
                                               epsilon=eps,
                                               name="Q-trans-avg")
        transfer_ql_agent_avgq.set_init_q_function(avg_q_func)

        agents = [
            pure_ql_agent, pure_ql_agent_opt, transfer_ql_agent_optq,
            transfer_ql_agent_avgq
        ]
    elif alg == "rmax":
        pure_rmax_agent = RMaxAgent(actions, name="RMAX-vmax")
        updating_trans_rmax_agent = UpdatingRMaxAgent(actions,
                                                      name="RMAX-updating_max")
        trans_rmax_agent = RMaxAgent(actions, name="RMAX-trans_max")
        trans_rmax_agent.set_init_q_function(opt_q_func)
        agents = [pure_rmax_agent, updating_trans_rmax_agent, trans_rmax_agent]
    elif alg == "delayed-q":
        pure_delayed_ql_agent = DelayedQLearnerAgent(actions,
                                                     opt_q_func,
                                                     name="DelayedQ-vmax")
        pure_delayed_ql_agent.set_vmax()
        updating_delayed_ql_agent = UpdatingDelayedQLearnerAgent(
            actions, name="DelayedQ-updating_max")
        trans_delayed_ql_agent = DelayedQLearnerAgent(
            actions, opt_q_func, name="DelayedQ-trans-max")
        agents = [
            pure_delayed_ql_agent, updating_delayed_ql_agent,
            trans_delayed_ql_agent
        ]
    else:
        print "Unknown type of agents:", alg
        print "(q, rmax, delayed-q)"
        assert (False)

    # Run task.
    # TODO: Function for Learning on each MDP
    run_agents_multi_task(agents,
                          mdp_distr,
                          task_samples=samples,
                          episodes=1,
                          steps=100,
                          reset_at_terminal=is_goal_terminal,
                          is_rec_disc_reward=False,
                          cumulative_plot=True,
                          open_plot=open_plot)
def main():

    # Grab experiment params.
    mdp_class, task_samples, episodes, steps, grid_dim, x_axis_num_options, agent_class_str, max_options, exp_type = parse_args(
    )

    gamma = 0.9

    # ========================
    # === Make Environment ===
    # ========================
    multi_task = True
    max_option_steps = 50 if x_axis_num_options else 0
    environment = make_mdp.make_mdp_distr(
        mdp_class=mdp_class,
        grid_dim=grid_dim) if multi_task else make_mdp.make_mdp(
            mdp_class=mdp_class)
    actions = environment.get_actions()
    environment.set_gamma(gamma)

    # Indicator functions.
    v_indic = ind_funcs._v_approx_indicator
    q_indic = ind_funcs._q_eps_approx_indicator
    v_disc_indic = ind_funcs._v_disc_approx_indicator
    rand_indic = ind_funcs._random

    # =========================
    # === Make Abstractions ===
    # =========================

    # Directed Variants.
    v_directed_sa, v_directed_aa = get_abstractions(environment,
                                                    v_disc_indic,
                                                    directed=True,
                                                    max_options=max_options)
    # v_directed_sa, v_directed_aa = get_abstractions(environment, v_indic, directed=True, max_options=max_options)

    # Identity action abstraction.
    identity_sa, identity_aa = get_sa(environment,
                                      default=True), get_aa(environment,
                                                            default=True)

    if exp_type == "core":
        # Core only abstraction types.
        q_directed_sa, q_directed_aa = get_abstractions(
            environment, q_indic, directed=True, max_options=max_options)
        rand_directed_sa, rand_directed_aa = get_abstractions(
            environment, rand_indic, directed=True, max_options=max_options)
        pblocks_sa, pblocks_aa = get_sa(
            environment,
            default=True), action_abs.aa_baselines.get_policy_blocks_aa(
                environment, incl_prim_actions=True, num_options=max_options)

    # ===================
    # === Make Agents ===
    # ===================

    # Base Agents.
    agent_class = QLearningAgent if agent_class_str == "ql" else RMaxAgent
    rand_agent = RandomAgent(actions)
    baseline_agent = agent_class(actions, gamma=gamma)

    if mdp_class == "pblocks":
        baseline_agent.epsilon = 0.01

    # Abstraction Extensions.
    agents = []
    vabs_agent_directed = AbstractionWrapper(agent_class,
                                             actions,
                                             str(environment),
                                             max_option_steps=max_option_steps,
                                             state_abstr=v_directed_sa,
                                             action_abstr=v_directed_aa,
                                             name_ext="v-sa+aa")

    if exp_type == "core":
        # Core only agents.
        qabs_agent_directed = AbstractionWrapper(
            agent_class,
            actions,
            str(environment),
            max_option_steps=max_option_steps,
            state_abstr=q_directed_sa,
            action_abstr=q_directed_aa,
            name_ext="q-sa+aa")
        rabs_agent_directed = AbstractionWrapper(
            agent_class,
            actions,
            str(environment),
            max_option_steps=max_option_steps,
            state_abstr=rand_directed_sa,
            action_abstr=rand_directed_aa,
            name_ext="rand-sa+aa")
        pblocks_agent = AbstractionWrapper(agent_class,
                                           actions,
                                           str(environment),
                                           max_option_steps=max_option_steps,
                                           state_abstr=pblocks_sa,
                                           action_abstr=pblocks_aa,
                                           name_ext="pblocks")
        agents = [
            vabs_agent_directed, qabs_agent_directed, rabs_agent_directed,
            pblocks_agent, baseline_agent
        ]
    elif exp_type == "combo":
        # Combo only agents.
        aa_agent = AbstractionWrapper(agent_class,
                                      actions,
                                      str(environment),
                                      max_option_steps=max_option_steps,
                                      state_abstr=identity_sa,
                                      action_abstr=v_directed_aa,
                                      name_ext="aa")
        sa_agent = AbstractionWrapper(agent_class,
                                      actions,
                                      str(environment),
                                      max_option_steps=max_option_steps,
                                      state_abstr=v_directed_sa,
                                      action_abstr=identity_aa,
                                      name_ext="sa")
        agents = [vabs_agent_directed, sa_agent, aa_agent, baseline_agent]

    # Run experiments.
    if multi_task:
        steps = 999999 if x_axis_num_options else steps
        run_agents_multi_task(agents,
                              environment,
                              task_samples=task_samples,
                              steps=steps,
                              episodes=episodes,
                              reset_at_terminal=True)
    else:
        run_agents_on_mdp(agents,
                          environment,
                          instances=20,
                          episodes=30,
                          reset_at_terminal=True)
def main():
    # Make MDP Distribution.
    mdp_class = "four_room"
    environment = make_mdp.make_mdp_distr(mdp_class=mdp_class, grid_dim=10)

    make_random_sa_diropt_aa_stack(environment, max_num_levels=3)