Ejemplo n.º 1
0
def get_combo_experiment_agents(environment):
    '''
    Args:
        environment (simple_rl.MDPDistribution)

    Returns:
        (list)
    '''
    actions = environment.get_actions()
    gamma = environment.get_gamma()

    sa, aa = get_directed_option_sa_pair(
        environment,
        indic_func=ind_funcs._q_disc_approx_indicator,
        max_options=100)
    sa_qds_test = get_sa(environment,
                         indic_func=ind_funcs._q_disc_approx_indicator,
                         epsilon=0.05)
    sa_qs_test = get_sa(environment,
                        indic_func=ind_funcs._q_eps_approx_indicator,
                        epsilon=0.1)

    # QLearner.
    ql_agent = QLearningAgent(actions, gamma=gamma, epsilon=0.1, alpha=0.05)
    rmax_agent = RMaxAgent(actions, gamma=gamma, epsilon=0.1, alpha=0.05)

    # Combos.
    ql_sa_qds_agent = AbstractionWrapper(QLearningAgent,
                                         agent_params={"actions": actions},
                                         state_abstr=sa_qds_test,
                                         name_ext="$\phi_{Q_d^*}$")
    ql_sa_qs_agent = AbstractionWrapper(QLearningAgent,
                                        agent_params={"actions": actions},
                                        state_abstr=sa_qs_test,
                                        name_ext="$\phi_{Q_\epsilon^*}$")

    # sa_agent = AbstractionWrapper(QLearningAgent, actions, str(environment), state_abstr=sa, name_ext="sa")
    aa_agent = AbstractionWrapper(QLearningAgent,
                                  agent_params={"actions": actions},
                                  action_abstr=aa,
                                  name_ext="aa")
    sa_aa_agent = AbstractionWrapper(QLearningAgent,
                                     agent_params={"actions": actions},
                                     state_abstr=sa,
                                     action_abstr=aa,
                                     name_ext="$\phi_{Q_d^*}+aa$")

    agents = [ql_agent, ql_sa_qds_agent, ql_sa_qs_agent, aa_agent, sa_aa_agent]

    return agents
Ejemplo n.º 2
0
def get_optimal_policies(environment):
    '''
    Args:
        environment (simple_rl.MDPDistribution)

    Returns:
        (list)
    '''

    # Make State Abstraction
    approx_qds_test = get_sa(environment,
                             indic_func=ind_funcs._q_eps_approx_indicator,
                             epsilon=0.05)

    # True Optimal
    true_opt_vi = ValueIteration(environment)
    true_opt_vi.run_vi()
    opt_agent = FixedPolicyAgent(true_opt_vi.policy, "$\pi^*$")

    # Optimal Abstraction
    opt_det_vi = AbstractValueIteration(environment,
                                        state_abstr=approx_qds_test,
                                        sample_rate=30)
    opt_det_vi.run_vi()
    opt_det_agent = FixedPolicyAgent(opt_det_vi.policy, name="$\pi_{\phi}^*$")

    stoch_policy_obj = StochasticSAPolicy(approx_qds_test, environment)
    stoch_agent = FixedPolicyAgent(stoch_policy_obj.policy,
                                   "$\pi(a \mid s_\phi )$")

    ql_agents = [opt_agent, stoch_agent, opt_det_agent]

    return ql_agents
Ejemplo n.º 3
0
def main():

    # Grab experiment params.
    mdp = BadChainMDP(gamma=0.95, kappa=0.001)
    actions = mdp.get_actions()

    # =======================
    # == Make Abstractions ==
    # =======================
    sa_q_eps = get_sa(mdp,
                      indic_func=indicator_funcs._q_eps_approx_indicator,
                      epsilon=0.1)

    # RMax Agents.
    rmax_agent = RMaxAgent(actions)
    abstr_rmax_agent = AbstractionWrapper(RMaxAgent,
                                          state_abstr=sa_q_eps,
                                          agent_params={"actions": actions},
                                          name_ext="-$\\phi_{Q_\\epsilon^*}$")

    # Delayed Q Agents.
    del_q_agent = DelayedQAgent(actions)
    abstr_del_q_agent = AbstractionWrapper(DelayedQAgent,
                                           state_abstr=sa_q_eps,
                                           agent_params={"actions": actions},
                                           name_ext="-$\\phi_{Q_\\epsilon^*}$")

    run_agents_on_mdp(
        [rmax_agent, abstr_rmax_agent, del_q_agent, abstr_del_q_agent],
        mdp,
        instances=50,
        steps=250,
        episodes=1)
def main():
    # Grab experiment params.
    # Switch between Upworld and Trench
    mdp_class = "upworld"
    # mdp_class = "trench"
    grid_lim = 20 if mdp_class == 'upworld' else 7
    gamma = 0.95
    vanilla_file = "vi.csv"
    sa_file = "vi-$\phi_{Q_d^*}.csv"
    file_prefix = "results/planning-" + mdp_class + "/"
    clear_files(dir_name=file_prefix)

    for grid_dim in xrange(3, grid_lim):
        # ======================
        # == Make Environment ==
        # ======================
        environment = make_mdp.make_mdp(mdp_class=mdp_class, grid_dim=grid_dim)
        environment.set_gamma(gamma)

        # =======================
        # == Make Abstractions ==
        # =======================
        sa_qds = get_sa(environment,
                        indic_func=ind_funcs._q_disc_approx_indicator,
                        epsilon=0.01)

        # ============
        # == Run VI ==
        # ============
        vanilla_vi = ValueIteration(environment, delta=0.0001, sample_rate=15)
        sa_vi = AbstractValueIteration(ground_mdp=environment,
                                       state_abstr=sa_qds)

        print "Running VIs."
        start_time = time.clock()
        vanilla_iters, vanilla_val = vanilla_vi.run_vi()
        vanilla_time = round(time.clock() - start_time, 2)

        start_time = time.clock()
        sa_iters, sa_val = sa_vi.run_vi()
        sa_time = round(time.clock() - start_time, 2)

        print "vanilla", vanilla_iters, vanilla_val, vanilla_time
        print "sa:", sa_iters, sa_val, sa_time

        write_datum(file_prefix + "iters/" + vanilla_file, vanilla_iters)
        write_datum(file_prefix + "iters/" + sa_file, sa_iters)

        write_datum(file_prefix + "times/" + vanilla_file, vanilla_time)
        write_datum(file_prefix + "times/" + sa_file, sa_time)
Ejemplo n.º 5
0
def get_sa_experiment_agents(environment, AgentClass, pac=False):
    '''
    Args:
        environment (simple_rl.MDPDistribution)
        AgentClass (Class)

    Returns:
        (list)
    '''
    actions = environment.get_actions()
    gamma = environment.get_gamma()

    if pac:
        # PAC State Abstractions.
        sa_qds_test = compute_pac_sa(
            environment,
            indic_func=ind_funcs._q_disc_approx_indicator,
            epsilon=0.2)
        sa_qs_test = compute_pac_sa(
            environment,
            indic_func=ind_funcs._q_eps_approx_indicator,
            epsilon=0.2)
        sa_qs_exact_test = compute_pac_sa(
            environment,
            indic_func=ind_funcs._q_eps_approx_indicator,
            epsilon=0.0)

    else:
        # Compute state abstractions.
        sa_qds_test = get_sa(environment,
                             indic_func=ind_funcs._q_disc_approx_indicator,
                             epsilon=0.1)
        sa_qs_test = get_sa(environment,
                            indic_func=ind_funcs._q_eps_approx_indicator,
                            epsilon=0.1)
        sa_qs_exact_test = get_sa(environment,
                                  indic_func=ind_funcs._q_eps_approx_indicator,
                                  epsilon=0.0)

    # Make Agents.
    agent = AgentClass(actions, gamma=gamma)
    params = {
        "actions": actions
    } if AgentClass is not RMaxAgent else {
        "actions": actions,
        "s_a_threshold": 2,
        "horizon": 5
    }
    sa_qds_agent = AbstractionWrapper(AgentClass,
                                      agent_params=params,
                                      state_abstr=sa_qds_test,
                                      name_ext="$-\phi_{Q_d^*}$")
    sa_qs_agent = AbstractionWrapper(AgentClass,
                                     agent_params=params,
                                     state_abstr=sa_qs_test,
                                     name_ext="$-\phi_{Q_\epsilon^*}$")
    sa_qs_exact_agent = AbstractionWrapper(AgentClass,
                                           agent_params=params,
                                           state_abstr=sa_qs_exact_test,
                                           name_ext="-$\phi_{Q^*}$")

    agents = [agent, sa_qds_agent, sa_qs_agent, sa_qs_exact_agent]

    # if isinstance(environment.sample(), FourRoomMDP) or isinstance(environment.sample(), ColorMDP):
    #     # If it's a fourroom add the handcoded one.
    #     sa_hand_test = get_sa(environment, indic_func=ind_funcs._four_rooms)
    #     sa_hand_agent = AbstractionWrapper(AgentClass, agent_params=params, state_abstr=sa_hand_test, name_ext="$-\phi_h$")
    #     agents += [sa_hand_agent]

    return agents
Ejemplo n.º 6
0
def get_exact_vs_approx_agents(environment, incl_opt=True):
    '''
    Args:
        environment (simple_rl.MDPDistribution)
        incl_opt (bool)

    Returns:
        (list)
    '''

    actions = environment.get_actions()
    gamma = environment.get_gamma()

    exact_qds_test = get_sa(environment,
                            indic_func=ind_funcs._q_eps_approx_indicator,
                            epsilon=0.0)
    approx_qds_test = get_sa(environment,
                             indic_func=ind_funcs._q_eps_approx_indicator,
                             epsilon=0.05)

    ql_agent = QLearningAgent(actions, gamma=gamma, epsilon=0.1, alpha=0.05)
    ql_exact_agent = AbstractionWrapper(QLearningAgent,
                                        agent_params={"actions": actions},
                                        state_abstr=exact_qds_test,
                                        name_ext="-exact")
    ql_approx_agent = AbstractionWrapper(QLearningAgent,
                                         agent_params={"actions": actions},
                                         state_abstr=approx_qds_test,
                                         name_ext="-approx")
    ql_agents = [ql_agent, ql_exact_agent, ql_approx_agent]

    dql_agent = DoubleQAgent(actions, gamma=gamma, epsilon=0.1, alpha=0.05)
    dql_exact_agent = AbstractionWrapper(DoubleQAgent,
                                         agent_params={"actions": actions},
                                         state_abstr=exact_qds_test,
                                         name_ext="-exact")
    dql_approx_agent = AbstractionWrapper(DoubleQAgent,
                                          agent_params={"actions": actions},
                                          state_abstr=approx_qds_test,
                                          name_ext="-approx")
    dql_agents = [dql_agent, dql_exact_agent, dql_approx_agent]

    rm_agent = RMaxAgent(actions, gamma=gamma)
    rm_exact_agent = AbstractionWrapper(RMaxAgent,
                                        agent_params={"actions": actions},
                                        state_abstr=exact_qds_test,
                                        name_ext="-exact")
    rm_approx_agent = AbstractionWrapper(RMaxAgent,
                                         agent_params={"actions": actions},
                                         state_abstr=approx_qds_test,
                                         name_ext="-approx")
    rm_agents = [rm_agent, rm_exact_agent, rm_approx_agent]

    if incl_opt:
        vi = ValueIteration(environment)
        vi.run_vi()
        opt_agent = FixedPolicyAgent(vi.policy, name="$\pi^*$")

        sa_vi = AbstractValueIteration(
            environment,
            sample_rate=50,
            max_iterations=3000,
            delta=0.0001,
            state_abstr=approx_qds_test,
            action_abstr=ActionAbstraction(
                options=[], prim_actions=environment.get_actions()))
        sa_vi.run_vi()
        approx_opt_agent = FixedPolicyAgent(sa_vi.policy, name="$\pi_\phi^*$")

        dql_agents += [opt_agent, approx_opt_agent]

    return ql_agents