コード例 #1
0
def main(open_plot=True):
    #     gym_mdp = GridWorldMDP(width=10, height=10, init_loc=(1,1), goal_locs=[(10,10)])
    #     num_feats = gym_mdp.get_num_state_feats()
    #     lin_agent = QLearnerAgent(gym_mdp.actions, alpha=0.4, epsilon=0.4)
    #     rand_agent = RandomAgent(gym_mdp.actions)
    #     run_agents_on_mdp([lin_agent, rand_agent], gym_mdp, instances=50, episodes=200, steps=100, open_plot=open_plot)

    #     gym_mdp = GridWorldMDP(width=10, height=10, init_loc=(1,1), goal_locs=[(10,10)])
    #     num_feats = gym_mdp.get_num_state_feats()
    #     lin_agent = LinearQLearnerAgent(gym_mdp.actions, num_features=num_feats, alpha=0.4, epsilon=0.4, anneal=False,rbf=True)
    #     rand_agent = RandomAgent(gym_mdp.actions)
    #     run_agents_on_mdp([lin_agent, rand_agent], gym_mdp, instances=50, episodes=200, steps=100, open_plot=open_plot,verbose=True)

    gym_mdp = GymMDP(env_name='CartPole-v0', render=False)
    num_feats = gym_mdp.get_num_state_feats()
    lin_agent = LinearQLearnerAgent(gym_mdp.actions,
                                    num_features=num_feats,
                                    alpha=0.4,
                                    epsilon=0.4,
                                    anneal=False,
                                    rbf=True)
    rand_agent = RandomAgent(gym_mdp.actions)
    run_agents_on_mdp([lin_agent, rand_agent],
                      gym_mdp,
                      instances=5,
                      episodes=1000,
                      steps=100,
                      open_plot=open_plot)
コード例 #2
0
ファイル: gym_example.py プロジェクト: david-abel/simple_rl
def main(open_plot=True):
    # Gym MDP
    gym_mdp = GymMDP(env_name='Breakout-v0', render=False)
    num_feats = gym_mdp.get_num_state_feats()

    # Setup agents and run.
    rand_agent = RandomAgent(gym_mdp.get_actions())
    lin_q_agent = LinearQAgent(gym_mdp.get_actions(), num_feats)
    run_agents_on_mdp([lin_q_agent, rand_agent], gym_mdp, instances=5, episodes=50000, steps=200, open_plot=open_plot, verbose=False)
コード例 #3
0
ファイル: gym_example.py プロジェクト: seansegal/simple_rl
def main(open_plot=True):
    # Gym MDP
    gym_mdp = GymMDP(env_name='CartPole-v0', render=False)
    num_feats = gym_mdp.get_num_state_feats()

    # Setup agents and run.
    lin_agent = LinearQLearnerAgent(gym_mdp.actions, num_features=num_feats, alpha=0.4, epsilon=0.4, anneal=True)
    rand_agent = RandomAgent(gym_mdp.actions)
    run_agents_on_mdp([lin_agent, rand_agent], gym_mdp, instances=10, episodes=30, steps=10000, open_plot=open_plot)
コード例 #4
0
def main(open_plot=True):
    # Gym MDP
    gym_mdp = GymMDP(env_name='CartPole-v0', render=True)
    num_feats = gym_mdp.get_num_state_feats()

    # Setup agents and run.
    q_learning_agent = LinearQAgent(gym_mdp.get_actions(), num_feats)
    run_agents_on_mdp([q_learning_agent],
                      gym_mdp,
                      instances=1,
                      episodes=400,
                      steps=210,
                      open_plot=open_plot,
                      verbose=True)
コード例 #5
0
def main(open_plot=True):
    # Gym MDP
    gym_mdp = GymMDP(env_name='Breakout-v0', render=False)
    num_feats = gym_mdp.get_num_state_feats()

    # Setup agents and run.
    rand_agent = RandomAgent(gym_mdp.get_actions())
    lin_q_agent = LinearQAgent(gym_mdp.get_actions(), num_feats)
    run_agents_on_mdp([lin_q_agent, rand_agent],
                      gym_mdp,
                      instances=5,
                      episodes=50000,
                      steps=200,
                      open_plot=open_plot,
                      verbose=False)
コード例 #6
0
def main():

    # ======================
    # == Make Environment ==
    # ======================
    params = rlec.get_cartpole_params()
    num_test_mdps = 6  # 6 is max.
    mdp_demo_policy_dict = {}
    env = GymMDP(env_name='CartPole-v0')
    obs_size = env.get_num_state_feats()
    mdp_demo_policy_dict[env] = cpd.expert_cartpole_policy
    test_mdp = CartPoleMDP()

    # ============================
    # == Make State Abstraction ==
    # ============================
    sess = tf.Session()
    nn_sa_file_name = "cartpole_nn_sa"
    params['num_iterations_for_abstraction_learning'] = 500
    abstraction_net = make_nn_sa(mdp_demo_policy_dict, sess, params)
    nn_sa = NNStateAbstr(abstraction_net)

    # ====================================
    # == Visualize Abstract State Space ==
    # ====================================

    # Collect dataset based on learner.
    sa_agent = AbstractionWrapper(QLearningAgent,
                                  agent_params={
                                      "alpha": params['rl_learning_rate'],
                                      "epsilon": 0.2,
                                      "actions": test_mdp.get_actions()
                                  },
                                  state_abstr=nn_sa,
                                  name_ext="$-\\phi$")
    #visited_states = vu.collect_dataset(test_mdp, samples=2000) #, learning_agent=sa_agent)
    visited_states = collect_samples_from_demo_policy_random_s0_cartpole(
        mdp_demo_policy_dict, num_samples=2000)

    # Get feature indices.
    features = get_feature_dicts()

    # Visualize.
    vu.visualize_state_abstrs3D(visited_states, features, nn_sa)
def diff_sampling_distr_experiment():
    '''
    Summary:
        Runs
    '''
    # Make MDP and Demo Policy.
    params = get_params()
    mdp_demo_policy_dict = {}
    env = GymMDP(env_name='CartPole-v0')
    obs_size = env.get_num_state_feats()
    mdp_demo_policy_dict[env] = cpd.expert_cartpole_policy
    demo_agent = FixedPolicyAgent(cpd.expert_cartpole_policy)

    # Make a NN for each sampling param.
    sampling_params = [0.0, 0.5, 1.0]

    test_mdp = CartPoleMDP()  #
    agents = {"demo": demo_agent}
    sess = tf.Session()
    for epsilon in sampling_params:
        with tf.variable_scope('nn_sa' + str(epsilon), reuse=False) as scope:
            print "epsilon", epsilon
            # tf.reset_default_graph()
            params["epsilon"] = epsilon
            abstraction_net = make_nn_sa(mdp_demo_policy_dict,
                                         sess,
                                         params,
                                         verbose=False)
            nn_sa = NNStateAbstr(abstraction_net)
            sa_agent = AbstractionWrapper(QLearningAgent,
                                          agent_params={
                                              "actions":
                                              env.get_actions(),
                                              "name":
                                              "$QL_\\phi-\\epsilon=" +
                                              str(epsilon) + "$"
                                          },
                                          state_abstr=nn_sa)
            agents[epsilon] = sa_agent

    with tf.variable_scope('demo') as scope:
        abstraction_net_rand = make_nn_sa(mdp_demo_policy_dict,
                                          sess,
                                          params,
                                          verbose=False,
                                          sample_type="rand")
        nn_sa_rand = NNStateAbstr(abstraction_net_rand)
        sa_agent_rand = AbstractionWrapper(QLearningAgent,
                                           agent_params={
                                               "actions": env.get_actions(),
                                               "name": "$D \\sim U(S)$"
                                           },
                                           state_abstr=nn_sa_rand,
                                           name_ext="")
        agents["rand"] = sa_agent_rand

    run_agents_on_mdp(agents.values(),
                      test_mdp,
                      instances=params['num_instances'],
                      episodes=params['episodes'],
                      steps=params['steps'],
                      verbose=False)

    sess.close()
def main():

    # ======================
    # == Make Environment ==
    # ======================
    params = get_params()
    num_test_mdps = 6  # 6 is max.
    mdp_demo_policy_dict = {}
    env = GymMDP(env_name='CartPole-v0')
    obs_size = env.get_num_state_feats()
    mdp_demo_policy_dict[env] = cpd.expert_cartpole_policy

    if params['multitask']:
        # Make distribution.
        mdp_dist_dict = {
            CartPoleMDP(gravity=gravity): 1.0 / num_test_mdps
            for gravity in [5.0, 6.0, 8.0, 12.0][:num_test_mdps]
        }
        test_mdp = MDPDistribution(mdp_dist_dict)
    else:
        test_mdp = CartPoleMDP()

    # ============================
    # == Make State Abstraction ==
    # ============================
    sess = tf.Session()
    nn_sa_file_name = "cartpole_nn_sa"
    abstraction_net = make_nn_sa(mdp_demo_policy_dict, sess, params)
    nn_sa = NNStateAbstr(abstraction_net)

    # =================
    # == Make Agents ==
    # =================
    actions = test_mdp.get_actions()
    num_features = test_mdp.get_num_state_feats()
    linear_agent = LinearQAgent(actions=actions,
                                num_features=num_features,
                                alpha=params['rl_learning_rate'])
    sa_agent = AbstractionWrapper(QLearningAgent,
                                  agent_params={
                                      "alpha": params['rl_learning_rate'],
                                      "epsilon": 0.2,
                                      "actions": test_mdp.get_actions()
                                  },
                                  state_abstr=nn_sa,
                                  name_ext="$-\\phi$")

    # ====================
    # == Run Experiment ==
    # ====================

    if params['multitask']:
        run_agents_lifelong([sa_agent, linear_agent],
                            test_mdp,
                            samples=params['num_instances'],
                            episodes=params['episodes'],
                            steps=params['steps'],
                            verbose=False)
    else:
        # demo_agent = FixedPolicyAgent(cpd.expert_cartpole_policy)
        run_agents_on_mdp([sa_agent, linear_agent],
                          test_mdp,
                          instances=params['num_instances'],
                          episodes=params['episodes'],
                          steps=params['steps'],
                          verbose=False)
コード例 #9
0
ファイル: gym_example.py プロジェクト: YuhangSong/simple_rl
#!/usr/bin/env python

# Other imports.
import srl_example_setup
from simple_rl.agents import LinearQLearnerAgent, RandomAgent
from simple_rl.tasks import GymMDP
from simple_rl.run_experiments import run_agents_on_mdp

# Gym MDP
gym_mdp = GymMDP(env_name='CartPole-v0', render=False)

num_feats = gym_mdp.get_num_state_feats()

# Setup agents and run.
lin_agent = LinearQLearnerAgent(gym_mdp.actions,
                                num_features=num_feats,
                                alpha=0.4,
                                epsilon=0.4,
                                anneal=True)
rand_agent = RandomAgent(gym_mdp.actions)

run_agents_on_mdp([lin_agent, rand_agent],
                  gym_mdp,
                  instances=10,
                  episodes=30,
                  steps=10000)