Exemple #1
0
def test_MonteCarloFirstVisit():
    env = rl.Taxi()
    agent = rl.backend.MonteCarloFirstVisit(states=env.states,
                                            actions=env.actions,
                                            epsilon=0.03,
                                            gamma=0.99)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #2
0
def test_Double_Expected_Sarsa():
    env = rl.Taxi()
    agent = rl.DoubleExpectedSarsa(states=env.states,
                                   actions=env.actions,
                                   alpha=0.1,
                                   epsilon=0.03,
                                   gamma=0.99)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #3
0
def test_Double_QLearning():
    env = rl.Taxi()
    agent = rl.DoubleQLearning(states=env.states,
                               actions=env.actions,
                               alpha=0.1,
                               epsilon=0.03,
                               gamma=0.99)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #4
0
def test_DoubleSarsa():
    env = rl.Taxi()
    agent = rl.DoubleSarsa(states=env.states,
                           actions=env.actions,
                           alpha=0.1,
                           epsilon=0.03,
                           gamma=0.99,
                           backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
def test_n_step_sarsa():
    env = rl.Taxi()
    agent = rl.NStepSarsa(states=env.states,
                          actions=env.actions,
                          alpha=0.1,
                          epsilon=0.03,
                          gamma=0.99,
                          n_step=5)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #6
0
def test_Sarsa():
    env = rl.Taxi()
    agent = rl.backend.Sarsa(
        states=env.states,
        actions=env.actions,
        alpha=0.1,
        epsilon=0.03,
        gamma=0.99
    )
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #7
0
def test_TabularDynaQPlus():
    env = rl.Taxi()
    agent = rl.TabularDynaQPlus(states=env.states,
                                actions=env.actions,
                                alpha=0.1,
                                epsilon=0.03,
                                gamma=0.99,
                                n_plan=5,
                                backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #8
0
def test_NStepTreeBackup():
    env = rl.Taxi()
    agent = rl.NStepTreeBackup(states=env.states,
                               actions=env.actions,
                               alpha=0.1,
                               epsilon=0.03,
                               gamma=0.99,
                               n_step=5,
                               backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #9
0
def test_PrioritizedSweeping():
    env = rl.Taxi()
    agent = rl.PrioritizedSweeping(states=env.states,
                                   actions=env.actions,
                                   alpha=0.1,
                                   epsilon=0.03,
                                   gamma=0.99,
                                   n_plan=5,
                                   theta=0.5,
                                   backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #10
0
def test_sarsa_appr():
    env = rl.Taxi()
    agent = rl.SemiGradientSarsa(actions=env.actions,
                                 alpha=0.1,
                                 epsilon=0.03,
                                 gamma=0.99,
                                 features=1,
                                 tilings=2,
                                 tilings_offset=[1],
                                 tile_size=[1])
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #11
0
def test_SemiGradientExpectedSarsa():
    env = rl.Taxi()
    agent = rl.SemiGradientExpectedSarsa(actions=env.actions,
                                         alpha=0.1,
                                         epsilon=0.01,
                                         gamma=0.99,
                                         features=1,
                                         tilings=2,
                                         tilings_offset=[4, 4],
                                         tile_size=[4, 4],
                                         backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
def test_semigradient_n_step_expected_sarsa():
    env = rl.Taxi()
    agent = rl.SemiGradientNStepExpectedSarsa(actions=env.actions,
                                              alpha=0.1,
                                              epsilon=0.03,
                                              gamma=0.99,
                                              n_step=5,
                                              features=1,
                                              tilings=2,
                                              tilings_offset=[1],
                                              tile_size=[1])
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #13
0
def test_SemiGradientMonteCarlo():
    env = rl.Taxi()
    agent = rl.backend.SemiGradientMonteCarlo(
        actions=env.actions,
        alpha=0.1,
        epsilon=0.01,
        gamma=0.99,
        features=1,
        tilings=2,
        tilings_offset=[4, 4],
        tile_size=[4, 4]
    )
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
def test_sarsa_lambda():
    env = rl.Taxi()
    agent = rl.SarsaLambda(
        actions=env.actions,
        alpha=0.1,
        epsilon=0.03,
        gamma=0.99,
        lambd=0.98,
        trace_type="replacing",
        features=1,
        tilings=4,
    )
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Exemple #15
0
def test_MonteCarlo():
    env = rl.Taxi()

    agent = rl.MonteCarlo(states=env.states,
                          actions=env.actions,
                          epsilon=0.03,
                          gamma=0.99,
                          visit_update='first')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)

    agent = rl.MonteCarlo(states=env.states,
                          actions=env.actions,
                          epsilon=0.03,
                          gamma=0.99,
                          visit_update='every')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
    test_sample = 30

    env_name = 'taxi_confounder'  # taxi, taxi_generic_model, taxi_confounder
    observe_confounders = True
    number_of_agents = 1

    alpha = 0.3
    epsilon = 1
    epsilon_decay = 0.995
    gamma = 0.98
    min_epsilon = 0.05
    causal_threshold = 0.6
    ####################################

    if env_name == 'taxi':
        env = rl.Taxi(build_causal_model=True, confounders=observe_confounders)
    elif env_name == 'taxi_generic_model':
        env = rl.TaxiGenericModel(build_causal_model=True,
                                  observability='partial')  # full, partial
    elif env_name == 'taxi_confounder':
        env = rl.TaxiConfounder(build_causal_model=True,
                                observe_confounder=observe_confounders)

    results = []

    ####################################
    # Random agent
    ####################################
    random_agents = [
        rl.Random(actions=env.actions) for _ in range(number_of_agents)
    ]