Beispiel #1
0
def test_MonteCarloFirstVisit():
    env = rl.Taxi()
    agent = rl.backend.MonteCarloFirstVisit(states=env.states,
                                            actions=env.actions,
                                            epsilon=0.03,
                                            gamma=0.99)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #2
0
def test_Double_Expected_Sarsa():
    env = rl.Taxi()
    agent = rl.DoubleExpectedSarsa(states=env.states,
                                   actions=env.actions,
                                   alpha=0.1,
                                   epsilon=0.03,
                                   gamma=0.99)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #3
0
def test_MonteCarlo():
    env = rl.Taxi()

    agent = rl.MonteCarlo(states=env.states,
                          actions=env.actions,
                          epsilon=0.03,
                          gamma=0.99,
                          visit_update='first')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)

    agent = rl.MonteCarlo(states=env.states,
                          actions=env.actions,
                          epsilon=0.03,
                          gamma=0.99,
                          visit_update='every')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #4
0
def test_Double_QLearning():
    env = rl.Taxi()
    agent = rl.DoubleQLearning(states=env.states,
                               actions=env.actions,
                               alpha=0.1,
                               epsilon=0.03,
                               gamma=0.99)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #5
0
def test_DoubleSarsa():
    env = rl.Taxi()
    agent = rl.DoubleSarsa(states=env.states,
                           actions=env.actions,
                           alpha=0.1,
                           epsilon=0.03,
                           gamma=0.99,
                           backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
def test_n_step_sarsa():
    env = rl.Taxi()
    agent = rl.NStepSarsa(states=env.states,
                          actions=env.actions,
                          alpha=0.1,
                          epsilon=0.03,
                          gamma=0.99,
                          n_step=5)
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #7
0
def test_Sarsa():
    env = rl.Taxi()
    agent = rl.backend.Sarsa(
        states=env.states,
        actions=env.actions,
        alpha=0.1,
        epsilon=0.03,
        gamma=0.99
    )
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #8
0
def test_TabularDynaQPlus():
    env = rl.Taxi()
    agent = rl.TabularDynaQPlus(states=env.states,
                                actions=env.actions,
                                alpha=0.1,
                                epsilon=0.03,
                                gamma=0.99,
                                n_plan=5,
                                backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #9
0
def test_NStepTreeBackup():
    env = rl.Taxi()
    agent = rl.NStepTreeBackup(states=env.states,
                               actions=env.actions,
                               alpha=0.1,
                               epsilon=0.03,
                               gamma=0.99,
                               n_step=5,
                               backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #10
0
def test_sarsa_appr():
    env = rl.Taxi()
    agent = rl.SemiGradientSarsa(actions=env.actions,
                                 alpha=0.1,
                                 epsilon=0.03,
                                 gamma=0.99,
                                 features=1,
                                 tilings=2,
                                 tilings_offset=[1],
                                 tile_size=[1])
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #11
0
def test_PrioritizedSweeping():
    env = rl.Taxi()
    agent = rl.PrioritizedSweeping(states=env.states,
                                   actions=env.actions,
                                   alpha=0.1,
                                   epsilon=0.03,
                                   gamma=0.99,
                                   n_plan=5,
                                   theta=0.5,
                                   backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #12
0
def test_SemiGradientExpectedSarsa():
    env = rl.Taxi()
    agent = rl.SemiGradientExpectedSarsa(actions=env.actions,
                                         alpha=0.1,
                                         epsilon=0.01,
                                         gamma=0.99,
                                         features=1,
                                         tilings=2,
                                         tilings_offset=[4, 4],
                                         tile_size=[4, 4],
                                         backend='cpp')
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #13
0
def test_semigradient_n_step_expected_sarsa():
    env = rl.Taxi()
    agent = rl.SemiGradientNStepExpectedSarsa(actions=env.actions,
                                              alpha=0.1,
                                              epsilon=0.03,
                                              gamma=0.99,
                                              n_step=5,
                                              features=1,
                                              tilings=2,
                                              tilings_offset=[1],
                                              tile_size=[1])
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #14
0
def test_SemiGradientMonteCarlo():
    env = rl.Taxi()
    agent = rl.backend.SemiGradientMonteCarlo(
        actions=env.actions,
        alpha=0.1,
        epsilon=0.01,
        gamma=0.99,
        features=1,
        tilings=2,
        tilings_offset=[4, 4],
        tile_size=[4, 4]
    )
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
def test_sarsa_lambda():
    env = rl.Taxi()
    agent = rl.SarsaLambda(
        actions=env.actions,
        alpha=0.1,
        epsilon=0.03,
        gamma=0.99,
        lambd=0.98,
        trace_type="replacing",
        features=1,
        tilings=4,
    )
    session = rl.Session(env, agent)
    session.run(100, 10, 10)
Beispiel #16
0
                observe_confounder=observe_confounder, build_causal_model=True)
        elif env_type == 'confounder_not_directly_influencing_outcome':
            env = rl.ConfounderNotDirectlyInfluencingOutcome(
                observe_confounder=observe_confounder, build_causal_model=True)

        results = []

        ####################################
        # Random Agent
        ####################################
        random_agents = [
            rl.Random(actions=env.actions) for _ in range(number_of_agents)
        ]

        for agent in random_agents:
            session = rl.Session(env=env, agent=agent, max_steps=max_steps)
            results.append(
                session.run(episodes=episodes,
                            test_offset=test_offset,
                            test_samples=test_sample,
                            render=False))

        ####################################
        # A lot of Causal Q-Learning
        ####################################
        causal_q_learning_agents = [
            rl.CausalQLearning(states=env.states,
                               actions=env.actions,
                               alpha=alpha,
                               epsilon=epsilon,
                               epsilon_decay=epsilon_decay,
        env = rl.TaxiGenericModel(build_causal_model=True,
                                  observability='partial')  # full, partial
    elif env_name == 'taxi_confounder':
        env = rl.TaxiConfounder(build_causal_model=True,
                                observe_confounder=observe_confounders)

    results = []

    ####################################
    # Random agent
    ####################################
    random_agents = [
        rl.Random(actions=env.actions) for _ in range(number_of_agents)
    ]
    for agent in random_agents:
        session = rl.Session(env=env, agent=agent)
        results.append(
            session.run(episodes=episodes,
                        test_offset=test_offset,
                        test_samples=test_sample,
                        render=False))

    ####################################
    # Causal Q-Learning
    ####################################
    causal_q_learning_agents = [
        rl.CausalQLearning(states=env.states,
                           actions=env.actions,
                           alpha=alpha,
                           epsilon=epsilon,
                           epsilon_decay=epsilon_decay,