def test_MonteCarloFirstVisit(): env = rl.Taxi() agent = rl.backend.MonteCarloFirstVisit(states=env.states, actions=env.actions, epsilon=0.03, gamma=0.99) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_Double_Expected_Sarsa(): env = rl.Taxi() agent = rl.DoubleExpectedSarsa(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_Double_QLearning(): env = rl.Taxi() agent = rl.DoubleQLearning(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_DoubleSarsa(): env = rl.Taxi() agent = rl.DoubleSarsa(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_n_step_sarsa(): env = rl.Taxi() agent = rl.NStepSarsa(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_step=5) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_Sarsa(): env = rl.Taxi() agent = rl.backend.Sarsa( states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99 ) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_TabularDynaQPlus(): env = rl.Taxi() agent = rl.TabularDynaQPlus(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_plan=5, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_NStepTreeBackup(): env = rl.Taxi() agent = rl.NStepTreeBackup(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_step=5, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_PrioritizedSweeping(): env = rl.Taxi() agent = rl.PrioritizedSweeping(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_plan=5, theta=0.5, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_sarsa_appr(): env = rl.Taxi() agent = rl.SemiGradientSarsa(actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, features=1, tilings=2, tilings_offset=[1], tile_size=[1]) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_SemiGradientExpectedSarsa(): env = rl.Taxi() agent = rl.SemiGradientExpectedSarsa(actions=env.actions, alpha=0.1, epsilon=0.01, gamma=0.99, features=1, tilings=2, tilings_offset=[4, 4], tile_size=[4, 4], backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_semigradient_n_step_expected_sarsa(): env = rl.Taxi() agent = rl.SemiGradientNStepExpectedSarsa(actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_step=5, features=1, tilings=2, tilings_offset=[1], tile_size=[1]) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_SemiGradientMonteCarlo(): env = rl.Taxi() agent = rl.backend.SemiGradientMonteCarlo( actions=env.actions, alpha=0.1, epsilon=0.01, gamma=0.99, features=1, tilings=2, tilings_offset=[4, 4], tile_size=[4, 4] ) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_sarsa_lambda(): env = rl.Taxi() agent = rl.SarsaLambda( actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, lambd=0.98, trace_type="replacing", features=1, tilings=4, ) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_MonteCarlo(): env = rl.Taxi() agent = rl.MonteCarlo(states=env.states, actions=env.actions, epsilon=0.03, gamma=0.99, visit_update='first') session = rl.Session(env, agent) session.run(100, 10, 10) agent = rl.MonteCarlo(states=env.states, actions=env.actions, epsilon=0.03, gamma=0.99, visit_update='every') session = rl.Session(env, agent) session.run(100, 10, 10)
test_sample = 30 env_name = 'taxi_confounder' # taxi, taxi_generic_model, taxi_confounder observe_confounders = True number_of_agents = 1 alpha = 0.3 epsilon = 1 epsilon_decay = 0.995 gamma = 0.98 min_epsilon = 0.05 causal_threshold = 0.6 #################################### if env_name == 'taxi': env = rl.Taxi(build_causal_model=True, confounders=observe_confounders) elif env_name == 'taxi_generic_model': env = rl.TaxiGenericModel(build_causal_model=True, observability='partial') # full, partial elif env_name == 'taxi_confounder': env = rl.TaxiConfounder(build_causal_model=True, observe_confounder=observe_confounders) results = [] #################################### # Random agent #################################### random_agents = [ rl.Random(actions=env.actions) for _ in range(number_of_agents) ]