def test_MonteCarloFirstVisit(): env = rl.Taxi() agent = rl.backend.MonteCarloFirstVisit(states=env.states, actions=env.actions, epsilon=0.03, gamma=0.99) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_Double_Expected_Sarsa(): env = rl.Taxi() agent = rl.DoubleExpectedSarsa(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_MonteCarlo(): env = rl.Taxi() agent = rl.MonteCarlo(states=env.states, actions=env.actions, epsilon=0.03, gamma=0.99, visit_update='first') session = rl.Session(env, agent) session.run(100, 10, 10) agent = rl.MonteCarlo(states=env.states, actions=env.actions, epsilon=0.03, gamma=0.99, visit_update='every') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_Double_QLearning(): env = rl.Taxi() agent = rl.DoubleQLearning(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_DoubleSarsa(): env = rl.Taxi() agent = rl.DoubleSarsa(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_n_step_sarsa(): env = rl.Taxi() agent = rl.NStepSarsa(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_step=5) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_Sarsa(): env = rl.Taxi() agent = rl.backend.Sarsa( states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99 ) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_TabularDynaQPlus(): env = rl.Taxi() agent = rl.TabularDynaQPlus(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_plan=5, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_NStepTreeBackup(): env = rl.Taxi() agent = rl.NStepTreeBackup(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_step=5, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_sarsa_appr(): env = rl.Taxi() agent = rl.SemiGradientSarsa(actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, features=1, tilings=2, tilings_offset=[1], tile_size=[1]) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_PrioritizedSweeping(): env = rl.Taxi() agent = rl.PrioritizedSweeping(states=env.states, actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_plan=5, theta=0.5, backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_SemiGradientExpectedSarsa(): env = rl.Taxi() agent = rl.SemiGradientExpectedSarsa(actions=env.actions, alpha=0.1, epsilon=0.01, gamma=0.99, features=1, tilings=2, tilings_offset=[4, 4], tile_size=[4, 4], backend='cpp') session = rl.Session(env, agent) session.run(100, 10, 10)
def test_semigradient_n_step_expected_sarsa(): env = rl.Taxi() agent = rl.SemiGradientNStepExpectedSarsa(actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, n_step=5, features=1, tilings=2, tilings_offset=[1], tile_size=[1]) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_SemiGradientMonteCarlo(): env = rl.Taxi() agent = rl.backend.SemiGradientMonteCarlo( actions=env.actions, alpha=0.1, epsilon=0.01, gamma=0.99, features=1, tilings=2, tilings_offset=[4, 4], tile_size=[4, 4] ) session = rl.Session(env, agent) session.run(100, 10, 10)
def test_sarsa_lambda(): env = rl.Taxi() agent = rl.SarsaLambda( actions=env.actions, alpha=0.1, epsilon=0.03, gamma=0.99, lambd=0.98, trace_type="replacing", features=1, tilings=4, ) session = rl.Session(env, agent) session.run(100, 10, 10)
observe_confounder=observe_confounder, build_causal_model=True) elif env_type == 'confounder_not_directly_influencing_outcome': env = rl.ConfounderNotDirectlyInfluencingOutcome( observe_confounder=observe_confounder, build_causal_model=True) results = [] #################################### # Random Agent #################################### random_agents = [ rl.Random(actions=env.actions) for _ in range(number_of_agents) ] for agent in random_agents: session = rl.Session(env=env, agent=agent, max_steps=max_steps) results.append( session.run(episodes=episodes, test_offset=test_offset, test_samples=test_sample, render=False)) #################################### # A lot of Causal Q-Learning #################################### causal_q_learning_agents = [ rl.CausalQLearning(states=env.states, actions=env.actions, alpha=alpha, epsilon=epsilon, epsilon_decay=epsilon_decay,
env = rl.TaxiGenericModel(build_causal_model=True, observability='partial') # full, partial elif env_name == 'taxi_confounder': env = rl.TaxiConfounder(build_causal_model=True, observe_confounder=observe_confounders) results = [] #################################### # Random agent #################################### random_agents = [ rl.Random(actions=env.actions) for _ in range(number_of_agents) ] for agent in random_agents: session = rl.Session(env=env, agent=agent) results.append( session.run(episodes=episodes, test_offset=test_offset, test_samples=test_sample, render=False)) #################################### # Causal Q-Learning #################################### causal_q_learning_agents = [ rl.CausalQLearning(states=env.states, actions=env.actions, alpha=alpha, epsilon=epsilon, epsilon_decay=epsilon_decay,