def test_resume_gym_valid_environment(): start_virtual_display_if_headless() def resume_args_mutator(resume_args: Dict): print(f'Called mutator: {len(resume_args)} resume arguments.') def train_function_args_callback(args: Dict): print(f'Called callback: {len(args)} resume arguments.') run_args = f'--random-seed 12345 --agent rlai.agents.mdp.StochasticMdpAgent --continuous-state-discretization-resolution 0.005 --gamma 0.95 --environment rlai.environments.openai_gym.Gym --gym-id CartPole-v1 --render-every-nth-episode 2 --train-function rlai.gpi.monte_carlo.iteration.iterate_value_q_pi --num-improvements 2 --num-episodes-per-improvement 2 --update-upon-every-visit True --epsilon 0.2 --q-S-A rlai.q_S_A.tabular.TabularStateActionValueEstimator --make-final-policy-greedy False --num-improvements-per-plot 2 --num-improvements-per-checkpoint 2 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}' checkpoint_path, agent_path = run( args=shlex.split(run_args), train_function_args_callback=train_function_args_callback) random_state = RandomState(12345) resume_environment = Gym(random_state, None, 'CartPole-v1', None) agent = resume_from_checkpoint(checkpoint_path, iterate_value_q_pi, environment=resume_environment, num_improvements=2, resume_args_mutator=resume_args_mutator) resume_environment.close() # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_resume_gym_valid_environment.pickle', 'wb') as file: # pickle.dump(agent.pi, file) with open( f'{os.path.dirname(__file__)}/fixtures/test_resume_gym_valid_environment.pickle', 'rb') as file: pi_fixture = pickle.load(file) assert agent.pi == pi_fixture
def test_q_learning_with_patsy_formula(): start_virtual_display_if_headless() checkpoint_path, agent_path = run(shlex.split(f'--random-seed 12345 --agent rlai.agents.mdp.ActionValueMdpAgent --gamma 1 --environment rlai.environments.gridworld.Gridworld --id example_4_1 --T 25 --train-function rlai.gpi.temporal_difference.iteration.iterate_value_q_pi --mode Q_LEARNING --num-improvements 5 --num-episodes-per-improvement 5 --epsilon 0.05 --q-S-A rlai.q_S_A.function_approximation.estimators.ApproximateStateActionValueEstimator --function-approximation-model rlai.q_S_A.function_approximation.models.sklearn.SKLearnSGD --verbose 1 --feature-extractor rlai.q_S_A.function_approximation.models.feature_extraction.StateActionIdentityFeatureExtractor --formula "C(s, levels={list(range(16))}):C(a, levels={list(range(4))})" --make-final-policy-greedy True --num-improvements-per-checkpoint 5 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}')) _, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_q_learning_with_patsy_formula.pickle', 'wb') as f: # pickle.dump(agent, f) with open(f'{os.path.dirname(__file__)}/fixtures/test_q_learning_with_patsy_formula.pickle', 'rb') as f: agent_fixture = pickle.load(f) assert_run( agent, agent_fixture )
def test_prioritized_sweeping_planning_high_threshold(): start_virtual_display_if_headless() checkpoint_path, agent_path = run(shlex.split(f'--random-seed 12345 --agent rlai.agents.mdp.ActionValueMdpAgent --gamma 1 --environment rlai.environments.gridworld.Gridworld --id example_4_1 --planning-environment rlai.environments.mdp.PrioritizedSweepingMdpPlanningEnvironment --num-planning-improvements-per-direct-improvement 10 --priority-theta -10 --T-planning 50 --train-function rlai.gpi.temporal_difference.iteration.iterate_value_q_pi --mode Q_LEARNING --num-improvements 10 --num-episodes-per-improvement 1 --epsilon 0.01 --q-S-A rlai.q_S_A.tabular.TabularStateActionValueEstimator --make-final-policy-greedy True --num-improvements-per-checkpoint 10 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}')) _, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_prioritized_sweeping_planning_high_threshold.pickle', 'wb') as f: # pickle.dump(agent, f) with open(f'{os.path.dirname(__file__)}/fixtures/test_prioritized_sweeping_planning_high_threshold.pickle', 'rb') as f: agent_fixture = pickle.load(f) assert_run( agent, agent_fixture )
def test_policy_gradient_reinforce_softmax_action_preferences_with_baseline(): start_virtual_display_if_headless() checkpoint_path, agent_path = run(shlex.split(f'--random-seed 12345 --agent rlai.agents.mdp.ParameterizedMdpAgent --gamma 1 --environment rlai.environments.gridworld.Gridworld --id example_4_1 --T 100 --train-function rlai.policy_gradient.monte_carlo.reinforce.improve --num-episodes 10 --v-S rlai.v_S.function_approximation.estimators.ApproximateStateValueEstimator --feature-extractor rlai.environments.gridworld.GridworldStateFeatureExtractor --function-approximation-model rlai.models.sklearn.SKLearnSGD --loss squared_error --sgd-alpha 0.0 --learning-rate constant --eta0 0.001 --policy rlai.policies.parameterized.discrete_action.SoftMaxInActionPreferencesPolicy --policy-feature-extractor rlai.environments.gridworld.GridworldFeatureExtractor --alpha 0.001 --update-upon-every-visit False --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}')) _, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_policy_gradient_reinforce_softmax_action_preferences_with_baseline.pickle', 'wb') as f: # pickle.dump(agent, f) with open(f'{os.path.dirname(__file__)}/fixtures/test_policy_gradient_reinforce_softmax_action_preferences_with_baseline.pickle', 'rb') as f: agent_fixture = pickle.load(f) assert_run( agent, agent_fixture )
def test_policy_gradient_reinforce_normal_with_baseline(): start_virtual_display_if_headless() checkpoint_path, agent_path = run(shlex.split(f'--random-seed 12345 --agent rlai.agents.mdp.ParameterizedMdpAgent --gamma 0.99 --environment rlai.environments.openai_gym.Gym --gym-id LunarLanderContinuous-v2 --render-every-nth-episode 2 --steps-per-second 1000 --plot-environment --T 2000 --train-function rlai.policy_gradient.monte_carlo.reinforce.improve --num-episodes 4 --v-S rlai.v_S.function_approximation.estimators.ApproximateStateValueEstimator --feature-extractor rlai.environments.openai_gym.ContinuousFeatureExtractor --function-approximation-model rlai.models.sklearn.SKLearnSGD --loss squared_error --sgd-alpha 0.0 --learning-rate constant --eta0 0.00001 --policy rlai.policies.parameterized.continuous_action.ContinuousActionNormalDistributionPolicy --policy-feature-extractor rlai.environments.openai_gym.ContinuousFeatureExtractor --plot-policy --alpha 0.00001 --update-upon-every-visit True --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}')) _, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_policy_gradient_reinforce_normal_with_baseline.pickle', 'wb') as f: # pickle.dump(agent, f) with open(f'{os.path.dirname(__file__)}/fixtures/test_policy_gradient_reinforce_normal_with_baseline.pickle', 'rb') as f: agent_fixture = pickle.load(f) assert_run( agent, agent_fixture )
def test_gym_cartpole_function_approximation_plot_model(): start_virtual_display_if_headless() checkpoint_path, agent_path = run(shlex.split(f'--random-seed 12345 --agent rlai.agents.mdp.ActionValueMdpAgent --gamma 0.95 --environment rlai.environments.openai_gym.Gym --gym-id CartPole-v1 --render-every-nth-episode 2 --train-function rlai.gpi.temporal_difference.iteration.iterate_value_q_pi --mode SARSA --num-improvements 2 --num-episodes-per-improvement 2 --num-updates-per-improvement 1 --epsilon 0.2 --q-S-A rlai.q_S_A.function_approximation.estimators.ApproximateStateActionValueEstimator --plot-model --plot-model-bins 10 --function-approximation-model rlai.q_S_A.function_approximation.models.sklearn.SKLearnSGD --loss squared_error --sgd-alpha 0.0 --learning-rate constant --eta0 0.001 --feature-extractor rlai.environments.openai_gym.CartpoleFeatureExtractor --make-final-policy-greedy True --num-improvements-per-plot 2 --num-improvements-per-checkpoint 2 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}')) _, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_gym_cartpole_function_approximation_plot_model.pickle', 'wb') as f: # pickle.dump(agent, f) with open(f'{os.path.dirname(__file__)}/fixtures/test_gym_cartpole_function_approximation_plot_model.pickle', 'rb') as f: agent_fixture = pickle.load(f) assert_run( agent, agent_fixture )
def test_gym_cartpole_tabular(): start_virtual_display_if_headless() checkpoint_path, agent_path = run(shlex.split(f'--random-seed 12345 --agent rlai.agents.mdp.ActionValueMdpAgent --continuous-state-discretization-resolution 0.005 --gamma 0.95 --environment rlai.environments.openai_gym.Gym --gym-id CartPole-v1 --render-every-nth-episode 2 --train-function rlai.gpi.monte_carlo.iteration.iterate_value_q_pi --num-improvements 2 --num-episodes-per-improvement 2 --update-upon-every-visit True --epsilon 0.2 --q-S-A rlai.q_S_A.tabular.TabularStateActionValueEstimator --make-final-policy-greedy True --num-improvements-per-plot 2 --num-improvements-per-checkpoint 2 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}')) _, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_gym_cartpole_tabular.pickle', 'wb') as f: # pickle.dump(agent, f) with open(f'{os.path.dirname(__file__)}/fixtures/test_gym_cartpole_tabular.pickle', 'rb') as f: agent_fixture = pickle.load(f) assert_run( agent, agent_fixture )
def test_scale_learning_rate_with_logging(): start_virtual_display_if_headless() checkpoint_path, agent_path = run( shlex.split( f'--random-seed 12345 --agent rlai.agents.mdp.StochasticMdpAgent --gamma 1 --environment rlai.environments.gridworld.Gridworld --id example_4_1 --T 25 --train-function rlai.gpi.temporal_difference.iteration.iterate_value_q_pi --mode Q_LEARNING --num-improvements 5 --num-episodes-per-improvement 50 --epsilon 0.05 --q-S-A rlai.q_S_A.function_approximation.estimators.ApproximateStateActionValueEstimator --function-approximation-model rlai.q_S_A.function_approximation.models.sklearn.SKLearnSGD --scale-eta0-for-y --feature-extractor rlai.environments.gridworld.GridworldFeatureExtractor --make-final-policy-greedy True --num-improvements-per-checkpoint 5 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name} --log INFO' )) checkpoint, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_scale_learning_rate_with_logging.pickle', 'wb') as f: # pickle.dump((checkpoint, agent), f) with open( f'{os.path.dirname(__file__)}/fixtures/test_scale_learning_rate_with_logging.pickle', 'rb') as f: checkpoint_fixture, agent_fixture = pickle.load(f) assert_run(checkpoint, agent, checkpoint_fixture, agent_fixture)
def test_gym_continuous_mountain_car(): start_virtual_display_if_headless() checkpoint_path, agent_path = run( shlex.split( f'--random-seed 12345 --agent rlai.agents.mdp.StochasticMdpAgent --gamma 0.99 --environment rlai.environments.openai_gym.Gym --gym-id MountainCarContinuous-v0 --plot-environment --T 1000 --train-function rlai.policy_gradient.monte_carlo.reinforce.improve --num-episodes 2 --plot-state-value True --v-S rlai.v_S.function_approximation.estimators.ApproximateStateValueEstimator --feature-extractor rlai.environments.openai_gym.ContinuousMountainCarFeatureExtractor --function-approximation-model rlai.models.sklearn.SKLearnSGD --loss squared_loss --sgd-alpha 0.0 --learning-rate constant --eta0 0.01 --policy rlai.policies.parameterized.continuous_action.ContinuousActionBetaDistributionPolicy --policy-feature-extractor rlai.environments.openai_gym.ContinuousMountainCarFeatureExtractor --plot-policy --alpha 0.01 --update-upon-every-visit True --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --num-episodes-per-checkpoint 1 --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name} --log DEBUG' )) checkpoint, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_gym_continuous_mountain_car.pickle', 'wb') as f: # pickle.dump((checkpoint, agent), f) with open( f'{os.path.dirname(__file__)}/fixtures/test_gym_continuous_mountain_car.pickle', 'rb') as f: checkpoint_fixture, agent_fixture = pickle.load(f) assert_run(checkpoint, agent, checkpoint_fixture, agent_fixture)
def test_continuous_action_discretization(): start_virtual_display_if_headless() checkpoint_path, agent_path = run( shlex.split( f'--random-seed 12345 --agent rlai.agents.mdp.StochasticMdpAgent --continuous-state-discretization-resolution 0.005 --gamma 0.95 --environment rlai.environments.openai_gym.Gym --gym-id MountainCarContinuous-v0 --T 20 --continuous-action-discretization-resolution 0.1 --render-every-nth-episode 2 --video-directory {tempfile.TemporaryDirectory().name} --force --train-function rlai.gpi.temporal_difference.iteration.iterate_value_q_pi --mode SARSA --num-improvements 2 --num-episodes-per-improvement 1 --epsilon 0.01 --q-S-A rlai.q_S_A.tabular.TabularStateActionValueEstimator --make-final-policy-greedy True --num-improvements-per-plot 2 --num-improvements-per-checkpoint 2 --checkpoint-path {tempfile.NamedTemporaryFile(delete=False).name} --save-agent-path {tempfile.NamedTemporaryFile(delete=False).name}' )) checkpoint, agent = load_checkpoint_and_agent(checkpoint_path, agent_path) # uncomment the following line and run test to update fixture # with open(f'{os.path.dirname(__file__)}/fixtures/test_continuous_action_discretization.pickle', 'wb') as f: # pickle.dump((checkpoint, agent), f) with open( f'{os.path.dirname(__file__)}/fixtures/test_continuous_action_discretization.pickle', 'rb') as f: checkpoint_fixture, agent_fixture = pickle.load(f) assert_run(checkpoint, agent, checkpoint_fixture, agent_fixture)