def test_default_config(self, mock_critic_network, mock_actor_network, mock_agent): params = ["agent.actor_fc_layer_params=[100,10]", "agent.observation_fc_layer_params=[1,2,3]", "agent.action_fc_layer_params=[1,2,3,4]", "agent.joint_fc_layer_params=[5]"] obs_spec = "obs_spec" dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1,2,3], int), action_spec=specs.ArraySpec([1], float) ) conf = agent_application.make_config(DDPGConfig(), params) agent_trainer = DDPGAgent(dataspec, conf) agent = agent_trainer.init_agent() mock_critic_network.assert_called_once_with((dataspec.observation_spec, dataspec.action_spec), observation_fc_layer_params=[1,2,3], action_fc_layer_params=[1,2,3,4], joint_fc_layer_params=[5]) mock_actor_network.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec, fc_layer_params=[100,10]) mock_agent.assert_called_once_with( time_step_spec=mock.ANY, # TODO action_spec=dataspec.action_spec, train_step_counter=mock.ANY, # TODO actor_network=mock_actor_network.return_value, critic_network=mock_critic_network.return_value, actor_optimizer=mock.ANY, #TODO critic_optimizer=mock.ANY, #TODO td_errors_loss_fn=None, target_actor_network=None, target_critic_network=None, ) self.assertEqual(agent, mock_agent.return_value)
def _make_app(self, rp_buffer): params = [ "training.batch_size=32", "training.num_iterations=1", "project.tensorboard_path=/tmp/test_tb_path" ] conf = agent_application.make_config(QConfig(), params) dataspec = agent_application.DataSpec(observation_spec=None, action_spec=None) class Env: def __init__(self): self.env_id_cols = ["env_id_1"] self.ts_id_col = "ts_1" self.obs_cols = ["obs_1", "obs_2"] app = agent_application.AgentApplication( data_spec=dataspec, agent=DQNAgent, config=conf, env=Env(), first_timestep_dt=datetime.datetime.now(), training_interval=datetime.timedelta(days=1)) app.init_replay_buffer = lambda: rp_buffer return app
def test_default_config(self, mock_critic_network, mock_actor_network, mock_agent): params = ["agent.actor_fc_layer_params=[100,10]", "agent.observation_fc_layer_params=[1,2,3]", "agent.action_fc_layer_params=[1,2,3,4]", "agent.joint_fc_layer_params=[5]", "agent.ou_stddev=0.1", "agent.ou_damping=0.2", "agent.target_update_tau=0.3", "agent.target_update_period=1", "agent.dqda_clipping=1.1", "agent.reward_scale_factor=1.2", "agent.gradient_clipping=1.3", "agent.debug_summaries=True", "agent.summarize_grads_and_vars=True", "agent.name=Patrick"] obs_spec = "obs_spec" dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1,2,3], int), action_spec=specs.ArraySpec([1], float) ) conf = agent_application.make_config(DDPGConfig(), params) agent_trainer = DDPGAgent(dataspec, conf) agent = agent_trainer.init_agent() mock_critic_network.assert_called_once_with((dataspec.observation_spec, dataspec.action_spec), observation_fc_layer_params=[1,2,3], action_fc_layer_params=[1,2,3,4], joint_fc_layer_params=[5]) mock_actor_network.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec, fc_layer_params=[100,10]) mock_agent.assert_called_once_with( time_step_spec=mock.ANY, # TODO action_spec=dataspec.action_spec, train_step_counter=mock.ANY, # TODO actor_network=mock_actor_network.return_value, critic_network=mock_critic_network.return_value, actor_optimizer=mock.ANY, #TODO critic_optimizer=mock.ANY, #TODO td_errors_loss_fn=None, target_actor_network=None, target_critic_network=None, ou_stddev=0.1, ou_damping=0.2, target_update_tau=0.3, target_update_period=1, dqda_clipping=1.1, reward_scale_factor=1.2, gradient_clipping=1.3, debug_summaries=True, summarize_grads_and_vars=True, name="Patrick" ) self.assertEqual(agent, mock_agent.return_value)
def test_qconfig_values(self): params = [ "agent.optimizer.learning_rate=0.01", "agent.fc_layer_params=[100, 150, 90]", ] conf = agent_application.make_config(QConfig(), params) self.assertEqual(conf.agent.optimizer.learning_rate, 0.01) # default is Adam self.assertEqual(conf.agent.optimizer.optimizer, Optimizer.Adam) self.assertEqual(conf.agent.fc_layer_params, [100, 150, 90])
def make_runner(num_runs=4, num_eval_episodes=100, eval_interval=1): # get run config params = [ "agent.optimizer.learning_rate=0.01", "training.num_iterations=10000", "policy.eps_start=1.0", "policy.eps_final=0.1", "agent.fc_layer_params=[100,]", "trajectory.trajectory_training_window=100", "project.application_name=cartpole_sim", "project.dm_storage_path=/tmp/rl_applications/cartpole_sim/%s/" % int(time.time()), "project.tensorboard_path=/tmp/tb_path/cartpole_sim/%s" % datetime.now().strftime("%Y-%m-%dT%H-%M-%S"), ] conf = agent_application.make_config(QConfig(), params) print(OmegaConf.to_yaml(conf)) # create batch of cartpole environments first_timestep_dt = datetime(year=2019, month=8, day=7, hour=10) training_interval = timedelta(days=1) spark = get_spark_session() tfenv = make_batch_tfenv(make_env, conf, first_timestep_dt, training_interval, spark) # finalize RL application data_spec = agent_application.DataSpec( action_spec=tfenv.action_spec, observation_spec=tfenv.observation_spec) application = agent_application.AgentApplication( data_spec=data_spec, agent=DQNAgent(data_spec, conf), env=tfenv, config=conf, first_timestep_dt=first_timestep_dt, training_interval=training_interval) # create the data manager dm = build_inmemory_data_manager(application) tfenv.set_dm(dm) # create simulator runner sim_runner = SimulationRunner(application=application, make_eval_env=make_env, dm=dm, num_runs=num_runs, num_eval_episodes=num_eval_episodes, eval_interval=eval_interval) return sim_runner
def test_standard_config_dqn(self, mock_qnetwork, mock_agent): params = [ "agent.optimizer.learning_rate=0.01", "policy.epsilon_greedy=0.01", "trajectory.n_step=1", "agent.boltzmann_temperature=200", "agent.emit_log_probability=True", "agent.target_update_tau=1.0", "agent.target_update_period=2", "agent.gamma=1.1", "agent.reward_scale_factor=1.2", "agent.gradient_clipping=1.5", "agent.debug_summaries=True", "agent.summarize_grads_and_vars=False", "agent.name=Patrick", "agent.fc_layer_params=[100, 150, 90]", ] dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1, 2, 3], int), action_spec=specs.ArraySpec([1], float)) conf = agent_application.make_config(QConfig(), params) agent_trainer = DQNAgent(dataspec, conf) agent = agent_trainer.init_agent() mock_qnetwork.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec, fc_layer_params=[100, 150, 90]) mock_agent.assert_called_once_with( time_step_spec=mock.ANY, # TODO action_spec=dataspec.action_spec, q_network=mock_qnetwork.return_value, train_step_counter=mock.ANY, # TODO optimizer=mock.ANY, #TODO epsilon_greedy=0.01, n_step_update=1, boltzmann_temperature=200, emit_log_probability=True, target_update_tau=1.0, target_update_period=2, gamma=1.1, reward_scale_factor=1.2, gradient_clipping=1.5, debug_summaries=True, summarize_grads_and_vars=False, name="Patrick", ) self.assertEqual(agent, mock_agent.return_value)
def make_runner(num_runs=4, num_eval_episodes=100, eval_interval=1): params = [ "agent.actor_optimizer.learning_rate=1e-4", "agent.critic_optimizer.learning_rate=1e-3", "training.num_iterations=2000", "env.num_envs=60", "env.num_steps_per_run=50", "policy.eps_start=1.0", "policy.eps_final=0.1", "policy.eps_steps=1000", "agent.actor_fc_layer_params=[400, 300]", "agent.observation_fc_layer_params=[400,]", "agent.joint_fc_layer_params=[300,]", "trajectory.trajectory_training_window=100", "project.application_name=mountaincar_ddpg", "project.dm_storage_path=/tmp/rl_applications/mountaincar_sim/%s/" % int(time.time()), "project.tensorboard_path=/tmp/tb_path/mountaincar_sim/%s" % datetime.now().strftime("%Y-%m-%dT%H-%M-%S"), ] conf = agent_application.make_config(DDPGConfig(), params) print(OmegaConf.to_yaml(conf)) first_timestep_dt = datetime(year=2019, month=8, day=7, hour=10) training_interval = timedelta(days=1) spark = get_spark_session() tfenv = make_batch_tfenv(make_env, conf, first_timestep_dt, training_interval, spark) data_spec = agent_application.DataSpec( action_spec=tfenv.action_spec, observation_spec=tfenv.observation_spec) application = agent_application.AgentApplication( data_spec=data_spec, agent=DDPGAgent(data_spec, conf), env=tfenv, config=conf, first_timestep_dt=first_timestep_dt, training_interval=training_interval) dm = build_inmemory_data_manager(application) tfenv.set_dm(dm) return SimulationRunner(application=application, make_eval_env=make_env, dm=dm, num_runs=num_runs, num_eval_episodes=num_eval_episodes, eval_interval=eval_interval)
def setUp(self): mock_agent = MagicMock(Agent) dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1, 2, 3], int), action_spec=specs.ArraySpec([1], float)) conf = agent_application.make_config(AgentConfig(), []) today = datetime(date.today().year, date.today().month, date.today().day) env = MagicMock() self._mock_agent_init = "MOCKED AGENT" mock_agent.init_agent = MagicMock(return_value=self._mock_agent_init) self._application = agent_application.AgentApplication( data_spec=dataspec, agent=mock_agent, env=env, config=conf, first_timestep_dt=today, training_interval=timedelta(days=1))
def test_ignore_missing_config_dqn(self, mock_qnetwork, mock_agent): params = ["agent.fc_layer_params=[100, 150, 90]"] dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1, 2, 3], int), action_spec=specs.ArraySpec([1], float)) conf = agent_application.make_config(QConfig(), params) agent_trainer = DQNAgent(dataspec, conf) agent = agent_trainer.init_agent() mock_qnetwork.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec, fc_layer_params=[100, 150, 90]) mock_agent.assert_called_once_with( time_step_spec=mock.ANY, # TODO action_spec=dataspec.action_spec, q_network=mock_qnetwork.return_value, train_step_counter=mock.ANY, # TODO optimizer=mock.ANY, #TODO epsilon_greedy=conf.policy.epsilon_greedy, n_step_update=conf.trajectory.n_step) self.assertEqual(agent, mock_agent.return_value)