def _make_app(self, rp_buffer): params = [ "training.batch_size=32", "training.num_iterations=1", "project.tensorboard_path=/tmp/test_tb_path" ] conf = agent_application.make_config(QConfig(), params) dataspec = agent_application.DataSpec(observation_spec=None, action_spec=None) class Env: def __init__(self): self.env_id_cols = ["env_id_1"] self.ts_id_col = "ts_1" self.obs_cols = ["obs_1", "obs_2"] app = agent_application.AgentApplication( data_spec=dataspec, agent=DQNAgent, config=conf, env=Env(), first_timestep_dt=datetime.datetime.now(), training_interval=datetime.timedelta(days=1)) app.init_replay_buffer = lambda: rp_buffer return app
def test_qconfig_values(self): params = [ "agent.optimizer.learning_rate=0.01", "agent.fc_layer_params=[100, 150, 90]", ] conf = agent_application.make_config(QConfig(), params) self.assertEqual(conf.agent.optimizer.learning_rate, 0.01) # default is Adam self.assertEqual(conf.agent.optimizer.optimizer, Optimizer.Adam) self.assertEqual(conf.agent.fc_layer_params, [100, 150, 90])
def make_runner(num_runs=4, num_eval_episodes=100, eval_interval=1): # get run config params = [ "agent.optimizer.learning_rate=0.01", "training.num_iterations=10000", "policy.eps_start=1.0", "policy.eps_final=0.1", "agent.fc_layer_params=[100,]", "trajectory.trajectory_training_window=100", "project.application_name=cartpole_sim", "project.dm_storage_path=/tmp/rl_applications/cartpole_sim/%s/" % int(time.time()), "project.tensorboard_path=/tmp/tb_path/cartpole_sim/%s" % datetime.now().strftime("%Y-%m-%dT%H-%M-%S"), ] conf = agent_application.make_config(QConfig(), params) print(OmegaConf.to_yaml(conf)) # create batch of cartpole environments first_timestep_dt = datetime(year=2019, month=8, day=7, hour=10) training_interval = timedelta(days=1) spark = get_spark_session() tfenv = make_batch_tfenv(make_env, conf, first_timestep_dt, training_interval, spark) # finalize RL application data_spec = agent_application.DataSpec( action_spec=tfenv.action_spec, observation_spec=tfenv.observation_spec) application = agent_application.AgentApplication( data_spec=data_spec, agent=DQNAgent(data_spec, conf), env=tfenv, config=conf, first_timestep_dt=first_timestep_dt, training_interval=training_interval) # create the data manager dm = build_inmemory_data_manager(application) tfenv.set_dm(dm) # create simulator runner sim_runner = SimulationRunner(application=application, make_eval_env=make_env, dm=dm, num_runs=num_runs, num_eval_episodes=num_eval_episodes, eval_interval=eval_interval) return sim_runner
def test_standard_config_dqn(self, mock_qnetwork, mock_agent): params = [ "agent.optimizer.learning_rate=0.01", "policy.epsilon_greedy=0.01", "trajectory.n_step=1", "agent.boltzmann_temperature=200", "agent.emit_log_probability=True", "agent.target_update_tau=1.0", "agent.target_update_period=2", "agent.gamma=1.1", "agent.reward_scale_factor=1.2", "agent.gradient_clipping=1.5", "agent.debug_summaries=True", "agent.summarize_grads_and_vars=False", "agent.name=Patrick", "agent.fc_layer_params=[100, 150, 90]", ] dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1, 2, 3], int), action_spec=specs.ArraySpec([1], float)) conf = agent_application.make_config(QConfig(), params) agent_trainer = DQNAgent(dataspec, conf) agent = agent_trainer.init_agent() mock_qnetwork.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec, fc_layer_params=[100, 150, 90]) mock_agent.assert_called_once_with( time_step_spec=mock.ANY, # TODO action_spec=dataspec.action_spec, q_network=mock_qnetwork.return_value, train_step_counter=mock.ANY, # TODO optimizer=mock.ANY, #TODO epsilon_greedy=0.01, n_step_update=1, boltzmann_temperature=200, emit_log_probability=True, target_update_tau=1.0, target_update_period=2, gamma=1.1, reward_scale_factor=1.2, gradient_clipping=1.5, debug_summaries=True, summarize_grads_and_vars=False, name="Patrick", ) self.assertEqual(agent, mock_agent.return_value)
def test_ignore_missing_config_dqn(self, mock_qnetwork, mock_agent): params = ["agent.fc_layer_params=[100, 150, 90]"] dataspec = agent_application.DataSpec( observation_spec=specs.ArraySpec([1, 2, 3], int), action_spec=specs.ArraySpec([1], float)) conf = agent_application.make_config(QConfig(), params) agent_trainer = DQNAgent(dataspec, conf) agent = agent_trainer.init_agent() mock_qnetwork.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec, fc_layer_params=[100, 150, 90]) mock_agent.assert_called_once_with( time_step_spec=mock.ANY, # TODO action_spec=dataspec.action_spec, q_network=mock_qnetwork.return_value, train_step_counter=mock.ANY, # TODO optimizer=mock.ANY, #TODO epsilon_greedy=conf.policy.epsilon_greedy, n_step_update=conf.trajectory.n_step) self.assertEqual(agent, mock_agent.return_value)