def _make_app(self, rp_buffer):
        params = [
            "training.batch_size=32", "training.num_iterations=1",
            "project.tensorboard_path=/tmp/test_tb_path"
        ]
        conf = agent_application.make_config(QConfig(), params)

        dataspec = agent_application.DataSpec(observation_spec=None,
                                              action_spec=None)

        class Env:
            def __init__(self):
                self.env_id_cols = ["env_id_1"]
                self.ts_id_col = "ts_1"
                self.obs_cols = ["obs_1", "obs_2"]

        app = agent_application.AgentApplication(
            data_spec=dataspec,
            agent=DQNAgent,
            config=conf,
            env=Env(),
            first_timestep_dt=datetime.datetime.now(),
            training_interval=datetime.timedelta(days=1))

        app.init_replay_buffer = lambda: rp_buffer
        return app
Esempio n. 2
0
    def test_default_config(self, mock_critic_network, mock_actor_network, mock_agent):
        params = ["agent.actor_fc_layer_params=[100,10]", "agent.observation_fc_layer_params=[1,2,3]",
                  "agent.action_fc_layer_params=[1,2,3,4]", "agent.joint_fc_layer_params=[5]"]

        obs_spec = "obs_spec"
        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1,2,3], int),
            action_spec=specs.ArraySpec([1], float)
        )
        conf = agent_application.make_config(DDPGConfig(), params)

        agent_trainer = DDPGAgent(dataspec, conf)
        agent = agent_trainer.init_agent()

        mock_critic_network.assert_called_once_with((dataspec.observation_spec, dataspec.action_spec),
                                                    observation_fc_layer_params=[1,2,3],
                                                    action_fc_layer_params=[1,2,3,4],
                                                    joint_fc_layer_params=[5])

        mock_actor_network.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec,
                                                   fc_layer_params=[100,10])
        mock_agent.assert_called_once_with(
            time_step_spec=mock.ANY, # TODO
            action_spec=dataspec.action_spec,
            train_step_counter=mock.ANY, # TODO
            actor_network=mock_actor_network.return_value,
            critic_network=mock_critic_network.return_value,
            actor_optimizer=mock.ANY, #TODO
            critic_optimizer=mock.ANY, #TODO
            td_errors_loss_fn=None,
            target_actor_network=None,
            target_critic_network=None,
        )
        self.assertEqual(agent, mock_agent.return_value)
Esempio n. 3
0
    def test_default_config(self, mock_critic_network, mock_actor_network, mock_agent):
        params = ["agent.actor_fc_layer_params=[100,10]", "agent.observation_fc_layer_params=[1,2,3]",
                  "agent.action_fc_layer_params=[1,2,3,4]", "agent.joint_fc_layer_params=[5]",
                  "agent.ou_stddev=0.1",
                  "agent.ou_damping=0.2",
                  "agent.target_update_tau=0.3",
                  "agent.target_update_period=1",
                  "agent.dqda_clipping=1.1",
                  "agent.reward_scale_factor=1.2",
                  "agent.gradient_clipping=1.3",
                  "agent.debug_summaries=True",
                  "agent.summarize_grads_and_vars=True",
                  "agent.name=Patrick"]

        obs_spec = "obs_spec"
        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1,2,3], int),
            action_spec=specs.ArraySpec([1], float)
        )
        conf = agent_application.make_config(DDPGConfig(), params)

        agent_trainer = DDPGAgent(dataspec, conf)
        agent = agent_trainer.init_agent()

        mock_critic_network.assert_called_once_with((dataspec.observation_spec, dataspec.action_spec),
                                                    observation_fc_layer_params=[1,2,3],
                                                    action_fc_layer_params=[1,2,3,4],
                                                    joint_fc_layer_params=[5])

        mock_actor_network.assert_called_once_with(dataspec.observation_spec, dataspec.action_spec,
                                                   fc_layer_params=[100,10])
        mock_agent.assert_called_once_with(
            time_step_spec=mock.ANY, # TODO
            action_spec=dataspec.action_spec,
            train_step_counter=mock.ANY, # TODO
            actor_network=mock_actor_network.return_value,
            critic_network=mock_critic_network.return_value,
            actor_optimizer=mock.ANY, #TODO
            critic_optimizer=mock.ANY, #TODO
            td_errors_loss_fn=None,
            target_actor_network=None,
            target_critic_network=None,
            ou_stddev=0.1,
            ou_damping=0.2,
            target_update_tau=0.3,
            target_update_period=1,
            dqda_clipping=1.1,
            reward_scale_factor=1.2,
            gradient_clipping=1.3,
            debug_summaries=True,
            summarize_grads_and_vars=True,
            name="Patrick"
        )
        self.assertEqual(agent, mock_agent.return_value)
Esempio n. 4
0
def make_runner(num_runs=4, num_eval_episodes=100, eval_interval=1):

    # get run config
    params = [
        "agent.optimizer.learning_rate=0.01",
        "training.num_iterations=10000",
        "policy.eps_start=1.0",
        "policy.eps_final=0.1",
        "agent.fc_layer_params=[100,]",
        "trajectory.trajectory_training_window=100",
        "project.application_name=cartpole_sim",
        "project.dm_storage_path=/tmp/rl_applications/cartpole_sim/%s/" %
        int(time.time()),
        "project.tensorboard_path=/tmp/tb_path/cartpole_sim/%s" %
        datetime.now().strftime("%Y-%m-%dT%H-%M-%S"),
    ]
    conf = agent_application.make_config(QConfig(), params)
    print(OmegaConf.to_yaml(conf))

    # create batch of cartpole environments
    first_timestep_dt = datetime(year=2019, month=8, day=7, hour=10)
    training_interval = timedelta(days=1)
    spark = get_spark_session()
    tfenv = make_batch_tfenv(make_env, conf, first_timestep_dt,
                             training_interval, spark)

    # finalize RL application
    data_spec = agent_application.DataSpec(
        action_spec=tfenv.action_spec, observation_spec=tfenv.observation_spec)

    application = agent_application.AgentApplication(
        data_spec=data_spec,
        agent=DQNAgent(data_spec, conf),
        env=tfenv,
        config=conf,
        first_timestep_dt=first_timestep_dt,
        training_interval=training_interval)

    # create the data manager
    dm = build_inmemory_data_manager(application)
    tfenv.set_dm(dm)

    # create simulator runner
    sim_runner = SimulationRunner(application=application,
                                  make_eval_env=make_env,
                                  dm=dm,
                                  num_runs=num_runs,
                                  num_eval_episodes=num_eval_episodes,
                                  eval_interval=eval_interval)
    return sim_runner
Esempio n. 5
0
    def test_standard_config_dqn(self, mock_qnetwork, mock_agent):
        params = [
            "agent.optimizer.learning_rate=0.01",
            "policy.epsilon_greedy=0.01",
            "trajectory.n_step=1",
            "agent.boltzmann_temperature=200",
            "agent.emit_log_probability=True",
            "agent.target_update_tau=1.0",
            "agent.target_update_period=2",
            "agent.gamma=1.1",
            "agent.reward_scale_factor=1.2",
            "agent.gradient_clipping=1.5",
            "agent.debug_summaries=True",
            "agent.summarize_grads_and_vars=False",
            "agent.name=Patrick",
            "agent.fc_layer_params=[100, 150, 90]",
        ]

        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1, 2, 3], int),
            action_spec=specs.ArraySpec([1], float))
        conf = agent_application.make_config(QConfig(), params)

        agent_trainer = DQNAgent(dataspec, conf)
        agent = agent_trainer.init_agent()

        mock_qnetwork.assert_called_once_with(dataspec.observation_spec,
                                              dataspec.action_spec,
                                              fc_layer_params=[100, 150, 90])
        mock_agent.assert_called_once_with(
            time_step_spec=mock.ANY,  # TODO
            action_spec=dataspec.action_spec,
            q_network=mock_qnetwork.return_value,
            train_step_counter=mock.ANY,  # TODO
            optimizer=mock.ANY,  #TODO
            epsilon_greedy=0.01,
            n_step_update=1,
            boltzmann_temperature=200,
            emit_log_probability=True,
            target_update_tau=1.0,
            target_update_period=2,
            gamma=1.1,
            reward_scale_factor=1.2,
            gradient_clipping=1.5,
            debug_summaries=True,
            summarize_grads_and_vars=False,
            name="Patrick",
        )
        self.assertEqual(agent, mock_agent.return_value)
Esempio n. 6
0
def make_runner(num_runs=4, num_eval_episodes=100, eval_interval=1):
    params = [
        "agent.actor_optimizer.learning_rate=1e-4",
        "agent.critic_optimizer.learning_rate=1e-3",
        "training.num_iterations=2000",
        "env.num_envs=60",
        "env.num_steps_per_run=50",
        "policy.eps_start=1.0",
        "policy.eps_final=0.1",
        "policy.eps_steps=1000",
        "agent.actor_fc_layer_params=[400, 300]",
        "agent.observation_fc_layer_params=[400,]",
        "agent.joint_fc_layer_params=[300,]",
        "trajectory.trajectory_training_window=100",
        "project.application_name=mountaincar_ddpg",
        "project.dm_storage_path=/tmp/rl_applications/mountaincar_sim/%s/" %
        int(time.time()),
        "project.tensorboard_path=/tmp/tb_path/mountaincar_sim/%s" %
        datetime.now().strftime("%Y-%m-%dT%H-%M-%S"),
    ]
    conf = agent_application.make_config(DDPGConfig(), params)
    print(OmegaConf.to_yaml(conf))

    first_timestep_dt = datetime(year=2019, month=8, day=7, hour=10)
    training_interval = timedelta(days=1)
    spark = get_spark_session()
    tfenv = make_batch_tfenv(make_env, conf, first_timestep_dt,
                             training_interval, spark)

    data_spec = agent_application.DataSpec(
        action_spec=tfenv.action_spec, observation_spec=tfenv.observation_spec)

    application = agent_application.AgentApplication(
        data_spec=data_spec,
        agent=DDPGAgent(data_spec, conf),
        env=tfenv,
        config=conf,
        first_timestep_dt=first_timestep_dt,
        training_interval=training_interval)

    dm = build_inmemory_data_manager(application)
    tfenv.set_dm(dm)
    return SimulationRunner(application=application,
                            make_eval_env=make_env,
                            dm=dm,
                            num_runs=num_runs,
                            num_eval_episodes=num_eval_episodes,
                            eval_interval=eval_interval)
Esempio n. 7
0
    def setUp(self):

        mock_agent = MagicMock(Agent)
        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1, 2, 3], int),
            action_spec=specs.ArraySpec([1], float))
        conf = agent_application.make_config(AgentConfig(), [])

        today = datetime(date.today().year,
                         date.today().month,
                         date.today().day)
        env = MagicMock()
        self._mock_agent_init = "MOCKED AGENT"
        mock_agent.init_agent = MagicMock(return_value=self._mock_agent_init)
        self._application = agent_application.AgentApplication(
            data_spec=dataspec,
            agent=mock_agent,
            env=env,
            config=conf,
            first_timestep_dt=today,
            training_interval=timedelta(days=1))
Esempio n. 8
0
    def test_ignore_missing_config_dqn(self, mock_qnetwork, mock_agent):
        params = ["agent.fc_layer_params=[100, 150, 90]"]

        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1, 2, 3], int),
            action_spec=specs.ArraySpec([1], float))
        conf = agent_application.make_config(QConfig(), params)

        agent_trainer = DQNAgent(dataspec, conf)
        agent = agent_trainer.init_agent()

        mock_qnetwork.assert_called_once_with(dataspec.observation_spec,
                                              dataspec.action_spec,
                                              fc_layer_params=[100, 150, 90])
        mock_agent.assert_called_once_with(
            time_step_spec=mock.ANY,  # TODO
            action_spec=dataspec.action_spec,
            q_network=mock_qnetwork.return_value,
            train_step_counter=mock.ANY,  # TODO
            optimizer=mock.ANY,  #TODO
            epsilon_greedy=conf.policy.epsilon_greedy,
            n_step_update=conf.trajectory.n_step)
        self.assertEqual(agent, mock_agent.return_value)