def _make_app(self, rp_buffer):
        params = [
            "training.batch_size=32", "training.num_iterations=1",
            "project.tensorboard_path=/tmp/test_tb_path"
        ]
        conf = agent_application.make_config(QConfig(), params)

        dataspec = agent_application.DataSpec(observation_spec=None,
                                              action_spec=None)

        class Env:
            def __init__(self):
                self.env_id_cols = ["env_id_1"]
                self.ts_id_col = "ts_1"
                self.obs_cols = ["obs_1", "obs_2"]

        app = agent_application.AgentApplication(
            data_spec=dataspec,
            agent=DQNAgent,
            config=conf,
            env=Env(),
            first_timestep_dt=datetime.datetime.now(),
            training_interval=datetime.timedelta(days=1))

        app.init_replay_buffer = lambda: rp_buffer
        return app
Example #2
0
    def test_qconfig_values(self):
        params = [
            "agent.optimizer.learning_rate=0.01",
            "agent.fc_layer_params=[100, 150, 90]",
        ]

        conf = agent_application.make_config(QConfig(), params)
        self.assertEqual(conf.agent.optimizer.learning_rate, 0.01)
        # default is Adam
        self.assertEqual(conf.agent.optimizer.optimizer, Optimizer.Adam)
        self.assertEqual(conf.agent.fc_layer_params, [100, 150, 90])
Example #3
0
def make_runner(num_runs=4, num_eval_episodes=100, eval_interval=1):

    # get run config
    params = [
        "agent.optimizer.learning_rate=0.01",
        "training.num_iterations=10000",
        "policy.eps_start=1.0",
        "policy.eps_final=0.1",
        "agent.fc_layer_params=[100,]",
        "trajectory.trajectory_training_window=100",
        "project.application_name=cartpole_sim",
        "project.dm_storage_path=/tmp/rl_applications/cartpole_sim/%s/" %
        int(time.time()),
        "project.tensorboard_path=/tmp/tb_path/cartpole_sim/%s" %
        datetime.now().strftime("%Y-%m-%dT%H-%M-%S"),
    ]
    conf = agent_application.make_config(QConfig(), params)
    print(OmegaConf.to_yaml(conf))

    # create batch of cartpole environments
    first_timestep_dt = datetime(year=2019, month=8, day=7, hour=10)
    training_interval = timedelta(days=1)
    spark = get_spark_session()
    tfenv = make_batch_tfenv(make_env, conf, first_timestep_dt,
                             training_interval, spark)

    # finalize RL application
    data_spec = agent_application.DataSpec(
        action_spec=tfenv.action_spec, observation_spec=tfenv.observation_spec)

    application = agent_application.AgentApplication(
        data_spec=data_spec,
        agent=DQNAgent(data_spec, conf),
        env=tfenv,
        config=conf,
        first_timestep_dt=first_timestep_dt,
        training_interval=training_interval)

    # create the data manager
    dm = build_inmemory_data_manager(application)
    tfenv.set_dm(dm)

    # create simulator runner
    sim_runner = SimulationRunner(application=application,
                                  make_eval_env=make_env,
                                  dm=dm,
                                  num_runs=num_runs,
                                  num_eval_episodes=num_eval_episodes,
                                  eval_interval=eval_interval)
    return sim_runner
Example #4
0
    def test_standard_config_dqn(self, mock_qnetwork, mock_agent):
        params = [
            "agent.optimizer.learning_rate=0.01",
            "policy.epsilon_greedy=0.01",
            "trajectory.n_step=1",
            "agent.boltzmann_temperature=200",
            "agent.emit_log_probability=True",
            "agent.target_update_tau=1.0",
            "agent.target_update_period=2",
            "agent.gamma=1.1",
            "agent.reward_scale_factor=1.2",
            "agent.gradient_clipping=1.5",
            "agent.debug_summaries=True",
            "agent.summarize_grads_and_vars=False",
            "agent.name=Patrick",
            "agent.fc_layer_params=[100, 150, 90]",
        ]

        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1, 2, 3], int),
            action_spec=specs.ArraySpec([1], float))
        conf = agent_application.make_config(QConfig(), params)

        agent_trainer = DQNAgent(dataspec, conf)
        agent = agent_trainer.init_agent()

        mock_qnetwork.assert_called_once_with(dataspec.observation_spec,
                                              dataspec.action_spec,
                                              fc_layer_params=[100, 150, 90])
        mock_agent.assert_called_once_with(
            time_step_spec=mock.ANY,  # TODO
            action_spec=dataspec.action_spec,
            q_network=mock_qnetwork.return_value,
            train_step_counter=mock.ANY,  # TODO
            optimizer=mock.ANY,  #TODO
            epsilon_greedy=0.01,
            n_step_update=1,
            boltzmann_temperature=200,
            emit_log_probability=True,
            target_update_tau=1.0,
            target_update_period=2,
            gamma=1.1,
            reward_scale_factor=1.2,
            gradient_clipping=1.5,
            debug_summaries=True,
            summarize_grads_and_vars=False,
            name="Patrick",
        )
        self.assertEqual(agent, mock_agent.return_value)
Example #5
0
    def test_ignore_missing_config_dqn(self, mock_qnetwork, mock_agent):
        params = ["agent.fc_layer_params=[100, 150, 90]"]

        dataspec = agent_application.DataSpec(
            observation_spec=specs.ArraySpec([1, 2, 3], int),
            action_spec=specs.ArraySpec([1], float))
        conf = agent_application.make_config(QConfig(), params)

        agent_trainer = DQNAgent(dataspec, conf)
        agent = agent_trainer.init_agent()

        mock_qnetwork.assert_called_once_with(dataspec.observation_spec,
                                              dataspec.action_spec,
                                              fc_layer_params=[100, 150, 90])
        mock_agent.assert_called_once_with(
            time_step_spec=mock.ANY,  # TODO
            action_spec=dataspec.action_spec,
            q_network=mock_qnetwork.return_value,
            train_step_counter=mock.ANY,  # TODO
            optimizer=mock.ANY,  #TODO
            epsilon_greedy=conf.policy.epsilon_greedy,
            n_step_update=conf.trajectory.n_step)
        self.assertEqual(agent, mock_agent.return_value)