コード例 #1
0
def test_combined_monitors():
    with monitor(CombinedMonitor([MonitorSpy(), MonitorSpy()])) as m:
        broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
        broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])
    broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert m.monitors[0].num_messages_received == 2
    assert m.monitors[1].num_messages_received == 2
コード例 #2
0
def test_training_monitor_receives_and_logs_training_message(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=4)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 4)

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=4, avg_reward=2.0)
    ]
コード例 #3
0
def test_training_monitor_logger_is_optional(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(
                make_training_monitor(progress_averaging=1,
                                      performance_sample_size=2)):
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert not caplog.messages
コード例 #4
0
def test_training_monitor_receives_and_aggregates_multiple_messages(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=3)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 1)
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}] * 2)

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=3, avg_reward=1 / 3)
    ]
コード例 #5
0
def test_training_monitor_ignores_episodic_return_being_not_present(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=2)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])
            broadcast(Message.TRAINING, infos=[{}])
            assert not caplog.messages
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
            assert caplog.messages == [
                m.TRAINING_LINE_FORMAT.format(steps=3, avg_reward=0.75)
            ]
コード例 #6
0
def test_training_monitor_can_print_multiple_monitor_messages(caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=1)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
            broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=1, avg_reward=2.0),
        m.TRAINING_LINE_FORMAT.format(steps=2, avg_reward=-0.5)
    ]
コード例 #7
0
def test_training_monitor_prints_training_monitor_messages_at_specified_frequency_even_at_uneven_intervals(
        caplog):
    with caplog.at_level(logging.INFO):
        with monitor(make_training_monitor(logger=logger,
                                           progress_averaging=4)) as m:
            broadcast(Message.TRAINING, infos=[{'episodic_return': +2}] * 3)
            broadcast(Message.TRAINING, infos=[{'episodic_return': -1}] * 2)
            broadcast(Message.TRAINING, infos=[{'episodic_return': +1}] * 3)

    assert caplog.messages == [
        m.TRAINING_LINE_FORMAT.format(steps=4, avg_reward=5 / 4),
        m.TRAINING_LINE_FORMAT.format(steps=8, avg_reward=2 / 4)
    ]
コード例 #8
0
def test_training_monitor_prints_wall_clock_performance_in_specified_interval(
        caplog):
    with caplog.at_level(logging.INFO):
        with time_scope() as measure:
            with monitor(
                    make_training_monitor(logger=logger,
                                          performance_sample_size=2)) as m:
                time.sleep(0.2)
                broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
                time.sleep(0.2)
                broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    eps = 0.01
    assert caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed + eps)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=2, performance=2 / measure.elapsed - eps)]
コード例 #9
0
def test_training_monitor_prints_wall_clock_performance_measure_at_specified_frequency_even_at_uneven_calls(
        caplog):
    with caplog.at_level(logging.INFO):
        with time_scope() as measure:
            with monitor(
                    make_training_monitor(logger=logger,
                                          performance_sample_size=4)) as m:
                time.sleep(0.5)
                broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 3)
                time.sleep(0.3)
                broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 2)

    eps = 0.01
    assert caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed + eps)] or \
           caplog.messages == [m.PERFORMANCE_LINE_FORMAT.format(steps=5, performance=5 / measure.elapsed - eps)]
コード例 #10
0
def test_tensorboard_monitor_writes_specified_infos():
    writer = SummaryWriterSpy()
    tb_monitor = TensorboardMonitor(
        writer,
        progress_averaging=2,
        scalars=dict(episodic_return='returns/episodic'))
    with monitor(tb_monitor):
        broadcast(Message.TRAINING, infos=[{'episodic_return': 2}] * 2)
        broadcast(Message.TRAINING, infos=[{'episodic_return': None}])
        broadcast(Message.TRAINING,
                  infos=[{
                      'episodic_return': -0.5,
                      'other_value': "its a string"
                  }])
    broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])

    assert writer.received_scalars == [('returns/episodic', 2, 0),
                                       ('returns/episodic', 2, 1),
                                       ('returns/episodic', 0.75, 3)]
    assert tb_monitor.step == 4
コード例 #11
0
writer = SummaryWriter()


def evaluate_agent(trainable):
    e = make_env()
    video, rewards = render_trajectory(e,
                                       trainable.policy,
                                       reward_infos=['episodic_return'])
    writer.add_video("trajectory", video, trainable.steps_trained, fps=40)
    writer.add_scalar("rewards/total", rewards['total_reward'],
                      trainable.steps_trained)
    writer.add_scalar("returns/total", rewards['total_episodic_return'],
                      trainable.steps_trained)


log_monitor = LogMonitor(logger,
                         progress_averaging=100,
                         performance_sample_size=10000)
tb_monitor = TensorboardMonitor(
    writer, 10, scalars=dict(episodic_return='returns/episodic'))
env = MultipleEnvs(make_env, num_envs=16)
with active_gym(env) as env, monitor(CombinedMonitor([log_monitor,
                                                      tb_monitor])) as m:
    com = A2CTrainer(env, config={'rollout_horizon': 5, 'device': 'cpu'})
    try:
        amarl.run(com,
                  num_steps=int(1e5),
                  step_frequency_fns={int(2e4): evaluate_agent})
    finally:
        pass
コード例 #12
0
def test_training_monitor_captures_rewards():
    with monitor(make_training_monitor()) as m:
        broadcast(Message.TRAINING, infos=[{'episodic_return': 2}])
        broadcast(Message.TRAINING, infos=[{'episodic_return': -0.5}])

    assert m.captured_returns == [2, -0.5]