コード例 #1
0
    def test_add_custom_scalars(self):
        with TemporaryDirectory() as tmp_dir:
            writer = SummaryWriter(tmp_dir)
            writer.add_custom_scalars = MagicMock()
            with summary_writer_context(writer):
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["a", "b"], category="cat", title="title")
                with self.assertRaisesRegex(
                        AssertionError,
                        "Title \\(title\\) is already in category \\(cat\\)"):
                    SummaryWriterContext.add_custom_scalars_multilinechart(
                        ["c", "d"], category="cat", title="title")
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["e", "f"], category="cat", title="title2")
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["g", "h"], category="cat2", title="title")

            SummaryWriterContext.add_custom_scalars(writer)
            writer.add_custom_scalars.assert_called_once_with({
                "cat": {
                    "title": ["Multiline", ["a", "b"]],
                    "title2": ["Multiline", ["e", "f"]],
                },
                "cat2": {
                    "title": ["Multiline", ["g", "h"]]
                },
            })
コード例 #2
0
 def test_swallowing_exception(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock(
             side_effect=NotImplementedError("test"))
         writer.exceptions_to_ignore = (NotImplementedError, KeyError)
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
コード例 #3
0
 def test_writing_stack(self):
     with TemporaryDirectory() as tmp_dir1, TemporaryDirectory(
     ) as tmp_dir2:
         writer1 = SummaryWriter(tmp_dir1)
         writer1.add_scalar = MagicMock()
         writer2 = SummaryWriter(tmp_dir2)
         writer2.add_scalar = MagicMock()
         with summary_writer_context(writer1):
             with summary_writer_context(writer2):
                 SummaryWriterContext.add_scalar("test2", torch.ones(1))
             SummaryWriterContext.add_scalar("test1", torch.zeros(1))
         writer1.add_scalar.assert_called_once_with("test1",
                                                    torch.zeros(1),
                                                    global_step=0)
         writer2.add_scalar.assert_called_once_with("test2",
                                                    torch.ones(1),
                                                    global_step=0)
コード例 #4
0
def run_test_offline(
    env_name: str,
    model: ModelManager__Union,
    replay_memory_size: int,
    num_batches_per_epoch: int,
    num_train_epochs: int,
    passing_score_bar: float,
    num_eval_episodes: int,
    minibatch_size: int,
    use_gpu: bool,
):
    env = Gym(env_name=env_name)
    env.seed(SEED)
    env.action_space.seed(SEED)
    normalization = build_normalizer(env)
    logger.info(f"Normalization is: \n{pprint.pformat(normalization)}")

    manager = model.value
    trainer = manager.initialize_trainer(
        use_gpu=use_gpu,
        reward_options=RewardOptions(),
        normalization_data_map=normalization,
    )

    # first fill the replay buffer to burn_in
    replay_buffer = ReplayBuffer(replay_capacity=replay_memory_size,
                                 batch_size=minibatch_size)
    # always fill full RB
    random_policy = make_random_policy_for_env(env)
    agent = Agent.create_for_env(env, policy=random_policy)
    fill_replay_buffer(
        env=env,
        replay_buffer=replay_buffer,
        desired_size=replay_memory_size,
        agent=agent,
    )

    device = torch.device("cuda") if use_gpu else None
    # pyre-fixme[6]: Expected `device` for 2nd param but got `Optional[torch.device]`.
    trainer_preprocessor = make_replay_buffer_trainer_preprocessor(
        trainer, device, env)

    writer = SummaryWriter()
    with summary_writer_context(writer):
        for epoch in range(num_train_epochs):
            logger.info(f"Evaluating before epoch {epoch}: ")
            eval_rewards = evaluate_cem(env, manager, 1)
            for _ in tqdm(range(num_batches_per_epoch)):
                train_batch = replay_buffer.sample_transition_batch()
                preprocessed_batch = trainer_preprocessor(train_batch)
                trainer.train(preprocessed_batch)

    logger.info(f"Evaluating after training for {num_train_epochs} epochs: ")
    eval_rewards = evaluate_cem(env, manager, num_eval_episodes)
    mean_rewards = np.mean(eval_rewards)
    assert (mean_rewards >= passing_score_bar
            ), f"{mean_rewards} doesn't pass the bar {passing_score_bar}."
コード例 #5
0
 def test_not_swallowing_exception(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock(
             side_effect=NotImplementedError("test"))
         with self.assertRaisesRegex(
                 NotImplementedError,
                 "test"), summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
コード例 #6
0
 def test_writing(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock()
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
         writer.add_scalar.assert_called_once_with("test",
                                                   torch.ones(1),
                                                   global_step=0)
コード例 #7
0
 def test_global_step(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock()
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
             SummaryWriterContext.increase_global_step()
             SummaryWriterContext.add_scalar("test", torch.zeros(1))
         writer.add_scalar.assert_has_calls([
             call("test", torch.ones(1), global_step=0),
             call("test", torch.zeros(1), global_step=1),
         ])
         self.assertEqual(2, len(writer.add_scalar.mock_calls))
コード例 #8
0
def train_policy(
    env: EnvWrapper,
    training_policy: Policy,
    num_train_episodes: int,
    post_step: Optional[PostStep] = None,
    post_episode: Optional[PostEpisode] = None,
    use_gpu: bool = False,
) -> np.ndarray:
    device = torch.device("cuda") if use_gpu else torch.device("cpu")
    agent = Agent.create_for_env(
        env,
        policy=training_policy,
        post_transition_callback=post_step,
        post_episode_callback=post_episode,
        device=device,
    )
    running_reward = 0
    writer = SummaryWriter()
    with summary_writer_context(writer):
        train_rewards = []
        with trange(num_train_episodes, unit=" epoch") as t:
            for i in t:
                # Note: run_episode also performs a training step for the agent, if specified in post_step
                trajectory = run_episode(env=env,
                                         agent=agent,
                                         mdp_id=i,
                                         max_steps=200)
                ep_reward = trajectory.calculate_cumulative_reward()
                train_rewards.append(ep_reward)
                running_reward *= REWARD_DECAY
                running_reward += (1 - REWARD_DECAY) * ep_reward
                t.set_postfix(reward=running_reward)

    logger.info("============Train rewards=============")
    logger.info(train_rewards)
    logger.info(
        f"average: {np.mean(train_rewards)};\tmax: {np.max(train_rewards)}")
    return np.array(train_rewards)
コード例 #9
0
def train_workflow(
    model_manager: ModelManager,
    train_dataset: Optional[Dataset],
    eval_dataset: Optional[Dataset],
    *,
    num_epochs: int,
    use_gpu: bool,
    named_model_ids: ModuleNameToEntityId,
    child_workflow_id: int,
    setup_data: Optional[Dict[str, bytes]] = None,
    normalization_data_map: Optional[Dict[str, NormalizationData]] = None,
    reward_options: Optional[RewardOptions] = None,
    reader_options: Optional[ReaderOptions] = None,
    resource_options: Optional[ResourceOptions] = None,
    warmstart_path: Optional[str] = None,
) -> RLTrainingOutput:
    writer = SummaryWriter()
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if setup_data is not None:
        data_module = model_manager.get_data_module(
            setup_data=setup_data,
            reward_options=reward_options,
            reader_options=reader_options,
            resource_options=resource_options,
        )
        assert data_module is not None
        data_module.setup()
    else:
        data_module = None

    if normalization_data_map is None:
        assert data_module is not None
        normalization_data_map = data_module.get_normalization_data_map()

    warmstart_input_path = warmstart_path or None
    trainer_module = model_manager.build_trainer(
        use_gpu=use_gpu,
        reward_options=reward_options,
        normalization_data_map=normalization_data_map,
    )

    if not reader_options:
        reader_options = ReaderOptions()

    if not resource_options:
        resource_options = ResourceOptions()

    with summary_writer_context(writer):
        train_output, lightning_trainer = model_manager.train(
            trainer_module,
            train_dataset,
            eval_dataset,
            None,
            data_module,
            num_epochs,
            reader_options,
            resource_options,
            checkpoint_path=warmstart_input_path,
        )

    output_paths = {}
    for module_name, serving_module in model_manager.build_serving_modules(
            trainer_module, normalization_data_map).items():
        torchscript_output_path = f"{model_manager.__class__.__name__}_{module_name}_{round(time.time())}.torchscript"
        torch.jit.save(serving_module, torchscript_output_path)
        logger.info(f"Saved {module_name} to {torchscript_output_path}")
        output_paths[module_name] = torchscript_output_path
    return dataclasses.replace(train_output, output_paths=output_paths)
コード例 #10
0
 def test_swallowing_histogram_value_error(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         with summary_writer_context(writer):
             SummaryWriterContext.add_histogram("bad_histogram",
                                                torch.ones(100, 1))
コード例 #11
0
 def test_with_none(self):
     with summary_writer_context(None):
         self.assertIsNone(
             SummaryWriterContext.add_scalar("test", torch.ones(1)))