コード例 #1
0
ファイル: test_gym.py プロジェクト: sidowaeoa/ReAgent
def train_policy(
    env: EnvWrapper,
    training_policy: Policy,
    num_train_episodes: int,
    post_step: Optional[PostStep] = None,
    post_episode: Optional[PostEpisode] = None,
    use_gpu: bool = False,
) -> np.ndarray:
    device = torch.device("cuda") if use_gpu else torch.device("cpu")
    agent = Agent.create_for_env(
        env,
        policy=training_policy,
        post_transition_callback=post_step,
        post_episode_callback=post_episode,
        device=device,
    )
    running_reward = 0
    writer = SummaryWriter()
    with summary_writer_context(writer):
        train_rewards = []
        with trange(num_train_episodes, unit=" epoch") as t:
            for i in t:
                # Note: run_episode also performs a training step for the agent, if specified in post_step
                trajectory = run_episode(env=env, agent=agent, mdp_id=i, max_steps=200)
                ep_reward = trajectory.calculate_cumulative_reward()
                train_rewards.append(ep_reward)
                running_reward *= REWARD_DECAY
                running_reward += (1 - REWARD_DECAY) * ep_reward
                t.set_postfix(reward=running_reward)

    logger.info("============Train rewards=============")
    logger.info(train_rewards)
    logger.info(f"average: {np.mean(train_rewards)};\tmax: {np.max(train_rewards)}")
    return np.array(train_rewards)
コード例 #2
0
    def test_add_custom_scalars(self):
        with TemporaryDirectory() as tmp_dir:
            writer = SummaryWriter(tmp_dir)
            writer.add_custom_scalars = MagicMock()
            with summary_writer_context(writer):
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["a", "b"], category="cat", title="title")
                with self.assertRaisesRegex(
                        AssertionError,
                        "Title \\(title\\) is already in category \\(cat\\)"):
                    SummaryWriterContext.add_custom_scalars_multilinechart(
                        ["c", "d"], category="cat", title="title")
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["e", "f"], category="cat", title="title2")
                SummaryWriterContext.add_custom_scalars_multilinechart(
                    ["g", "h"], category="cat2", title="title")

            SummaryWriterContext.add_custom_scalars(writer)
            writer.add_custom_scalars.assert_called_once_with({
                "cat": {
                    "title": ["Multiline", ["a", "b"]],
                    "title2": ["Multiline", ["e", "f"]],
                },
                "cat2": {
                    "title": ["Multiline", ["g", "h"]]
                },
            })
コード例 #3
0
ファイル: model_manager.py プロジェクト: zhaonann/ReAgent
    def train_workflow(
        self,
        train_dataset: Dataset,
        eval_dataset: Optional[Dataset],
        normalization_data_map: Dict[str, NormalizationData],
        model,  # reagent.workflow.model_managers.ModelManager__Union
        num_epochs: int,
        use_gpu: bool,
        parent_workflow_id: int,
        child_workflow_id: int,
        reward_options: Optional[RewardOptions] = None,
        warmstart_path: Optional[str] = None,
    ) -> RLTrainingOutput:
        manager = model.value

        writer = SummaryWriter()
        logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

        warmstart_input_path = warmstart_path or None
        manager.initialize_trainer(
            use_gpu=use_gpu,
            reward_options=reward_options,
            normalization_data_map=normalization_data_map,
            warmstart_path=warmstart_input_path,
        )

        with summary_writer_context(writer):
            train_output = manager.train(train_dataset, eval_dataset, num_epochs)

        # TODO: make this a parameter
        torchscript_output_path = f"model_{round(time.time())}.torchscript"
        serving_module = manager.build_serving_module()
        torch.jit.save(serving_module, torchscript_output_path)
        logger.info(f"Saved torchscript model to {torchscript_output_path}")
        return dataclasses.replace(train_output, output_path=torchscript_output_path)
コード例 #4
0
 def test_swallowing_exception(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock(
             side_effect=NotImplementedError("test"))
         writer.exceptions_to_ignore = (NotImplementedError, KeyError)
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
コード例 #5
0
 def test_writing_stack(self):
     with TemporaryDirectory() as tmp_dir1, TemporaryDirectory(
     ) as tmp_dir2:
         writer1 = SummaryWriter(tmp_dir1)
         writer1.add_scalar = MagicMock()
         writer2 = SummaryWriter(tmp_dir2)
         writer2.add_scalar = MagicMock()
         with summary_writer_context(writer1):
             with summary_writer_context(writer2):
                 SummaryWriterContext.add_scalar("test2", torch.ones(1))
             SummaryWriterContext.add_scalar("test1", torch.zeros(1))
         writer1.add_scalar.assert_called_once_with("test1",
                                                    torch.zeros(1),
                                                    global_step=0)
         writer2.add_scalar.assert_called_once_with("test2",
                                                    torch.ones(1),
                                                    global_step=0)
コード例 #6
0
 def test_not_swallowing_exception(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock(
             side_effect=NotImplementedError("test"))
         with self.assertRaisesRegex(
                 NotImplementedError,
                 "test"), summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
コード例 #7
0
 def test_writing(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock()
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
         writer.add_scalar.assert_called_once_with("test",
                                                   torch.ones(1),
                                                   global_step=0)
コード例 #8
0
ファイル: test_gym_offline.py プロジェクト: lwzbuaa/ReAgent
def run_test_offline(
    env_name: str,
    max_steps: Optional[int],
    model: ModelManager__Union,
    replay_memory_size: int,
    num_batches_per_epoch: int,
    num_train_epochs: int,
    passing_score_bar: float,
    num_eval_episodes: int,
    use_gpu: bool,
):
    env = EnvFactory.make(env_name)
    env.seed(SEED)
    env.action_space.seed(SEED)
    normalization = build_normalizer(env)
    logger.info(f"Normalization is: \n{pprint.pformat(normalization)}")

    manager = model.value
    trainer = manager.initialize_trainer(
        use_gpu=use_gpu,
        reward_options=RewardOptions(),
        normalization_data_map=normalization,
    )

    # first fill the replay buffer to burn_in
    replay_buffer = ReplayBuffer.create_from_env(
        env=env,
        replay_memory_size=replay_memory_size,
        batch_size=trainer.minibatch_size,
    )
    # always fill full RB
    fill_replay_buffer(
        env=env, replay_buffer=replay_buffer, desired_size=replay_memory_size
    )

    device = torch.device("cuda") if use_gpu else None
    # pyre-fixme[6]: Expected `device` for 2nd param but got `Optional[torch.device]`.
    trainer_preprocessor = make_replay_buffer_trainer_preprocessor(trainer, device, env)

    writer = SummaryWriter()
    with summary_writer_context(writer):
        for epoch in range(num_train_epochs):
            logger.info(f"Evaluating before epoch {epoch}: ")
            eval_rewards = evaluate_cem(env, manager, max_steps, 1)
            for _ in tqdm(range(num_batches_per_epoch)):
                train_batch = replay_buffer.sample_transition_batch_tensor()
                preprocessed_batch = trainer_preprocessor(train_batch)
                trainer.train(preprocessed_batch)

    logger.info(f"Evaluating after training for {num_train_epochs} epochs: ")
    eval_rewards = evaluate_cem(env, manager, max_steps, num_eval_episodes)
    mean_rewards = np.mean(eval_rewards)
    assert (
        mean_rewards >= passing_score_bar
    ), f"{mean_rewards} doesn't pass the bar {passing_score_bar}."
コード例 #9
0
 def test_global_step(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         writer.add_scalar = MagicMock()
         with summary_writer_context(writer):
             SummaryWriterContext.add_scalar("test", torch.ones(1))
             SummaryWriterContext.increase_global_step()
             SummaryWriterContext.add_scalar("test", torch.zeros(1))
         writer.add_scalar.assert_has_calls([
             call("test", torch.ones(1), global_step=0),
             call("test", torch.zeros(1), global_step=1),
         ])
         self.assertEqual(2, len(writer.add_scalar.mock_calls))
コード例 #10
0
ファイル: model_manager.py プロジェクト: tobelegit/ReAgent
    def train_workflow(
        self,
        train_dataset: Dataset,
        eval_dataset: Optional[Dataset],
        normalization_data_map: Dict[str, NormalizationData],
        num_epochs: int,
        use_gpu: bool,
        named_model_ids: ModuleNameToEntityId,
        child_workflow_id: int,
        reward_options: Optional[RewardOptions] = None,
        reader_options: Optional[ReaderOptions] = None,
        resource_options: Optional[ResourceOptions] = None,
        warmstart_path: Optional[str] = None,
    ) -> RLTrainingOutput:
        writer = SummaryWriter()
        logger.info("TensorBoard logging location is: {}".format(
            writer.log_dir))

        warmstart_input_path = warmstart_path or None
        self.initialize_trainer(
            use_gpu=use_gpu,
            # pyre-fixme[6]: Expected `RewardOptions` for 2nd param but got
            #  `Optional[RewardOptions]`.
            # pyre-fixme[6]: Expected `RewardOptions` for 2nd param but got
            #  `Optional[RewardOptions]`.
            reward_options=reward_options,
            normalization_data_map=normalization_data_map,
            warmstart_path=warmstart_input_path,
        )

        if not reader_options:
            reader_options = ReaderOptions()

        with summary_writer_context(writer):
            train_output = self.train(train_dataset, eval_dataset, num_epochs,
                                      reader_options)

        output_paths = {}
        for module_name, serving_module in self.build_serving_modules().items(
        ):
            # TODO: make this a parameter
            torchscript_output_path = f"model_{round(time.time())}.torchscript"
            serving_module = self.build_serving_module()
            torch.jit.save(serving_module, torchscript_output_path)
            logger.info(f"Saved {module_name} to {torchscript_output_path}")
            output_paths[module_name] = torchscript_output_path
        return dataclasses.replace(train_output, output_paths=output_paths)
コード例 #11
0
ファイル: test_gym.py プロジェクト: h8f/ReAgent
def run_test(
    env_name: str,
    model: ModelManager__Union,
    replay_memory_size: int,
    train_every_ts: int,
    train_after_ts: int,
    num_train_episodes: int,
    max_steps: Optional[int],
    passing_score_bar: float,
    num_eval_episodes: int,
    use_gpu: bool,
):
    env = EnvFactory.make(env_name)
    env.seed(SEED)
    env.action_space.seed(SEED)
    normalization = build_normalizer(env)
    logger.info(f"Normalization is: \n{pprint.pformat(normalization)}")

    manager = model.value
    try:
        # pyre-fixme[16]: `Env` has no attribute `state_feature_config_provider`.
        manager.state_feature_config_provider = env.state_feature_config_provider
        logger.info(
            f"Using environment's state_feature_config_provider.\n"
            f"{manager.state_feature_config_provider}"
        )
    except AttributeError:
        logger.info("state_feature_config_provider override not applicable")

    trainer = manager.initialize_trainer(
        use_gpu=use_gpu,
        reward_options=RewardOptions(),
        normalization_data_map=normalization,
    )
    training_policy = manager.create_policy(serving=False)

    replay_buffer = ReplayBuffer.create_from_env(
        env=env,
        replay_memory_size=replay_memory_size,
        batch_size=trainer.minibatch_size,
    )

    device = torch.device("cuda") if use_gpu else torch.device("cpu")
    # first fill the replay buffer to burn_in
    train_after_ts = max(train_after_ts, trainer.minibatch_size)
    fill_replay_buffer(
        env=env, replay_buffer=replay_buffer, desired_size=train_after_ts
    )

    post_step = train_with_replay_buffer_post_step(
        replay_buffer=replay_buffer,
        env=env,
        trainer=trainer,
        training_freq=train_every_ts,
        batch_size=trainer.minibatch_size,
        device=device,
    )

    agent = Agent.create_for_env(
        env, policy=training_policy, post_transition_callback=post_step, device=device
    )

    writer = SummaryWriter()
    with summary_writer_context(writer):
        train_rewards = []
        for i in range(num_train_episodes):
            trajectory = run_episode(
                env=env, agent=agent, mdp_id=i, max_steps=max_steps
            )
            ep_reward = trajectory.calculate_cumulative_reward()
            train_rewards.append(ep_reward)
            logger.info(
                f"Finished training episode {i} (len {len(trajectory)})"
                f" with reward {ep_reward}."
            )

    logger.info("============Train rewards=============")
    logger.info(train_rewards)
    logger.info(f"average: {np.mean(train_rewards)};\tmax: {np.max(train_rewards)}")

    # Check whether the max score passed the score bar; we explore during training
    # the return could be bad (leading to flakiness in C51 and QRDQN).
    assert np.max(train_rewards) >= passing_score_bar, (
        f"max reward ({np.max(train_rewards)})after training for "
        f"{len(train_rewards)} episodes is less than < {passing_score_bar}.\n"
    )

    serving_policy = manager.create_policy(serving=True)
    agent = Agent.create_for_env_with_serving_policy(env, serving_policy)

    eval_rewards = evaluate_for_n_episodes(
        n=num_eval_episodes, env=env, agent=agent, max_steps=max_steps
    ).squeeze(1)

    logger.info("============Eval rewards==============")
    logger.info(eval_rewards)
    logger.info(f"average: {np.mean(eval_rewards)};\tmax: {np.max(eval_rewards)}")
    assert np.mean(eval_rewards) >= passing_score_bar, (
        f"Predictor reward is {np.mean(eval_rewards)},"
        f"less than < {passing_score_bar}.\n"
    )
コード例 #12
0
def run_test(
    env_name: str,
    model: ModelManager__Union,
    replay_memory_size: int,
    train_every_ts: int,
    train_after_ts: int,
    num_train_episodes: int,
    max_steps: Optional[int],
    passing_score_bar: float,
    num_eval_episodes: int,
    use_gpu: bool,
):
    env = EnvFactory.make(env_name)
    env.seed(SEED)
    env.action_space.seed(SEED)
    normalization = build_normalizer(env)
    logger.info(f"Normalization is: \n{pprint.pformat(normalization)}")

    manager = model.value
    trainer = manager.initialize_trainer(
        use_gpu=use_gpu,
        reward_options=RewardOptions(),
        normalization_data_map=normalization,
    )
    training_policy = manager.create_policy(serving=False)

    replay_buffer = ReplayBuffer.create_from_env(
        env=env,
        replay_memory_size=replay_memory_size,
        batch_size=trainer.minibatch_size,
    )

    device = torch.device("cuda") if use_gpu else None
    # first fill the replay buffer to burn_in
    train_after_ts = max(train_after_ts, trainer.minibatch_size)
    fill_replay_buffer(env=env,
                       replay_buffer=replay_buffer,
                       desired_size=train_after_ts)

    post_step = train_with_replay_buffer_post_step(
        replay_buffer=replay_buffer,
        env=env,
        trainer=trainer,
        training_freq=train_every_ts,
        batch_size=trainer.minibatch_size,
        device=device,
    )

    agent = Agent.create_for_env(
        env,
        policy=training_policy,
        post_transition_callback=post_step,
        # pyre-fixme[6]: Expected `Union[str, torch.device]` for 4th param but got
        #  `Optional[torch.device]`.
        device=device,
    )

    writer = SummaryWriter()
    with summary_writer_context(writer):
        train_rewards = []
        for i in range(num_train_episodes):
            trajectory = run_episode(env=env,
                                     agent=agent,
                                     mdp_id=i,
                                     max_steps=max_steps)
            ep_reward = trajectory.calculate_cumulative_reward()
            train_rewards.append(ep_reward)
            logger.info(
                f"Finished training episode {i} with reward {ep_reward}.")

    assert train_rewards[-1] >= passing_score_bar, (
        f"reward after {len(train_rewards)} episodes is {train_rewards[-1]},"
        f"less than < {passing_score_bar}...\n"
        f"Full reward history: {train_rewards}")

    logger.info("============Train rewards=============")
    logger.info(train_rewards)

    serving_policy = manager.create_policy(serving=True)
    agent = Agent.create_for_env_with_serving_policy(env, serving_policy)

    eval_rewards = evaluate_for_n_episodes(n=num_eval_episodes,
                                           env=env,
                                           agent=agent,
                                           max_steps=max_steps).squeeze(1)
    assert np.mean(eval_rewards) >= passing_score_bar, (
        f"Predictor reward is {np.mean(eval_rewards)},"
        f"less than < {passing_score_bar}...\n"
        f"Full eval rewards: {eval_rewards}.")

    logger.info("============Eval rewards==============")
    logger.info(eval_rewards)
コード例 #13
0
 def test_swallowing_histogram_value_error(self):
     with TemporaryDirectory() as tmp_dir:
         writer = SummaryWriter(tmp_dir)
         with summary_writer_context(writer):
             SummaryWriterContext.add_histogram("bad_histogram",
                                                torch.ones(100, 1))
コード例 #14
0
 def test_with_none(self):
     with summary_writer_context(None):
         self.assertIsNone(
             SummaryWriterContext.add_scalar("test", torch.ones(1)))
コード例 #15
0
def single_process_main(gpu_index, *args):
    params = args[0]
    # Set minibatch size based on # of devices being used to train
    params["training"]["minibatch_size"] *= minibatch_size_multiplier(
        params["use_gpu"], params["use_all_avail_gpus"])

    rl_parameters = from_json(params["rl"], RLParameters)
    training_parameters = from_json(params["training"], TrainingParameters)
    rainbow_parameters = from_json(params["rainbow"], RainbowDQNParameters)

    model_params = ContinuousActionModelParameters(
        rl=rl_parameters,
        training=training_parameters,
        rainbow=rainbow_parameters)
    state_normalization = BaseWorkflow.read_norm_file(
        params["state_norm_data_path"])
    action_normalization = BaseWorkflow.read_norm_file(
        params["action_norm_data_path"])

    writer = SummaryWriter(log_dir=params["model_output_path"])
    logger.info("TensorBoard logging location is: {}".format(writer.log_dir))

    if params["use_all_avail_gpus"]:
        BaseWorkflow.init_multiprocessing(
            int(params["num_processes_per_node"]),
            int(params["num_nodes"]),
            int(params["node_index"]),
            gpu_index,
            params["init_method"],
        )

    workflow = ParametricDqnWorkflow(
        model_params,
        state_normalization,
        action_normalization,
        params["use_gpu"],
        params["use_all_avail_gpus"],
    )

    state_sorted_features, _ = sort_features_by_normalization(
        state_normalization)
    action_sorted_features, _ = sort_features_by_normalization(
        action_normalization)
    preprocess_handler = ParametricDqnPreprocessHandler(
        StringKeySparseToDenseProcessor(state_sorted_features),
        StringKeySparseToDenseProcessor(action_sorted_features),
    )

    train_dataset = JSONDatasetReader(
        params["training_data_path"],
        batch_size=training_parameters.minibatch_size,
        preprocess_handler=preprocess_handler,
    )
    eval_dataset = JSONDatasetReader(params["eval_data_path"],
                                     batch_size=16,
                                     preprocess_handler=preprocess_handler)

    with summary_writer_context(writer):
        workflow.train_network(train_dataset, eval_dataset,
                               int(params["epochs"]))

    if int(params["node_index"]) == 0 and gpu_index == 0:
        workflow.save_models(params["model_output_path"])