Exemple #1
0
 def test_get_info(rank):
     world = get_world()
     assert world.get_ranks() == [0, 1, 2]
     assert world.get_members() == ["0", "1", "2"]
     assert get_cur_rank() == rank
     assert get_cur_name() == str(rank)
     return True
Exemple #2
0
    def training_step(self, batch, _batch_idx):
        world_inited = get_world() is not None
        model_inited = isinstance(self.nn_model, NNModule)

        if world_inited and get_cur_rank() == 0:
            with open(os.environ["TEST_SAVE_PATH"], "wb") as f:
                pickle.dump([model_inited], f)
        if not world_inited:
            raise RuntimeError("World not initialized.")
        return None
Exemple #3
0
 def on_train_batch_end(self, trainer, pl_module, outputs, batch,
                        _batch_idx, _dataloader_idx) -> None:
     for log in batch[0].logs:
         if "total_reward" in log:
             self.max_total_reward = max(log["total_reward"],
                                         self.max_total_reward)
             default_logger.info(
                 f"Process [{get_cur_rank()}] "
                 f"Current max total reward={self.max_total_reward:.2f}.")
             self.queue.put((get_cur_rank(), self.max_total_reward))
             t_plugin = trainer.training_type_plugin
             trainer.should_stop = self.reduce_early_stopping_decision(
                 trainer, t_plugin)
             if trainer.should_stop:
                 default_logger.info(
                     f"Process [{get_cur_rank()}] decides to exit.")
             return
     default_logger.error("Missing total reward in logs.")
Exemple #4
0
 def test_get_info(rank):
     _world = get_world()
     assert get_cur_rank() == rank
     assert get_cur_name() == str(rank)
     return True
        avg = t_plugin.reduce(avg, reduce_op=ReduceOp.SUM)
        return float(avg)


if __name__ == "__main__":
    os.environ["WORLD_SIZE"] = "3"
    print(os.environ["TEST_SAVE_PATH"])
    config = generate_env_config("CartPole-v0", {})
    config = generate_training_config(root_dir=os.environ["ROOT_DIR"], config=config)
    config = generate_algorithm_config("DQNApex", config)

    # use ddp gpu
    config["gpus"] = [0, 0, 0]
    config["num_processes"] = 3
    # this testing process corresponds to this node
    config["num_nodes"] = 1
    config["early_stopping_patience"] = 100
    # Use class instead of string name since algorithms is distributed.
    config["frame_config"]["models"] = [QNet, QNet]
    config["frame_config"]["model_kwargs"] = [
        {"state_dim": 4, "action_num": 2},
        {"state_dim": 4, "action_num": 2},
    ]

    # cb = [DDPInspectCallback(), LoggerDebugCallback()]
    cb = [DDPInspectCallback()]
    launch(config, pl_callbacks=cb)
    if is_world_initialized() and get_cur_rank() == 0:
        with open(os.environ["TEST_SAVE_PATH"], "wb") as f:
            pickle.dump(cb[0].avg_max_total_reward, f)