Ejemplo n.º 1
0
    def test_run(self, config_file_path: str):
        """
        Instantiate and run all the test tasks

        Arguments:
            config_file_path {str} -- path to the config for the task to be run
        """
        logger.info(f"Loading {config_file_path}")
        cfg = SSLHydraConfig.from_configs([config_file_path])
        args, config = convert_to_attrdict(cfg.default_cfg)
        checkpoint_folder = get_checkpoint_folder(config)

        # Complete the data localization at runtime
        config.DATA.TRAIN.DATA_PATHS = [
            pkg_resources.resource_filename(__name__, "test_data")
        ]

        # run training and make sure no exception is raised
        dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES)
        train_main(
            config,
            dist_run_id=dist_run_id,
            checkpoint_path=None,
            checkpoint_folder=checkpoint_folder,
            local_rank=0,
            node_id=0,
            hook_generator=default_hook_generator,
        )
Ejemplo n.º 2
0
    def test_run(self, config_file_path: str):
        """
        Instantiate and run all the test tasks

        Arguments:
            config_file_path {str} -- path to the config for the task to be run
        """
        logger.info(f"Loading {config_file_path}")
        cfg = SSLHydraConfig.from_configs([config_file_path])
        args, config = convert_to_attrdict(cfg.default_cfg)
        checkpoint_folder = get_checkpoint_folder(config)

        # Complete the data localization at runtime
        config.DATA.TRAIN.DATA_PATHS = [
            pkg_resources.resource_filename(__name__, "test_data")
        ]

        if torch.distributed.is_initialized():
            # Destroy process groups as torch may be initialized with NCCL, which
            # is incompatible with test_cpu_regnet_moco.yaml
            torch.distributed.destroy_process_group()

        # run training and make sure no exception is raised
        dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES)
        train_main(
            config,
            dist_run_id=dist_run_id,
            checkpoint_path=None,
            checkpoint_folder=checkpoint_folder,
            local_rank=0,
            node_id=0,
            hook_generator=default_hook_generator,
        )
Ejemplo n.º 3
0
 def process_main(cfg, dist_run_id, local_rank, node_id):
     train_main(
         cfg,
         dist_run_id,
         checkpoint_path,
         checkpoint_folder,
         local_rank=local_rank,
         node_id=node_id,
         hook_generator=hook_generator,
     )