Esempio n. 1
0
 def _create_10B_evaluation_config(num_gpus: int, num_steps: int,
                                   batch_size: int,
                                   path_to_sliced_checkpoint: str):
     data_limit = num_steps * batch_size * num_gpus
     cfg = compose_hydra_configuration([
         "config=benchmark/linear_image_classification/clevr_count/eval_resnet_8gpu_transfer_clevr_count_linear",
         "+config/benchmark/linear_image_classification/clevr_count/models=regnet10B",
         f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={path_to_sliced_checkpoint}",
         "config.MODEL.AMP_PARAMS.USE_AMP=True",
         "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
         "config.OPTIMIZER.num_epochs=1",
         "config.LOG_FREQUENCY=1",
         # Testing on fake images
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]",
         "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True",
         "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
         "config.DATA.TEST.DATA_SOURCES=[synthetic]",
         "config.DATA.TEST.LABEL_SOURCES=[synthetic]",
         "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True",
         "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True",
         # Disable overlap communication and computation for test
         "config.MODEL.FSDP_CONFIG.FORCE_SYNC_CUDA=True",
         # Testing on 8 V100 32GB GPU only
         f"config.DATA.TRAIN.BATCHSIZE_PER_REPLICA={batch_size}",
         f"config.DATA.TRAIN.DATA_LIMIT={data_limit}",
         "config.DISTRIBUTED.NUM_NODES=1",
         f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpus}",
         "config.DISTRIBUTED.RUN_ID=auto",
     ])
     args, config = convert_to_attrdict(cfg)
     return config
 def _create_extract_features_config(checkpoint_path: str,
                                     num_gpu: int = 2):
     with initialize_config_module(config_module="vissl.config"):
         cfg = compose(
             "defaults",
             overrides=[
                 "config=feature_extraction/extract_resnet_in1k_8gpu",
                 "+config/feature_extraction/with_head=rn50_swav",
                 f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}",
                 "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
                 "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]",
                 "config.DATA.TEST.DATA_SOURCES=[synthetic]",
                 "config.DATA.TEST.LABEL_SOURCES=[synthetic]",
                 "config.DATA.TRAIN.DATA_LIMIT=40",
                 "config.DATA.TEST.DATA_LIMIT=20",
                 "config.SEED_VALUE=0",
                 "config.MODEL.AMP_PARAMS.USE_AMP=False",
                 "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
                 "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
                 "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
                 "config.LOSS.swav_loss.epsilon=0.03",
                 "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
                 "config.MODEL.FSDP_CONFIG.mixed_precision=False",
                 "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False",
                 "config.MODEL.FSDP_CONFIG.compute_dtype=float32",
                 f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}",
                 "config.LOG_FREQUENCY=1",
                 "config.OPTIMIZER.construct_single_param_group_only=True",
                 "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
                 "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2",
                 "config.OPTIMIZER.use_larc=False",
             ],
         )
     args, config = convert_to_attrdict(cfg)
     return config
    def _create_pretraining_config(num_gpu: int = 2):
        with initialize_config_module(config_module="vissl.config"):
            cfg = compose(
                "defaults",
                overrides=[
                    "config=test/integration_test/quick_swav",
                    "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
                    "config.DATA.TRAIN.DATA_LIMIT=40",
                    "config.SEED_VALUE=0",
                    "config.MODEL.AMP_PARAMS.USE_AMP=False",
                    "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
                    "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
                    "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
                    "config.LOSS.swav_loss.epsilon=0.03",
                    "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
                    "config.MODEL.FSDP_CONFIG.mixed_precision=False",
                    "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False",
                    "config.MODEL.FSDP_CONFIG.compute_dtype=float32",
                    f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}",
                    "config.LOG_FREQUENCY=1",
                    "config.OPTIMIZER.construct_single_param_group_only=True",
                    "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
                    "config.OPTIMIZER.use_larc=False",
                ],
            )

        args, config = convert_to_attrdict(cfg)
        return config
Esempio n. 4
0
    def _create_pretraining_config(with_fsdp: bool,
                                   with_activation_checkpointing: bool,
                                   with_larc: bool):
        cfg = compose_hydra_configuration([
            "config=pretrain/swav/swav_8node_resnet",
            "+config/pretrain/swav/models=regnet16Gf",
            "config.SEED_VALUE=2",
            "config.MODEL.AMP_PARAMS.USE_AMP=True",
            "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
            "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
            "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
            f"config.OPTIMIZER.use_larc={with_larc}",
            "config.LOSS.swav_loss.epsilon=0.03",
            "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
            "config.MODEL.FSDP_CONFIG.mixed_precision=False",
            "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False",
        ], )
        args, config = convert_to_attrdict(cfg)
        if with_fsdp:
            config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
            config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
        else:
            config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
            config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"

        if with_larc and with_fsdp:
            config.MODEL.FSDP_CONFIG.flatten_parameters = False
            config.OPTIMIZER.name = "sgd_fsdp"

        config["MODEL"]["ACTIVATION_CHECKPOINTING"][
            "USE_ACTIVATION_CHECKPOINTING"] = with_activation_checkpointing
        return config
Esempio n. 5
0
 def _create_pretraining_config(with_fsdp: bool):
     cfg = compose_hydra_configuration(
         [
             "config=test/integration_test/quick_swav_2crops",
             "+config/test/integration_test/models=swav_regnet_fsdp",
             "config.MODEL.FSDP_CONFIG.mixed_precision=False",
             "config.SEED_VALUE=0",
             "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
             "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
             "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
             "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
             "config.LOSS.swav_loss.epsilon=0.03",
             "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
             "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
             "config.DATA.TRAIN.DATA_LIMIT=32",
             "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
             "config.OPTIMIZER.use_larc=False",
             "config.OPTIMIZER.construct_single_param_group_only=True",
             "config.LOG_FREQUENCY=1",
             "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
             "config.DISTRIBUTED.NUM_PROC_PER_NODE=2",
         ]
     )
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
         config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
     config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []]
     return config
 def _create_fsdp_model_config(with_fsdp: bool):
     with initialize_config_module(config_module="vissl.config"):
         cfg = compose(
             "defaults",
             overrides=[
                 "config=test/integration_test/quick_swav",
                 "+config/pretrain/swav/models=regnet16Gf",
                 "config.SEED_VALUE=0",
                 "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
                 "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
                 "config.LOSS.swav_loss.epsilon=0.03",
                 "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
                 "config.MODEL.FSDP_CONFIG.mixed_precision=False",
                 "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False",
                 "config.MODEL.FSDP_CONFIG.compute_dtype=float32",
                 "config.OPTIMIZER.construct_single_param_group_only=True",
             ],
         )
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
         config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
     return config
Esempio n. 7
0
    def _create_benchmark_config(
        checkpoint_path: str,
        with_fsdp: bool,
        with_eval_mlp: bool = True,
        num_gpu: int = 2,
    ):
        if with_eval_mlp:
            head_config = "+config/debugging/benchmark/linear_image_classification/models=regnet16Gf_eval_mlp"
        else:
            head_config = "+config/debugging/benchmark/linear_image_classification/models=regnet16Gf_mlp"

        with initialize_config_module(config_module="vissl.config"):
            cfg = compose(
                "defaults",
                overrides=[
                    "config=debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160",
                    head_config,
                    f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}",
                    "config.SEED_VALUE=2",
                    "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
                    "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
                    "config.OPTIMIZER.num_epochs=1",
                    "config.OPTIMIZER.param_schedulers.lr.lengths=[0.1, 0.9]",
                    "config.OPTIMIZER.param_schedulers.lr.name=cosine",
                    "config.LOSS.swav_loss.epsilon=0.03",
                    "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
                    "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]",
                    "config.DATA.TEST.DATA_SOURCES=[synthetic]",
                    "config.DATA.TEST.LABEL_SOURCES=[synthetic]",
                    "config.DATA.TRAIN.DATA_LIMIT=40",
                    "config.DATA.TEST.DATA_LIMIT=16",
                    "config.DISTRIBUTED.NCCL_DEBUG=False",
                    "config.MODEL.AMP_PARAMS.USE_AMP=false",
                    "config.MODEL.FSDP_CONFIG.mixed_precision=false",
                    "config.OPTIMIZER.use_larc=false",
                    "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",  # This is critical
                    "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
                    "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
                    "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True",
                    "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
                    "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4",
                    "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
                    "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=false",
                    "config.OPTIMIZER.construct_single_param_group_only=True",
                    "config.OPTIMIZER.num_epochs=2",
                    "config.DISTRIBUTED.NUM_NODES=1",
                    f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}",
                ],
            )
        args, config = convert_to_attrdict(cfg)
        if with_fsdp:
            config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
            head_type = "eval_mlp_fsdp" if with_eval_mlp else "mlp_fsdp"
            config["MODEL"]["HEAD"]["PARAMS"][0][0] = head_type
            config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
        else:
            config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
            head_type = "eval_mlp" if with_eval_mlp else "mlp"
            config["MODEL"]["HEAD"]["PARAMS"][0][0] = head_type
        return config
Esempio n. 8
0
def hydra_main(overrides: List[Any]):
    ######################################################################################
    # DO NOT MOVE THIS IMPORT TO TOP LEVEL: submitit processes will not be initialized
    # correctly (MKL_THREADING_LAYER will be set to INTEL instead of GNU)
    ######################################################################################
    from vissl.hooks import default_hook_generator

    ######################################################################################

    print(f"####### overrides: {overrides}")
    with initialize_config_module(config_module="vissl.config"):
        cfg = compose("defaults", overrides=overrides)
    args, config = convert_to_attrdict(cfg)

    if config.SLURM.USE_SLURM:
        assert (
            is_submitit_available()
        ), "Please 'pip install submitit' to schedule jobs on SLURM"
        launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config)
    else:
        launch_distributed(
            cfg=config,
            node_id=args.node_id,
            engine_name=args.engine_name,
            hook_generator=default_hook_generator,
        )
Esempio n. 9
0
 def _create_pretraining_config(num_gpu: int = 2,
                                with_fsdp: bool = False,
                                fsdp_flatten_parameters: bool = False):
     cfg = compose_hydra_configuration([
         "config=test/integration_test/quick_swav",
         "+config/test/integration_test/models=swav_regnet_fsdp",
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.DATA.TRAIN.DATA_LIMIT=40",
         "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
         "config.SEED_VALUE=0",
         "config.LOSS.swav_loss.epsilon=0.03",
         f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}",
         "config.LOG_FREQUENCY=1",
         "config.OPTIMIZER.construct_single_param_group_only=True",
     ], )
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
         config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
         config.MODEL.FSDP_CONFIG.flatten_parameters = fsdp_flatten_parameters
         config.MODEL.FSDP_CONFIG.mixed_precision = False
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
     return config
Esempio n. 10
0
 def _create_pretraining_config(with_fsdp: bool, num_gpu: int = 2):
     cfg = compose_hydra_configuration([
         "config=test/integration_test/quick_swav",
         "+config/pretrain/swav/models=regnet16Gf",
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.DATA.TRAIN.DATA_LIMIT=40",
         "config.SEED_VALUE=0",
         "config.MODEL.AMP_PARAMS.USE_AMP=False",
         "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
         "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
         "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
         "config.LOSS.swav_loss.epsilon=0.03",
         "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
         "config.MODEL.FSDP_CONFIG.mixed_precision=False",
         "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False",
         "config.MODEL.FSDP_CONFIG.compute_dtype=float32",
         f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}",
         "config.LOG_FREQUENCY=1",
         "config.OPTIMIZER.construct_single_param_group_only=True",
         "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
         "config.OPTIMIZER.use_larc=False",
         "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
         "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
     ], )
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
         config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
     return config
Esempio n. 11
0
 def _create_dino_linear_eval_config(checkpoint_path: str, gpu_count: int = 2):
     cfg = compose_hydra_configuration(
         [
             "config=test/integration_test/quick_eval_in1k_linear",
             "+config/benchmark/linear_image_classification/imagenet1k/models=dino_xcit_s16",
             f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}",
             f"config.DISTRIBUTED.NUM_PROC_PER_NODE={gpu_count}",
             # Datasets
             "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
             "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]",
             "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
             "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True",
             "config.DATA.TRAIN.DATA_LIMIT=32",
             "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
             "config.DATA.TEST.DATA_SOURCES=[synthetic]",
             "config.DATA.TEST.LABEL_SOURCES=[synthetic]",
             "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4",
             "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True",
             "config.DATA.TEST.DATA_LIMIT=32",
             "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True",
             # To get the logs reliably
             "config.LOG_FREQUENCY=1",
             "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
             "config.OPTIMIZER.num_epochs=2",
         ]
     )
     args, config = convert_to_attrdict(cfg)
     return config
Esempio n. 12
0
def hydra_main(overrides: List[str]):
    print(f"####### overrides: {overrides}")
    with initialize_config_module(config_module="vissl.config"):
        cfg = compose("defaults", overrides=overrides)
    setup_logging(__name__)
    args, config = convert_to_attrdict(cfg)
    benchmark_data(config)
Esempio n. 13
0
 def test_benchmark_model(self, filepath: str):
     logger.info(f"Loading {filepath}")
     cfg = SSLHydraConfig.from_configs(
         [filepath, "config.DISTRIBUTED.NUM_PROC_PER_NODE=1"])
     _, config = convert_to_attrdict(cfg.default_cfg)
     if not is_fsdp_model_config(config):
         build_model(config.MODEL, config.OPTIMIZER)
Esempio n. 14
0
 def _create_dino_pretraining_config(
     with_mixed_precision: bool, gpu_count: int = 2, num_epochs: int = 4
 ):
     cfg = compose_hydra_configuration(
         [
             "config=test/integration_test/quick_dino_xcit",
             "config.SEED_VALUE=0",
             "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
             "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
             "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True",
             "config.DATA.TRAIN.DATA_LIMIT=32",
             "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
             "config.LOG_FREQUENCY=1",
             "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
             f"config.OPTIMIZER.num_epochs={num_epochs}",
             f"config.DISTRIBUTED.NUM_PROC_PER_NODE={gpu_count}",
             # Options to override to get FSDP
             "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
             f"config.MODEL.AMP_PARAMS.USE_AMP={with_mixed_precision}",
             "config.OPTIMIZER.construct_single_param_group_only=True",
             "config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD=0",
         ]
     )
     args, config = convert_to_attrdict(cfg)
     return config
Esempio n. 15
0
    def test_run(self, config_file_path: str):
        """
        Instantiate and run all the test tasks

        Arguments:
            config_file_path {str} -- path to the config for the task to be run
        """
        logger.info(f"Loading {config_file_path}")
        cfg = SSLHydraConfig.from_configs([config_file_path])
        args, config = convert_to_attrdict(cfg.default_cfg)
        checkpoint_folder = get_checkpoint_folder(config)

        # Complete the data localization at runtime
        config.DATA.TRAIN.DATA_PATHS = [
            pkg_resources.resource_filename(__name__, "test_data")
        ]

        if torch.distributed.is_initialized():
            # Destroy process groups as torch may be initialized with NCCL, which
            # is incompatible with test_cpu_regnet_moco.yaml
            torch.distributed.destroy_process_group()

        # run training and make sure no exception is raised
        dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES)
        train_main(
            config,
            dist_run_id=dist_run_id,
            checkpoint_path=None,
            checkpoint_folder=checkpoint_folder,
            local_rank=0,
            node_id=0,
            hook_generator=default_hook_generator,
        )
Esempio n. 16
0
 def _create_config(with_fsdp: bool):
     with initialize_config_module(config_module="vissl.config"):
         cfg = compose(
             "defaults",
             overrides=[
                 "config=pretrain/swav/swav_8node_resnet",
                 "+config/pretrain/swav/models=regnet16Gf",
                 "config.SEED_VALUE=2",
                 "config.MODEL.AMP_PARAMS.USE_AMP=True",
                 "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
                 "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
                 "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
                 "config.OPTIMIZER.num_epochs=1",
                 "config.OPTIMIZER.use_larc=False",
                 "config.LOSS.swav_loss.epsilon=0.03",
                 "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
                 "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=16",
                 "config.DISTRIBUTED.NCCL_DEBUG=False",
                 "config.DISTRIBUTED.NUM_NODES=1",
                 "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
                 "config.MODEL.FSDP_CONFIG.mixed_precision=False",
                 "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False",
             ],
         )
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
     return config
Esempio n. 17
0
    def _create_finetuning_config(
        checkpoint_path: str,
        num_gpu: int = 2,
        regularize_bias: bool = False,
        construct_single_param_group_only: bool = False,
        with_fsdp: bool = False,
        fsdp_flatten_parameters: bool = False,
        with_partial_head: bool = False,
    ):
        architecture_config = (
            "+config/test/integration_test/models=finetune_regnet_fsdp")
        if with_partial_head:
            architecture_config = (
                "+config/test/integration_test/models=finetune_regnet_fsdp_head"
            )

        cfg = compose_hydra_configuration([
            "config=test/integration_test/quick_eval_finetune_in1k",
            architecture_config,
            f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}",
            "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
            "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]",
            "config.DATA.TEST.DATA_SOURCES=[synthetic]",
            "config.DATA.TEST.LABEL_SOURCES=[synthetic]",
            "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True",
            "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True",
            "config.DATA.TRAIN.DATA_LIMIT=40",
            "config.DATA.TEST.DATA_LIMIT=20",
            "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
            "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2",
            "config.SEED_VALUE=0",
            f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}",
            "config.LOG_FREQUENCY=1",
            "config.OPTIMIZER.num_epochs=2",
            "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_value=0.01",
            "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_lr_batch_size=2",
            "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_value=0.1",
            "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_lr_batch_size=2",
            f"config.OPTIMIZER.regularize_bias={regularize_bias}",
            f"config.OPTIMIZER.construct_single_param_group_only={construct_single_param_group_only}",
        ])
        args, config = convert_to_attrdict(cfg)
        if with_fsdp:
            config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
            if with_partial_head:
                config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
                config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp_fsdp"
            else:
                config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp_fsdp"
            config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
            config.MODEL.FSDP_CONFIG.flatten_parameters = fsdp_flatten_parameters
            config.MODEL.FSDP_CONFIG.mixed_precision = False
        else:
            config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
            if with_partial_head:
                config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
                config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp"
            else:
                config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp"
        return config
Esempio n. 18
0
    def test_run(self, config_file_path: str):
        """
        Instantiate and run all the test tasks

        Arguments:
            config_file_path {str} -- path to the config for the task to be run
        """
        logger.info(f"Loading {config_file_path}")
        cfg = SSLHydraConfig.from_configs([config_file_path])
        args, config = convert_to_attrdict(cfg.default_cfg)
        checkpoint_folder = get_checkpoint_folder(config)

        # Complete the data localization at runtime
        config.DATA.TRAIN.DATA_PATHS = [
            pkg_resources.resource_filename(__name__, "test_data")
        ]

        # run training and make sure no exception is raised
        dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES)
        train_main(
            config,
            dist_run_id=dist_run_id,
            checkpoint_path=None,
            checkpoint_folder=checkpoint_folder,
            local_rank=0,
            node_id=0,
            hook_generator=default_hook_generator,
        )
Esempio n. 19
0
    def _generate_config(self, config):
        """
        Generate AttrDict config from a config YAML file and overrides.
        """
        with initialize_config_module(config_module="vissl.config"):
            config = compose("defaults", overrides=config)

        return convert_to_attrdict(config)
Esempio n. 20
0
 def test_cfg_composition(self):
     # compose the configs and check that the model is changed
     cfg = SSLHydraConfig.from_configs([
         "config=test/integration_test/quick_simclr",
         "+config/pretrain/simclr/models=resnext101",
     ])
     _, config = convert_to_attrdict(cfg.default_cfg)
     self.assertEqual(config.MODEL.TRUNK.RESNETS.DEPTH, 101,
                      "config composition failed")
Esempio n. 21
0
 def vissl_swin_transformer_config():
     cfg = compose_hydra_configuration(
         [
             "config=test/integration_test/quick_dino_swin_t",
             "config.MODEL.TRUNK.NAME=swin_transformer",
             "config.MODEL.TRUNK.SWIN_TRANSFORMER.DROP_PATH_RATE=0.0",
         ]
     )
     args, config = convert_to_attrdict(cfg)
     return config
Esempio n. 22
0
    def test_augly_transforms(self):
        cfg = compose_hydra_configuration([
            "config=test/cpu_test/test_cpu_resnet_simclr.yaml",
            "+config/test/transforms=augly_transforms_example",
        ], )
        _, config = convert_to_attrdict(cfg)

        with in_temporary_directory() as _:
            # Test that the training runs with an augly transformation.
            run_integration_test(config)
Esempio n. 23
0
 def test_loss_build(self, filepath):
     logger.info(f"Loading {filepath}")
     cfg = SSLHydraConfig.from_configs([
         filepath,
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.DATA.TEST.DATA_SOURCES=[synthetic]",
     ])
     _, config = convert_to_attrdict(cfg.default_cfg)
     task = SelfSupervisionTask.from_config(config)
     task.datasets, _ = task.build_datasets()
     self.assertTrue(task._build_loss(), "failed to build loss")
Esempio n. 24
0
 def test_cfg_key_addition(self):
     # compose the configs and check that the new key is inserted
     cfg = SSLHydraConfig.from_configs([
         "config=test/integration_test/quick_simclr",
         "+config.LOSS.simclr_info_nce_loss.buffer_params.MY_TEST_KEY=dummy",
     ])
     _, config = convert_to_attrdict(cfg.default_cfg)
     self.assertTrue(
         "MY_TEST_KEY" in config.LOSS.simclr_info_nce_loss.buffer_params,
         "something went wrong, new key not added. Fail.",
     )
Esempio n. 25
0
 def _create_config(force_legacy_profiler: bool):
     cfg = compose_hydra_configuration([
         "config=test/integration_test/quick_simclr",
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.OPTIMIZER.use_larc=False",
         "config.PROFILING.RUNTIME_PROFILING.USE_PROFILER=true",
         "config.PROFILING.PROFILED_RANKS=[0]",
         f"config.PROFILING.RUNTIME_PROFILING.LEGACY_PROFILER={force_legacy_profiler}",
     ])
     args, config = convert_to_attrdict(cfg)
     return config
Esempio n. 26
0
 def test_sqrt_lr_scaling(self):
     # compose the configs and check that the LR is changed
     cfg = SSLHydraConfig.from_configs([
         "config=test/integration_test/quick_simclr",
         "+config/pretrain/simclr/models=resnext101",
         "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.auto_scale=True",
         'config.OPTIMIZER.param_schedulers.lr.name="linear"',
         'config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.scaling_type="sqrt"',
     ])
     _, config = convert_to_attrdict(cfg.default_cfg)
     param_schedulers = config.OPTIMIZER.param_schedulers.lr
     self.assertEqual(0.3 * (0.125**0.5), param_schedulers.end_value)
Esempio n. 27
0
 def test_pytorch_loss(self):
     cfg = SSLHydraConfig.from_configs([
         "config=test/integration_test/quick_simclr",
         "config.LOSS.name=CosineEmbeddingLoss",
         "+config.LOSS.CosineEmbeddingLoss.margin=1.0",
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.DATA.TEST.DATA_SOURCES=[synthetic]",
     ])
     _, config = convert_to_attrdict(cfg.default_cfg)
     task = SelfSupervisionTask.from_config(config)
     task.datasets, _ = task.build_datasets()
     self.assertTrue(task._build_loss(), "failed to build loss")
Esempio n. 28
0
def hydra_main(overrides: List[Any]):
    print(f"####### overrides: {overrides}")
    with initialize_config_module(config_module="vissl.config"):
        cfg = compose("defaults", overrides=overrides)
    setup_logging(__name__)
    args, config = convert_to_attrdict(cfg)
    launch_distributed(
        config,
        node_id=args.node_id,
        engine_name=args.engine_name,
        hook_generator=default_hook_generator,
    )
    # close the logging streams including the filehandlers
    shutdown_logging()
Esempio n. 29
0
 def _create_extract_label_prediction_config(self, with_fsdp: bool,
                                             with_mixed_precision: bool,
                                             auto_wrap_threshold: int):
     cfg = compose_hydra_configuration([
         "config=test/integration_test/quick_extract_label_predictions",
         "+config/test/integration_test/models=extract_label_pred_regnet_fsdp",
         "config.SEED_VALUE=0",
         "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
         "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
         "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
         "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
         "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
         "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]",
         "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True",
         "config.DATA.TRAIN.RANDOM_SYNTHETIC_LABELS=2",
         "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
         "config.DATA.TRAIN.DATA_LIMIT=32",
         "config.DATA.TEST.DATA_SOURCES=[synthetic]",
         "config.DATA.TEST.LABEL_SOURCES=[synthetic]",
         "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True",
         "config.DATA.TEST.RANDOM_SYNTHETIC_LABELS=2",
         "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4",
         "config.DATA.TEST.DATA_LIMIT=32",
         "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
         "config.OPTIMIZER.num_epochs=1",
         "config.OPTIMIZER.use_larc=False",
         "config.OPTIMIZER.construct_single_param_group_only=True",
         "config.LOG_FREQUENCY=1",
         "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
         "config.DISTRIBUTED.NUM_PROC_PER_NODE=2",
     ])
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "eval_mlp_fsdp"
         config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
         config.MODEL.FSDP_CONFIG.mixed_precision = with_mixed_precision
         config.MODEL.FSDP_CONFIG.fp32_reduce_scatter = with_mixed_precision
         config.MODEL.FSDP_CONFIG.compute_dtype = torch.float32
         config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD = auto_wrap_threshold
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "eval_mlp"
     config.MODEL.AMP_PARAMS.USE_AMP = with_mixed_precision
     config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []]
     config.MODEL.ACTIVATION_CHECKPOINTING.USE_ACTIVATION_CHECKPOINTING = False
     return config
Esempio n. 30
0
 def _create_pretraining_config(
     with_fsdp: bool,
     with_activation_checkpointing: bool,
     with_mixed_precision: bool,
     auto_wrap_threshold: int,
 ):
     with initialize_config_module(config_module="vissl.config"):
         cfg = compose(
             "defaults",
             overrides=[
                 "config=test/integration_test/quick_swav_2crops",
                 "+config/test/integration_test/models=swav_regnet_fsdp",
                 "config.SEED_VALUE=0",
                 "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True",
                 "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch",
                 "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch",
                 "config.MODEL.FSDP_CONFIG.flatten_parameters=True",
                 "config.LOSS.swav_loss.epsilon=0.03",
                 "config.DATA.TRAIN.DATA_SOURCES=[synthetic]",
                 "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4",
                 "config.DATA.TRAIN.DATA_LIMIT=32",
                 "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True",
                 "config.OPTIMIZER.use_larc=False",
                 "config.OPTIMIZER.construct_single_param_group_only=True",
                 "config.LOG_FREQUENCY=1",
                 "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True",
                 "config.DISTRIBUTED.NUM_PROC_PER_NODE=2",
             ],
         )
     args, config = convert_to_attrdict(cfg)
     if with_fsdp:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp"
         config.TRAINER.TASK_NAME = "self_supervision_fsdp_task"
         config.MODEL.FSDP_CONFIG.mixed_precision = with_mixed_precision
         config.MODEL.FSDP_CONFIG.fp32_reduce_scatter = with_mixed_precision
         config.MODEL.FSDP_CONFIG.compute_dtype = torch.float32
         config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD = auto_wrap_threshold
     else:
         config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2"
         config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head"
     config.MODEL.AMP_PARAMS.USE_AMP = with_mixed_precision
     config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []]
     config.MODEL.ACTIVATION_CHECKPOINTING.USE_ACTIVATION_CHECKPOINTING = (
         with_activation_checkpointing)
     return config