def _create_10B_evaluation_config(num_gpus: int, num_steps: int, batch_size: int, path_to_sliced_checkpoint: str): data_limit = num_steps * batch_size * num_gpus cfg = compose_hydra_configuration([ "config=benchmark/linear_image_classification/clevr_count/eval_resnet_8gpu_transfer_clevr_count_linear", "+config/benchmark/linear_image_classification/clevr_count/models=regnet10B", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={path_to_sliced_checkpoint}", "config.MODEL.AMP_PARAMS.USE_AMP=True", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.OPTIMIZER.num_epochs=1", "config.LOG_FREQUENCY=1", # Testing on fake images "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True", # Disable overlap communication and computation for test "config.MODEL.FSDP_CONFIG.FORCE_SYNC_CUDA=True", # Testing on 8 V100 32GB GPU only f"config.DATA.TRAIN.BATCHSIZE_PER_REPLICA={batch_size}", f"config.DATA.TRAIN.DATA_LIMIT={data_limit}", "config.DISTRIBUTED.NUM_NODES=1", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpus}", "config.DISTRIBUTED.RUN_ID=auto", ]) args, config = convert_to_attrdict(cfg) return config
def _create_extract_features_config(checkpoint_path: str, num_gpu: int = 2): with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=[ "config=feature_extraction/extract_resnet_in1k_8gpu", "+config/feature_extraction/with_head=rn50_swav", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=20", "config.SEED_VALUE=0", "config.MODEL.AMP_PARAMS.USE_AMP=False", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2", "config.OPTIMIZER.use_larc=False", ], ) args, config = convert_to_attrdict(cfg) return config
def _create_pretraining_config(num_gpu: int = 2): with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=[ "config=test/integration_test/quick_swav", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.SEED_VALUE=0", "config.MODEL.AMP_PARAMS.USE_AMP=False", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.OPTIMIZER.use_larc=False", ], ) args, config = convert_to_attrdict(cfg) return config
def _create_pretraining_config(with_fsdp: bool, with_activation_checkpointing: bool, with_larc: bool): cfg = compose_hydra_configuration([ "config=pretrain/swav/swav_8node_resnet", "+config/pretrain/swav/models=regnet16Gf", "config.SEED_VALUE=2", "config.MODEL.AMP_PARAMS.USE_AMP=True", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", f"config.OPTIMIZER.use_larc={with_larc}", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" if with_larc and with_fsdp: config.MODEL.FSDP_CONFIG.flatten_parameters = False config.OPTIMIZER.name = "sgd_fsdp" config["MODEL"]["ACTIVATION_CHECKPOINTING"][ "USE_ACTIVATION_CHECKPOINTING"] = with_activation_checkpointing return config
def _create_pretraining_config(with_fsdp: bool): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_swav_2crops", "+config/test/integration_test/models=swav_regnet_fsdp", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.LOSS.swav_loss.epsilon=0.03", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.OPTIMIZER.use_larc=False", "config.OPTIMIZER.construct_single_param_group_only=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DISTRIBUTED.NUM_PROC_PER_NODE=2", ] ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] return config
def _create_fsdp_model_config(with_fsdp: bool): with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=[ "config=test/integration_test/quick_swav", "+config/pretrain/swav/models=regnet16Gf", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", "config.OPTIMIZER.construct_single_param_group_only=True", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_benchmark_config( checkpoint_path: str, with_fsdp: bool, with_eval_mlp: bool = True, num_gpu: int = 2, ): if with_eval_mlp: head_config = "+config/debugging/benchmark/linear_image_classification/models=regnet16Gf_eval_mlp" else: head_config = "+config/debugging/benchmark/linear_image_classification/models=regnet16Gf_mlp" with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=[ "config=debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160", head_config, f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.SEED_VALUE=2", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.OPTIMIZER.num_epochs=1", "config.OPTIMIZER.param_schedulers.lr.lengths=[0.1, 0.9]", "config.OPTIMIZER.param_schedulers.lr.name=cosine", "config.LOSS.swav_loss.epsilon=0.03", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=16", "config.DISTRIBUTED.NCCL_DEBUG=False", "config.MODEL.AMP_PARAMS.USE_AMP=false", "config.MODEL.FSDP_CONFIG.mixed_precision=false", "config.OPTIMIZER.use_larc=false", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", # This is critical "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=false", "config.OPTIMIZER.construct_single_param_group_only=True", "config.OPTIMIZER.num_epochs=2", "config.DISTRIBUTED.NUM_NODES=1", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" head_type = "eval_mlp_fsdp" if with_eval_mlp else "mlp_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = head_type config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" head_type = "eval_mlp" if with_eval_mlp else "mlp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = head_type return config
def hydra_main(overrides: List[Any]): ###################################################################################### # DO NOT MOVE THIS IMPORT TO TOP LEVEL: submitit processes will not be initialized # correctly (MKL_THREADING_LAYER will be set to INTEL instead of GNU) ###################################################################################### from vissl.hooks import default_hook_generator ###################################################################################### print(f"####### overrides: {overrides}") with initialize_config_module(config_module="vissl.config"): cfg = compose("defaults", overrides=overrides) args, config = convert_to_attrdict(cfg) if config.SLURM.USE_SLURM: assert ( is_submitit_available() ), "Please 'pip install submitit' to schedule jobs on SLURM" launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config) else: launch_distributed( cfg=config, node_id=args.node_id, engine_name=args.engine_name, hook_generator=default_hook_generator, )
def _create_pretraining_config(num_gpu: int = 2, with_fsdp: bool = False, fsdp_flatten_parameters: bool = False): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_swav", "+config/test/integration_test/models=swav_regnet_fsdp", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.SEED_VALUE=0", "config.LOSS.swav_loss.epsilon=0.03", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.flatten_parameters = fsdp_flatten_parameters config.MODEL.FSDP_CONFIG.mixed_precision = False else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_pretraining_config(with_fsdp: bool, num_gpu: int = 2): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_swav", "+config/pretrain/swav/models=regnet16Gf", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.SEED_VALUE=0", "config.MODEL.AMP_PARAMS.USE_AMP=False", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.OPTIMIZER.use_larc=False", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_dino_linear_eval_config(checkpoint_path: str, gpu_count: int = 2): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_eval_in1k_linear", "+config/benchmark/linear_image_classification/imagenet1k/models=dino_xcit_s16", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={gpu_count}", # Datasets "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.DATA_LIMIT=32", "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True", # To get the logs reliably "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.OPTIMIZER.num_epochs=2", ] ) args, config = convert_to_attrdict(cfg) return config
def hydra_main(overrides: List[str]): print(f"####### overrides: {overrides}") with initialize_config_module(config_module="vissl.config"): cfg = compose("defaults", overrides=overrides) setup_logging(__name__) args, config = convert_to_attrdict(cfg) benchmark_data(config)
def test_benchmark_model(self, filepath: str): logger.info(f"Loading {filepath}") cfg = SSLHydraConfig.from_configs( [filepath, "config.DISTRIBUTED.NUM_PROC_PER_NODE=1"]) _, config = convert_to_attrdict(cfg.default_cfg) if not is_fsdp_model_config(config): build_model(config.MODEL, config.OPTIMIZER)
def _create_dino_pretraining_config( with_mixed_precision: bool, gpu_count: int = 2, num_epochs: int = 4 ): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_dino_xcit", "config.SEED_VALUE=0", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", f"config.OPTIMIZER.num_epochs={num_epochs}", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={gpu_count}", # Options to override to get FSDP "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", f"config.MODEL.AMP_PARAMS.USE_AMP={with_mixed_precision}", "config.OPTIMIZER.construct_single_param_group_only=True", "config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD=0", ] ) args, config = convert_to_attrdict(cfg) return config
def test_run(self, config_file_path: str): """ Instantiate and run all the test tasks Arguments: config_file_path {str} -- path to the config for the task to be run """ logger.info(f"Loading {config_file_path}") cfg = SSLHydraConfig.from_configs([config_file_path]) args, config = convert_to_attrdict(cfg.default_cfg) checkpoint_folder = get_checkpoint_folder(config) # Complete the data localization at runtime config.DATA.TRAIN.DATA_PATHS = [ pkg_resources.resource_filename(__name__, "test_data") ] if torch.distributed.is_initialized(): # Destroy process groups as torch may be initialized with NCCL, which # is incompatible with test_cpu_regnet_moco.yaml torch.distributed.destroy_process_group() # run training and make sure no exception is raised dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES) train_main( config, dist_run_id=dist_run_id, checkpoint_path=None, checkpoint_folder=checkpoint_folder, local_rank=0, node_id=0, hook_generator=default_hook_generator, )
def _create_config(with_fsdp: bool): with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=[ "config=pretrain/swav/swav_8node_resnet", "+config/pretrain/swav/models=regnet16Gf", "config.SEED_VALUE=2", "config.MODEL.AMP_PARAMS.USE_AMP=True", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.OPTIMIZER.num_epochs=1", "config.OPTIMIZER.use_larc=False", "config.LOSS.swav_loss.epsilon=0.03", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=16", "config.DISTRIBUTED.NCCL_DEBUG=False", "config.DISTRIBUTED.NUM_NODES=1", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_finetuning_config( checkpoint_path: str, num_gpu: int = 2, regularize_bias: bool = False, construct_single_param_group_only: bool = False, with_fsdp: bool = False, fsdp_flatten_parameters: bool = False, with_partial_head: bool = False, ): architecture_config = ( "+config/test/integration_test/models=finetune_regnet_fsdp") if with_partial_head: architecture_config = ( "+config/test/integration_test/models=finetune_regnet_fsdp_head" ) cfg = compose_hydra_configuration([ "config=test/integration_test/quick_eval_finetune_in1k", architecture_config, f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=20", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2", "config.SEED_VALUE=0", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.num_epochs=2", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_value=0.01", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_lr_batch_size=2", "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_value=0.1", "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_lr_batch_size=2", f"config.OPTIMIZER.regularize_bias={regularize_bias}", f"config.OPTIMIZER.construct_single_param_group_only={construct_single_param_group_only}", ]) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" if with_partial_head: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp_fsdp" else: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.flatten_parameters = fsdp_flatten_parameters config.MODEL.FSDP_CONFIG.mixed_precision = False else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" if with_partial_head: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp" else: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp" return config
def test_run(self, config_file_path: str): """ Instantiate and run all the test tasks Arguments: config_file_path {str} -- path to the config for the task to be run """ logger.info(f"Loading {config_file_path}") cfg = SSLHydraConfig.from_configs([config_file_path]) args, config = convert_to_attrdict(cfg.default_cfg) checkpoint_folder = get_checkpoint_folder(config) # Complete the data localization at runtime config.DATA.TRAIN.DATA_PATHS = [ pkg_resources.resource_filename(__name__, "test_data") ] # run training and make sure no exception is raised dist_run_id = get_dist_run_id(config, config.DISTRIBUTED.NUM_NODES) train_main( config, dist_run_id=dist_run_id, checkpoint_path=None, checkpoint_folder=checkpoint_folder, local_rank=0, node_id=0, hook_generator=default_hook_generator, )
def _generate_config(self, config): """ Generate AttrDict config from a config YAML file and overrides. """ with initialize_config_module(config_module="vissl.config"): config = compose("defaults", overrides=config) return convert_to_attrdict(config)
def test_cfg_composition(self): # compose the configs and check that the model is changed cfg = SSLHydraConfig.from_configs([ "config=test/integration_test/quick_simclr", "+config/pretrain/simclr/models=resnext101", ]) _, config = convert_to_attrdict(cfg.default_cfg) self.assertEqual(config.MODEL.TRUNK.RESNETS.DEPTH, 101, "config composition failed")
def vissl_swin_transformer_config(): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_dino_swin_t", "config.MODEL.TRUNK.NAME=swin_transformer", "config.MODEL.TRUNK.SWIN_TRANSFORMER.DROP_PATH_RATE=0.0", ] ) args, config = convert_to_attrdict(cfg) return config
def test_augly_transforms(self): cfg = compose_hydra_configuration([ "config=test/cpu_test/test_cpu_resnet_simclr.yaml", "+config/test/transforms=augly_transforms_example", ], ) _, config = convert_to_attrdict(cfg) with in_temporary_directory() as _: # Test that the training runs with an augly transformation. run_integration_test(config)
def test_loss_build(self, filepath): logger.info(f"Loading {filepath}") cfg = SSLHydraConfig.from_configs([ filepath, "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", ]) _, config = convert_to_attrdict(cfg.default_cfg) task = SelfSupervisionTask.from_config(config) task.datasets, _ = task.build_datasets() self.assertTrue(task._build_loss(), "failed to build loss")
def test_cfg_key_addition(self): # compose the configs and check that the new key is inserted cfg = SSLHydraConfig.from_configs([ "config=test/integration_test/quick_simclr", "+config.LOSS.simclr_info_nce_loss.buffer_params.MY_TEST_KEY=dummy", ]) _, config = convert_to_attrdict(cfg.default_cfg) self.assertTrue( "MY_TEST_KEY" in config.LOSS.simclr_info_nce_loss.buffer_params, "something went wrong, new key not added. Fail.", )
def _create_config(force_legacy_profiler: bool): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_simclr", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.OPTIMIZER.use_larc=False", "config.PROFILING.RUNTIME_PROFILING.USE_PROFILER=true", "config.PROFILING.PROFILED_RANKS=[0]", f"config.PROFILING.RUNTIME_PROFILING.LEGACY_PROFILER={force_legacy_profiler}", ]) args, config = convert_to_attrdict(cfg) return config
def test_sqrt_lr_scaling(self): # compose the configs and check that the LR is changed cfg = SSLHydraConfig.from_configs([ "config=test/integration_test/quick_simclr", "+config/pretrain/simclr/models=resnext101", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.auto_scale=True", 'config.OPTIMIZER.param_schedulers.lr.name="linear"', 'config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.scaling_type="sqrt"', ]) _, config = convert_to_attrdict(cfg.default_cfg) param_schedulers = config.OPTIMIZER.param_schedulers.lr self.assertEqual(0.3 * (0.125**0.5), param_schedulers.end_value)
def test_pytorch_loss(self): cfg = SSLHydraConfig.from_configs([ "config=test/integration_test/quick_simclr", "config.LOSS.name=CosineEmbeddingLoss", "+config.LOSS.CosineEmbeddingLoss.margin=1.0", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", ]) _, config = convert_to_attrdict(cfg.default_cfg) task = SelfSupervisionTask.from_config(config) task.datasets, _ = task.build_datasets() self.assertTrue(task._build_loss(), "failed to build loss")
def hydra_main(overrides: List[Any]): print(f"####### overrides: {overrides}") with initialize_config_module(config_module="vissl.config"): cfg = compose("defaults", overrides=overrides) setup_logging(__name__) args, config = convert_to_attrdict(cfg) launch_distributed( config, node_id=args.node_id, engine_name=args.engine_name, hook_generator=default_hook_generator, ) # close the logging streams including the filehandlers shutdown_logging()
def _create_extract_label_prediction_config(self, with_fsdp: bool, with_mixed_precision: bool, auto_wrap_threshold: int): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_extract_label_predictions", "+config/test/integration_test/models=extract_label_pred_regnet_fsdp", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.RANDOM_SYNTHETIC_LABELS=2", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.RANDOM_SYNTHETIC_LABELS=2", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.OPTIMIZER.num_epochs=1", "config.OPTIMIZER.use_larc=False", "config.OPTIMIZER.construct_single_param_group_only=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DISTRIBUTED.NUM_PROC_PER_NODE=2", ]) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "eval_mlp_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.mixed_precision = with_mixed_precision config.MODEL.FSDP_CONFIG.fp32_reduce_scatter = with_mixed_precision config.MODEL.FSDP_CONFIG.compute_dtype = torch.float32 config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD = auto_wrap_threshold else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "eval_mlp" config.MODEL.AMP_PARAMS.USE_AMP = with_mixed_precision config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] config.MODEL.ACTIVATION_CHECKPOINTING.USE_ACTIVATION_CHECKPOINTING = False return config
def _create_pretraining_config( with_fsdp: bool, with_activation_checkpointing: bool, with_mixed_precision: bool, auto_wrap_threshold: int, ): with initialize_config_module(config_module="vissl.config"): cfg = compose( "defaults", overrides=[ "config=test/integration_test/quick_swav_2crops", "+config/test/integration_test/models=swav_regnet_fsdp", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.LOSS.swav_loss.epsilon=0.03", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.OPTIMIZER.use_larc=False", "config.OPTIMIZER.construct_single_param_group_only=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DISTRIBUTED.NUM_PROC_PER_NODE=2", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.mixed_precision = with_mixed_precision config.MODEL.FSDP_CONFIG.fp32_reduce_scatter = with_mixed_precision config.MODEL.FSDP_CONFIG.compute_dtype = torch.float32 config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD = auto_wrap_threshold else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" config.MODEL.AMP_PARAMS.USE_AMP = with_mixed_precision config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] config.MODEL.ACTIVATION_CHECKPOINTING.USE_ACTIVATION_CHECKPOINTING = ( with_activation_checkpointing) return config