def _create_pretraining_config(with_fsdp: bool, with_activation_checkpointing: bool, with_larc: bool): cfg = compose_hydra_configuration([ "config=pretrain/swav/swav_8node_resnet", "+config/pretrain/swav/models=regnet16Gf", "config.SEED_VALUE=2", "config.MODEL.AMP_PARAMS.USE_AMP=True", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", f"config.OPTIMIZER.use_larc={with_larc}", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" if with_larc and with_fsdp: config.MODEL.FSDP_CONFIG.flatten_parameters = False config.OPTIMIZER.name = "sgd_fsdp" config["MODEL"]["ACTIVATION_CHECKPOINTING"][ "USE_ACTIVATION_CHECKPOINTING"] = with_activation_checkpointing return config
def _create_dino_linear_eval_config(checkpoint_path: str, gpu_count: int = 2): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_eval_in1k_linear", "+config/benchmark/linear_image_classification/imagenet1k/models=dino_xcit_s16", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={gpu_count}", # Datasets "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.DATA_LIMIT=32", "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True", # To get the logs reliably "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.OPTIMIZER.num_epochs=2", ] ) args, config = convert_to_attrdict(cfg) return config
def _create_fsdp_model_config(with_fsdp: bool): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_swav", "+config/pretrain/swav/models=regnet16Gf", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", "config.OPTIMIZER.construct_single_param_group_only=True", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_finetuning_config( checkpoint_path: str, num_gpu: int = 2, regularize_bias: bool = False, construct_single_param_group_only: bool = False, with_fsdp: bool = False, fsdp_flatten_parameters: bool = False, with_partial_head: bool = False, ): architecture_config = ( "+config/test/integration_test/models=finetune_regnet_fsdp") if with_partial_head: architecture_config = ( "+config/test/integration_test/models=finetune_regnet_fsdp_head" ) cfg = compose_hydra_configuration([ "config=test/integration_test/quick_eval_finetune_in1k", architecture_config, f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=20", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2", "config.SEED_VALUE=0", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.num_epochs=2", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_value=0.01", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_lr_batch_size=2", "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_value=0.1", "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_lr_batch_size=2", f"config.OPTIMIZER.regularize_bias={regularize_bias}", f"config.OPTIMIZER.construct_single_param_group_only={construct_single_param_group_only}", ]) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" if with_partial_head: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp_fsdp" else: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.flatten_parameters = fsdp_flatten_parameters config.MODEL.FSDP_CONFIG.mixed_precision = False else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" if with_partial_head: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp" else: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp" return config
def _create_pretraining_config(with_fsdp: bool): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_swav_2crops", "+config/test/integration_test/models=swav_regnet_fsdp", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.LOSS.swav_loss.epsilon=0.03", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.OPTIMIZER.use_larc=False", "config.OPTIMIZER.construct_single_param_group_only=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DISTRIBUTED.NUM_PROC_PER_NODE=2", ] ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] return config
def _create_10B_evaluation_config(num_gpus: int, num_steps: int, batch_size: int, path_to_sliced_checkpoint: str): data_limit = num_steps * batch_size * num_gpus cfg = compose_hydra_configuration([ "config=benchmark/linear_image_classification/clevr_count/eval_resnet_8gpu_transfer_clevr_count_linear", "+config/benchmark/linear_image_classification/clevr_count/models=regnet10B", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={path_to_sliced_checkpoint}", "config.MODEL.AMP_PARAMS.USE_AMP=True", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.OPTIMIZER.num_epochs=1", "config.LOG_FREQUENCY=1", # Testing on fake images "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True", # Disable overlap communication and computation for test "config.MODEL.FSDP_CONFIG.FORCE_SYNC_CUDA=True", # Testing on 8 V100 32GB GPU only f"config.DATA.TRAIN.BATCHSIZE_PER_REPLICA={batch_size}", f"config.DATA.TRAIN.DATA_LIMIT={data_limit}", "config.DISTRIBUTED.NUM_NODES=1", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpus}", "config.DISTRIBUTED.RUN_ID=auto", ]) args, config = convert_to_attrdict(cfg) return config
def _create_pretraining_config(with_fsdp: bool, num_gpu: int = 2): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_swav", "+config/pretrain/swav/models=regnet16Gf", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.SEED_VALUE=0", "config.MODEL.AMP_PARAMS.USE_AMP=False", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.OPTIMIZER.use_larc=False", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_dino_pretraining_config( with_mixed_precision: bool, gpu_count: int = 2, num_epochs: int = 4 ): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_dino_xcit", "config.SEED_VALUE=0", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", f"config.OPTIMIZER.num_epochs={num_epochs}", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={gpu_count}", # Options to override to get FSDP "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", f"config.MODEL.AMP_PARAMS.USE_AMP={with_mixed_precision}", "config.OPTIMIZER.construct_single_param_group_only=True", "config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD=0", ] ) args, config = convert_to_attrdict(cfg) return config
def _create_pretraining_config(num_gpu: int = 2, with_fsdp: bool = False, fsdp_flatten_parameters: bool = False): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_swav", "+config/test/integration_test/models=swav_regnet_fsdp", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.SEED_VALUE=0", "config.LOSS.swav_loss.epsilon=0.03", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", ], ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.flatten_parameters = fsdp_flatten_parameters config.MODEL.FSDP_CONFIG.mixed_precision = False else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def _create_benchmark_config( checkpoint_path: str, with_fsdp: bool, with_eval_mlp: bool = True, num_gpu: int = 2, ): if with_eval_mlp: head_config = "+config/debugging/benchmark/linear_image_classification/models=regnet16Gf_eval_mlp" else: head_config = "+config/debugging/benchmark/linear_image_classification/models=regnet16Gf_mlp" cfg = compose_hydra_configuration([ "config=debugging/benchmark/linear_image_classification/eval_resnet_8gpu_transfer_imagenette_160", head_config, f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.SEED_VALUE=2", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.OPTIMIZER.num_epochs=1", "config.OPTIMIZER.param_schedulers.lr.lengths=[0.1, 0.9]", "config.OPTIMIZER.param_schedulers.lr.name=cosine", "config.LOSS.swav_loss.epsilon=0.03", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=16", "config.DISTRIBUTED.NCCL_DEBUG=False", "config.MODEL.AMP_PARAMS.USE_AMP=false", "config.MODEL.FSDP_CONFIG.mixed_precision=false", "config.OPTIMIZER.use_larc=false", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", # This is critical "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.DATA.TEST.USE_DEBUGGING_SAMPLER=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=false", "config.OPTIMIZER.construct_single_param_group_only=True", "config.OPTIMIZER.num_epochs=2", "config.DISTRIBUTED.NUM_NODES=1", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", ]) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" head_type = "eval_mlp_fsdp" if with_eval_mlp else "mlp_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = head_type config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" head_type = "eval_mlp" if with_eval_mlp else "mlp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = head_type return config
def test_augly_transforms(self): cfg = compose_hydra_configuration([ "config=test/cpu_test/test_cpu_resnet_simclr.yaml", "+config/test/transforms=augly_transforms_example", ], ) _, config = convert_to_attrdict(cfg) with in_temporary_directory() as _: # Test that the training runs with an augly transformation. run_integration_test(config)
def vissl_swin_transformer_config(): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_dino_swin_t", "config.MODEL.TRUNK.NAME=swin_transformer", "config.MODEL.TRUNK.SWIN_TRANSFORMER.DROP_PATH_RATE=0.0", ] ) args, config = convert_to_attrdict(cfg) return config
def _create_config(force_legacy_profiler: bool): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_simclr", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.OPTIMIZER.use_larc=False", "config.PROFILING.RUNTIME_PROFILING.USE_PROFILER=true", "config.PROFILING.PROFILED_RANKS=[0]", f"config.PROFILING.RUNTIME_PROFILING.LEGACY_PROFILER={force_legacy_profiler}", ]) args, config = convert_to_attrdict(cfg) return config
def _create_extract_label_prediction_config(self, with_fsdp: bool, with_mixed_precision: bool, auto_wrap_threshold: int): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_extract_label_predictions", "+config/test/integration_test/models=extract_label_pred_regnet_fsdp", "config.SEED_VALUE=0", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.RANDOM_SYNTHETIC_LABELS=2", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.RANDOM_SYNTHETIC_LABELS=2", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.DATA_LIMIT=32", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", "config.OPTIMIZER.num_epochs=1", "config.OPTIMIZER.use_larc=False", "config.OPTIMIZER.construct_single_param_group_only=True", "config.LOG_FREQUENCY=1", "config.REPRODUCIBILITY.CUDDN_DETERMINISTIC=True", "config.DISTRIBUTED.NUM_PROC_PER_NODE=2", ]) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "eval_mlp_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.mixed_precision = with_mixed_precision config.MODEL.FSDP_CONFIG.fp32_reduce_scatter = with_mixed_precision config.MODEL.FSDP_CONFIG.compute_dtype = torch.float32 config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD = auto_wrap_threshold else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "eval_mlp" config.MODEL.AMP_PARAMS.USE_AMP = with_mixed_precision config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] config.MODEL.ACTIVATION_CHECKPOINTING.USE_ACTIVATION_CHECKPOINTING = False return config
def _create_extract_features_config_head(checkpoint_path: str, num_gpu: int = 2): cfg = compose_hydra_configuration([ "config=feature_extraction/extract_resnet_in1k_8gpu", "+config/feature_extraction/with_head=rn50_swav", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=20", "config.SEED_VALUE=0", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2", ]) args, config = convert_to_attrdict(cfg) return config
def _create_extract_cluster_config( with_fsdp: bool, checkpoint_path: str, num_gpu: int = 2 ): cfg = compose_hydra_configuration( [ "config=extract_cluster/swav/visualise_swav_resnet_in1k_8gpu", "+config/extract_cluster/swav/models=regnet16Gf", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=20", "config.SEED_VALUE=0", "config.MODEL.AMP_PARAMS.USE_AMP=False", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=4", "config.OPTIMIZER.use_larc=False", ] ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" return config
def test_ema_hook(self): cfg = compose_hydra_configuration( [ "config=test/integration_test/quick_eval_in1k_linear.yaml", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.OPTIMIZER.num_epochs=2", "config.HOOKS.EMA_MODEL.SAVE_EMA_MODEL=True", "config.HOOKS.EMA_MODEL.ENABLE_EMA_METERS=True", "config.HOOKS.EMA_MODEL.EMA_DEVICE=gpu", ], ) _, config = convert_to_attrdict(cfg) with in_temporary_directory() as checkpoint_folder: # Run a quick_eval_in1k_linear. integration_logs = run_integration_test(config) checkpoint_path = os.path.join(checkpoint_folder, "checkpoint.torch") # Test that the ema model is saved in the checkpoint. checkpoint = load_checkpoint(checkpoint_path) self.assertTrue( "ema_model" in checkpoint["classy_state_dict"].keys(), msg="ema_model has not been saved to the checkpoint folder.", ) # Test that train_accuracy_list_meter_ema have been logged to metrics.json. metrics = integration_logs.get_accuracies(from_metrics_file=True) self.assertTrue( "train_accuracy_list_meter_ema" in metrics[1], msg="train_accuracy_list_meter_ema is not logged to the metrics.json file.", ) self.assertEqual( len(metrics), 8, "the metrics.json output does not have the appropriate number of entries.", )
def _create_extract_features_config( checkpoint_path: str, model_name: str, with_fsdp: bool, num_gpu: int = 2 ): cfg = compose_hydra_configuration( [ "config=feature_extraction/extract_resnet_in1k_8gpu", "+config/test/integration_test/models=" + model_name, "config.MODEL.FSDP_CONFIG.mixed_precision=False", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.RANDOM_SYNTHETIC_LABELS=10", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.RANDOM_SYNTHETIC_LABELS=10", "config.DATA.TRAIN.DATA_LIMIT=200", "config.DATA.TEST.DATA_LIMIT=200", "config.SEED_VALUE=0", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=20", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=10", "config.MODEL.FEATURE_EVAL_SETTINGS.SHOULD_FLATTEN_FEATS=False", "config.EXTRACT_FEATURES.CHUNK_THRESHOLD=50", # Options used for the nearest neighbors config "config.NEAREST_NEIGHBOR.TOPK=20", "config.NEAREST_NEIGHBOR.SIGMA=0.1", "config.NEAREST_NEIGHBOR.L2_NORM_FEATS=True", "config.NEAREST_NEIGHBOR.USE_CUDA=False", ] ) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] return config
def _create_extract_features_config_trunk(checkpoint_path: str, num_gpu: int = 2): cfg = compose_hydra_configuration([ "config=feature_extraction/extract_resnet_in1k_8gpu", "+config/feature_extraction/trunk_only=rn50_layers", f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.DATA.TEST.DATA_LIMIT=20", "config.SEED_VALUE=0", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2", "config.MODEL.FEATURE_EVAL_SETTINGS.SHOULD_FLATTEN_FEATS=False", "config.EXTRACT_FEATURES.CHUNK_THRESHOLD=0", ], ) args, config = convert_to_attrdict(cfg) return config
def _create_pretraining_config(num_gpu: int = 2): cfg = compose_hydra_configuration([ "config=test/integration_test/quick_swav", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.DATA_LIMIT=40", "config.SEED_VALUE=0", "config.MODEL.AMP_PARAMS.USE_AMP=False", "config.MODEL.SYNC_BN_CONFIG.CONVERT_BN_TO_SYNC_BN=True", "config.MODEL.SYNC_BN_CONFIG.SYNC_BN_TYPE=pytorch", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.LOSS.swav_loss.epsilon=0.03", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", "config.MODEL.FSDP_CONFIG.mixed_precision=False", "config.MODEL.FSDP_CONFIG.fp32_reduce_scatter=False", "config.MODEL.FSDP_CONFIG.compute_dtype=float32", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpu}", "config.LOG_FREQUENCY=1", "config.OPTIMIZER.construct_single_param_group_only=True", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.OPTIMIZER.use_larc=False", ]) args, config = convert_to_attrdict(cfg) return config
def _create_10B_pretrain_config(num_gpus: int, num_steps: int, batch_size: int): data_limit = num_steps * batch_size * num_gpus cfg = compose_hydra_configuration([ "config=pretrain/swav/swav_8node_resnet", "+config/pretrain/seer/models=regnet10B", "config.OPTIMIZER.num_epochs=1", "config.LOG_FREQUENCY=1", # Testing on fake images "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.USE_DEBUGGING_SAMPLER=True", # Disable overlap communication and computation for test "config.MODEL.FSDP_CONFIG.FORCE_SYNC_CUDA=True", # Testing on 8 V100 32GB GPU only f"config.DATA.TRAIN.BATCHSIZE_PER_REPLICA={batch_size}", f"config.DATA.TRAIN.DATA_LIMIT={data_limit}", "config.DISTRIBUTED.NUM_NODES=1", f"config.DISTRIBUTED.NUM_PROC_PER_NODE={num_gpus}", "config.DISTRIBUTED.RUN_ID=auto", ]) args, config = convert_to_attrdict(cfg) return config
def hydra_main(overrides: List[Any]): ###################################################################################### # DO NOT MOVE THIS IMPORT TO TOP LEVEL: submitit processes will not be initialized # correctly (MKL_THREADING_LAYER will be set to INTEL instead of GNU) ###################################################################################### from vissl.hooks import default_hook_generator ###################################################################################### print(f"####### overrides: {overrides}") cfg = compose_hydra_configuration(overrides) args, config = convert_to_attrdict(cfg) if config.SLURM.USE_SLURM: assert (is_submitit_available() ), "Please 'pip install submitit' to schedule jobs on SLURM" launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config) else: launch_distributed( cfg=config, node_id=args.node_id, engine_name=args.engine_name, hook_generator=default_hook_generator, )
def hydra_main(overrides: List[Any]): cfg = compose_hydra_configuration(overrides) args, config = convert_to_attrdict(cfg) print(pprint.pformat(config))
def _create_finetuning_config( checkpoint_path: str, auto_wrap_threshold: int, with_fsdp: bool = False, with_partial_head: bool = False, with_mixed_precision: bool = False, with_activation_checkpointing: bool = False, ): architecture_config = ( "+config/test/integration_test/models=finetune_regnet_fsdp") if with_partial_head: architecture_config = ( "+config/test/integration_test/models=finetune_regnet_fsdp_head" ) cfg = compose_hydra_configuration([ "config=test/integration_test/quick_eval_finetune_in1k", architecture_config, f"config.MODEL.WEIGHTS_INIT.PARAMS_FILE={checkpoint_path}", "config.DATA.TRAIN.DATA_SOURCES=[synthetic]", "config.DATA.TRAIN.LABEL_SOURCES=[synthetic]", "config.DATA.TEST.DATA_SOURCES=[synthetic]", "config.DATA.TEST.LABEL_SOURCES=[synthetic]", "config.DATA.TRAIN.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TRAIN.RANDOM_SYNTHETIC_LABELS=2", "config.DATA.TEST.RANDOM_SYNTHETIC_IMAGES=True", "config.DATA.TEST.RANDOM_SYNTHETIC_LABELS=2", "config.DATA.TRAIN.DATA_LIMIT=32", "config.DATA.TEST.DATA_LIMIT=32", "config.DATA.TRAIN.BATCHSIZE_PER_REPLICA=4", "config.DATA.TEST.BATCHSIZE_PER_REPLICA=2", "config.SEED_VALUE=0", "config.DISTRIBUTED.NUM_PROC_PER_NODE=2", "config.LOG_FREQUENCY=1", "config.MODEL.AMP_PARAMS.AMP_TYPE=pytorch", "config.OPTIMIZER.num_epochs=2", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_value=0.01", "config.OPTIMIZER.param_schedulers.lr.auto_lr_scaling.base_lr_batch_size=2", "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_value=0.1", "config.OPTIMIZER.param_schedulers.lr_head.auto_lr_scaling.base_lr_batch_size=2", "config.OPTIMIZER.construct_single_param_group_only=True", "config.MODEL.FSDP_CONFIG.flatten_parameters=True", ]) args, config = convert_to_attrdict(cfg) if with_fsdp: config["MODEL"]["TRUNK"]["NAME"] = "regnet_fsdp" if with_partial_head: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head_fsdp" config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp_fsdp" else: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp_fsdp" config.TRAINER.TASK_NAME = "self_supervision_fsdp_task" config.MODEL.FSDP_CONFIG.mixed_precision = with_mixed_precision config.MODEL.FSDP_CONFIG.fp32_reduce_scatter = with_mixed_precision config.MODEL.FSDP_CONFIG.compute_dtype = torch.float32 config.MODEL.FSDP_CONFIG.AUTO_WRAP_THRESHOLD = auto_wrap_threshold else: config["MODEL"]["TRUNK"]["NAME"] = "regnet_v2" if with_partial_head: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "swav_head" config["MODEL"]["HEAD"]["PARAMS"][1][0] = "mlp" else: config["MODEL"]["HEAD"]["PARAMS"][0][0] = "mlp" config.MODEL.AMP_PARAMS.USE_AMP = with_mixed_precision config.MODEL.TRUNK.REGNET.stage_checkpoints = [[2], [4], [6, 11], []] config.MODEL.ACTIVATION_CHECKPOINTING.USE_ACTIVATION_CHECKPOINTING = ( with_activation_checkpointing) return config
def _create_config(overrides: List[str]): cfg = compose_hydra_configuration(overrides) args, config = convert_to_attrdict(cfg, dump_config=False) return config
def __init__(self, overrides: List[Any] = None): self.overrides = [] if overrides is not None and len(overrides) > 0: self.overrides.extend(overrides) cfg = compose_hydra_configuration(self.overrides) self.default_cfg = cfg
def hydra_main(overrides: List[Any]): cfg = compose_hydra_configuration(overrides) args, config = convert_to_attrdict(cfg) main(args, config)
def _generate_config(self, overrides: List[str]): """ Generate AttrDict config from a config YAML file and overrides. """ cfg = compose_hydra_configuration(overrides) return convert_to_attrdict(cfg)
def hydra_main(overrides: List[str]): print(f"####### overrides: {overrides}") cfg = compose_hydra_configuration(overrides) setup_logging(__name__) args, config = convert_to_attrdict(cfg) benchmark_data(config)