def _configure_using_config_file(deepspeed_config, mpu=None): global num_layers, PARTITION_ACTIVATIONS, CONTIGUOUS_CHECKPOINTING, \ PA_TO_CPU, SYNCHRONIZE, PROFILE_TIME config = DeepSpeedConfig(deepspeed_config, mpu=mpu).activation_checkpointing_config logger.info(config.repr()) PARTITION_ACTIVATIONS = config.partition_activations CONTIGUOUS_CHECKPOINTING = config.contiguous_memory_optimization num_layers = config.number_checkpoints PA_TO_CPU = config.cpu_checkpointing SYNCHRONIZE = config.synchronize_checkpoint_boundary PROFILE_TIME = config.profile
def test_empty_csv_monitor(self): config_dict = {"train_batch_size": 2, "csv_monitor": {}} ds_config = DeepSpeedConfig(config_dict) csv_monitor = csvMonitor(ds_config.monitor_config) assert csv_monitor.enabled == CSV_MONITOR_ENABLED_DEFAULT assert csv_monitor.output_path == CSV_MONITOR_OUTPUT_PATH_DEFAULT assert csv_monitor.job_name == CSV_MONITOR_JOB_NAME_DEFAULT
def test_empty_tensorboard(self): config_dict = {"train_batch_size": 2, "tensorboard": {}} ds_config = DeepSpeedConfig(config_dict) tb_monitor = TensorBoardMonitor(ds_config.monitor_config) assert tb_monitor.enabled == TENSORBOARD_ENABLED_DEFAULT assert tb_monitor.output_path == TENSORBOARD_OUTPUT_PATH_DEFAULT assert tb_monitor.job_name == TENSORBOARD_JOB_NAME_DEFAULT
def test_empty_wandb(self): config_dict = {"train_batch_size": 2, "wandb": {}} ds_config = DeepSpeedConfig(config_dict) wandb_monitor = WandbMonitor(ds_config.monitor_config) assert wandb_monitor.enabled == WANDB_ENABLED_DEFAULT assert wandb_monitor.group == WANDB_GROUP_NAME_DEFAULT assert wandb_monitor.team == WANDB_TEAM_NAME_DEFAULT assert wandb_monitor.project == WANDB_PROJECT_NAME_DEFAULT
def _test_batch_config(num_ranks, batch, micro_batch, gas, success): assert dist.get_world_size() == num_ranks, \ 'The test assumes a world size of f{num_ranks}' ds_batch_config = 'tests/unit/ds_batch_config.json' ds_config = DeepSpeedConfig(ds_batch_config) #test cases when all parameters are provided status = _run_batch_config(ds_config, train_batch=batch, micro_batch=micro_batch, gas=gas) _batch_assert(status, ds_config, batch, micro_batch, gas, success) #test cases when two out of three parameters are provided status = _run_batch_config(ds_config, train_batch=batch, micro_batch=micro_batch) _batch_assert(status, ds_config, batch, micro_batch, gas, success) if success: #when gas is provided with one more parameter status = _run_batch_config(ds_config, train_batch=batch, gas=gas) _batch_assert(status, ds_config, batch, micro_batch, gas, success) status = _run_batch_config(ds_config, micro_batch=micro_batch, gas=gas) _batch_assert(status, ds_config, batch, micro_batch, gas, success) #test the case when only micro_batch or train_batch is provided if gas == 1: status = _run_batch_config(ds_config, micro_batch=micro_batch) _batch_assert(status, ds_config, batch, micro_batch, gas, success) status = _run_batch_config(ds_config, train_batch=batch) _batch_assert(status, ds_config, batch, micro_batch, gas, success) else: #when only gas is provided status = _run_batch_config(ds_config, gas=gas) _batch_assert(status, ds_config, batch, micro_batch, gas, success) #when gas is provided with something else and gas does not divide batch if gas != 1: status = _run_batch_config(ds_config, train_batch=batch, gas=gas) _batch_assert(status, ds_config, batch, micro_batch, gas, success)
def test_csv_monitor(self): config_dict = { "train_batch_size": 2, "csv_monitor": { "enabled": True, "output_path": "test_output/ds_logs/", "job_name": "test" } } ds_config = DeepSpeedConfig(config_dict) csv_monitor = csvMonitor(ds_config.monitor_config) assert csv_monitor.enabled == True assert csv_monitor.output_path == "test_output/ds_logs/" assert csv_monitor.job_name == "test"
def test_tensorboard(self): config_dict = { "train_batch_size": 2, "tensorboard": { "enabled": True, "output_path": "test_output/ds_logs/", "job_name": "test" } } ds_config = DeepSpeedConfig(config_dict) tb_monitor = TensorBoardMonitor(ds_config.monitor_config) assert tb_monitor.enabled == True assert tb_monitor.output_path == "test_output/ds_logs/" assert tb_monitor.job_name == "test"
def test_wandb(self): config_dict = { "train_batch_size": 2, "wandb": { "enabled": False, "group": "my_group", "team": "my_team", "project": "my_project" } } ds_config = DeepSpeedConfig(config_dict) wandb_monitor = WandbMonitor(ds_config.monitor_config) assert wandb_monitor.enabled == False assert wandb_monitor.group == "my_group" assert wandb_monitor.team == "my_team" assert wandb_monitor.project == "my_project"