def distributed_zero_mixed_precision_lamb(world_rank, world_size, device, checkpoint_dir): opts = { 'device': { 'id': device }, 'mixed_precision': { 'enabled': True }, 'distributed': { 'world_rank': world_rank, 'world_size': world_size, 'allreduce_post_accumulation': True, 'deepspeed_zero_optimization': { 'stage': 1 } }, 'debug': { 'deterministic_compute': True } } create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir, state_dict_key_name='state_dict_' + str(world_rank))
def distributed_zero_mixed_precision_lamb(world_rank, world_size, device, checkpoint_dir): opts = { "device": { "id": device }, "mixed_precision": { "enabled": True }, "distributed": { "world_rank": world_rank, "world_size": world_size, "allreduce_post_accumulation": True, "deepspeed_zero_optimization": { "stage": 1 }, }, "debug": { "deterministic_compute": True }, } create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir, state_dict_key_name="state_dict_" + str(world_rank))
def single_node_mixed_precision(device = 'cuda', checkpoint_dir = 'checkpoint_dir/single_node/mixed_precision/'): opts = { 'device' : {'id' : device}, 'mixed_precision': { 'enabled': True }, 'debug' : {'deterministic_compute': True} } create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir)
def data_parallelism_full_precision(world_rank, world_size, device, checkpoint_dir = 'checkpoint_dir/data_parallelism/full_precision/'): opts = { 'device' : {'id' : device}, 'distributed' : { 'world_rank' : world_rank, 'world_size' : world_size, 'allreduce_post_accumulation' : True }, 'debug' : {'deterministic_compute': True} } create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir if world_rank == 0 else None)
def single_node_mixed_precision(checkpoint_dir, device="cuda"): opts = { "device": { "id": device }, "mixed_precision": { "enabled": True }, "debug": { "deterministic_compute": True } } create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir)
def distributed_zero_full_precision_adam(world_rank, world_size, device, checkpoint_dir = 'checkpoint_dir/distributed_zero/full_precision/adam/'): opts = { 'device' : {'id' : device}, 'distributed' : { 'world_rank' : world_rank, 'world_size' : world_size, 'allreduce_post_accumulation' : True, 'deepspeed_zero_optimization': { 'stage': 1 } }, 'debug' : {'deterministic_compute': True} } create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir, state_dict_key_name='state_dict_'+str(world_rank), use_lamb=False)
def data_parallelism_full_precision(world_rank, world_size, device, checkpoint_dir): opts = { "device": { "id": device }, "distributed": { "world_rank": world_rank, "world_size": world_size, "allreduce_post_accumulation": True }, "debug": { "deterministic_compute": True }, } create_orttrainer_and_save_checkpoint( device, opts, checkpoint_dir if world_rank == 0 else None)
def single_node_full_precision(checkpoint_dir, device='cuda'): opts = {'device': {'id': device}, 'debug': {'deterministic_compute': True}} create_orttrainer_and_save_checkpoint(device, opts, checkpoint_dir)