Python set_model_parallel_world_size Examples

Programming Language: Python

Namespace/Package Name: megatron.mpu

Method/Function: set_model_parallel_world_size

Examples at hotexamples.com: 2

Python set_model_parallel_world_size - 2 examples found. These are the top rated real world Python examples of megatron.mpu.set_model_parallel_world_size extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def initialize_megatron(extra_args_provider=None,
                        args_defaults={},
                        ignore_unknown_args=False,
                        allow_no_cuda=False):
    """Set global variables, initialize distributed, and
    set autoresume and random seeds.
    `allow_no_cuda` should not be set unless using megatron for cpu only 
    data processing. In general this arg should not be set unless you know 
    what you are doing.
    Returns a function to finalize distributed env initialization 
    (optionally, only when args.lazy_mpu_init == True)

"""
    if not allow_no_cuda:
        # Make sure cuda is available.
        assert torch.cuda.is_available(), 'Megatron requires CUDA.'

    # Parse args, build tokenizer, and set adlr-autoresume,
    # tensorboard-writer, and timers.
    set_global_variables(extra_args_provider=extra_args_provider,
                         args_defaults=args_defaults,
                         ignore_unknown_args=ignore_unknown_args)

    # torch.distributed initialization
    def finish_mpu_init():
        args = get_args()
        # Pytorch distributed.
        _initialize_distributed()

        # Random seeds for reproducibility.
        if args.rank == 0:
            print('> setting random seeds to {} ...'.format(args.seed))
        _set_random_seed(args.seed)

    args = get_args()
    if args.lazy_mpu_init:
        args.use_cpu_initialization = True
        # delayed initialization of DDP-related stuff
        # We only set basic DDP globals
        set_model_parallel_world_size(args.model_parallel_size)
        # and return function for external DDP manager to call when it has DDP initialized
        set_model_parallel_rank(args.rank)
        return finish_mpu_init
    else:
        # Megatron's MPU is the master. Complete initialization right away.
        finish_mpu_init()

        # Initialize memory buffers.
        _initialize_mem_buffs()

        # Autoresume.
        _init_autoresume()

        # Write arguments to tensorboard.
        _write_args_to_tensorboard()
        # No continuation function
        return None

Example #2

Show file

def initialize_megatron(neox_args, allow_no_cuda=False):
    """Set initialize distributed and set autoresume and random seeds.
    `allow_no_cuda` should not be set unless using megatron for cpu only
    data processing. In general this arg should not be set unless you know
    what you are doing.
    Returns a function to finalize distributed env initialization
    (optionally, only when args.lazy_mpu_init == True)
    """
    if not allow_no_cuda:
        # Make sure cuda is available.
        assert torch.cuda.is_available(), "Megatron requires CUDA."

    # torch.distributed initialization
    def finish_mpu_init():
        # Pytorch distributed.
        _initialize_distributed(neox_args=neox_args)

        # Random seeds for reproducibility.
        if neox_args.rank == 0:
            print("> setting random seeds to {} ...".format(neox_args.seed))
        _set_random_seed(neox_args.seed)

    # check fused kernels are installed:
    if (neox_args.scaled_upper_triang_masked_softmax_fusion
            or neox_args.scaled_masked_softmax_fusion):
        fused_kernels.load_fused_kernels()

    if neox_args.lazy_mpu_init:
        neox_args.use_cpu_initialization = True
        # delayed initialization of DDP-related stuff
        # We only set basic DDP globals
        set_model_parallel_world_size(neox_args.model_parallel_size)
        # and return function for external DDP manager to call when it has DDP initialized
        set_model_parallel_rank(neox_args.rank)
        return finish_mpu_init
    else:
        # Megatron's MPU is the master. Complete initialization right away.
        finish_mpu_init()

        # Compile dataset C++ code.
        if neox_args.local_rank == 0:
            from megatron.data.data_utils import compile_helper

            compile_helper()

        # Write arguments to tensorboard.
        _write_args_to_tensorboard(neox_args=neox_args)
        # No continuation function
        return None