def set_auto_parallel_context(**kwargs): """ Set auto parallel context. Note: Attribute name is required for setting attributes. If a program has tasks with different parallel modes, then before setting new parallel mode for next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset the configuration. Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. "stand_alone" do not support mirror_mean. Default: False. cast_before_mirror (bool): Insert Mirror Op after the cast if this flag is True. "stand_alone", "data_parallel" and "hybrid_parallel" do not support cast_before_mirror. Default: True. parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel", "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone". - stand_alone: Only one processor working. - data_parallel: Distributing the data across different processors. - hybrid_parallel: Achieving data parallelism and model parallelism manually. - semi_auto_parallel: Achieving data parallelism and model parallelism by setting parallel strategies. - auto_parallel: Achieving parallelism automatically. auto_parallel_search_mode (str): There are two kinds of search modes, "recursive_programming" and "dynamic_programming". Default: "dynamic_programming". - recursive_programming: Recursive programming search mode. - dynamic_programming: Dynamic programming search mode. parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' full_batch (bool): Whether to load the whole batch on each device. Default: False. Raises: ValueError: If input key is not attribute in auto parallel context. Examples: >>> context.set_auto_parallel_context(device_num=8) >>> context.set_auto_parallel_context(global_rank=0) >>> context.set_auto_parallel_context(mirror_mean=True) >>> context.set_auto_parallel_context(cast_before_mirror=False) >>> context.set_auto_parallel_context(parallel_mode="auto_parallel") >>> context.set_auto_parallel_context(parameter_broadcast=False) >>> context.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt") >>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt") """ _set_auto_parallel_context(**kwargs)
def _restore_auto_parallel_context(): """restore auto parallel context""" global _parallel_mode global _device_num global _global_rank global _parameter_broadcast global _mirror_mean global _cast_before_mirror global _loss_repeated_mean global _communication_backend global _enable_all_reduce_fusion _set_auto_parallel_context(parallel_mode=_parallel_mode, device_num=_device_num, global_rank=_global_rank, parameter_broadcast=_parameter_broadcast, mirror_mean=_mirror_mean, cast_before_mirror=_cast_before_mirror, loss_repeated_mean=_loss_repeated_mean) auto_parallel_context().set_communication_backend(_communication_backend) auto_parallel_context().set_enable_all_reduce_fusion(_enable_all_reduce_fusion)
def set_auto_parallel_context(**kwargs): """ Set auto parallel context. Note: Attribute name is required for setting attributes. Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. mirror_mean (bool): Whether to perform mean operator after all-reduce of mirror. "stand_alone" do not support mirror_mean. Default: False. cast_before_mirror (bool): Insert Mirror Op after the cast if this flag is True. "stand_alone", "data_parallel" and "hybrid_parallel" do not support cast_before_mirror. Default: True. parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel", "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone". - stand_alone: Only one processor working. - data_parallel: Distributing the data across different processors. - hybrid_parallel: Achieving data parallelism and model parallelism manually. - semi_auto_parallel: Achieving data parallelism and model parallelism by setting parallel strategies. - auto_parallel: Achieving parallelism automatically. parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. Raises: ValueError: If input key is not attribute in auto parallel context. Examples: >>> context.set_auto_parallel_context(device_num=8) >>> context.set_auto_parallel_context(global_rank=0) >>> context.set_auto_parallel_context(mirror_mean=True) >>> context.set_auto_parallel_context(cast_before_mirror=False) >>> context.set_auto_parallel_context(parallel_mode="auto_parallel") >>> context.set_auto_parallel_context(parameter_broadcast=False) """ _set_auto_parallel_context(**kwargs)
def set_auto_parallel_context(**kwargs): r""" Set auto parallel context, which is valid only for Ascend and GPU target. Auto parallel context should be configured before the initialization of your network. Note: Attribute name is required for setting attributes. If a program has tasks with different parallel modes, then before setting new parallel mode for the next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset the configuration. Setting or changing parallel modes must be called before any creating Initializer, otherwise, RuntimeError may be raised when compiling the network. Some configurations are parallel mode specific, see the below table for details: =========================== =========================== Common AUTO_PARALLEL =========================== =========================== device_num gradient_fp32_sync global_rank loss_repeated_mean gradients_mean auto_parallel_search_mode parallel_mode strategy_ckpt_load_file all_reduce_fusion_config strategy_ckpt_save_file enable_parallel_optimizer full_batch \ pipeline_stages =========================== =========================== Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. gradients_mean (bool): Whether to perform mean operator after allreduce of gradients. "stand_alone" do not support gradients_mean. Default: False. gradient_fp32_sync (bool): Run allreduce of gradients in fp32. "stand_alone", "data_parallel" and "hybrid_parallel" do not support gradient_fp32_sync. Default: True. parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel", "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone". - stand_alone: Only one processor is working. - data_parallel: Distributes the data across different processors. - hybrid_parallel: Achieves data parallelism and model parallelism manually. - semi_auto_parallel: Achieves data parallelism and model parallelism by setting parallel strategies. - auto_parallel: Achieving parallelism automatically. auto_parallel_search_mode (str): There are two kinds of shard strategy search modes, "recursive_programming" and "dynamic_programming". Default: "dynamic_programming". - recursive_programming: Recursive programming search mode. - dynamic_programming: Dynamic programming search mode. parameter_broadcast (bool): Whether to broadcast parameters before training. Before training, in order to have the same network initialization parameter values for all devices, broadcast the parameters on device 0 to other devices. Parameter broadcasting in different parallel modes is different, data_parallel mode, all parameters are broadcast except for the parameter whose attribute layerwise_parallel is True. Hybrid_parallel, semi_auto_parallel and auto_parallel mode, the segmented parameters do not participate in broadcasting. Default: False. strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' full_batch (bool): If you load whole batch datasets in auto_parallel mode, this parameter should be set with True. Default: False. enable_parallel_optimizer (bool): This is a developing feature, which shards the weight update computation for data parallel training in the benefit of time and memory saving. Currently, auto and semi auto parallel mode support all optimizers in both Ascend and GPU. Data parallel mode only supports `Lamb` and `AdamWeightDecay` in Ascend . Default: False. all_reduce_fusion_config (list): Set allreduce fusion strategy by parameters indices. Only support ReduceOp.SUM and HCCL_WORLD_GROUP/NCCL_WORLD_GROUP. No Default, if it is not set, the fusion is closed. pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how the devices are distributed alone the pipeline. The total devices will be divided into 'pipeline_stags' stages. This currently could only be used when parallel mode semi_auto_parallel is enabled. Default: 1. Raises: ValueError: If input key is not attribute in auto parallel context. Examples: >>> context.set_auto_parallel_context(device_num=8) >>> context.set_auto_parallel_context(global_rank=0) >>> context.set_auto_parallel_context(gradients_mean=True) >>> context.set_auto_parallel_context(gradient_fp32_sync=False) >>> context.set_auto_parallel_context(parallel_mode="auto_parallel") >>> context.set_auto_parallel_context(auto_parallel_search_mode="dynamic_programming") >>> context.set_auto_parallel_context(parameter_broadcast=False) >>> context.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt") >>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt") >>> context.set_auto_parallel_context(full_batch=True) >>> context.set_auto_parallel_context(enable_parallel_optimizer=False) >>> context.set_auto_parallel_context(all_reduce_fusion_config=[8, 160]) >>> context.set_auto_parallel_context(pipeline_stages=2) """ _set_auto_parallel_context(**kwargs)
def set_auto_parallel_context(**kwargs): """ Set auto parallel context. Note: Attribute name is required for setting attributes. If a program has tasks with different parallel modes, then before setting new parallel mode for next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset the configuration. Setting or changing parallel modes must be called before any Initializer created, or RuntimeError may be raised when compile network. Args: device_num (int): Available device number, the value must be in [1, 4096]. Default: 1. global_rank (int): Global rank id, the value must be in [0, 4095]. Default: 0. gradients_mean (bool): Whether to perform mean operator after all-reduce of mirror. "stand_alone" do not support gradients_mean. Default: False. gradient_fp32_sync (bool): Gradients allreduce by fp32 even though gradients is fp16 if this flag is True.. "stand_alone", "data_parallel" and "hybrid_parallel" do not support gradient_fp32_sync. Default: True. parallel_mode (str): There are five kinds of parallel modes, "stand_alone", "data_parallel", "hybrid_parallel", "semi_auto_parallel" and "auto_parallel". Default: "stand_alone". - stand_alone: Only one processor working. - data_parallel: Distributing the data across different processors. - hybrid_parallel: Achieving data parallelism and model parallelism manually. - semi_auto_parallel: Achieving data parallelism and model parallelism by setting parallel strategies. - auto_parallel: Achieving parallelism automatically. auto_parallel_search_mode (str): There are two kinds of search modes, "recursive_programming" and "dynamic_programming". Default: "dynamic_programming". - recursive_programming: Recursive programming search mode. - dynamic_programming: Dynamic programming search mode. parameter_broadcast (bool): Indicating whether to broadcast parameters before training. "stand_alone", "semi_auto_parallel" and "auto_parallel" do not support parameter broadcast. Default: False. strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' full_batch (bool): Whether to load the whole batch on each device. Default: False. enable_parallel_optimizer (bool): This is a developing feature, which shards the weight update computation in data parallel training in the benefit of time and memory saving. all_reduce_fusion_config (list): Set allreduce fusion strategy by parameters indices. Raises: ValueError: If input key is not attribute in auto parallel context. Examples: >>> context.set_auto_parallel_context(device_num=8) >>> context.set_auto_parallel_context(global_rank=0) >>> context.set_auto_parallel_context(gradients_mean=True) >>> context.set_auto_parallel_context(gradient_fp32_sync=False) >>> context.set_auto_parallel_context(parallel_mode="auto_parallel") >>> context.set_auto_parallel_context(parameter_broadcast=False) >>> context.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt") >>> context.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt") """ _set_auto_parallel_context(**kwargs)