def grouped_allreduce_(tensors, average=None, name=None, priority=0, prescale_factor=1.0, postscale_factor=1.0, process_set=global_process_set, op=None): """ A function that performs in-place averaging or summation of the input tensors over all the Horovod processes. The reduction operations are keyed by the base name. If a base name is not provided, an incremented auto-generated base name is used. Reductions are performed across tensors in the same list position. The tensor type and shape must be the same on all Horovod processes for tensors sharing positions in the input tensor list. The reduction will not start until all processes are ready to send and receive the tensors. Arguments: tensors: A list of tensors to average or sum. average: .. warning:: .. deprecated:: 0.24.0 Use `op` instead. Will be removed in v1.0. op: The reduction operation to combine tensors across different ranks. Can be Average (default) or Sum. name: A base name to use for the group reduction operation priority: The priority of this operation. Higher priority operations are likely to be executed before other operations. prescale_factor: Multiplicative factor to scale tensor before allreduce postscale_factor: Multiplicative factor to scale tensor after allreduce process_set: Process set object to limit this operation to a subset of Horovod processes. Default is the global process set. Returns: A list containing tensors of the same shape and type as in `tensors`, averaged or summed across all processes. """ op = handle_average_backwards_compatibility(op, average) assert op in [Average, Sum] if not tensors: return tensors c_in = c_handle_array(tensors) c_out = c_handle_array(tensors) c_name = c_str(name) if isinstance(name, string_types) else ctypes.c_char_p(None) check_call( MPI_MXNET_LIB_CTYPES.horovod_mxnet_allreduce_async( c_in, c_out, c_name, ctypes.c_bool(op == Average), ctypes.c_int(priority), ctypes.c_double(prescale_factor), ctypes.c_double(postscale_factor), ctypes.c_int(len(tensors)), ctypes.c_int(process_set.process_set_id))) return tensors
def grouped_allreduce(tensors, average=True, name=None, priority=0, prescale_factor=1.0, postscale_factor=1.0, process_set=global_process_set): """ A function that performs averaging or summation of the input tensors over all the Horovod processes. The input tensors are not modified. The reduction operations are keyed by the base name. If a base name is not provided, an incremented auto-generated base name is used. Reductions are performed across tensors in the same list position. The tensor type and shape must be the same on all Horovod processes for tensors sharing positions in the input tensor list. The reduction will not start until all processes are ready to send and receive the tensors. Arguments: tensors: A list of tensors to average or sum. average: A flag indicating whether to compute average or summation, defaults to average. name: A base name to use for the group reduction operation priority: The priority of this operation. Higher priority operations are likely to be executed before other operations. prescale_factor: Multiplicative factor to scale tensor before allreduce postscale_factor: Multiplicative factor to scale tensor after allreduce process_set: Process set object to limit this operation to a subset of Horovod processes. Default is the global process set. Returns: A list containing tensors of the same shape and type as in `tensors`, averaged or summed across all processes. """ if not tensors: return tensors outputs = [ mx.nd.zeros(shape=tensor.shape, ctx=tensor.context, dtype=tensor.dtype) for tensor in tensors ] c_in = c_handle_array(tensors) c_out = c_handle_array(outputs) c_name = c_str(name) if isinstance(name, string_types) else ctypes.c_char_p(None) check_call( MPI_MXNET_LIB_CTYPES.horovod_mxnet_allreduce_async( c_in, c_out, c_name, ctypes.c_bool(average), ctypes.c_int(priority), ctypes.c_double(prescale_factor), ctypes.c_double(postscale_factor), ctypes.c_int(len(tensors)), ctypes.c_int(process_set.process_set_id))) return outputs