Python _cast_if_autocast_enabled Exemples, apex._autocast_utils._cast_if_autocast_enabled Python Exemples

Exemple #1

0

Afficher le fichier

def linear_with_grad_accumulation_and_async_allreduce_in16bit(
    input, weight, bias, gradient_accumulation_fusion, async_grad_allreduce,
):
    args = _cast_if_autocast_enabled(
        input, weight, bias, gradient_accumulation_fusion, async_grad_allreduce
    )
    with torch.cuda.amp.autocast(enabled=False):
        return LinearWithGradAccumulationAndAsyncAllreduceIn16Bit.apply(*args)

Exemple #2

0

Afficher le fichier

def scaled_upper_triang_masked_softmax(inputs, _, scale):
    b, np, sq, sk = inputs.size()
    assert sq == sk, "causal mask is only for self attention"
    # Reshaping input to 3D tensor (attn_batches, sq, sk)
    inputs = inputs.view(-1, sq, sk)
    args = _cast_if_autocast_enabled(inputs, scale)
    with torch.cuda.amp.autocast(enabled=False):
        probs = ScaledUpperTriangMaskedSoftmax.apply(*args)
    return probs.view(b, np, sq, sk)

Exemple #3

0

Afficher le fichier

Fichier : fused_layer_norm.py Projet : leezu/apex

def mixed_dtype_fused_layer_norm_affine(input,
                                        weight,
                                        bias,
                                        normalized_shape,
                                        eps=1e-6):
    args = _cast_if_autocast_enabled(input, weight, bias, normalized_shape,
                                     eps)
    with torch.cuda.amp.autocast(enabled=False):
        return FusedLayerNormAffineMixedDtypesFunction.apply(*args)

Exemple #4

0

Afficher le fichier

def scaled_masked_softmax(inputs, mask, scale):
    # input is 4D tensor (b, np, sq, sk)
    args = _cast_if_autocast_enabled(inputs, mask, scale)
    with torch.cuda.amp.autocast(enabled=False):
        return ScaledMaskedSoftmax.apply(*args)

Exemple #5

0

Afficher le fichier

def fused_bias_gelu(input, bias):
    args = _cast_if_autocast_enabled(input, bias)
    with torch.cuda.amp.autocast(enabled=False):
        return GeLUFunction.apply(*args)

Exemple #6

0

Afficher le fichier

def _fast_layer_norm(x, weight, bias, epsilon):
    args = _cast_if_autocast_enabled(x, weight, bias, epsilon)
    with torch.cuda.amp.autocast(enabled=False):
        return FastLayerNormFN.apply(*args)

Exemple #7

0

Afficher le fichier

def bias_dropout_add_fused_inference(x, bias, residual, prob):
    args = _cast_if_autocast_enabled(x, bias, residual, prob)
    with torch.cuda.amp.autocast(enabled=False):
        return bias_dropout_add_fused_inference_(*args)

Exemple #8

0

Afficher le fichier

def bias_dropout_add_fused_train(x, bias, residual, prob):
    # re-enable torch grad to enable fused optimization.
    with torch.enable_grad():
        args = _cast_if_autocast_enabled(x, bias, residual, prob)
        with torch.cuda.amp.autocast(enabled=False):
            return bias_dropout_add_fused_train_(*args)

Exemple #9

0

Afficher le fichier

def column_parallel_linear(input, weight, bias):
    args = _cast_if_autocast_enabled(input, weight, bias)
    with torch.cuda.amp.autocast(enabled=False):
        return ColumnParallelLinearWithAsyncAllreduce.apply(*args)

Exemple #10

0

Afficher le fichier

def fused_rms_norm(input, normalized_shape, eps=1e-6):
    args = _cast_if_autocast_enabled(input, normalized_shape, eps)
    with torch.cuda.amp.autocast(enabled=False):
        return FusedRMSNormFunction.apply(*args)