Ejemplo n.º 1
0
    def __init__(self, output_size, spatial_scale, sampling_ratio):
        super(ROIAlign, self).__init__()
        self.output_size = output_size
        self.spatial_scale = spatial_scale
        self.sampling_ratio = sampling_ratio

        if use_apex_amp:
            self.fwd = amp.float_function(self.fwd_impl)
        else:
            self.fwd = self.fwd_impl
def causal_linear_attention(q, k, v, eps = 1e-6):
    from fast_transformers.causal_product import CausalDotProduct
    autocast_enabled = torch.is_autocast_enabled()
    is_half = isinstance(q, torch.cuda.HalfTensor)
    assert not is_half or APEX_AVAILABLE, 'half tensors can only be used if nvidia apex is available'
    cuda_context = null_context if not autocast_enabled else partial(autocast, enabled = False)

    causal_dot_product_fn = amp.float_function(CausalDotProduct.apply) if is_half else CausalDotProduct.apply

    k_cumsum = k.cumsum(dim=-2) + eps
    D_inv = 1. / torch.einsum('...nd,...nd->...n', q, k_cumsum.type_as(q))

    with cuda_context():
        if autocast_enabled:
            q, k, v = map(lambda t: t.float(), (q, k, v))

        out = causal_dot_product_fn(q, k, v)

    out = torch.einsum('...nd,...n->...nd', out, D_inv)
    return out
Ejemplo n.º 3
0
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# from ._utils import _C
from mydl import _C

from apex import amp

# Only valid with fp32 inputs - give AMP the hint
nms = amp.float_function(_C.nms)

# nms.__doc__ = """
# This function performs Non-maximum suppresion"""
Ejemplo n.º 4
0
 def float_wraps(*args, **kwargs):
     if is_amp_training():
         return amp.float_function(func)(*args, **kwargs)
     else:
         return func(*args, **kwargs)
Ejemplo n.º 5
0
from models.ops import _C

from apex import amp

# Only valid with fp32 inputs - give AMP the hint
nms = amp.float_function(_C.nms)
ml_nms = amp.float_function(_C.ml_nms)

# nms.__doc__ = """
# This function performs Non-maximum suppresion"""
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# from ._utils import _C
import torch
from torch import nn
from torch.autograd import Function
# from torch.autograd.function import once_differentiable

from maskrcnn_benchmark import _Custom as _C

from apex import amp

rotate_nms = amp.float_function(_C.rotate_nms)
rotate_soft_nms = amp.float_function(_C.rotate_soft_nms)
rotate_iou_matrix = amp.float_function(_C.rotate_iou_matrix)


class _RotateNMSFunction(Function):
    @staticmethod
    def forward(ctx, r_boxes, scores, nms_threshold, post_nms_top_n=-1):
        # r_boxes: (N,5)
        assert len(r_boxes.shape) == 2 and r_boxes.size(1) == 5

        keep_inds = rotate_nms(r_boxes, scores, nms_threshold)

        if post_nms_top_n > 0:
            keep_inds = keep_inds[:post_nms_top_n]
        return keep_inds


def rotate_soft_nms_func(r_boxes,
                         scores,
Ejemplo n.º 7
0
                 embedding_dim,
                 device="cuda"):
        super(JointSparseEmbedding, self).__init__()
        self.embedding_dim = embedding_dim
        self.categorical_feature_sizes = copy.copy(categorical_feature_sizes)

        self.register_buffer(
            "offsets",
            torch.tensor([0] + categorical_feature_sizes).cumsum(0).to(device))
        self.weights = torch.nn.Parameter(
            torch.rand((self.offsets[-1].item(), embedding_dim),
                       device=device))

    def forward(self, categorical_inputs):
        # Check input has the right shape
        assert categorical_inputs.shape[1] == len(
            self.categorical_feature_sizes)

        embedding_out = embedding_gather(
            self.weights, categorical_inputs + self.offsets[:-1])

        return embedding_out

    def extra_repr(self):
        s = F"categorical_feature_sizes={self.categorical_feature_sizes}\n"
        s += F"offsets={self.offsets.cpu().numpy()}"
        return s


embedding_gather = amp.float_function(EmbeddingGatherFunction.apply)
                        ctx.padding[1], ctx.dilation[0], ctx.dilation[1],
                        weight.size(2), weight.size(3), ctx.rotation_groups)

        return (
            grad_input,
            grad_offset,
            grad_rotation,
            grad_weight,
        ) + (None, ) * 4

    @staticmethod
    def _output_size(input, kernel_size, stride, padding, dilation):
        output_size = (input.size(0), input.size(1))
        for d in range(input.dim() - 2):
            in_size = input.size(d + 2)
            pad = padding[d]
            kernel = dilation[d] * (kernel_size[d] - 1) + 1
            stride_ = stride[d]
            output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
        if not all(map(lambda s: s > 0, output_size)):
            raise ValueError(
                "convolution input is too small (output would be {})".format(
                    'x'.join(map(str, output_size))))
        return output_size


# register as fp32 functions.
sample_depthwise = amp.float_function(SampleDepthwiseFunction.apply)
deform_sample_depthwise = amp.float_function(
    DeformableSampleDepthwiseFunction.apply)
Ejemplo n.º 9
0
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# from ._utils import _C
from maskrcnn_benchmark import _C

from apex import amp

# Only valid with fp32 inputs - give AMP the hint
nms = amp.float_function(_C.nms)
soft_nms_f = amp.float_function(_C.soft_nms)

# nms.__doc__ = """
# This function performs Non-maximum suppresion"""


def soft_nms(boxes,
             scores,
             nms_thresh=0.3,
             sigma=0.5,
             score_thresh=0.001,
             method=1):
    # method: 1) linear, 2) gaussian, else) original NMS
    boxes2 = boxes.clone()
    scores2 = scores.clone()
    indices, keep = soft_nms_f(boxes2, scores2, nms_thresh, sigma,
                               score_thresh, method)
    return indices, keep, scores2
from absl import logging
from apex import amp
from torch.autograd import Function

from dlrm.cuda_ext import fused_embedding


class BuckleEmbeddingFusedGatherFunction(Function):
    """Customized embedding gather """
    @staticmethod
    def forward(ctx, embedding, indices, offsets, amp_train):
        output = fused_embedding.gather_gpu_fused_fwd(embedding, indices,
                                                      offsets, amp_train)
        ctx.save_for_backward(embedding, indices, offsets)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        embedding, indices, offsets = ctx.saved_tensors

        logging.log_first_n(
            logging.WARNING,
            "Highly specialized embedding for embedding_dim 128", 1)
        grad_weights = fused_embedding.gather_gpu_fused_bwd(
            embedding, indices, offsets, grad_output)
        return grad_weights, None, None, None


buckle_embedding_fused_gather = amp.float_function(
    BuckleEmbeddingFusedGatherFunction.apply)
Ejemplo n.º 11
0
from models.ops import _C
from apex import amp

# Only valid with fp32 inputs - give AMP the hint
nms = amp.float_function(_C.nms)
ml_nms = amp.float_function(_C.ml_nms)
nms_rotated = amp.float_function(_C.nms_rotated)
nms_polygon = amp.float_function(_C.nms_polygon)
# nms.__doc__ = """
# This function performs Non-maximum suppresion"""
Ejemplo n.º 12
0
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.

# from ._utils import _C
from maskrcnn_benchmark import _C

try:
    from apex import amp
    use_apex_amp = True
except ImportError:
    use_apex_amp = False


# Monkey patch in need for fp32
def nms_impl(dets, scores, threshold):
    return _C.nms(dets, scores, threshold)


if use_apex_amp:
    nms = amp.float_function(nms_impl)
else:
    nms = _C.nms

# nms.__doc__ = """
# This function performs Non-maximum suppresion"""
Ejemplo n.º 13
0
import torch
from apex import amp

from ssd import extensions

nms = amp.float_function(extensions.nms)
Ejemplo n.º 14
0
from apex import amp
from torchvision.ops import nms as thv_nms

from pet.lib.ops import _C

SOFT_NMS_METHODS = {'hard': 0, 'linear': 1, 'gaussian': 2}

# Only valid with fp32 inputs - give AMP the hint
nms = amp.float_function(thv_nms)
ml_nms = amp.float_function(_C.ml_nms)
nms_rotated = amp.float_function(_C.nms_rotated)
poly_nms = amp.float_function(_C.poly_nms)


def soft_nms(dets,
             scores,
             sigma=0.5,
             overlap_thresh=0.3,
             score_thresh=0.001,
             method='linear'):
    """ Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503. """

    assert method in SOFT_NMS_METHODS, 'Unknown soft_nms method: {}'.format(
        method)

    return _C.soft_nms(dets, scores, sigma, overlap_thresh, score_thresh,
                       SOFT_NMS_METHODS[method])


def ml_soft_nms(dets,
                scores,
Ejemplo n.º 15
0
 def float_function(fn):
     return amp.float_function(fn)