def __init__(self, output_size, spatial_scale, sampling_ratio): super(ROIAlign, self).__init__() self.output_size = output_size self.spatial_scale = spatial_scale self.sampling_ratio = sampling_ratio if use_apex_amp: self.fwd = amp.float_function(self.fwd_impl) else: self.fwd = self.fwd_impl
def causal_linear_attention(q, k, v, eps = 1e-6): from fast_transformers.causal_product import CausalDotProduct autocast_enabled = torch.is_autocast_enabled() is_half = isinstance(q, torch.cuda.HalfTensor) assert not is_half or APEX_AVAILABLE, 'half tensors can only be used if nvidia apex is available' cuda_context = null_context if not autocast_enabled else partial(autocast, enabled = False) causal_dot_product_fn = amp.float_function(CausalDotProduct.apply) if is_half else CausalDotProduct.apply k_cumsum = k.cumsum(dim=-2) + eps D_inv = 1. / torch.einsum('...nd,...nd->...n', q, k_cumsum.type_as(q)) with cuda_context(): if autocast_enabled: q, k, v = map(lambda t: t.float(), (q, k, v)) out = causal_dot_product_fn(q, k, v) out = torch.einsum('...nd,...n->...nd', out, D_inv) return out
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # from ._utils import _C from mydl import _C from apex import amp # Only valid with fp32 inputs - give AMP the hint nms = amp.float_function(_C.nms) # nms.__doc__ = """ # This function performs Non-maximum suppresion"""
def float_wraps(*args, **kwargs): if is_amp_training(): return amp.float_function(func)(*args, **kwargs) else: return func(*args, **kwargs)
from models.ops import _C from apex import amp # Only valid with fp32 inputs - give AMP the hint nms = amp.float_function(_C.nms) ml_nms = amp.float_function(_C.ml_nms) # nms.__doc__ = """ # This function performs Non-maximum suppresion"""
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # from ._utils import _C import torch from torch import nn from torch.autograd import Function # from torch.autograd.function import once_differentiable from maskrcnn_benchmark import _Custom as _C from apex import amp rotate_nms = amp.float_function(_C.rotate_nms) rotate_soft_nms = amp.float_function(_C.rotate_soft_nms) rotate_iou_matrix = amp.float_function(_C.rotate_iou_matrix) class _RotateNMSFunction(Function): @staticmethod def forward(ctx, r_boxes, scores, nms_threshold, post_nms_top_n=-1): # r_boxes: (N,5) assert len(r_boxes.shape) == 2 and r_boxes.size(1) == 5 keep_inds = rotate_nms(r_boxes, scores, nms_threshold) if post_nms_top_n > 0: keep_inds = keep_inds[:post_nms_top_n] return keep_inds def rotate_soft_nms_func(r_boxes, scores,
embedding_dim, device="cuda"): super(JointSparseEmbedding, self).__init__() self.embedding_dim = embedding_dim self.categorical_feature_sizes = copy.copy(categorical_feature_sizes) self.register_buffer( "offsets", torch.tensor([0] + categorical_feature_sizes).cumsum(0).to(device)) self.weights = torch.nn.Parameter( torch.rand((self.offsets[-1].item(), embedding_dim), device=device)) def forward(self, categorical_inputs): # Check input has the right shape assert categorical_inputs.shape[1] == len( self.categorical_feature_sizes) embedding_out = embedding_gather( self.weights, categorical_inputs + self.offsets[:-1]) return embedding_out def extra_repr(self): s = F"categorical_feature_sizes={self.categorical_feature_sizes}\n" s += F"offsets={self.offsets.cpu().numpy()}" return s embedding_gather = amp.float_function(EmbeddingGatherFunction.apply)
ctx.padding[1], ctx.dilation[0], ctx.dilation[1], weight.size(2), weight.size(3), ctx.rotation_groups) return ( grad_input, grad_offset, grad_rotation, grad_weight, ) + (None, ) * 4 @staticmethod def _output_size(input, kernel_size, stride, padding, dilation): output_size = (input.size(0), input.size(1)) for d in range(input.dim() - 2): in_size = input.size(d + 2) pad = padding[d] kernel = dilation[d] * (kernel_size[d] - 1) + 1 stride_ = stride[d] output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, ) if not all(map(lambda s: s > 0, output_size)): raise ValueError( "convolution input is too small (output would be {})".format( 'x'.join(map(str, output_size)))) return output_size # register as fp32 functions. sample_depthwise = amp.float_function(SampleDepthwiseFunction.apply) deform_sample_depthwise = amp.float_function( DeformableSampleDepthwiseFunction.apply)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # from ._utils import _C from maskrcnn_benchmark import _C from apex import amp # Only valid with fp32 inputs - give AMP the hint nms = amp.float_function(_C.nms) soft_nms_f = amp.float_function(_C.soft_nms) # nms.__doc__ = """ # This function performs Non-maximum suppresion""" def soft_nms(boxes, scores, nms_thresh=0.3, sigma=0.5, score_thresh=0.001, method=1): # method: 1) linear, 2) gaussian, else) original NMS boxes2 = boxes.clone() scores2 = scores.clone() indices, keep = soft_nms_f(boxes2, scores2, nms_thresh, sigma, score_thresh, method) return indices, keep, scores2
from absl import logging from apex import amp from torch.autograd import Function from dlrm.cuda_ext import fused_embedding class BuckleEmbeddingFusedGatherFunction(Function): """Customized embedding gather """ @staticmethod def forward(ctx, embedding, indices, offsets, amp_train): output = fused_embedding.gather_gpu_fused_fwd(embedding, indices, offsets, amp_train) ctx.save_for_backward(embedding, indices, offsets) return output @staticmethod def backward(ctx, grad_output): embedding, indices, offsets = ctx.saved_tensors logging.log_first_n( logging.WARNING, "Highly specialized embedding for embedding_dim 128", 1) grad_weights = fused_embedding.gather_gpu_fused_bwd( embedding, indices, offsets, grad_output) return grad_weights, None, None, None buckle_embedding_fused_gather = amp.float_function( BuckleEmbeddingFusedGatherFunction.apply)
from models.ops import _C from apex import amp # Only valid with fp32 inputs - give AMP the hint nms = amp.float_function(_C.nms) ml_nms = amp.float_function(_C.ml_nms) nms_rotated = amp.float_function(_C.nms_rotated) nms_polygon = amp.float_function(_C.nms_polygon) # nms.__doc__ = """ # This function performs Non-maximum suppresion"""
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # from ._utils import _C from maskrcnn_benchmark import _C try: from apex import amp use_apex_amp = True except ImportError: use_apex_amp = False # Monkey patch in need for fp32 def nms_impl(dets, scores, threshold): return _C.nms(dets, scores, threshold) if use_apex_amp: nms = amp.float_function(nms_impl) else: nms = _C.nms # nms.__doc__ = """ # This function performs Non-maximum suppresion"""
import torch from apex import amp from ssd import extensions nms = amp.float_function(extensions.nms)
from apex import amp from torchvision.ops import nms as thv_nms from pet.lib.ops import _C SOFT_NMS_METHODS = {'hard': 0, 'linear': 1, 'gaussian': 2} # Only valid with fp32 inputs - give AMP the hint nms = amp.float_function(thv_nms) ml_nms = amp.float_function(_C.ml_nms) nms_rotated = amp.float_function(_C.nms_rotated) poly_nms = amp.float_function(_C.poly_nms) def soft_nms(dets, scores, sigma=0.5, overlap_thresh=0.3, score_thresh=0.001, method='linear'): """ Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503. """ assert method in SOFT_NMS_METHODS, 'Unknown soft_nms method: {}'.format( method) return _C.soft_nms(dets, scores, sigma, overlap_thresh, score_thresh, SOFT_NMS_METHODS[method]) def ml_soft_nms(dets, scores,
def float_function(fn): return amp.float_function(fn)