def backward(ctx, grad_output): rois, = ctx.saved_tensors output_size = ctx.output_size spatial_scale = ctx.spatial_scale sampling_ratio = ctx.sampling_ratio if not ctx.is_nhwc: bs, ch, h, w = ctx.input_shape else: bs, h, w, ch = ctx.input_shape ## TODO: NHWC kernel + transposes is faster than NCHW backward kernel ## Might change to transposes + NHWC kernel if we want to speed up NCHW case ## Cast to fp32 for the kernel because FP16 atomics is slower than FP32 in Volta grad_input = _C.roi_align_backward(grad_output.float(), rois, spatial_scale, output_size[0], output_size[1], bs, ch, h, w, sampling_ratio, ctx.is_nhwc).half() return grad_input, None, None, None, None, None
def backward(ctx, grad_output): rois, = ctx.saved_tensors output_size = ctx.output_size spatial_scale = ctx.spatial_scale sampling_ratio = ctx.sampling_ratio bs, ch, h, w = ctx.input_shape grad_input = _C.roi_align_backward( grad_output, rois, spatial_scale, output_size[0], output_size[1], bs, ch, h, w, sampling_ratio, ) return grad_input, None, None, None, None