def test_load_multiple_module(self): custom_module = load( name='custom_conj_jit', sources=['custom_conj_op.cc'], extra_include_paths=paddle_includes, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cc flags extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags verbose=True) custom_conj = custom_module.custom_conj self.assertIsNotNone(custom_conj)
if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) # Compile and load custom op Just-In-Time. # custom_relu_op_dup.cc is only used for multi ops test, # not a new op, if you want to test only one op, remove this # source file sources = ['custom_relu_op.cc', 'custom_relu_op_dup.cc'] if not IS_MAC: sources.append('custom_relu_op.cu') custom_module = load( name='custom_relu_module_jit', sources=sources, extra_include_paths=paddle_includes, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cc flags extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags verbose=True) class TestJITLoad(unittest.TestCase): def setUp(self): self.custom_ops = [ custom_module.custom_relu, custom_module.custom_relu_dup, custom_module.custom_relu_no_x_in_backward ] self.dtypes = ['float32', 'float64'] if paddle.is_compiled_with_cuda(): self.dtypes.append('float16') self.devices = ['cpu']
import paddle from paddle.utils.cpp_extension import load # jit compile custom op custom_ops = load(name="custom_jit_ops", sources=["relu_cuda.cu"], verbose=True) x = paddle.randn([4, 10], dtype='float32') relu_out = custom_ops.custom_relu(x) print(relu_out)
import numpy as np import paddle import paddle.nn as nn from paddle.vision.transforms import ToTensor from paddle.utils.cpp_extension import load BATCH_SIZE = 32 EPOCH_NUM = 10 # jit compile custom op custom_ops = load(name="custom_jit_ops", sources=["relu_cuda.cc", "relu_cuda.cu"]) transform = ToTensor() cifar10_train = paddle.vision.datasets.Cifar10(mode='train', transform=transform) class MyNet(paddle.nn.Layer): def __init__(self, num_classes=1): super(MyNet, self).__init__() self.conv1 = paddle.nn.Conv2D(in_channels=3, out_channels=32, kernel_size=(3, 3)) self.pool1 = paddle.nn.MaxPool2D(kernel_size=2, stride=2) self.conv2 = paddle.nn.Conv2D(in_channels=32, out_channels=64, kernel_size=(3, 3)) self.pool2 = paddle.nn.MaxPool2D(kernel_size=2, stride=2)
import numpy as np import paddle.fluid as fluid import os import sys import cv2 import time import shapely from shapely.geometry import Polygon import paddle from paddle.utils.cpp_extension import load custom_ops = load(name="custom_jit_ops", sources=["rbox_iou_op.cc", "rbox_iou_op.cu"]) paddle.set_device('gpu:0') #paddle.set_device('gpu') #paddle.set_device('cpu') paddle.disable_static() rbox1 = [[772.0, 575.5, 90.75791931152344, 26.0, -0.3468348975810076]] rbox1 = np.array(rbox1) rbox2 = [[772.0, 575.5, 90.75791931152344, 26.0, 0.0]] rbox2 = np.array(rbox2) use_rand_data = True # use_rand_data = False if use_rand_data: rbox1 = np.random.rand(13000, 5)
# from paddle.utils.cpp_extension.extension_utils import run_cmd # from utils import paddle_includes, extra_cc_args, extra_nvcc_args # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. # file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format( # get_build_directory()) # if os.name == 'nt' and os.path.isfile(file): # cmd = 'del {}'.format(file) # run_cmd(cmd, True) custom_ops = load( name='custom_conj_jit', sources=['custom_conj_op.cc'], # extra_include_paths=paddle_includes, # add for Coverage CI # extra_cxx_cflags=extra_cc_args, # test for cc flags # extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags verbose=True) def is_complex(dtype): return dtype == paddle.fluid.core_avx.VarDesc.VarType.COMPLEX64 or \ dtype == paddle.fluid.core_avx.VarDesc.VarType.COMPLEX128 def to_complex(dtype): if dtype == "float32": return np.complex64 elif dtype == "float64": return np.complex128 else:
import os __dirname__ = os.path.dirname(__file__) from paddle.utils.cpp_extension import load src_files = [ 'src/interpolate.cc', 'src/three_interpolate_cuda.cu', 'src/three_nn_cuda.cu' ] src_files = [os.path.join(__dirname__, filename) for filename in src_files] interpolate_ops = load(name="interpolate_ops", sources=src_files) if __name__ == '__main__': three_nn_wrapper_ops = interpolate_ops.three_nn_wrapper print('three_nn_wrapper_ops is passed!') three_interpolate_wrapper_ops = interpolate_ops.three_interpolate_wrapper print('three_interpolate_wrapper_ops is passed!') three_interpolate_grad_wrapper_ops = interpolate_ops.three_interpolate_grad_wrapper print('three_interpolate_grad_wrapper_ops is passed!')
import os __dirname__ = os.path.dirname(__file__) from paddle.utils.cpp_extension import load src_files = [ 'src/iou3d.cc', 'src/iou3d_kernel.cu' ] src_files = [os.path.join(__dirname__, filename) for filename in src_files] iou3d_ops = load( name="iou3d_ops", sources=src_files) if __name__ == '__main__': boxes_overlap_bev_gpu_ops = iou3d_ops.boxes_overlap_bev_gpu print('boxes_overlap_bev_gpu_ops is passed!') boxes_iou_bev_gpu_ops = iou3d_ops.boxes_iou_bev_gpu print('bev_gpu_ops is passed!') nms_gpu_ops = iou3d_ops.nms_gpu print('nms_gpu_ops is passed!') nms_normal_gpu_ops = iou3d_ops.nms_normal_gpu print('nms_normal_gpu_ops is passed!')
import os import unittest import paddle import numpy as np from paddle.utils.cpp_extension import load from utils import paddle_includes, extra_cc_args, extra_nvcc_args from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method # switch to old custom op method use_new_custom_op_load_method(False) # Compile and load custom op Just-In-Time. custom_module = load( name='custom_relu2', sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'], extra_include_paths=paddle_includes, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cc flags extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags verbose=True # add for unittest ) class TestJITLoad(unittest.TestCase): def test_api(self): raw_data = np.array([[-1, 1, 0], [1, -1, -1]]).astype('float32') gt_data = np.array([[0, 1, 0], [1, 0, 0]]).astype('float32') x = paddle.to_tensor(raw_data, dtype='float32') # use custom api out = custom_module.relu2(x) out3 = custom_module.relu3(x) self.assertTrue(np.array_equal(out.numpy(), gt_data))
import os import numpy as np import paddle import paddle.static as static from paddle.utils.cpp_extension import load custom_relu = load(name='custom_relu_jit_lib', sources=['relu_op.cc', 'relu_op.cu']) def test_relu2_dynamic(device, dtype): paddle.set_device(device) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) t = paddle.to_tensor(x) t.stop_gradient = False out = custom_relu(t) out.stop_gradient = False print(out.numpy()) out.backward() def test_relu2_static(device, dtype): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()):
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import unittest import numpy as np import time import paddle import paddle.static as static from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension.extension_utils import run_cmd custom_ops = load(name='custom_concat_jit', sources=['custom_concat_op.cc'], extra_include_paths=["./"], verbose=True) TEST_TIME = 1 def concat_dynamic(func, dtype, np_inputs, axis_v, with_attr=False): paddle.set_device("cpu") inputs = [ paddle.to_tensor(x, dtype=dtype, stop_gradient=False) for x in np_inputs ] if with_attr: axis = axis_v else: axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v)
import paddle import numpy as np from paddle.utils.cpp_extension import load, get_build_directory from utils import paddle_includes, extra_cc_args from paddle.utils.cpp_extension.extension_utils import run_cmd # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. file = '{}\\dispatch_op\\dispatch_op.pyd'.format(get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) dispatch_op = load( name='dispatch_op', sources=['dispatch_test_op.cc'], extra_include_paths=paddle_includes, # add for Coverage CI extra_cxx_cflags=extra_cc_args, verbose=True) class TestJitDispatch(unittest.TestCase): def setUp(self): paddle.set_device('cpu') def run_dispatch_test(self, func, dtype): np_x = np.ones([2, 2]).astype(dtype) x = paddle.to_tensor(np_x) out = func(x) np_x = x.numpy() np_out = out.numpy() self.assertTrue(dtype in str(np_out.dtype))
import numpy as np import paddle import paddle.nn as nn from paddle.vision.transforms import ToTensor from paddle.utils.cpp_extension import load BATCH_SIZE = 32 EPOCH_NUM = 10 # jit compile custom op custom_ops = load( name="custom_jit_ops", sources=["relu_cpu_fp32.cc"]) transform = ToTensor() cifar10_train = paddle.vision.datasets.Cifar10(mode='train', transform=transform) class MyNet(paddle.nn.Layer): def __init__(self, num_classes=1): super(MyNet, self).__init__() self.conv1 = paddle.nn.Conv2D(in_channels=3, out_channels=32, kernel_size=(3, 3)) self.pool1 = paddle.nn.MaxPool2D(kernel_size=2, stride=2) self.conv2 = paddle.nn.Conv2D(in_channels=32, out_channels=64, kernel_size=(3,3)) self.pool2 = paddle.nn.MaxPool2D(kernel_size=2, stride=2) self.conv3 = paddle.nn.Conv2D(in_channels=64, out_channels=64, kernel_size=(3,3))
import os __dirname__ = os.path.dirname(__file__) from paddle.utils.cpp_extension import load src_files = [ 'src/group_points.cc', 'src/group_points_cuda.cu' ] src_files = [os.path.join(__dirname__, filename) for filename in src_files] group_points_ops = load( name="group_points_ops", sources=src_files) if __name__ == '__main__': group_points_grad_wrapper_ops = group_points_ops.group_points_grad_wrapper print('group_points_grad_wrapper_ops is passed!') group_points_wrapper_ops = group_points_ops.group_points_wrapper print('group_points_wrapper_ops is passed!')
import os __dirname__ = os.path.dirname(__file__) from paddle.utils.cpp_extension import load src_files = [ 'src/furthest_point_sample.cc', 'src/furthest_point_sample_cuda.cu' ] src_files = [os.path.join(__dirname__, filename) for filename in src_files] furthest_point_sample_ops = load( name="furthest_point_sample_ops", sources=src_files) if __name__ == '__main__': furthest_point_sampling_wrapper_ops = furthest_point_sample_ops.furthest_point_sampling_wrapper print('furthest_point_sampling_wrapper_ops is passed!') furthest_point_sampling_with_dist_wrapper_ops = furthest_point_sample_ops.furthest_point_sampling_with_dist_wrapper print('furthest_point_sampling_with_dist_wrapper_ops is passed!')
import os __dirname__ = os.path.dirname(__file__) from paddle.utils.cpp_extension import load src_files = ['src/gather_points.cc', 'src/gather_points_cuda.cu'] src_files = [os.path.join(__dirname__, filename) for filename in src_files] gather_points_ops = load(name="gather_points_ops", sources=src_files) if __name__ == '__main__': gather_points_wrapper_ops = gather_points_ops.gather_points_wrapper print('gather_points_wrapper_ops is passed!') gather_points_grad_wrapper_ops = gather_points_ops.gather_points_grad_wrapper print('gather_points_grad_wrapper_ops is passed!')
import os __dirname__ = os.path.dirname(__file__) from paddle.utils.cpp_extension import load src_files = [ 'src/knn.cc', 'src/knn_cuda.cu' ] src_files = [os.path.join(__dirname__, filename) for filename in src_files] knn_ops = load( name="knn_ops", sources=src_files) if __name__ == '__main__': knn_wrapper_ops = knn_ops.knn_wrapper print('knn_wrapper_ops is passed!')
from paddle.utils.cpp_extension import load from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension.extension_utils import run_cmd from utils import paddle_includes, extra_cc_args # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. file = '{}\\multi_out_jit\\multi_out_jit.pyd'.format(get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) # Compile and load custom op Just-In-Time. multi_out_module = load( name='multi_out_jit', sources=['multi_out_test_op.cc'], extra_include_paths=paddle_includes, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cflags verbose=True) class TestMultiOutputDtypes(unittest.TestCase): def setUp(self): self.custom_op = multi_out_module.multi_out self.dtypes = ['float32', 'float64'] self.devices = ['cpu'] def run_static(self, device, dtype): paddle.set_device(device) x_data = np.random.uniform(-1, 1, [4, 8]).astype(dtype) with paddle.static.scope_guard(paddle.static.Scope()):
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.fluid.framework import _test_eager_guard # Because Windows don't use docker, the shared lib already exists in the # cache dir, it will not be compiled again unless the shared lib is removed. file = '{}\\custom_attrs_jit\\custom_attrs_jit.pyd'.format( get_build_directory()) if os.name == 'nt' and os.path.isfile(file): cmd = 'del {}'.format(file) run_cmd(cmd, True) # Compile and load custom op Just-In-Time. custom_attrs = load( name='custom_attrs_jit', sources=['attr_test_op.cc'], extra_include_paths=paddle_includes, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cflags extra_cuda_cflags=extra_nvcc_args, # test for cflags verbose=True) class TestJitCustomAttrs(unittest.TestCase): def setUp(self): paddle.set_device('cpu') # prepare test value self.bool_attr = True self.int_attr = 10 self.float_attr = 3.14 self.int64_attr = 10000000000 self.str_attr = "StrAttr" self.int_vec_attr = [10, 10, 10]
import paddle from paddle.utils.cpp_extension import load # 即时编译 custom_ops = load( name="custom_jit_ops", sources=["relu_cpu.cc"], verbose=True) # 使用API paddle.set_device('cpu') x = paddle.randn([4, 10], dtype='float32') out = custom_ops.custom_relu(x)