Beispiel #1
0
def test_sum_use_float():

    sum = sympy.Sum(k, (k, 1, 100))
    expanded_sum = sum.doit()

    print(sum)
    print(expanded_sum)

    x = pystencils.fields('x: float32[1d]')

    assignments = pystencils.AssignmentCollection({x.center(): sum})

    ast = pystencils.create_kernel(assignments,
                                   data_type=create_type('float32'))
    code = str(pystencils.show_code(ast))
    kernel = ast.compile()

    print(code)
    print(pystencils.show_code(ast))
    assert 'float sum' in code

    array = np.zeros((10, ), np.float32)

    kernel(x=array)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_dynamic_matrix_location_dependent():
    try:
        from pystencils.data_types import TypedMatrixSymbol
    except ImportError:
        import pytest
        pytest.skip()

    x, y = pystencils.fields('x, y:  float32[3d]')

    A = TypedMatrixSymbol('A', 3, 1, create_type('double'),
                          CustomCppType('Vector3<double>'))

    my_fun_call = DynamicFunction(
        TypedSymbol('my_fun', 'std::function<Vector3<double>(int, int, int)>'),
        A.dtype, *pystencils.x_vector(3))

    assignments = pystencils.AssignmentCollection({
        A: my_fun_call,
        y.center: A[0] + A[1] + A[2]
    })

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())

    my_fun_call = DynamicFunction(
        TypedSymbol('my_fun', TemplateType('Functor_T')), A.dtype,
        *pystencils.x_vector(3))

    assignments = pystencils.AssignmentCollection({
        A: my_fun_call,
        y.center: A[0] + A[1] + A[2]
    })

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
Beispiel #3
0
def generate_shared_object(output_folder=None,
                           source_files=None,
                           show_code=False,
                           framework_module_class=TorchModule,
                           generate_code_only=False,
                           update_repo_files=False):

    object_cache = get_cache_config()['object_cache']

    module_name = 'pyronn_torch_cpp'

    if not output_folder:
        output_folder = join(dirname(__file__), '..', '..', 'generated_files')

    if not source_files:
        source_files = glob(join(dirname(__file__), 'PYRO-NN-Layers', '*.cu.cc'))

    cuda_sources = []
    makedirs(join(object_cache, module_name), exist_ok=True)
    rmtree(join(object_cache, module_name, 'helper_headers'), ignore_errors=True)
    copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'),
             join(object_cache, module_name, 'helper_headers'))
    if update_repo_files:
        rmtree(join(output_folder, 'helper_headers'), ignore_errors=True)
        copytree(join(dirname(__file__), 'PYRO-NN-Layers', 'helper_headers'),
                 join(output_folder, 'helper_headers'))

    for s in source_files:
        dst = join(object_cache, module_name, basename(s).replace('.cu.cc', '.cu'))
        copyfile(s, dst)  # Torch only accepts *.cu as CUDA
        cuda_sources.append(dst)
        if update_repo_files:
            dst = join(output_folder, basename(s).replace('.cu.cc', '.cu'))
            copyfile(s, dst)  # Torch only accepts *.cu as CUDA


    module = framework_module_class(module_name, FUNCTIONS.values())

    if show_code:
        pystencils.show_code(module, custom_backend=FrameworkIntegrationPrinter())

    if generate_code_only:
        return module

    extension = module.compile(extra_source_files=cuda_sources,
                               extra_cuda_flags=['-arch=sm_35'],
                               with_cuda=True,
                               compile_module_name=module_name)

    shared_object_file = module.compiled_file
    copyfile(shared_object_file, join(output_folder, module_name + '.so'))

    if update_repo_files:
        with open(join(output_folder, 'pyronn_torch.cpp'), 'w') as f:
            f.write(pystencils.get_code_str(module, custom_backend=FrameworkIntegrationPrinter()))

    return extension
Beispiel #4
0
def test_genereric_projection():
    volume = pystencils.fields('volume: float32[3d]')
    projections = pystencils.fields('projections: float32[2D]')

    projection_matrix = pystencils_reco.matrix_symbols(
        'T', pystencils.data_types.create_type('float32'), 3, 4)

    assignments = forward_projection(volume, projections, projection_matrix)
    kernel = assignments.compile('gpu')
    pystencils.show_code(kernel)
def test_cuda_unknown():
    x, y = pystencils.fields('x,y: float32 [2d]')

    assignments = pystencils.AssignmentCollection({
        get_dummy_symbol():
        sympy.Function('wtf')(address_of(y.center()), 2),
    })

    ast = pystencils.create_kernel(assignments, target=Target.GPU)
    pystencils.show_code(ast)
Beispiel #6
0
def test_custom_backends_cpu():
    z, x, y = pystencils.fields("z, y, x: [2d]")

    normal_assignments = pystencils.AssignmentCollection([
        pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))
    ], [])

    ast = pystencils.create_kernel(normal_assignments, target=Target.CPU)
    pystencils.show_code(ast, ScreamingBackend())
    with pytest.raises(CalledProcessError):
        pystencils.cpu.cpujit.make_python_function(
            ast, custom_backend=ScreamingBackend())
def test_cuda_but_not_c():
    x, y = pystencils.fields('x,y: float32 [2d]')

    assignments = pystencils.AssignmentCollection({
        get_dummy_symbol():
        sympy.Function('atomicAdd')(address_of(y.center()), 2),
        y.center():
        sympy.Function('rsqrtf')(x[0, 0])
    })

    ast = pystencils.create_kernel(assignments, target=Target.CPU)
    pystencils.show_code(ast)
def test_destructuring_field_class():
    z, x, y = pystencils.fields("z, y, x: [2d]")

    normal_assignments = pystencils.AssignmentCollection([
        pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))
    ], [])

    ast = pystencils.create_kernel(normal_assignments, target='gpu')
    print(pystencils.show_code(ast))

    ast.body = DestructuringBindingsForFieldClass(ast.body)
    print(pystencils.show_code(ast))
    ast.compile()
Beispiel #9
0
def test_opencl_jit_with_parameter():
    z, y, x = pystencils.fields("z, y, x: [2d]")

    a = sp.Symbol('a')
    assignments = pystencils.AssignmentCollection(
        {z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0]) + a})

    print(assignments)

    ast = pystencils.create_kernel(assignments, target='gpu')

    print(ast)

    code = pystencils.show_code(ast, custom_backend=CudaBackend())
    print(code)
    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
    print(opencl_code)

    cuda_kernel = ast.compile()
    assert cuda_kernel is not None

    import pycuda.gpuarray as gpuarray

    x_cpu = np.random.rand(20, 30)
    y_cpu = np.random.rand(20, 30)
    z_cpu = np.random.rand(20, 30)

    x = gpuarray.to_gpu(x_cpu)
    y = gpuarray.to_gpu(y_cpu)
    z = gpuarray.to_gpu(z_cpu)
    cuda_kernel(x=x, y=y, z=z, a=5.)

    result_cuda = z.get()

    import pyopencl.array as array
    ctx = cl.create_some_context(0)
    queue = cl.CommandQueue(ctx)

    x = array.to_device(queue, x_cpu)
    y = array.to_device(queue, y_cpu)
    z = array.to_device(queue, z_cpu)

    opencl_kernel = make_python_function(ast, queue, ctx)
    assert opencl_kernel is not None
    opencl_kernel(x=x, y=y, z=z, a=5.)

    result_opencl = z.get(queue)

    assert np.allclose(result_cuda, result_opencl)
Beispiel #10
0
def test_address_of_with_cse():
    x, y = pystencils.fields('x,y: int64[2d]')
    s = pystencils.TypedSymbol('s', PointerType(create_type('int64')))

    assignments = pystencils.AssignmentCollection({
        y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + s,
        x[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + 1
    }, {})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast)
    assignments_cse = sympy_cse(assignments)

    ast = pystencils.create_kernel(assignments_cse)
    pystencils.show_code(ast)
Beispiel #11
0
def test_address_of_with_cse():
    x, y = pystencils.fields('x,y: int64[2d]')

    assignments = pystencils.AssignmentCollection(
        {
            y[0, 0]: cast_func(address_of(x[0, 0]), 'int64'),
            x[0, 0]: cast_func(address_of(x[0, 0]), 'int64') + 1
        }, {})

    ast = pystencils.create_kernel(assignments)
    code = pystencils.show_code(ast)
    assignments_cse = sympy_cse(assignments)

    ast = pystencils.create_kernel(assignments_cse)
    code = pystencils.show_code(ast)
    print(code)
Beispiel #12
0
def test_custom_backends_gpu():
    pytest.importorskip('pycuda')
    import pycuda.driver
    import pystencils.gpucuda.cudajit

    z, x, y = pystencils.fields("z, y, x: [2d]")

    normal_assignments = pystencils.AssignmentCollection([
        pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))
    ], [])

    ast = pystencils.create_kernel(normal_assignments, target=Target.GPU)
    pystencils.show_code(ast, ScreamingGpuBackend())
    with pytest.raises(pycuda.driver.CompileError):
        pystencils.gpucuda.cudajit.make_python_function(
            ast, custom_backend=ScreamingGpuBackend())
Beispiel #13
0
def test_prod_var_limit():

    k = pystencils.TypedSymbol('k', create_type('int64'))
    limit = pystencils.TypedSymbol('limit', create_type('int64'))

    sum = sympy.Sum(k, (k, 1, limit))
    expanded_sum = sum.replace(limit, 100).doit()

    print(sum)
    print(expanded_sum)

    x = pystencils.fields('x: int64[1d]')

    assignments = pystencils.AssignmentCollection({x.center(): sum})

    ast = pystencils.create_kernel(assignments)
    code = str(pystencils.show_code(ast))
    kernel = ast.compile()

    print(code)

    array = np.zeros((10, ), np.int64)

    kernel(x=array, limit=100)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_typed_matrix():
    try:
        from pystencils.data_types import TypedMatrixSymbol
    except ImportError:
        import pytest
        pytest.skip()

    x, y = pystencils.fields('x, y:  float32[3d]')

    A = TypedMatrixSymbol('A', 3, 1, create_type('double'),
                          CustomCppType('Vector3<real_t>'))

    assignments = pystencils.AssignmentCollection(
        {y.center: A[0] + A[1] + A[2]})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
Beispiel #15
0
def test_source_code_comment():

    a, b = pystencils.fields('a,b: float[2D]')

    assignments = pystencils.AssignmentCollection(
        {a.center(): b[0, 2] + b[0, 0]}, {})

    config = pystencils.CreateKernelConfig(target=pystencils.Target.CPU)
    ast = pystencils.create_kernel(assignments, config=config)

    ast.body.append(pystencils.astnodes.SourceCodeComment("Hallo"))
    ast.body.append(pystencils.astnodes.EmptyLine())
    ast.body.append(pystencils.astnodes.SourceCodeComment("World!"))
    print(ast)
    compiled = ast.compile()
    assert compiled is not None

    pystencils.show_code(ast)
def test_cuda_known_functions():
    printer = CudaSympyPrinter()
    print(printer.known_functions)

    x, y = pystencils.fields('x,y: float32 [2d]')

    assignments = pystencils.AssignmentCollection({
        get_dummy_symbol():
        sympy.Function('atomicAdd')(address_of(y.center()), 2),
        y.center():
        sympy.Function('rsqrtf')(x[0, 0])
    })

    ast = pystencils.create_kernel(assignments, target=Target.GPU)
    pytest.importorskip('pycuda')
    pystencils.show_code(ast)
    kernel = ast.compile()
    assert (kernel is not None)
def test_custom_backends():
    z, x, y = pystencils.fields("z, y, x: [2d]")

    normal_assignments = pystencils.AssignmentCollection([
        pystencils.Assignment(z[0, 0], x[0, 0] * sympy.log(x[0, 0] * y[0, 0]))
    ], [])

    ast = pystencils.create_kernel(normal_assignments, target='cpu')
    print(pystencils.show_code(ast, ScreamingBackend()))
    with pytest.raises(CalledProcessError):
        pystencils.cpu.cpujit.make_python_function(
            ast, custom_backend=ScreamingBackend())

    ast = pystencils.create_kernel(normal_assignments, target='gpu')
    print(pystencils.show_code(ast, ScreamingGpuBackend()))
    with pytest.raises(pycuda.driver.CompileError):
        pystencils.gpucuda.cudajit.make_python_function(
            ast, custom_backend=ScreamingGpuBackend())
Beispiel #18
0
def test_boundary_check(with_cse):

    f, g = ps.fields("f, g : [2D]")
    stencil = ps.Assignment(g[0, 0],
                            (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)

    f_arr = np.random.rand(1000, 1000)
    g_arr = np.zeros_like(f_arr)
    # kernel(f=f_arr, g=g_arr)

    assignments = add_fixed_constant_boundary_handling(
        ps.AssignmentCollection([stencil]), with_cse)

    print(assignments)
    kernel_checked = ps.create_kernel(assignments, ghost_layers=0).compile()
    ps.show_code(kernel_checked)

    # No SEGFAULT, please!!
    kernel_checked(f=f_arr, g=g_arr)
Beispiel #19
0
def test_address_of():
    x, y = pystencils.fields('x,y: int64[2d]')
    s = pystencils.TypedSymbol('s', PointerType('int64'))

    assignments = pystencils.AssignmentCollection(
        {
            s: address_of(x[0, 0]),
            y[0, 0]: cast_func(s, 'int64')
        }, {})

    ast = pystencils.create_kernel(assignments)
    code = pystencils.show_code(ast)
    print(code)

    assignments = pystencils.AssignmentCollection(
        {y[0, 0]: cast_func(address_of(x[0, 0]), 'int64')}, {})

    ast = pystencils.create_kernel(assignments)
    code = pystencils.show_code(ast)
    print(code)
Beispiel #20
0
def test_print_opencl():
    z, y, x = pystencils.fields("z, y, x: [2d]")

    assignments = pystencils.AssignmentCollection(
        {z[0, 0]: x[0, 0] * sp.log(x[0, 0] * y[0, 0])})

    print(assignments)

    ast = pystencils.create_kernel(assignments, target='gpu')

    print(ast)

    code = pystencils.show_code(ast, custom_backend=CudaBackend())
    print(code)

    opencl_code = pystencils.show_code(ast, custom_backend=OpenClBackend())
    print(opencl_code)

    assert "__global double * RESTRICT const _data_x" in str(opencl_code)
    assert "__global double * RESTRICT" in str(opencl_code)
    assert "get_local_id(0)" in str(opencl_code)
Beispiel #21
0
def test_cuda_unknown():
    x, y = pystencils.fields('x,y: float32 [2d]')

    assignments = pystencils.AssignmentCollection({
        get_dummy_symbol():
        sympy.Function('wtf')(address_of(y.center()), 2),
    })

    ast = pystencils.create_kernel(assignments, 'gpu')
    code = str(pystencils.show_code(ast))
    print(code)
    assert "Not supported in CUDA" in code
Beispiel #22
0
def test_address_of():
    x, y = pystencils.fields('x,y: int64[2d]')
    s = pystencils.TypedSymbol('s', PointerType(create_type('int64')))

    assert address_of(x[0, 0]).canonical() == x[0, 0]
    assert address_of(x[0, 0]).dtype == PointerType(x[0, 0].dtype, restrict=True)
    assert address_of(sp.Symbol("a")).dtype == PointerType('void', restrict=True)

    assignments = pystencils.AssignmentCollection({
        s: address_of(x[0, 0]),
        y[0, 0]: cast_func(s, create_type('int64'))
    }, {})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast)

    assignments = pystencils.AssignmentCollection({
        y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64'))
    }, {})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast)
Beispiel #23
0
def test_cuda_but_not_c():
    x, y = pystencils.fields('x,y: float32 [2d]')

    assignments = pystencils.AssignmentCollection({
        get_dummy_symbol():
        sympy.Function('atomicAdd')(address_of(y.center()), 2),
        y.center():
        sympy.Function('rsqrtf')(x[0, 0])
    })

    ast = pystencils.create_kernel(assignments, 'cpu')
    code = str(pystencils.show_code(ast))
    assert "Not supported" in code
def test_dynamic_matrix():
    x, y = pystencils.fields('x, y:  float32[3d]')
    try:
        from pystencils.data_types import TypedMatrixSymbol
    except ImportError:
        import pytest
        pytest.skip()

    a = sp.symbols('a')

    A = TypedMatrixSymbol('A', 3, 1, create_type('double'), 'Vector3<double>')

    my_fun_call = DynamicFunction(
        TypedSymbol('my_fun', 'std::function<Vector3<double>(double)>'),
        A.dtype, a)

    assignments = pystencils.AssignmentCollection({
        A: my_fun_call,
        y.center: A[0] + A[1] + A[2]
    })

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def test_dynamic_function():
    x, y = pystencils.fields('x, y:  float32[3d]')

    a = sp.symbols('a')

    my_fun_call = DynamicFunction(
        TypedSymbol('my_fun', 'std::function<double(double)>'),
        create_type('double'), a)

    assignments = pystencils.AssignmentCollection(
        {y.center: x.center + my_fun_call})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())

    template_fun_call = DynamicFunction(
        TypedSymbol('my_fun', TemplateType('Functor_T')),
        create_type('double'), a, x.center)

    assignments = pystencils.AssignmentCollection(
        {y.center: x.center + template_fun_call})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
    def to_dot(self, graph_style=None, with_code=False):
        import graphviz
        graph_style = {} if graph_style is None else graph_style

        fields = {**self.reads, **self.writes}
        dot = graphviz.Digraph(str(id(self)))

        for field, node in fields.items():
            label = f'{field.name} #{field.counter}'
            dot.node(label, style='filled', fillcolor='#a056db', label=label)

        for node in self.computation_nodes:
            if isinstance(node, ComputationGraph):
                subgraph = node.to_dot(with_code=with_code)
                dot.subgraph(subgraph)
                continue
            elif isinstance(node.kernel, Swap):
                name = f'Swap {id(node)}'
                dot.node(str(id(node)),
                         style='filled',
                         fillcolor='#ff5600',
                         label=name)
            elif isinstance(node.kernel, KernelFunction):
                if with_code:
                    name = str(pystencils.show_code(node.kernel))
                else:
                    name = node.kernel.function_name

                dot.node(str(id(node)),
                         style='filled',
                         fillcolor='#0056db',
                         label=name)
            else:
                raise 'foo'

            for input in node.input_nodes:
                field = input.field
                label = f'{field.name} #{field.counter}'
                dot.edge(label, str(id(node)))
            for output in node.output_nodes:
                field = output.field
                label = f'{field.name} #{field.counter}'
                dot.edge(str(id(node)), label)

        return dot
def test_blocking_staggered():
    f, stag = ps.fields("f, stag(3): double[3D]")
    terms = [
        f[0, 0, 0] - f[-1, 0, 0],
        f[0, 0, 0] - f[0, -1, 0],
        f[0, 0, 0] - f[0, 0, -1],
    ]
    kernel = ps.create_staggered_kernel(stag, terms,
                                        cpu_blocking=(3, 16, 8)).compile()
    reference_kernel = ps.create_staggered_kernel(stag, terms).compile()
    print(ps.show_code(kernel.ast))

    f_arr = np.random.rand(80, 33, 19)
    stag_arr = np.zeros((80, 33, 19, 3))
    stag_ref = np.zeros((80, 33, 19, 3))
    kernel(f=f_arr, stag=stag_arr)
    reference_kernel(f=f_arr, stag=stag_ref)
    np.testing.assert_almost_equal(stag_arr, stag_ref)
Beispiel #28
0
def test_conv(input_channels, output_channels):
    src_arr = np.random.rand(21, 31, input_channels)
    dst_arr = np.zeros([21, 31, output_channels])
    stencil_arr = np.ones([3, 3, input_channels, output_channels]) / 9

    dst, src = ps.fields(
        f'dst({output_channels}), src({input_channels}): [2d]')

    stencil = ps.fields(f'stencil({input_channels}, {output_channels}): [3,3]')
    stencil.field_type = ps.field.FieldType.CUSTOM

    assignments = channel_convolution(src, stencil, dst)
    ast = ps.create_kernel(assignments)

    print(ps.show_code(ast))

    kernel = ast.compile()

    kernel(dst=dst_arr, src=src_arr, stencil=stencil_arr)
Beispiel #29
0
def test_conv_advanced(input_channels, output_channels):
    filter_shape = (5, 4)

    src_arr = np.random.rand(21, 31, input_channels)
    dst_arr = np.zeros([21, 31, output_channels])
    stencil_arr = np.ones([*filter_shape, input_channels, output_channels
                           ]) / (5 * 4)

    dst, src = ps.fields(
        f'dst({output_channels}), src({input_channels}): [2d]')

    stencil = ps.fields(
        f'stencil({input_channels}, {output_channels}): [{filter_shape[0]}, {filter_shape[1]}]'
    )
    stencil.field_type = ps.field.FieldType.CUSTOM

    assignments = channel_convolution(src, stencil, dst)
    ast = ps.create_kernel(assignments)

    print(ps.show_code(ast))
    kernel = ast.compile()
    kernel(dst=dst_arr, src=src_arr, stencil=stencil_arr)
Beispiel #30
0
def test_jacobi_fixed_field_size():
    size = (30, 20)

    src_field_c = np.random.rand(*size)
    src_field_py = np.copy(src_field_c)
    dst_field_c = np.zeros(size)
    dst_field_py = np.zeros(size)

    f = Field.create_from_numpy_array("f", src_field_c)
    d = Field.create_from_numpy_array("d", dst_field_c)

    jacobi = SympyAssignment(d[0, 0],
                             (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4)
    body = Block([jacobi])
    loop_node, gl_info = make_loop_over_domain(body)
    ast_node = KernelFunction(loop_node,
                              'cpu',
                              'c',
                              make_python_function,
                              ghost_layers=gl_info)
    resolve_field_accesses(ast_node)
    move_constants_before_loop(ast_node)

    for x in range(1, size[0] - 1):
        for y in range(1, size[1] - 1):
            dst_field_py[
                x,
                y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] +
                             src_field_py[x, y - 1] + src_field_py[x, y + 1])

    kernel = ast_node.compile()
    kernel(f=src_field_c, d=dst_field_c)
    error = np.sum(np.abs(dst_field_py - dst_field_c))
    np.testing.assert_allclose(error, 0.0, atol=1e-13)

    code_display = show_code(ast_node)
    assert 'for' in str(code_display)
    assert 'for' in code_display._repr_html_()