def test_latex_mapper(): from pymbolic import parse from pymbolic.mapper.stringifier import LaTeXMapper, StringifyMapper tm = LaTeXMapper() sm = StringifyMapper() equations = [] def add(expr): # Add an equation to the list of tests. equations.append(r"\[%s\] %% from: %s" % (tm(expr), sm(expr))) add(parse("a * b + c")) add(parse("f(a,b,c)")) add(parse("a ** b ** c")) add(parse("(a | b) ^ ~c")) add(parse("a << b")) add(parse("a >> b")) add(parse("a[i,j,k]")) add(parse("a[1:3]")) add(parse("a // b")) add(parse("not (a or b) and c")) add(parse("(a % b) % c")) add(parse("(a >= b) or (b <= c)")) add(prim.Min((1,)) + prim.Max((1, 2))) add(prim.Substitution(prim.Variable("x") ** 2, ("x",), (2,))) add(prim.Derivative(parse("x**2"), ("x",))) # Run LaTeX and ensure the file compiles. import os import tempfile import subprocess import shutil latex_dir = tempfile.mkdtemp("pymbolic") try: tex_file_path = os.path.join(latex_dir, "input.tex") with open(tex_file_path, "w") as tex_file: contents = LATEX_TEMPLATE % "\n".join(equations) tex_file.write(contents) try: subprocess.check_output( ["latex", "-interaction=nonstopmode", "-output-directory=%s" % latex_dir, tex_file_path], universal_newlines=True) except OSError: # FIXME: Should be FileNotFoundError on Py3 pytest.skip("latex command not found") except subprocess.CalledProcessError as err: assert False, str(err.output) finally: shutil.rmtree(latex_dir)
def get_temporary_decls(self, codegen_state, schedule_index): from loopy.kernel.data import temp_var_scope kernel = codegen_state.kernel base_storage_decls = [] temp_decls = [] # {{{ declare temporaries base_storage_sizes = {} base_storage_to_scope = {} base_storage_to_align_bytes = {} from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line for tv in sorted(six.itervalues(kernel.temporary_variables), key=lambda tv: tv.name): decl_info = tv.decl_info(self.target, index_dtype=kernel.index_dtype) if not tv.base_storage: for idi in decl_info: # global temp vars are mapped to arguments or global declarations if tv.scope != temp_var_scope.GLOBAL: decl = self.wrap_temporary_decl( self.get_temporary_decl(codegen_state, schedule_index, tv, idi), tv.scope) if tv.initializer is not None: assert tv.read_only decl = Initializer( decl, generate_array_literal(codegen_state, tv, tv.initializer)) temp_decls.append(decl) else: assert tv.initializer is None offset = 0 base_storage_sizes.setdefault(tv.base_storage, []).append(tv.nbytes) base_storage_to_scope.setdefault(tv.base_storage, []).append(tv.scope) align_size = tv.dtype.itemsize from loopy.kernel.array import VectorArrayDimTag for dim_tag, axis_len in zip(tv.dim_tags, tv.shape): if isinstance(dim_tag, VectorArrayDimTag): align_size *= axis_len base_storage_to_align_bytes.setdefault(tv.base_storage, []).append(align_size) for idi in decl_info: cast_decl = POD(self, idi.dtype, "") temp_var_decl = POD(self, idi.dtype, idi.name) cast_decl = self.wrap_temporary_decl(cast_decl, tv.scope) temp_var_decl = self.wrap_temporary_decl( temp_var_decl, tv.scope) # The 'restrict' part of this is a complete lie--of course # all these temporaries are aliased. But we're promising to # not use them to shovel data from one representation to the # other. That counts, right? cast_decl = _ConstRestrictPointer(cast_decl) temp_var_decl = _ConstRestrictPointer(temp_var_decl) cast_tp, cast_d = cast_decl.get_decl_pair() temp_var_decl = Initializer( temp_var_decl, "(%s %s) (%s + %s)" % (" ".join(cast_tp), cast_d, tv.base_storage, offset)) temp_decls.append(temp_var_decl) from pytools import product offset += (idi.dtype.itemsize * product(si for si in idi.shape)) ecm = self.get_expression_to_code_mapper(codegen_state) for bs_name, bs_sizes in sorted(six.iteritems(base_storage_sizes)): bs_var_decl = Value("char", bs_name) from pytools import single_valued bs_var_decl = self.wrap_temporary_decl( bs_var_decl, single_valued(base_storage_to_scope[bs_name])) # FIXME: Could try to use isl knowledge to simplify max. if all(isinstance(bs, int) for bs in bs_sizes): bs_size_max = max(bs_sizes) else: bs_size_max = p.Max(tuple(bs_sizes)) bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max)) alignment = max(base_storage_to_align_bytes[bs_name]) bs_var_decl = AlignedAttribute(alignment, bs_var_decl) base_storage_decls.append(bs_var_decl) # }}} result = base_storage_decls + temp_decls if result: result.append(Line()) return result
def get_temporary_decls(self, codegen_state, schedule_index): from genpy import Assign, Comment, Line from collections import defaultdict from numbers import Number import pymbolic.primitives as prim def alloc_nbytes(tv): from functools import reduce from operator import mul return tv.dtype.numpy_dtype.itemsize * reduce(mul, tv.shape, 1) from pymbolic.mapper.stringifier import PREC_NONE ecm = self.get_expression_to_code_mapper(codegen_state) global_temporaries = self._get_global_temporaries(codegen_state) if not global_temporaries: return [] # {{{ allocate space for the base_storage base_storage_sizes = defaultdict(set) for tv in global_temporaries: if tv.base_storage: base_storage_sizes[tv.base_storage].add(tv.nbytes) # }}} allocated_var_names = [] code_lines = [] code_lines.append(Line()) code_lines.append(Comment("{{{ allocate global temporaries")) code_lines.append(Line()) for name, sizes in base_storage_sizes.items(): if all(isinstance(s, Number) for s in sizes): size = max(sizes) else: size = prim.Max(tuple(sizes)) allocated_var_names.append(name) code_lines.append( Assign(name, f"allocator({ecm(size, PREC_NONE, 'i')})")) for tv in global_temporaries: if tv.base_storage: assert tv.base_storage in base_storage_sizes code_lines.append(Assign(tv.name, tv.base_storage)) else: nbytes_str = ecm(tv.nbytes, PREC_NONE, "i") allocated_var_names.append(tv.name) code_lines.append(Assign(tv.name, f"allocator({nbytes_str})")) code_lines.append( Assign( "_global_temporaries", "[{tvs}]".format(tvs=", ".join( tv for tv in allocated_var_names)))) code_lines.append(Line()) code_lines.append(Comment("}}}")) code_lines.append(Line()) return code_lines
def get_temporary_decls(self, codegen_state, schedule_index): from loopy.kernel.data import AddressSpace kernel = codegen_state.kernel base_storage_decls = [] temp_decls = [] # {{{ declare temporaries base_storage_sizes = {} base_storage_to_scope = {} base_storage_to_align_bytes = {} from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line # Getting the temporary variables that are needed for the current # sub-kernel. from loopy.schedule.tools import (temporaries_read_in_subkernel, temporaries_written_in_subkernel) subkernel = kernel.linearization[schedule_index].kernel_name sub_knl_temps = (temporaries_read_in_subkernel(kernel, subkernel) | temporaries_written_in_subkernel(kernel, subkernel)) for tv in sorted(kernel.temporary_variables.values(), key=lambda tv: tv.name): decl_info = tv.decl_info(self.target, index_dtype=kernel.index_dtype) if not tv.base_storage: for idi in decl_info: # global temp vars are mapped to arguments or global declarations if tv.address_space != AddressSpace.GLOBAL and ( tv.name in sub_knl_temps): decl = self.wrap_temporary_decl( self.get_temporary_decl(codegen_state, schedule_index, tv, idi), tv.address_space) if tv.initializer is not None: assert tv.read_only decl = Initializer( decl, generate_array_literal(codegen_state, tv, tv.initializer)) temp_decls.append(decl) else: assert tv.initializer is None if (tv.address_space == AddressSpace.GLOBAL and codegen_state.is_generating_device_code): # global temps trigger no codegen in the device code continue offset = 0 base_storage_sizes.setdefault(tv.base_storage, []).append(tv.nbytes) base_storage_to_scope.setdefault(tv.base_storage, []).append(tv.address_space) align_size = tv.dtype.itemsize from loopy.kernel.array import VectorArrayDimTag for dim_tag, axis_len in zip(tv.dim_tags, tv.shape): if isinstance(dim_tag, VectorArrayDimTag): align_size *= axis_len base_storage_to_align_bytes.setdefault(tv.base_storage, []).append(align_size) for idi in decl_info: cast_decl = POD(self, idi.dtype, "") temp_var_decl = POD(self, idi.dtype, idi.name) cast_decl = self.wrap_temporary_decl( cast_decl, tv.address_space) temp_var_decl = self.wrap_temporary_decl( temp_var_decl, tv.address_space) if tv._base_storage_access_may_be_aliasing: ptrtype = _ConstPointer else: # The 'restrict' part of this is a complete lie--of course # all these temporaries are aliased. But we're promising to # not use them to shovel data from one representation to the # other. That counts, right? ptrtype = _ConstRestrictPointer cast_decl = ptrtype(cast_decl) temp_var_decl = ptrtype(temp_var_decl) cast_tp, cast_d = cast_decl.get_decl_pair() temp_var_decl = Initializer( temp_var_decl, "({} {}) ({} + {})".format(" ".join(cast_tp), cast_d, tv.base_storage, offset)) temp_decls.append(temp_var_decl) from pytools import product offset += (idi.dtype.itemsize * product(si for si in idi.shape)) ecm = self.get_expression_to_code_mapper(codegen_state) for bs_name, bs_sizes in sorted(base_storage_sizes.items()): bs_var_decl = Value("char", bs_name) from pytools import single_valued bs_var_decl = self.wrap_temporary_decl( bs_var_decl, single_valued(base_storage_to_scope[bs_name])) # FIXME: Could try to use isl knowledge to simplify max. if all(isinstance(bs, int) for bs in bs_sizes): bs_size_max = max(bs_sizes) else: bs_size_max = p.Max(tuple(bs_sizes)) bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max)) alignment = max(base_storage_to_align_bytes[bs_name]) bs_var_decl = AlignedAttribute(alignment, bs_var_decl) base_storage_decls.append(bs_var_decl) # }}} result = base_storage_decls + temp_decls if result: result.append(Line()) return result