예제 #1
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        ops_block = OpsBlock('block')

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for section, trees in find_affine_trees(iet).items():
            dims.append(len(trees[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(trees[0].root))
            symbols -= set(FindSymbols('defines').visit(trees[0].root))
            to_dat |= symbols

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block))

        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            pre_loop, ops_kernel = opsit(trees, n)

            pre_time_loop.extend(pre_loop)
            self._ops_kernels.append(ops_kernel)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.append('stdio.h')

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            ops_exit
        ]

        return List(body=body)
예제 #2
0
    def _specialize_iet(self, iet, **kwargs):
        mapper = {}

        self._includes.append('ops_seq.h')

        ops_init = Call("ops_init", [0, 0, 2])
        ops_timing = Call("ops_timing_output", [FunctionPointer("stdout")])
        ops_exit = Call("ops_exit")

        global_declarations = []
        dims = None
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            callable_kernel, declarations, par_loop_call_block, dims = opsit(
                trees, n)
            global_declarations.extend(declarations)

            self._header_functions.append(callable_kernel)
            mapper[trees[0].root] = par_loop_call_block
            mapper.update({i.root: mapper.get(i.root)
                           for i in trees})  # Drop trees

        self._headers.append('#define OPS_%sD' % dims)
        warning("The OPS backend is still work-in-progress")

        global_declarations.append(Transformer(mapper).visit(iet))

        return List(
            body=[ops_init, *global_declarations, ops_timing, ops_exit])
예제 #3
0
    def _specialize_iet(self, iet, **kwargs):
        """
        Transform the Iteration/Expression tree to offload the computation of
        one or more loop nests onto YASK. This involves calling the YASK compiler
        to generate YASK code. Such YASK code is then called from within the
        transformed Iteration/Expression tree.
        """
        mapper = {}
        self.yk_solns = OrderedDict()
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            dimensions = tuple(
                filter_ordered(i.dim.root for i in flatten(trees)))
            context = contexts.fetch(dimensions, self._dtype)

            # A unique name for the 'real' compiler and kernel solutions
            name = namespace['jit-soln'](Signer._digest(
                configuration, *[i.root for i in trees]))

            # Create a YASK compiler solution for this Operator
            yc_soln = context.make_yc_solution(name)

            try:
                # Generate YASK grids and populate `yc_soln` with equations
                local_grids = yaskit(trees, yc_soln)

                # Build the new IET nodes
                yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n))
                funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                                 ['time'], yk_soln_obj)
                funcall = Offloaded(funcall, self._dtype)
                mapper[trees[0].root] = funcall
                mapper.update({i.root: mapper.get(i.root)
                               for i in trees})  # Drop trees

                # Mark `funcall` as an external function call
                self._func_table[namespace['code-soln-run']] = MetaCall(
                    None, False)

                # JIT-compile the newly-created YASK kernel
                yk_soln = context.make_yk_solution(name, yc_soln, local_grids)
                self.yk_solns[(dimensions, yk_soln_obj)] = yk_soln

                # Print some useful information about the newly constructed solution
                log("Solution '%s' contains %d grid(s) and %d equation(s)." %
                    (yc_soln.get_name(), yc_soln.get_num_grids(),
                     yc_soln.get_num_equations()))
            except NotImplementedError as e:
                log("Unable to offload a candidate tree. Reason: [%s]" %
                    str(e))
        iet = Transformer(mapper).visit(iet)

        if not self.yk_solns:
            log("No offloadable trees found")

        # Some Iteration/Expression trees are not offloaded to YASK and may
        # require further processing to be executed in YASK, due to the differences
        # in storage layout employed by Devito and YASK
        yk_grid_objs = {
            i.name: YaskGridObject(i.name)
            for i in self._input if i.from_YASK
        }
        yk_grid_objs.update({i: YaskGridObject(i) for i in self._local_grids})
        iet = make_grid_accesses(iet, yk_grid_objs)

        # Finally optimize all non-yaskized loops
        iet = super(OperatorYASK, self)._specialize_iet(iet, **kwargs)

        return iet
예제 #4
0
파일: operator.py 프로젝트: BrunoMot/devito
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        affine_trees = find_affine_trees(iet).items()

        # If there is no affine trees, then there is no loop to be optimized using OPS.
        if not affine_trees:
            return iet

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for _, tree in affine_trees:
            dims.append(len(tree[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(tree[0].root))
            symbols -= set(FindSymbols('defines').visit(tree[0].root))
            to_dat |= symbols

        # Create the OPS block for this problem
        ops_block = OpsBlock('block')
        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        after_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(
                list(create_ops_dat(f, name_to_ops_dat, ops_block)))
            # To return the result to Devito, it is necessary to copy the data
            # from the dat object back to the CPU memory.
            after_time_loop.extend(
                create_ops_fetch(f, name_to_ops_dat,
                                 self.time_dimension.extreme_max))

        # Generate ops kernels for each offloadable iteration tree
        mapper = {}
        for n, (_, tree) in enumerate(affine_trees):
            pre_loop, ops_kernel, ops_par_loop_call = opsit(
                tree, n, name_to_ops_dat, ops_block, dims[0])

            pre_time_loop.extend(pre_loop)
            self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \
                MetaCall(ops_kernel, False)
            mapper[tree[0].root] = ops_par_loop_call
            mapper.update({i.root: mapper.get(i.root)
                           for i in tree})  # Drop trees

        iet = Transformer(mapper).visit(iet)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.extend(['stdio.h', 'ops_seq.h'])

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            *after_time_loop, ops_exit
        ]

        return List(body=body)
예제 #5
0
def make_ops_kernels(iet):
    warning("The OPS backend is still work-in-progress")

    affine_trees = find_affine_trees(iet).items()

    # If there is no affine trees, then there is no loop to be optimized using OPS.
    if not affine_trees:
        return iet, {}

    ops_init = Call(namespace['ops_init'], [0, 0, 2])
    ops_partition = Call(namespace['ops_partition'], Literal('""'))
    ops_exit = Call(namespace['ops_exit'])

    # Extract all symbols that need to be converted to ops_dat
    dims = []
    to_dat = set()
    for _, tree in affine_trees:
        dims.append(len(tree[0].dimensions))
        symbols = set(FindSymbols('symbolics').visit(tree[0].root))
        symbols -= set(FindSymbols('defines').visit(tree[0].root))
        to_dat |= symbols

    # Create the OPS block for this problem
    ops_block = OpsBlock('block')
    ops_block_init = Expression(
        ClusterizedEq(
            Eq(ops_block, namespace['ops_decl_block'](dims[0],
                                                      Literal('"block"')))))

    # To ensure deterministic code generation we order the datasets to
    # be generated (since a set is an unordered collection)
    to_dat = filter_sorted(to_dat)

    name_to_ops_dat = {}
    pre_time_loop = []
    after_time_loop = []
    for f in to_dat:
        if f.is_Constant:
            continue

        pre_time_loop.extend(
            list(create_ops_dat(f, name_to_ops_dat, ops_block)))
        # Copy data from device to host
        after_time_loop.extend(
            create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max))

    # Generate ops kernels for each offloadable iteration tree
    mapper = {}
    ffuncs = []
    for n, (_, tree) in enumerate(affine_trees):
        pre_loop, ops_kernel, ops_par_loop_call = opsit(
            tree, n, name_to_ops_dat, ops_block, dims[0])

        pre_time_loop.extend(pre_loop)
        ffuncs.append(ops_kernel)
        mapper[tree[0].root] = ops_par_loop_call
        mapper.update({i.root: mapper.get(i.root) for i in tree})  # Drop trees

    iet = Transformer(mapper).visit(iet)

    assert (d == dims[0] for d in dims), \
        "The OPS backend currently assumes that all kernels \
        have the same number of dimensions"

    iet = iet._rebuild(body=flatten([
        ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body,
        after_time_loop, ops_exit
    ]))

    return iet, {
        'includes': ['stdio.h', 'ops_seq.h'],
        'ffuncs': ffuncs,
        'headers': [namespace['ops_define_dimension'](dims[0])]
    }
예제 #6
0
def make_yask_kernels(iet, **kwargs):
    yk_solns = kwargs.pop('yk_solns')

    mapper = {}
    for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
        dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees)))

        # Retrieve the section dtype
        exprs = FindNodes(Expression).visit(section)
        dtypes = {e.dtype for e in exprs}
        if len(dtypes) != 1:
            log("Unable to offload in presence of mixed-precision arithmetic")
            continue
        dtype = dtypes.pop()

        context = contexts.fetch(dimensions, dtype)

        # A unique name for the 'real' compiler and kernel solutions
        name = namespace['jit-soln'](Signer._digest(configuration,
                                                    *[i.root for i in trees]))

        # Create a YASK compiler solution for this Operator
        yc_soln = context.make_yc_solution(name)

        try:
            # Generate YASK vars and populate `yc_soln` with equations
            local_vars = yaskit(trees, yc_soln)

            # Build the new IET nodes
            yk_soln_obj = YASKSolnObject(namespace['code-soln-name'](n))
            funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                             ['time'], yk_soln_obj)
            funcall = Offloaded(funcall, dtype)
            mapper[trees[0].root] = funcall
            mapper.update({i.root: mapper.get(i.root)
                           for i in trees})  # Drop trees

            # JIT-compile the newly-created YASK kernel
            yk_soln = context.make_yk_solution(name, yc_soln, local_vars)
            yk_solns[(dimensions, yk_soln_obj)] = yk_soln

            # Print some useful information about the newly constructed solution
            log("Solution '%s' contains %d var(s) and %d equation(s)." %
                (yc_soln.get_name(), yc_soln.get_num_vars(),
                 yc_soln.get_num_equations()))
        except NotImplementedError as e:
            log("Unable to offload a candidate tree. Reason: [%s]" % str(e))
    iet = Transformer(mapper).visit(iet)

    if not yk_solns:
        log("No offloadable trees found")

    # Some Iteration/Expression trees are not offloaded to YASK and may
    # require further processing to be executed through YASK, due to the
    # different storage layout
    yk_var_objs = {
        i.name: YASKVarObject(i.name)
        for i in FindSymbols().visit(iet) if i.from_YASK
    }
    yk_var_objs.update({i: YASKVarObject(i) for i in get_local_vars(yk_solns)})
    iet = make_var_accesses(iet, yk_var_objs)

    # The signature needs to be updated
    # TODO: this could be done automagically through the iet pass engine, but
    # currently it only supports *appending* to the parameters list. While here
    # we actually need to change it as some parameters may disappear (x_m, x_M, ...)
    parameters = derive_parameters(iet, True)
    iet = iet._rebuild(parameters=parameters)

    return iet, {}
예제 #7
0
파일: operator.py 프로젝트: opesci/devito
    def _specialize_iet(self, iet, **kwargs):
        """
        Transform the Iteration/Expression tree to offload the computation of
        one or more loop nests onto YASK. This involves calling the YASK compiler
        to generate YASK code. Such YASK code is then called from within the
        transformed Iteration/Expression tree.
        """
        mapper = {}
        self.yk_solns = OrderedDict()
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees)))
            context = contexts.fetch(dimensions, self._dtype)

            # A unique name for the 'real' compiler and kernel solutions
            name = namespace['jit-soln'](Signer._digest(configuration,
                                                        *[i.root for i in trees]))

            # Create a YASK compiler solution for this Operator
            yc_soln = context.make_yc_solution(name)

            try:
                # Generate YASK grids and populate `yc_soln` with equations
                local_grids = yaskit(trees, yc_soln)

                # Build the new IET nodes
                yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n))
                funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                                 ['time'], yk_soln_obj)
                funcall = Offloaded(funcall, self._dtype)
                mapper[trees[0].root] = funcall
                mapper.update({i.root: mapper.get(i.root) for i in trees})  # Drop trees

                # Mark `funcall` as an external function call
                self._func_table[namespace['code-soln-run']] = MetaCall(None, False)

                # JIT-compile the newly-created YASK kernel
                yk_soln = context.make_yk_solution(name, yc_soln, local_grids)
                self.yk_solns[(dimensions, yk_soln_obj)] = yk_soln

                # Print some useful information about the newly constructed solution
                log("Solution '%s' contains %d grid(s) and %d equation(s)." %
                    (yc_soln.get_name(), yc_soln.get_num_grids(),
                     yc_soln.get_num_equations()))
            except NotImplementedError as e:
                log("Unable to offload a candidate tree. Reason: [%s]" % str(e))
        iet = Transformer(mapper).visit(iet)

        if not self.yk_solns:
            log("No offloadable trees found")

        # Some Iteration/Expression trees are not offloaded to YASK and may
        # require further processing to be executed in YASK, due to the differences
        # in storage layout employed by Devito and YASK
        yk_grid_objs = {i.name: YaskGridObject(i.name) for i in self._input
                        if i.from_YASK}
        yk_grid_objs.update({i: YaskGridObject(i) for i in self._local_grids})
        iet = make_grid_accesses(iet, yk_grid_objs)

        # Finally optimize all non-yaskized loops
        iet = super(OperatorYASK, self)._specialize_iet(iet, **kwargs)

        return iet
예제 #8
0
    def _specialize_iet(cls, iet, **kwargs):
        """
        Transform the Iteration/Expression tree to offload the computation of
        one or more loop nests onto YASK. This involves calling the YASK compiler
        to generate YASK code. Such YASK code is then called from within the
        transformed Iteration/Expression tree.
        """
        mapper = {}
        yk_solns = kwargs.pop('yk_solns')
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            dimensions = tuple(
                filter_ordered(i.dim.root for i in flatten(trees)))

            # Retrieve the section dtype
            exprs = FindNodes(Expression).visit(section)
            dtypes = {e.dtype for e in exprs}
            if len(dtypes) != 1:
                log("Unable to offload in presence of mixed-precision arithmetic"
                    )
                continue
            dtype = dtypes.pop()

            context = contexts.fetch(dimensions, dtype)

            # A unique name for the 'real' compiler and kernel solutions
            name = namespace['jit-soln'](Signer._digest(
                configuration, *[i.root for i in trees]))

            # Create a YASK compiler solution for this Operator
            yc_soln = context.make_yc_solution(name)

            try:
                # Generate YASK vars and populate `yc_soln` with equations
                local_vars = yaskit(trees, yc_soln)

                # Build the new IET nodes
                yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n))
                funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                                 ['time'], yk_soln_obj)
                funcall = Offloaded(funcall, dtype)
                mapper[trees[0].root] = funcall
                mapper.update({i.root: mapper.get(i.root)
                               for i in trees})  # Drop trees

                # JIT-compile the newly-created YASK kernel
                yk_soln = context.make_yk_solution(name, yc_soln, local_vars)
                yk_solns[(dimensions, yk_soln_obj)] = yk_soln

                # Print some useful information about the newly constructed solution
                log("Solution '%s' contains %d var(s) and %d equation(s)." %
                    (yc_soln.get_name(), yc_soln.get_num_vars(),
                     yc_soln.get_num_equations()))
            except NotImplementedError as e:
                log("Unable to offload a candidate tree. Reason: [%s]" %
                    str(e))
        iet = Transformer(mapper).visit(iet)

        if not yk_solns:
            log("No offloadable trees found")

        # Some Iteration/Expression trees are not offloaded to YASK and may
        # require further processing to be executed through YASK, due to the
        # different storage layout
        yk_var_objs = {
            i.name: YaskVarObject(i.name)
            for i in FindSymbols().visit(iet) if i.from_YASK
        }
        yk_var_objs.update(
            {i: YaskVarObject(i)
             for i in cls._get_local_vars(yk_solns)})
        iet = make_var_accesses(iet, yk_var_objs)

        # The signature needs to be updated
        parameters = derive_parameters(iet, True)
        iet = iet._rebuild(parameters=parameters)

        return super(OperatorYASK, cls)._specialize_iet(iet, **kwargs)