コード例 #1
0
ファイル: test_firedrake_972.py プロジェクト: ellipsis14/tsfc
def count_flops(n):
    mesh = Mesh(VectorElement('CG', interval, 1))
    tfs = FunctionSpace(mesh, TensorElement('DG', interval, 1, shape=(n, n)))
    vfs = FunctionSpace(mesh, VectorElement('DG', interval, 1, dim=n))

    ensemble_f = Coefficient(vfs)
    ensemble2_f = Coefficient(vfs)
    phi = TestFunction(tfs)

    i, j = indices(2)
    nc = 42  # magic number
    L = ((IndexSum(
        IndexSum(
            Product(nc * phi[i, j], Product(ensemble_f[i], ensemble_f[i])),
            MultiIndex((i, ))), MultiIndex((j, ))) * dx) +
         (IndexSum(
             IndexSum(
                 Product(nc * phi[i, j], Product(
                     ensemble2_f[j], ensemble2_f[j])), MultiIndex(
                         (i, ))), MultiIndex((j, ))) * dx) -
         (IndexSum(
             IndexSum(
                 2 * nc *
                 Product(phi[i, j], Product(ensemble_f[i], ensemble2_f[j])),
                 MultiIndex((i, ))), MultiIndex((j, ))) * dx))

    kernel, = compile_form(L, parameters=dict(mode='spectral'))
    return EstimateFlops().visit(kernel.ast)
コード例 #2
0
ファイル: cse.py プロジェクト: wei-pan/COFFEE
    def __init__(self, node, main_loop, nest, linear_reads_costs=None):
        self.level = -1
        self.pushed = False
        self.readby = []

        self.node = node
        self.main_loop = main_loop
        self.nest = nest
        self.linear_reads_costs = linear_reads_costs or OrderedDict()
        self.flops = EstimateFlops().visit(node)
コード例 #3
0
def count_flops(form):
    kernel, = compile_form(form, parameters=dict(mode='spectral'))
    return EstimateFlops().visit(kernel.ast)
コード例 #4
0
ファイル: test_count_flops.py プロジェクト: wei-pan/COFFEE
def v():
    return EstimateFlops()
コード例 #5
0
ファイル: plan.py プロジェクト: wei-pan/COFFEE
    def plan_cpu(self, opts):
        """Optimize this :class:`ASTKernel` for CPU execution.

        :param opts: a dictionary of optimizations to be applied. For a description
            of the recognized optimizations, please refer to the ``coffee.set_opt_level``
            documentation. If equal to ``None``, the default optimizations in
            ``coffee.options['optimizations']`` are applied; these are either the
            optimizations set when COFFEE was initialized or those changed through
            a call to ``set_opt_level``. In this way, a default set of optimizations
            is applied to all kernels, but users are also allowed to select
            specific transformations for individual kernels.
        """

        start_time = time.time()

        kernels = Find(FunDecl, stop_when_found=True).visit(self.ast)[FunDecl]

        if opts is None:
            opts = coffee.OptimizationLevel.retrieve(
                coffee.options['optimizations'])
        else:
            opts = coffee.OptimizationLevel.retrieve(opts.get('optlevel', {}))

        flops_pre = EstimateFlops().visit(self.ast)

        for kernel in kernels:
            rewrite = opts.get('rewrite')
            vectorize = opts.get('vectorize', (None, None))
            align_pad = opts.get('align_pad')
            split = opts.get('split')
            dead_ops_elimination = opts.get('dead_ops_elimination')

            info = visit(kernel, info_items=['decls', 'exprs'])
            # Collect expressions and related metadata
            nests = defaultdict(OrderedDict)
            for stmt, expr_info in info['exprs'].items():
                parent, nest = expr_info
                if not nest:
                    continue
                if kernel.template:
                    typ = "double"
                else:
                    typ = check_type(stmt, info['decls'])
                metaexpr = MetaExpr(typ, parent, nest)
                nests[nest[0]].update({stmt: metaexpr})
            loop_opts = [
                CPULoopOptimizer(loop, header, exprs)
                for (loop, header), exprs in nests.items()
            ]

            # Combining certain optimizations is forbidden.
            if dead_ops_elimination and split:
                warn("Split forbidden with dead-ops elimination")
                return
            if dead_ops_elimination and vectorize[0]:
                warn("Vect forbidden with dead-ops elimination")
                return
            if rewrite == 'auto' and len(info['exprs']) > 1:
                warn("Rewrite auto forbidden with multiple exprs")
                rewrite = 4

            # Main Ootimization pipeline
            for loop_opt in loop_opts:

                # 0) Expression Rewriting
                if rewrite:
                    loop_opt.rewrite(rewrite)

                # 1) Dead-operations elimination
                if dead_ops_elimination:
                    loop_opt.eliminate_zeros()

                # 2) Code specialization
                if split:
                    loop_opt.split(split)
                if coffee.initialized and flatten(loop_opt.expr_linear_loops):
                    vect = LoopVectorizer(loop_opt, kernel)
                    if align_pad:
                        # Padding and data alignment
                        vect.autovectorize()
                    if vectorize[0] and vectorize[0] != VectStrategy.AUTO:
                        # Specialize vectorization for the memory access pattern
                        # of the expression
                        vect.specialize(*vectorize)

            # Ensure kernel is always marked static inline
            # Remove either or both of static and inline (so that we get the order right)
            kernel.pred = [
                q for q in kernel.pred if q not in ['static', 'inline']
            ]
            kernel.pred.insert(0, 'inline')
            kernel.pred.insert(0, 'static')

            # Post processing of the AST ensures higher-quality code
            postprocess(kernel)

        flops_post = EstimateFlops().visit(self.ast)

        tot_time = time.time() - start_time

        output = "COFFEE finished in %g seconds (flops: %d -> %d)" % \
            (tot_time, flops_pre, flops_post)
        log(output, PERF_OK if flops_post <= flops_pre else PERF_WARN)