Ejemplo n.º 1
0
def optimize(clusters, dse_mode):
    """
    Optimize a topologically-ordered sequence of Clusters by applying the
    following transformations:

        * [cross-cluster] Fusion
        * [intra-cluster] Several flop-reduction passes via the DSE
        * [cross-cluster] Lifting
        * [cross-cluster] Scalarization
        * [cross-cluster] Arrays Elimination
    """
    # To create temporaries
    counter = generator()
    template = lambda: "r%d" % counter()

    # Fusion
    clusters = fuse(clusters)

    from devito.dse import rewrite
    clusters = rewrite(clusters, template, mode=dse_mode)

    # Lifting
    clusters = Lift().process(clusters)

    # Lifting may create fusion opportunities
    clusters = fuse(clusters)

    # Fusion may create opportunities to eliminate Arrays (thus shrinking the
    # working set) if these store identical expressions
    clusters = eliminate_arrays(clusters, template)

    # Fusion may create scalarization opportunities
    clusters = scalarize(clusters, template)

    return ClusterGroup(clusters)
Ejemplo n.º 2
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Eq) for i in expressions):
            raise InvalidOperator("Only `devito.Eq` expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self._func_table = OrderedDict()

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        self._state = {}

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = self._apply_substitutions(expressions, subs)
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(
            flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to a Schedule tree
        stree = st_build(clusters)

        # Lower Schedule tree to an Iteration/Expression tree (IET)
        iet = iet_build(stree)
        iet, self._profiler = self._profile_sections(iet)
        iet = self._specialize_iet(iet, **kwargs)
        iet = iet_insert_C_decls(iet)
        iet = self._build_casts(iet)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(iet)

        # Finish instantiation
        super(Operator, self).__init__(self.name, iet, 'int', parameters, ())
Ejemplo n.º 3
0
def test_tti_clusters_to_graph():
    solver = tti_operator()

    expressions = solver.op_fwd('centered').args['expressions']
    subs = solver.op_fwd('centered').args['subs']
    expressions = [LoweredEq(e, subs=subs) for e in expressions]
    clusters = clusterize(expressions)
    assert len(clusters) == 3

    main_cluster = clusters[0]
    n_output_tensors = len(main_cluster.trace)

    clusters = rewrite([main_cluster], mode='basic')
    assert len(clusters) == 1
    main_cluster = clusters[0]

    graph = main_cluster.trace
    assert len([v for v in graph.values() if v.is_tensor]) == n_output_tensors  # u and v
    assert all(v.reads or v.readby for v in graph.values())
Ejemplo n.º 4
0
def test_tti_clusters_to_graph():
    solver = tti_operator()

    nodes = FindNodes(Expression).visit(solver.op_fwd.elemental_functions)
    expressions = [n.expr for n in nodes]
    stencils = solver.op_fwd._retrieve_stencils(expressions)
    clusters = clusterize(expressions, stencils)
    assert len(clusters) == 3

    main_cluster = clusters[0]
    n_output_tensors = len(main_cluster.trace)

    clusters = rewrite([main_cluster], mode='basic')
    assert len(clusters) == 1
    main_cluster = clusters[0]

    graph = main_cluster.trace
    assert len([v for v in graph.values() if v.is_tensor
                ]) == n_output_tensors  # u and v
    assert all(v.reads or v.readby for v in graph.values())
Ejemplo n.º 5
0
def optimize(clusters, dse_mode):
    """
    Optimize a topologically-ordered sequence of Clusters by applying the
    following transformations:

        * [cross-cluster] Fusion
        * [intra-cluster] Several flop-reduction passes via the DSE
        * [cross-cluster] Lifting
        * [cross-cluster] Scalarization
        * [cross-cluster] Arrays Elimination
    """
    # To create temporaries
    counter = generator()
    template = lambda: "r%d" % counter()

    # Toposort+Fusion (the former to expose more fusion opportunities)
    clusters = Toposort().process(clusters)
    clusters = fuse(clusters)

    # Flop reduction via the DSE
    from devito.dse import rewrite
    clusters = rewrite(clusters, template, mode=dse_mode)

    # Lifting
    clusters = Lift().process(clusters)

    # Lifting may create fusion opportunities
    clusters = fuse(clusters)

    # Fusion may create opportunities to eliminate Arrays (thus shrinking the
    # working set) if these store identical expressions
    clusters = eliminate_arrays(clusters, template)

    # Fusion may create scalarization opportunities
    clusters = scalarize(clusters, template)

    # Determine computational properties (e.g., parallelism) that will be
    # necessary for the later passes
    clusters = analyze(clusters)

    return ClusterGroup(clusters)
Ejemplo n.º 6
0
def test_tti_clusters_to_graph():
    solver = tti_operator()

    expressions = solver.op_fwd('centered').args['expressions']
    subs = solver.op_fwd('centered').args['subs']
    expressions = [indexify(s) for s in expressions]
    expressions = [s.xreplace(subs) for s in expressions]
    stencils = solver.op_fwd('centered')._retrieve_stencils(expressions)
    clusters = clusterize(expressions, stencils)
    assert len(clusters) == 3

    main_cluster = clusters[0]
    n_output_tensors = len(main_cluster.trace)

    clusters = rewrite([main_cluster], mode='basic')
    assert len(clusters) == 1
    main_cluster = clusters[0]

    graph = main_cluster.trace
    assert len([v for v in graph.values() if v.is_tensor
                ]) == n_output_tensors  # u and v
    assert all(v.reads or v.readby for v in graph.values())
Ejemplo n.º 7
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Default attributes required for compilation
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._lib = None
        self._cfunction = None

        # Set the direction of time acoording to the given TimeAxis
        time.reverse = time_axis == Backward

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis 1 - required *also after* the Operator construction
        self.dtype = self._retrieve_dtype(expressions)
        self.output = self._retrieve_output_fields(expressions)

        # Analysis 2 - required *for* the Operator construction
        ordering = self._retrieve_loop_ordering(expressions)
        stencils = self._retrieve_stencils(expressions)

        # Group expressions based on their Stencil
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine for symbolic optimization
        clusters = rewrite(clusters, mode=dse)

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters, ordering)

        # Introduce C-level profiling infrastructure
        self.sections = OrderedDict()
        nodes = self._profile_sections(nodes)

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = FindSymbols('kernel-data').visit(nodes)
        dimensions = FindSymbols('dimensions').visit(nodes)
        dimensions += [d.parent for d in dimensions if d.is_Buffered]
        parameters += filter_ordered([d for d in dimensions if d.size is None],
                                     key=operator.attrgetter('name'))

        # Resolve and substitute dimensions for loop index variables
        subs = {}
        nodes = ResolveIterationVariable().visit(nodes, subs=subs)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Apply the Devito Loop Engine for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))
        parameters += [i.argument for i in dle_state.arguments]
        self._includes.extend(list(dle_state.includes))

        # Introduce all required C declarations
        nodes, elemental_functions = self._insert_declarations(
            dle_state, parameters)
        self.elemental_functions = elemental_functions

        # Track the DLE output, as it might be useful at execution time
        self._dle_state = dle_state

        # Finish instantiation
        super(OperatorBasic, self).__init__(self.name, nodes, 'int',
                                            parameters, ())
Ejemplo n.º 8
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering and analysis
        expressions = [LoweredEq(e, subs=subs) for e in expressions]
        self.dtype = retrieve_dtype(expressions)
        self.input, self.output, self.dimensions = retrieve_symbols(
            expressions)

        # Set the direction of time acoording to the given TimeAxis
        for time in [d for d in self.dimensions if d.is_Time]:
            if not time.is_Stepping:
                time.reverse = time_axis == Backward

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = self.input + self.dimensions

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Lower Clusters to an Iteration/Expression tree (IET)
        nodes = iet_build(clusters, self.dtype)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes, parameters)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(nodes, parameters)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table.update(
            OrderedDict([(i.name, MetaCall(i, True))
                         for i in dle_state.elemental_functions]))
        parameters.extend([i.argument for i in self.dle_arguments])
        self.dimensions.extend([
            i.argument for i in self.dle_arguments
            if isinstance(i.argument, Dimension)
        ])
        self._includes.extend(list(dle_state.includes))

        # Introduce the required symbol declarations
        nodes = iet_insert_C_decls(dle_state.nodes, self.func_table)

        # Initialise ArgumentEngine
        self.argument_engine = ArgumentEngine(clusters.ispace, parameters,
                                              self.dle_arguments)

        parameters = self.argument_engine.arguments

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
Ejemplo n.º 9
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        time_axis = kwargs.get("time_axis", Forward)
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # Set the direction of time acoording to the given TimeAxis
        time.reverse = time_axis == Backward

        # Expression lowering
        expressions = [indexify(s) for s in expressions]
        expressions = [s.xreplace(subs) for s in expressions]

        # Analysis
        self.dtype = self._retrieve_dtype(expressions)
        self.input, self.output, self.dimensions = self._retrieve_symbols(expressions)
        stencils = self._retrieve_stencils(expressions)

        # Parameters of the Operator (Dimensions necessary for data casts)
        parameters = self.input + [i for i in self.dimensions if i.size is None]

        # Group expressions based on their Stencil
        clusters = clusterize(expressions, stencils)

        # Apply the Devito Symbolic Engine (DSE) for symbolic optimization
        clusters = rewrite(clusters, mode=set_dse_mode(dse))

        # Wrap expressions with Iterations according to dimensions
        nodes = self._schedule_expressions(clusters)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes, parameters)

        # Resolve and substitute dimensions for loop index variables
        subs = {}
        nodes = ResolveIterationVariable().visit(nodes, subs=subs)
        nodes = SubstituteExpression(subs=subs).visit(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_arguments = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table = OrderedDict([(i.name, FunMeta(i, True))
                                       for i in dle_state.elemental_functions])
        parameters.extend([i.argument for i in self.dle_arguments])
        self.dimensions.extend([i.argument for i in self.dle_arguments
                                if isinstance(i.argument, Dimension)])
        self._includes.extend(list(dle_state.includes))

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize(dle_state.nodes, parameters)

        # Introduce all required C declarations
        nodes = self._insert_declarations(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
Ejemplo n.º 10
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = [i.xreplace(subs) for i in expressions]
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to a Schedule tree
        stree = schedule(clusters)
        stree = section(stree)

        # Lower Sections to an Iteration/Expression tree (IET)
        iet = iet_build(stree)

        # Insert code for C-level performance profiling
        iet, self.profiler = self._profile_sections(iet)

        # Translate into backend-specific representation
        iet = self._specialize_iet(iet, **kwargs)

        # Insert the required symbol declarations
        iet = iet_insert_C_decls(iet, self.func_table)

        # Insert data and pointer casts for array parameters and profiling structs
        iet = self._build_casts(iet)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(iet)

        # Finish instantiation
        super(Operator, self).__init__(self.name, iet, 'int', parameters, ())
Ejemplo n.º 11
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = [i.xreplace(subs) for i in expressions]
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to an Iteration/Expression tree (IET)
        nodes = iet_build(clusters)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize_iet(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_args = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table.update(OrderedDict([(i.name, MetaCall(i, True))
                                            for i in dle_state.elemental_functions]))
        self.dimensions.extend([i.argument for i in self.dle_args
                                if isinstance(i.argument, Dimension)])
        self._includes.extend(list(dle_state.includes))

        # Introduce the required symbol declarations
        nodes = iet_insert_C_decls(dle_state.nodes, self.func_table)

        # Insert data and pointer casts for array parameters and profiling structs
        nodes = self._build_casts(nodes)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())
Ejemplo n.º 12
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Eq) for i in expressions):
            raise InvalidOperator("Only `devito.Eq` expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self._func_table = OrderedDict()

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        self._state = {}

        # Form and gather any required implicit expressions
        expressions = self._add_implicit(expressions)

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = self._apply_substitutions(expressions, subs)
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self._input = filter_sorted(flatten(e.reads + e.writes for e in expressions))
        self._output = filter_sorted(flatten(e.writes for e in expressions))
        self._dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to a Schedule tree
        stree = st_build(clusters)

        # Lower Schedule tree to an Iteration/Expression tree (IET)
        iet = iet_build(stree)
        iet, self._profiler = self._profile_sections(iet)
        iet = self._specialize_iet(iet, **kwargs)

        # Derive all Operator parameters based on the IET
        parameters = derive_parameters(iet, True)

        # Finalization: introduce declarations, type casts, etc
        iet = self._finalize(iet, parameters)

        super(Operator, self).__init__(self.name, iet, 'int', parameters, ())