Beispiel #1
0
def generate_tabulate_dmats(L, dofs_data):
    "Tabulate the derivatives of the polynomial base"

    alignas = 32

    # Emit code for the dmats we've actually used
    dmats_code = [
        L.Comment("Tables of derivatives of the polynomial base (transpose).")
    ]

    dmats_names = []

    all_matrices = []

    for idof, dof_data in enumerate(dofs_data):
        # Get derivative matrices (coefficients) of basis functions, computed by FIAT at compile time.
        derivative_matrices = dof_data["dmats"]
        num_mats = len(derivative_matrices)
        num_members = dof_data["num_expansion_members"]

        # Generate tables for each spatial direction.
        matrix = numpy.zeros((num_mats, num_members, num_members))
        for i, dmat in enumerate(derivative_matrices):
            # Extract derivatives for current direction
            # (take transpose, FIAT_NEW PolynomialSet.tabulate()).
            matrix[i, ...] = numpy.transpose(dmat)

        # TODO: Use precision from parameters here
        from ffc.uflacs.elementtables import clamp_table_small_numbers
        matrix = clamp_table_small_numbers(matrix)

        # O(n^2) matrix matching...
        name = None
        for oldname, oldmatrix in all_matrices:
            if matrix.shape == oldmatrix.shape and numpy.allclose(
                    matrix, oldmatrix):
                name = oldname
                break

        if name is None:
            # Define variable name for coefficients for this dof
            name = L.Symbol("dmats%d" % (idof, ))
            all_matrices.append((name, matrix))

            # Declare new dmats table with unique values
            decl = L.ArrayDecl("static const double",
                               name, (num_mats, num_members, num_members),
                               values=matrix,
                               alignas=alignas)
            dmats_code.append(decl)

        # Append name for each dof
        dmats_names.append(name)

    return dmats_names, dmats_code
Beispiel #2
0
    def tabulate_dof_coordinates(self, L, ir, parameters):
        ir = ir["tabulate_dof_coordinates"]

        # Raise error if tabulate_dof_coordinates is ill-defined
        if not ir:
            msg = "tabulate_dof_coordinates is not defined for this element"
            return generate_error(L, msg, parameters["convert_exceptions_to_warnings"])

        # Extract coordinates and cell dimension
        gdim = ir["gdim"]
        tdim = ir["tdim"]
        points = ir["points"]

        # Extract cellshape
        cell_shape = ir["cell_shape"]

        # Output argument
        dof_coordinates = L.FlattenedArray(L.Symbol("dof_coordinates"),
                                           dims=(len(points), gdim))

        # Input argument
        coordinate_dofs = L.Symbol("coordinate_dofs")

        # Loop indices
        i = L.Symbol("i")
        k = L.Symbol("k")
        ip = L.Symbol("ip")

        # Basis symbol
        phi = L.Symbol("phi")

        # TODO: Get rid of all places that use reference_to_physical_map, it is restricted to a basis of degree 1
        # Create code for evaluating coordinate mapping
        num_scalar_xdofs = _num_vertices(cell_shape)
        cg1_basis = reference_to_physical_map(cell_shape)
        phi_values = numpy.asarray([phi_comp for X in points for phi_comp in cg1_basis(X)])
        assert len(phi_values) == len(points) * num_scalar_xdofs

        # TODO: Use precision parameter here
        phi_values = clamp_table_small_numbers(phi_values)

        code = [
            L.Assign(
                dof_coordinates[ip][i],
                sum(phi_values[ip*num_scalar_xdofs + k] * coordinate_dofs[gdim*k + i]
                    for k in range(num_scalar_xdofs))
            )
            for ip in range(len(points))
            for i in range(gdim)
        ]

        # FIXME: This code assumes an affine coordinate field.
        #        To get around that limitation, make this function take another argument
        #            const ufc::coordinate_mapping * cm
        #        and generate code like this:
        """
        index_type X[tdim*num_dofs];
        tabulate_dof_coordinates(X);
        cm->compute_physical_coordinates(x, X, coordinate_dofs);
        """

        return code
Beispiel #3
0
    def tabulate_dof_coordinates(self, L, ir, parameters):
        ir = ir["tabulate_dof_coordinates"]

        # Raise error if tabulate_dof_coordinates is ill-defined
        if not ir:
            msg = "tabulate_dof_coordinates is not defined for this element"
            return generate_error(L, msg,
                                  parameters["convert_exceptions_to_warnings"])

        # Extract coordinates and cell dimension
        gdim = ir["gdim"]
        tdim = ir["tdim"]
        points = ir["points"]

        # Extract cellshape
        cell_shape = ir["cell_shape"]

        # Output argument
        dof_coordinates = L.FlattenedArray(L.Symbol("dof_coordinates"),
                                           dims=(len(points), gdim))

        # Input argument
        coordinate_dofs = L.Symbol("coordinate_dofs")

        # Loop indices
        i = L.Symbol("i")
        k = L.Symbol("k")
        ip = L.Symbol("ip")

        # Basis symbol
        phi = L.Symbol("phi")

        # TODO: Get rid of all places that use reference_to_physical_map, it is restricted to a basis of degree 1
        # Create code for evaluating coordinate mapping
        num_scalar_xdofs = _num_vertices(cell_shape)
        cg1_basis = reference_to_physical_map(cell_shape)
        phi_values = numpy.asarray(
            [phi_comp for X in points for phi_comp in cg1_basis(X)])
        assert len(phi_values) == len(points) * num_scalar_xdofs

        # TODO: Use precision parameter here
        phi_values = clamp_table_small_numbers(phi_values)

        code = [
            L.Assign(
                dof_coordinates[ip][i],
                sum(phi_values[ip * num_scalar_xdofs + k] *
                    coordinate_dofs[gdim * k + i]
                    for k in range(num_scalar_xdofs)))
            for ip in range(len(points)) for i in range(gdim)
        ]

        # FIXME: This code assumes an affine coordinate field.
        #        To get around that limitation, make this function take another argument
        #            const ufc::coordinate_mapping * cm
        #        and generate code like this:
        """
        index_type X[tdim*num_dofs];
        tabulate_dof_coordinates(X);
        cm->compute_physical_coordinates(x, X, coordinate_dofs);
        """

        return code
Beispiel #4
0
    def interpolate_vertex_values(self, L, ir, parameters):
        irdata = ir["interpolate_vertex_values"]

        # Raise error if interpolate_vertex_values is ill-defined
        if not irdata:
            msg = "interpolate_vertex_values is not defined for this element"
            return [generate_error(L, msg, parameters["convert_exceptions_to_warnings"])]

        # Handle unsupported elements.
        if isinstance(irdata, str):
            msg = "interpolate_vertex_values: %s" % irdata
            return [generate_error(L, msg, parameters["convert_exceptions_to_warnings"])]

        # Add code for Jacobian if necessary
        code = []
        gdim = irdata["geometric_dimension"]
        tdim = irdata["topological_dimension"]
        cell_shape = ir["cell_shape"]
        if irdata["needs_jacobian"]:
            code += jacobian(L, gdim, tdim, cell_shape)
            code += inverse_jacobian(L, gdim, tdim, cell_shape)
            if irdata["needs_oriented"] and tdim != gdim:
                code += orientation(L)

        # Compute total value dimension for (mixed) element
        total_dim = irdata["physical_value_size"]

        # Generate code for each element
        value_offset = 0
        space_offset = 0
        for data in irdata["element_data"]:
            # Add vertex interpolation for this element
            code += [L.Comment("Evaluate function and change variables")]

            # Extract vertex values for all basis functions
            vertex_values = data["basis_values"]
            value_size = data["physical_value_size"]
            space_dim = data["space_dim"]
            mapping = data["mapping"]

            J = L.Symbol("J")
            J = L.FlattenedArray(J, dims=(gdim, tdim))
            detJ = L.Symbol("detJ")
            K = L.Symbol("K")
            K = L.FlattenedArray(K, dims=(tdim, gdim))

            # Create code for each value dimension:
            for k in range(value_size):
                # Create code for each vertex x_j
                for (j, values_at_vertex) in enumerate(vertex_values):

                    if value_size == 1:
                        values_at_vertex = [values_at_vertex]

                    values = clamp_table_small_numbers(values_at_vertex)

                    # Map basis functions using appropriate mapping
                    # FIXME: sort out all non-affine mappings and make into a function
                    # components = change_of_variables(values_at_vertex, k)

                    w = []
                    if mapping == 'affine':
                        w = values[k]
                    elif mapping == 'contravariant piola':
                        for index in range(space_dim):
                            w += [sum(J[k, p]*values[p][index]
                                      for p in range(tdim))/detJ]
                    elif mapping == 'covariant piola':
                        for index in range(space_dim):
                            w += [sum(K[p, k]*values[p][index]
                                      for p in range(tdim))]
                    elif mapping == 'double covariant piola':
                        for index in range(space_dim):
                            w += [sum(K[p, k//tdim]*values[p][q][index]*K[q, k % tdim]
                                      for q in range(tdim) for p in range(tdim))]
                    elif mapping == 'double contravariant piola':
                        for index in range(space_dim):
                            w += [sum(J[k//tdim, p]*values[p][q][index]*J[k % tdim, q]
                                      for q in range(tdim) for p in range(tdim))/(detJ*detJ)]
                    else:
                        error("Unknown mapping: %s" % mapping)

                    # Contract coefficients and basis functions
                    dof_values = L.Symbol("dof_values")
                    dof_list = [dof_values[i + space_offset] for i in range(space_dim)]
                    value = sum(p*q for (p, q) in zip(dof_list, w))

                    # Assign value to correct vertex
                    index = j * total_dim + (k + value_offset)
                    v_values = L.Symbol("vertex_values")
                    code += [L.Assign(v_values[index], value)]

            # Update offsets for value- and space dimension
            value_offset += data["physical_value_size"]
            space_offset += data["space_dim"]

        return code
Beispiel #5
0
    def interpolate_vertex_values(self, L, ir, parameters):
        irdata = ir["interpolate_vertex_values"]

        # Raise error if interpolate_vertex_values is ill-defined
        if not irdata:
            msg = "interpolate_vertex_values is not defined for this element"
            return [
                generate_error(L, msg,
                               parameters["convert_exceptions_to_warnings"])
            ]

        # Handle unsupported elements.
        if isinstance(irdata, str):
            msg = "interpolate_vertex_values: %s" % irdata
            return [
                generate_error(L, msg,
                               parameters["convert_exceptions_to_warnings"])
            ]

        # Add code for Jacobian if necessary
        code = []
        gdim = irdata["geometric_dimension"]
        tdim = irdata["topological_dimension"]
        cell_shape = ir["cell_shape"]
        if irdata["needs_jacobian"]:
            code += jacobian(L, gdim, tdim, cell_shape)
            code += inverse_jacobian(L, gdim, tdim, cell_shape)
            if irdata["needs_oriented"] and tdim != gdim:
                code += orientation(L)

        # Compute total value dimension for (mixed) element
        total_dim = irdata["physical_value_size"]

        # Generate code for each element
        value_offset = 0
        space_offset = 0
        for data in irdata["element_data"]:
            # Add vertex interpolation for this element
            code += [L.Comment("Evaluate function and change variables")]

            # Extract vertex values for all basis functions
            vertex_values = data["basis_values"]
            value_size = data["physical_value_size"]
            space_dim = data["space_dim"]
            mapping = data["mapping"]

            J = L.Symbol("J")
            J = L.FlattenedArray(J, dims=(gdim, tdim))
            detJ = L.Symbol("detJ")
            K = L.Symbol("K")
            K = L.FlattenedArray(K, dims=(tdim, gdim))

            # Create code for each value dimension:
            for k in range(value_size):
                # Create code for each vertex x_j
                for (j, values_at_vertex) in enumerate(vertex_values):

                    if value_size == 1:
                        values_at_vertex = [values_at_vertex]

                    values = clamp_table_small_numbers(values_at_vertex)

                    # Map basis functions using appropriate mapping
                    # FIXME: sort out all non-affine mappings and make into a function
                    # components = change_of_variables(values_at_vertex, k)

                    w = []
                    if mapping == 'affine':
                        w = values[k]
                    elif mapping == 'contravariant piola':
                        for index in range(space_dim):
                            w += [
                                sum(J[k, p] * values[p][index]
                                    for p in range(tdim)) / detJ
                            ]
                    elif mapping == 'covariant piola':
                        for index in range(space_dim):
                            w += [
                                sum(K[p, k] * values[p][index]
                                    for p in range(tdim))
                            ]
                    elif mapping == 'double covariant piola':
                        for index in range(space_dim):
                            w += [
                                sum(K[p, k // tdim] * values[p][q][index] *
                                    K[q, k % tdim] for q in range(tdim)
                                    for p in range(tdim))
                            ]
                    elif mapping == 'double contravariant piola':
                        for index in range(space_dim):
                            w += [
                                sum(J[k // tdim, p] * values[p][q][index] *
                                    J[k % tdim, q] for q in range(tdim)
                                    for p in range(tdim)) / (detJ * detJ)
                            ]
                    else:
                        error("Unknown mapping: %s" % mapping)

                    # Contract coefficients and basis functions
                    dof_values = L.Symbol("dof_values")
                    dof_list = [
                        dof_values[i + space_offset] for i in range(space_dim)
                    ]
                    value = sum(p * q for (p, q) in zip(dof_list, w))

                    # Assign value to correct vertex
                    index = j * total_dim + (k + value_offset)
                    v_values = L.Symbol("vertex_values")
                    code += [L.Assign(v_values[index], value)]

            # Update offsets for value- and space dimension
            value_offset += data["physical_value_size"]
            space_offset += data["space_dim"]

        return code
Beispiel #6
0
def build_uflacs_ir(cell, integral_type, entitytype,
                    integrands, tensor_shape,
                    coefficient_numbering,
                    quadrature_rules, parameters):
    # The intermediate representation dict we're building and returning here
    ir = {}

    # Extract uflacs specific optimization and code generation parameters
    p = parse_uflacs_optimization_parameters(parameters, integral_type)

    # Pass on parameters for consumption in code generation
    ir["params"] = p

    # { ufl coefficient: count }
    ir["coefficient_numbering"] = coefficient_numbering

    # Shared unique tables for all quadrature loops
    ir["unique_tables"] = {}
    ir["unique_table_types"] = {}

    # Shared piecewise expr_ir for all quadrature loops
    ir["piecewise_ir"] = empty_expr_ir()

    # { num_points: expr_ir for one integrand }
    ir["varying_irs"] = {}

    # Temporary data structures to build shared piecewise data
    pe2i = {}
    piecewise_modified_argument_indices = {}

    # Whether we expect the quadrature weight to be applied or not
    # (in some cases it's just set to 1 in ufl integral scaling)
    tdim = cell.topological_dimension()
    expect_weight = (
        integral_type not in ("expression",) + point_integral_types
        and (entitytype == "cell"
            or (entitytype == "facet" and tdim > 1)
            or (integral_type in custom_integral_types)
            )
        )

    if integral_type == "expression":
        # TODO: Figure out how to get non-integrand expressions in here, this is just a draft:
        # Analyse all expressions in one list
        assert isinstance(integrands, (tuple, list))
        all_num_points = [None]
        cases = [(None, integrands)]
    else:
        # Analyse each num_points/integrand separately
        assert isinstance(integrands, dict)
        all_num_points = sorted(integrands.keys())
        cases = [(num_points, [integrands[num_points]])
                 for num_points in all_num_points]
    ir["all_num_points"] = all_num_points

    for num_points, expressions in cases:
        # Rebalance order of nested terminal modifiers
        expressions = [balance_modifiers(expr) for expr in expressions]

        # Build initial scalar list-based graph representation
        V, V_deps, V_targets = build_scalar_graph(expressions)

        # Build terminal_data from V here before factorization.
        # Then we can use it to derive table properties for all modified terminals,
        # and then use that to rebuild the scalar graph more efficiently before
        # argument factorization. We can build terminal_data again after factorization
        # if that's necessary.
        initial_terminal_indices = [i for i, v in enumerate(V)
                                    if is_modified_terminal(v)]
        initial_terminal_data = [analyse_modified_terminal(V[i])
                                 for i in initial_terminal_indices]
        unique_tables, unique_table_types, unique_table_num_dofs, mt_unique_table_reference = \
            build_optimized_tables(num_points, quadrature_rules,
                cell, integral_type, entitytype, initial_terminal_data,
                ir["unique_tables"], p["enable_table_zero_compression"],
                rtol=p["table_rtol"], atol=p["table_atol"])

        # Replace some scalar modified terminals before reconstructing expressions
        # (could possibly use replace() on target expressions instead)
        z = as_ufl(0.0)
        one = as_ufl(1.0)
        for i, mt in zip(initial_terminal_indices, initial_terminal_data):
            if isinstance(mt.terminal, QuadratureWeight):
                # Replace quadrature weight with 1.0, will be added back later
                V[i] = one
            else:
                # Set modified terminals with zero tables to zero
                tr = mt_unique_table_reference.get(mt)
                if tr is not None and tr.ttype == "zeros":
                    V[i] = z

        # Propagate expression changes using dependency list
        for i in range(len(V)):
            deps = [V[j] for j in V_deps[i]]
            if deps:
                V[i] = V[i]._ufl_expr_reconstruct_(*deps)

        # Rebuild scalar target expressions and graph
        # (this may be overkill and possible to optimize
        # away if it turns out to be costly)
        expressions = [V[i] for i in V_targets]

        # Rebuild scalar list-based graph representation
        SV, SV_deps, SV_targets = build_scalar_graph(expressions)
        assert all(i < len(SV) for i in SV_targets)

        # Compute factorization of arguments
        (argument_factorizations, modified_arguments,
             FV, FV_deps, FV_targets) = \
            compute_argument_factorization(SV, SV_deps, SV_targets, len(tensor_shape))
        assert len(SV_targets) == len(argument_factorizations)       

        # TODO: Still expecting one target variable in code generation
        assert len(argument_factorizations) == 1
        argument_factorization, = argument_factorizations

        # Store modified arguments in analysed form
        for i in range(len(modified_arguments)):
            modified_arguments[i] = analyse_modified_terminal(modified_arguments[i])

        # Build set of modified_terminal indices into factorized_vertices
        modified_terminal_indices = [i for i, v in enumerate(FV)
                                     if is_modified_terminal(v)]

        # Build set of modified terminal ufl expressions
        modified_terminals = [analyse_modified_terminal(FV[i])
                              for i in modified_terminal_indices]

        # Make it easy to get mt object from FV index
        FV_mts = [None]*len(FV)
        for i, mt in zip(modified_terminal_indices, modified_terminals):
            FV_mts[i] = mt

        # Mark active modified arguments
        #active_modified_arguments = numpy.zeros(len(modified_arguments), dtype=int)
        #for ma_indices in argument_factorization:
        #    for j in ma_indices:
        #        active_modified_arguments[j] = 1

        # Dependency analysis
        inv_FV_deps, FV_active, FV_piecewise, FV_varying = \
            analyse_dependencies(FV, FV_deps, FV_targets,
                                 modified_terminal_indices,
                                 modified_terminals,
                                 mt_unique_table_reference)

        # Extend piecewise V with unique new FV_piecewise vertices
        pir = ir["piecewise_ir"]
        for i, v in enumerate(FV):
            if FV_piecewise[i]:
                j = pe2i.get(v)
                if j is None:
                    j = len(pe2i)
                    pe2i[v] = j
                    pir["V"].append(v)
                    pir["V_active"].append(1)
                    mt = FV_mts[i]
                    if mt is not None:
                        pir["mt_tabledata"][mt] = mt_unique_table_reference.get(mt)
                    pir["V_mts"].append(mt)

        # Extend piecewise modified_arguments list with unique new items
        for mt in modified_arguments:
            ma = piecewise_modified_argument_indices.get(mt)
            if ma is None:
                ma = len(pir["modified_arguments"])
                pir["modified_arguments"].append(mt)
                piecewise_modified_argument_indices[mt] = ma

        # Loop over factorization terms
        block_contributions = defaultdict(list)
        for ma_indices, fi in sorted(argument_factorization.items()):
            # Get a bunch of information about this term
            rank = len(ma_indices)
            trs = tuple(mt_unique_table_reference[modified_arguments[ai]] for ai in ma_indices)

            unames = tuple(tr.name for tr in trs)
            ttypes = tuple(tr.ttype for tr in trs)
            assert not any(tt == "zeros" for tt in ttypes)

            blockmap = tuple(tr.dofmap for tr in trs)

            block_is_uniform = all(tr.is_uniform for tr in trs)

            # Collect relevant restrictions to identify blocks
            # correctly in interior facet integrals
            block_restrictions = []
            for i, ma in enumerate(ma_indices):
                if trs[i].is_uniform:
                    r = None
                else:
                    r = modified_arguments[ma].restriction
                block_restrictions.append(r)
            block_restrictions = tuple(block_restrictions)

            # Store piecewise status for fi and translate
            # index to piecewise scope if relevant
            factor_is_piecewise = FV_piecewise[fi]
            if factor_is_piecewise:
                factor_index = pe2i[FV[fi]]
            else:
                factor_index = fi

            # TODO: Add separate block modes for quadrature
            # Both arguments in quadrature elements
            """
            for iq
                fw = f*w
                #for i
                #    for j
                #        B[i,j] = fw*U[i]*V[j] = 0 if i != iq or j != iq
                BQ[iq] = B[iq,iq] = fw
            for (iq) 
                A[iq+offset0, iq+offset1] = BQ[iq]
            """
            # One argument in quadrature element
            """
            for iq
                fw[iq] = f*w
                #for i
                #    for j
                #        B[i,j] = fw*UQ[i]*V[j] = 0 if i != iq
                for j
                    BQ[iq,j] = fw[iq]*V[iq,j]
            for (iq) for (j)
                A[iq+offset, j+offset] = BQ[iq,j]
            """

            # Decide how to handle code generation for this block
            if p["enable_preintegration"] and (factor_is_piecewise
                    and rank > 0 and "quadrature" not in ttypes):
                # - Piecewise factor is an absolute prerequisite
                # - Could work for rank 0 as well but currently doesn't
                # - Haven't considered how quadrature elements work out
                block_mode = "preintegrated"
            elif p["enable_premultiplication"] and (rank > 0
                    and all(tt in piecewise_ttypes for tt in ttypes)):
                # Integrate functional in quadloop, scale block after quadloop
                block_mode = "premultiplied"
            elif p["enable_sum_factorization"]:
                if (rank == 2 and any(tt in piecewise_ttypes for tt in ttypes)):
                    # Partial computation in quadloop of f*u[i],
                    # compute (f*u[i])*v[i] outside quadloop,
                    # (or with u,v swapped)
                    block_mode = "partial"
                else:
                    # Full runtime integration of f*u[i]*v[j],
                    # can still do partial computation in quadloop of f*u[i]
                    # but must compute (f*u[i])*v[i] as well inside quadloop.
                    # (or with u,v swapped)
                    block_mode = "full"
            else:
                # Use full runtime integration with nothing fancy going on
                block_mode = "safe"

            # Carry out decision
            if block_mode == "preintegrated":
                # Add to contributions:
                # P = sum_q weight*u*v;      preintegrated here
                # B[...] = f * P[...];       generated after quadloop
                # A[blockmap] += B[...];     generated after quadloop

                cache = ir["piecewise_ir"]["preintegrated_blocks"]

                block_is_transposed = False
                pname = cache.get(unames)

                # Reuse transpose to save memory
                if p["enable_block_transpose_reuse"] and pname is None and len(unames) == 2:
                    pname = cache.get((unames[1], unames[0]))
                    if pname is not None:
                        # Cache hit on transpose
                        block_is_transposed = True

                if pname is None:
                    # Cache miss, precompute block
                    weights = quadrature_rules[num_points][1]
                    if integral_type == "interior_facet":
                        ptable = integrate_block_interior_facets(weights, unames, ttypes,
                            unique_tables, unique_table_num_dofs)
                    else:
                        ptable = integrate_block(weights, unames, ttypes,
                            unique_tables, unique_table_num_dofs)
                    ptable = clamp_table_small_numbers(ptable, rtol=p["table_rtol"], atol=p["table_atol"])

                    pname = "PI%d" % (len(cache,))
                    cache[unames] = pname
                    unique_tables[pname] = ptable
                    unique_table_types[pname] = "preintegrated"

                assert factor_is_piecewise
                block_unames = (pname,)
                blockdata = preintegrated_block_data_t(block_mode, ttypes,
                                                       factor_index, factor_is_piecewise,
                                                       block_unames, block_restrictions,
                                                       block_is_transposed, block_is_uniform,
                                                       pname)
                block_is_piecewise = True

            elif block_mode == "premultiplied":
                # Add to contributions:
                # P = u*v;                        computed here
                # FI = sum_q weight * f;          generated inside quadloop
                # B[...] = FI * P[...];           generated after quadloop
                # A[blockmap] += B[...];          generated after quadloop

                cache = ir["piecewise_ir"]["premultiplied_blocks"]

                block_is_transposed = False
                pname = cache.get(unames)

                # Reuse transpose to save memory
                if p["enable_block_transpose_reuse"] and pname is None and len(unames) == 2:
                    pname = cache.get((unames[1], unames[0]))
                    if pname is not None:
                        # Cache hit on transpose
                        block_is_transposed = True

                if pname is None:
                    # Cache miss, precompute block
                    if integral_type == "interior_facet":
                        ptable = multiply_block_interior_facets(0, unames, ttypes, unique_tables, unique_table_num_dofs)
                    else:
                        ptable = multiply_block(0, unames, ttypes, unique_tables, unique_table_num_dofs)
                    pname = "PM%d" % (len(cache,))
                    cache[unames] = pname
                    unique_tables[pname] = ptable
                    unique_table_types[pname] = "premultiplied"

                block_unames = (pname,)
                blockdata = premultiplied_block_data_t(block_mode, ttypes,
                                                       factor_index, factor_is_piecewise,
                                                       block_unames, block_restrictions,
                                                       block_is_transposed, block_is_uniform,
                                                       pname)
                block_is_piecewise = False

            elif block_mode == "scaled":  # TODO: Add mode, block is piecewise but choose not to be premultiplied
                # Add to contributions:
                # FI = sum_q weight * f;          generated inside quadloop
                # B[...] = FI * u * v;            generated after quadloop
                # A[blockmap] += B[...];          generated after quadloop
                raise NotImplementedError("scaled block mode not implemented.")
                # (probably need mostly the same data as premultiplied, except no P table name or values)
                block_is_piecewise = False

            elif block_mode in ("partial", "full", "safe"):
                # Translate indices to piecewise context if necessary
                block_is_piecewise = factor_is_piecewise and not expect_weight
                ma_data = []
                for i, ma in enumerate(ma_indices):
                    if trs[i].is_piecewise:
                        ma_index = piecewise_modified_argument_indices[modified_arguments[ma]]
                    else:
                        block_is_piecewise = False
                        ma_index = ma
                    ma_data.append(ma_data_t(ma_index, trs[i]))

                block_is_transposed = False  # FIXME: Handle transposes for these block types

                if block_mode == "partial":
                    # Add to contributions:
                    # P[i] = sum_q weight * f * u[i];  generated inside quadloop
                    # B[i,j] = P[i] * v[j];            generated after quadloop (where v is the piecewise ma)
                    # A[blockmap] += B[...];           generated after quadloop

                    # Find first piecewise index TODO: Is last better? just reverse range here
                    for i in range(rank):
                        if trs[i].is_piecewise:
                            piecewise_ma_index = i
                            break
                    assert rank == 2
                    not_piecewise_ma_index = 1 - piecewise_ma_index
                    block_unames = (unames[not_piecewise_ma_index],)
                    blockdata = partial_block_data_t(block_mode,  ttypes,
                                                     factor_index, factor_is_piecewise,
                                                     block_unames, block_restrictions,
                                                     block_is_transposed,
                                                     tuple(ma_data), piecewise_ma_index)
                elif block_mode in ("full", "safe"):
                    # Add to contributions:
                    # B[i] = sum_q weight * f * u[i] * v[j];  generated inside quadloop
                    # A[blockmap] += B[i];                    generated after quadloop

                    block_unames = unames
                    blockdata = full_block_data_t(block_mode, ttypes,
                                                  factor_index, factor_is_piecewise,
                                                  block_unames, block_restrictions,
                                                  block_is_transposed,
                                                  tuple(ma_data))
            else:
                error("Invalid block_mode %s" % (block_mode,))

            if block_is_piecewise:
                # Insert in piecewise expr_ir
                ir["piecewise_ir"]["block_contributions"][blockmap].append(blockdata)
            else:
                # Insert in varying expr_ir for this quadrature loop
                block_contributions[blockmap].append(blockdata)

        # Figure out which table names are referenced in unstructured partition
        active_table_names = set()
        for i, mt in zip(modified_terminal_indices, modified_terminals):
            tr = mt_unique_table_reference.get(mt)
            if tr is not None and FV_active[i]:
                active_table_names.add(tr.name)

        # Figure out which table names are referenced in blocks
        for blockmap, contributions in chain(block_contributions.items(),
                                             ir["piecewise_ir"]["block_contributions"].items()):
            for blockdata in contributions:
                if blockdata.block_mode in ("preintegrated", "premultiplied"):
                    active_table_names.add(blockdata.name)
                elif blockdata.block_mode in ("partial", "full", "safe"):
                    for mad in blockdata.ma_data:
                        active_table_names.add(mad.tabledata.name)

        # Record all table types before dropping tables
        ir["unique_table_types"].update(unique_table_types)

        # Drop tables not referenced from modified terminals
        # and tables of zeros and ones
        unused_ttypes = ("zeros", "ones", "quadrature")
        keep_table_names = set()
        for name in active_table_names:
            ttype = ir["unique_table_types"][name]
            if ttype not in unused_ttypes:
                if name in unique_tables:
                    keep_table_names.add(name)
        unique_tables = { name: unique_tables[name]
                          for name in keep_table_names }

        # Add to global set of all tables
        for name, table in unique_tables.items():
            tbl = ir["unique_tables"].get(name)
            if tbl is not None and not numpy.allclose(tbl, table, rtol=p["table_rtol"], atol=p["table_atol"]):
                error("Table values mismatch with same name.")
        ir["unique_tables"].update(unique_tables)

        # Analyse active terminals to check what we'll need to generate code for
        active_mts = []
        for i, mt in zip(modified_terminal_indices, modified_terminals):
            if FV_active[i]:
                active_mts.append(mt)

        # Figure out if we need to access CellCoordinate to
        # avoid generating quadrature point table otherwise
        if integral_type == "cell":
            need_points = any(isinstance(mt.terminal, CellCoordinate)
                              for mt in active_mts)
        elif integral_type in facet_integral_types:
            need_points = any(isinstance(mt.terminal, FacetCoordinate)
                              for mt in active_mts)
        elif integral_type in custom_integral_types:
            need_points = True  # TODO: Always?
        else:
            need_points = False

        # Figure out if we need to access QuadratureWeight to
        # avoid generating quadrature point table otherwise
        #need_weights = any(isinstance(mt.terminal, QuadratureWeight)
        #                   for mt in active_mts)

        # Count blocks of each mode
        block_modes = defaultdict(int)
        for blockmap, contributions in block_contributions.items():
            for blockdata in contributions:
                block_modes[blockdata.block_mode] += 1
        # Debug output
        summary = "\n".join("  %d\t%s" % (count, mode)
                            for mode, count in sorted(block_modes.items()))
        debug("Blocks of each mode: \n" + summary)

        # If there are any blocks other than preintegrated we need weights
        if expect_weight and any(mode != "preintegrated" for mode in block_modes):
            need_weights = True
        elif integral_type in custom_integral_types:
            need_weights = True  # TODO: Always?
        else:
            need_weights = False

        # Build IR dict for the given expressions
        expr_ir = {}

        # (array) FV-index -> UFL subexpression
        expr_ir["V"] = FV

        # (array) V indices for each input expression component in flattened order
        expr_ir["V_targets"] = FV_targets

        ### Result of factorization:
        # (array) MA-index -> UFL expression of modified arguments
        expr_ir["modified_arguments"] = modified_arguments

        # (dict) tuple(MA-indices) -> FV-index of monomial factor
        #expr_ir["argument_factorization"] = argument_factorization

        expr_ir["block_contributions"] = block_contributions

        ### Modified terminals
        # (array) list of FV-indices to modified terminals
        #expr_ir["modified_terminal_indices"] = modified_terminal_indices

        # Dependency structure of graph:
        # (CRSArray) FV-index -> direct dependency FV-index list
        #expr_ir["dependencies"] = FV_deps

        # (CRSArray) FV-index -> direct dependee FV-index list
        #expr_ir["inverse_dependencies"] = inv_FV_deps

        # Metadata about each vertex
        #expr_ir["active"] = FV_active        # (array) FV-index -> bool
        #expr_ir["V_piecewise"] = FV_piecewise  # (array) FV-index -> bool
        expr_ir["V_varying"] = FV_varying      # (array) FV-index -> bool
        expr_ir["V_mts"] = FV_mts

        # Store mapping from modified terminal object to
        # table data, this is used in integralgenerator
        expr_ir["mt_tabledata"] = mt_unique_table_reference

        # To emit quadrature rules only if needed
        expr_ir["need_points"] = need_points
        expr_ir["need_weights"] = need_weights

        # Store final ir for this num_points
        ir["varying_irs"][num_points] = expr_ir

    return ir
Beispiel #7
0
def build_uflacs_ir(cell, integral_type, entitytype, integrands, tensor_shape,
                    coefficient_numbering, quadrature_rules, parameters):
    # The intermediate representation dict we're building and returning here
    ir = {}

    # Extract uflacs specific optimization and code generation parameters
    p = parse_uflacs_optimization_parameters(parameters, integral_type)

    # Pass on parameters for consumption in code generation
    ir["params"] = p

    # { ufl coefficient: count }
    ir["coefficient_numbering"] = coefficient_numbering

    # Shared unique tables for all quadrature loops
    ir["unique_tables"] = {}
    ir["unique_table_types"] = {}

    # Shared piecewise expr_ir for all quadrature loops
    ir["piecewise_ir"] = empty_expr_ir()

    # { num_points: expr_ir for one integrand }
    ir["varying_irs"] = {}

    # Temporary data structures to build shared piecewise data
    pe2i = {}
    piecewise_modified_argument_indices = {}

    # Whether we expect the quadrature weight to be applied or not
    # (in some cases it's just set to 1 in ufl integral scaling)
    tdim = cell.topological_dimension()
    expect_weight = (
        integral_type not in ("expression", ) + point_integral_types
        and (entitytype == "cell" or (entitytype == "facet" and tdim > 1) or
             (integral_type in custom_integral_types)))

    if integral_type == "expression":
        # TODO: Figure out how to get non-integrand expressions in here, this is just a draft:
        # Analyse all expressions in one list
        assert isinstance(integrands, (tuple, list))
        all_num_points = [None]
        cases = [(None, integrands)]
    else:
        # Analyse each num_points/integrand separately
        assert isinstance(integrands, dict)
        all_num_points = sorted(integrands.keys())
        cases = [(num_points, [integrands[num_points]])
                 for num_points in all_num_points]
    ir["all_num_points"] = all_num_points

    for num_points, expressions in cases:
        # Rebalance order of nested terminal modifiers
        expressions = [balance_modifiers(expr) for expr in expressions]

        # Build initial scalar list-based graph representation
        V, V_deps, V_targets = build_scalar_graph(expressions)

        # Build terminal_data from V here before factorization.
        # Then we can use it to derive table properties for all modified terminals,
        # and then use that to rebuild the scalar graph more efficiently before
        # argument factorization. We can build terminal_data again after factorization
        # if that's necessary.
        initial_terminal_indices = [
            i for i, v in enumerate(V) if is_modified_terminal(v)
        ]
        initial_terminal_data = [
            analyse_modified_terminal(V[i]) for i in initial_terminal_indices
        ]
        unique_tables, unique_table_types, unique_table_num_dofs, mt_unique_table_reference = \
            build_optimized_tables(num_points, quadrature_rules,
                cell, integral_type, entitytype, initial_terminal_data,
                ir["unique_tables"], p["enable_table_zero_compression"],
                rtol=p["table_rtol"], atol=p["table_atol"])

        # Replace some scalar modified terminals before reconstructing expressions
        # (could possibly use replace() on target expressions instead)
        z = as_ufl(0.0)
        one = as_ufl(1.0)
        for i, mt in zip(initial_terminal_indices, initial_terminal_data):
            if isinstance(mt.terminal, QuadratureWeight):
                # Replace quadrature weight with 1.0, will be added back later
                V[i] = one
            else:
                # Set modified terminals with zero tables to zero
                tr = mt_unique_table_reference.get(mt)
                if tr is not None and tr.ttype == "zeros":
                    V[i] = z

        # Propagate expression changes using dependency list
        for i in range(len(V)):
            deps = [V[j] for j in V_deps[i]]
            if deps:
                V[i] = V[i]._ufl_expr_reconstruct_(*deps)

        # Rebuild scalar target expressions and graph
        # (this may be overkill and possible to optimize
        # away if it turns out to be costly)
        expressions = [V[i] for i in V_targets]

        # Rebuild scalar list-based graph representation
        SV, SV_deps, SV_targets = build_scalar_graph(expressions)
        assert all(i < len(SV) for i in SV_targets)

        # Compute factorization of arguments
        (argument_factorizations, modified_arguments,
             FV, FV_deps, FV_targets) = \
            compute_argument_factorization(SV, SV_deps, SV_targets, len(tensor_shape))
        assert len(SV_targets) == len(argument_factorizations)

        # TODO: Still expecting one target variable in code generation
        assert len(argument_factorizations) == 1
        argument_factorization, = argument_factorizations

        # Store modified arguments in analysed form
        for i in range(len(modified_arguments)):
            modified_arguments[i] = analyse_modified_terminal(
                modified_arguments[i])

        # Build set of modified_terminal indices into factorized_vertices
        modified_terminal_indices = [
            i for i, v in enumerate(FV) if is_modified_terminal(v)
        ]

        # Build set of modified terminal ufl expressions
        modified_terminals = [
            analyse_modified_terminal(FV[i]) for i in modified_terminal_indices
        ]

        # Make it easy to get mt object from FV index
        FV_mts = [None] * len(FV)
        for i, mt in zip(modified_terminal_indices, modified_terminals):
            FV_mts[i] = mt

        # Mark active modified arguments
        #active_modified_arguments = numpy.zeros(len(modified_arguments), dtype=int)
        #for ma_indices in argument_factorization:
        #    for j in ma_indices:
        #        active_modified_arguments[j] = 1

        # Dependency analysis
        inv_FV_deps, FV_active, FV_piecewise, FV_varying = \
            analyse_dependencies(FV, FV_deps, FV_targets,
                                 modified_terminal_indices,
                                 modified_terminals,
                                 mt_unique_table_reference)

        # Extend piecewise V with unique new FV_piecewise vertices
        pir = ir["piecewise_ir"]
        for i, v in enumerate(FV):
            if FV_piecewise[i]:
                j = pe2i.get(v)
                if j is None:
                    j = len(pe2i)
                    pe2i[v] = j
                    pir["V"].append(v)
                    pir["V_active"].append(1)
                    mt = FV_mts[i]
                    if mt is not None:
                        pir["mt_tabledata"][
                            mt] = mt_unique_table_reference.get(mt)
                    pir["V_mts"].append(mt)

        # Extend piecewise modified_arguments list with unique new items
        for mt in modified_arguments:
            ma = piecewise_modified_argument_indices.get(mt)
            if ma is None:
                ma = len(pir["modified_arguments"])
                pir["modified_arguments"].append(mt)
                piecewise_modified_argument_indices[mt] = ma

        # Loop over factorization terms
        block_contributions = defaultdict(list)
        for ma_indices, fi in sorted(argument_factorization.items()):
            # Get a bunch of information about this term
            rank = len(ma_indices)
            trs = tuple(mt_unique_table_reference[modified_arguments[ai]]
                        for ai in ma_indices)

            unames = tuple(tr.name for tr in trs)
            ttypes = tuple(tr.ttype for tr in trs)
            assert not any(tt == "zeros" for tt in ttypes)

            blockmap = tuple(tr.dofmap for tr in trs)

            block_is_uniform = all(tr.is_uniform for tr in trs)

            # Collect relevant restrictions to identify blocks
            # correctly in interior facet integrals
            block_restrictions = []
            for i, ma in enumerate(ma_indices):
                if trs[i].is_uniform:
                    r = None
                else:
                    r = modified_arguments[ma].restriction
                block_restrictions.append(r)
            block_restrictions = tuple(block_restrictions)

            # Store piecewise status for fi and translate
            # index to piecewise scope if relevant
            factor_is_piecewise = FV_piecewise[fi]
            if factor_is_piecewise:
                factor_index = pe2i[FV[fi]]
            else:
                factor_index = fi

            # TODO: Add separate block modes for quadrature
            # Both arguments in quadrature elements
            """
            for iq
                fw = f*w
                #for i
                #    for j
                #        B[i,j] = fw*U[i]*V[j] = 0 if i != iq or j != iq
                BQ[iq] = B[iq,iq] = fw
            for (iq) 
                A[iq+offset0, iq+offset1] = BQ[iq]
            """
            # One argument in quadrature element
            """
            for iq
                fw[iq] = f*w
                #for i
                #    for j
                #        B[i,j] = fw*UQ[i]*V[j] = 0 if i != iq
                for j
                    BQ[iq,j] = fw[iq]*V[iq,j]
            for (iq) for (j)
                A[iq+offset, j+offset] = BQ[iq,j]
            """

            # Decide how to handle code generation for this block
            if p["enable_preintegration"] and (factor_is_piecewise and rank > 0
                                               and "quadrature" not in ttypes):
                # - Piecewise factor is an absolute prerequisite
                # - Could work for rank 0 as well but currently doesn't
                # - Haven't considered how quadrature elements work out
                block_mode = "preintegrated"
            elif p["enable_premultiplication"] and (rank > 0 and all(
                    tt in piecewise_ttypes for tt in ttypes)):
                # Integrate functional in quadloop, scale block after quadloop
                block_mode = "premultiplied"
            elif p["enable_sum_factorization"]:
                if (rank == 2
                        and any(tt in piecewise_ttypes for tt in ttypes)):
                    # Partial computation in quadloop of f*u[i],
                    # compute (f*u[i])*v[i] outside quadloop,
                    # (or with u,v swapped)
                    block_mode = "partial"
                else:
                    # Full runtime integration of f*u[i]*v[j],
                    # can still do partial computation in quadloop of f*u[i]
                    # but must compute (f*u[i])*v[i] as well inside quadloop.
                    # (or with u,v swapped)
                    block_mode = "full"
            else:
                # Use full runtime integration with nothing fancy going on
                block_mode = "safe"

            # Carry out decision
            if block_mode == "preintegrated":
                # Add to contributions:
                # P = sum_q weight*u*v;      preintegrated here
                # B[...] = f * P[...];       generated after quadloop
                # A[blockmap] += B[...];     generated after quadloop

                cache = ir["piecewise_ir"]["preintegrated_blocks"]

                block_is_transposed = False
                pname = cache.get(unames)

                # Reuse transpose to save memory
                if p["enable_block_transpose_reuse"] and pname is None and len(
                        unames) == 2:
                    pname = cache.get((unames[1], unames[0]))
                    if pname is not None:
                        # Cache hit on transpose
                        block_is_transposed = True

                if pname is None:
                    # Cache miss, precompute block
                    weights = quadrature_rules[num_points][1]
                    if integral_type == "interior_facet":
                        ptable = integrate_block_interior_facets(
                            weights, unames, ttypes, unique_tables,
                            unique_table_num_dofs)
                    else:
                        ptable = integrate_block(weights, unames, ttypes,
                                                 unique_tables,
                                                 unique_table_num_dofs)
                    ptable = clamp_table_small_numbers(ptable,
                                                       rtol=p["table_rtol"],
                                                       atol=p["table_atol"])

                    pname = "PI%d" % (len(cache, ))
                    cache[unames] = pname
                    unique_tables[pname] = ptable
                    unique_table_types[pname] = "preintegrated"

                assert factor_is_piecewise
                block_unames = (pname, )
                blockdata = preintegrated_block_data_t(
                    block_mode, ttypes, factor_index, factor_is_piecewise,
                    block_unames, block_restrictions, block_is_transposed,
                    block_is_uniform, pname)
                block_is_piecewise = True

            elif block_mode == "premultiplied":
                # Add to contributions:
                # P = u*v;                        computed here
                # FI = sum_q weight * f;          generated inside quadloop
                # B[...] = FI * P[...];           generated after quadloop
                # A[blockmap] += B[...];          generated after quadloop

                cache = ir["piecewise_ir"]["premultiplied_blocks"]

                block_is_transposed = False
                pname = cache.get(unames)

                # Reuse transpose to save memory
                if p["enable_block_transpose_reuse"] and pname is None and len(
                        unames) == 2:
                    pname = cache.get((unames[1], unames[0]))
                    if pname is not None:
                        # Cache hit on transpose
                        block_is_transposed = True

                if pname is None:
                    # Cache miss, precompute block
                    if integral_type == "interior_facet":
                        ptable = multiply_block_interior_facets(
                            0, unames, ttypes, unique_tables,
                            unique_table_num_dofs)
                    else:
                        ptable = multiply_block(0, unames, ttypes,
                                                unique_tables,
                                                unique_table_num_dofs)
                    pname = "PM%d" % (len(cache, ))
                    cache[unames] = pname
                    unique_tables[pname] = ptable
                    unique_table_types[pname] = "premultiplied"

                block_unames = (pname, )
                blockdata = premultiplied_block_data_t(
                    block_mode, ttypes, factor_index, factor_is_piecewise,
                    block_unames, block_restrictions, block_is_transposed,
                    block_is_uniform, pname)
                block_is_piecewise = False

            elif block_mode == "scaled":  # TODO: Add mode, block is piecewise but choose not to be premultiplied
                # Add to contributions:
                # FI = sum_q weight * f;          generated inside quadloop
                # B[...] = FI * u * v;            generated after quadloop
                # A[blockmap] += B[...];          generated after quadloop
                raise NotImplementedError("scaled block mode not implemented.")
                # (probably need mostly the same data as premultiplied, except no P table name or values)
                block_is_piecewise = False

            elif block_mode in ("partial", "full", "safe"):
                # Translate indices to piecewise context if necessary
                block_is_piecewise = factor_is_piecewise and not expect_weight
                ma_data = []
                for i, ma in enumerate(ma_indices):
                    if trs[i].is_piecewise:
                        ma_index = piecewise_modified_argument_indices[
                            modified_arguments[ma]]
                    else:
                        block_is_piecewise = False
                        ma_index = ma
                    ma_data.append(ma_data_t(ma_index, trs[i]))

                block_is_transposed = False  # FIXME: Handle transposes for these block types

                if block_mode == "partial":
                    # Add to contributions:
                    # P[i] = sum_q weight * f * u[i];  generated inside quadloop
                    # B[i,j] = P[i] * v[j];            generated after quadloop (where v is the piecewise ma)
                    # A[blockmap] += B[...];           generated after quadloop

                    # Find first piecewise index TODO: Is last better? just reverse range here
                    for i in range(rank):
                        if trs[i].is_piecewise:
                            piecewise_ma_index = i
                            break
                    assert rank == 2
                    not_piecewise_ma_index = 1 - piecewise_ma_index
                    block_unames = (unames[not_piecewise_ma_index], )
                    blockdata = partial_block_data_t(
                        block_mode, ttypes, factor_index, factor_is_piecewise,
                        block_unames, block_restrictions, block_is_transposed,
                        tuple(ma_data), piecewise_ma_index)
                elif block_mode in ("full", "safe"):
                    # Add to contributions:
                    # B[i] = sum_q weight * f * u[i] * v[j];  generated inside quadloop
                    # A[blockmap] += B[i];                    generated after quadloop

                    block_unames = unames
                    blockdata = full_block_data_t(
                        block_mode, ttypes, factor_index, factor_is_piecewise,
                        block_unames, block_restrictions, block_is_transposed,
                        tuple(ma_data))
            else:
                error("Invalid block_mode %s" % (block_mode, ))

            if block_is_piecewise:
                # Insert in piecewise expr_ir
                ir["piecewise_ir"]["block_contributions"][blockmap].append(
                    blockdata)
            else:
                # Insert in varying expr_ir for this quadrature loop
                block_contributions[blockmap].append(blockdata)

        # Figure out which table names are referenced in unstructured partition
        active_table_names = set()
        for i, mt in zip(modified_terminal_indices, modified_terminals):
            tr = mt_unique_table_reference.get(mt)
            if tr is not None and FV_active[i]:
                active_table_names.add(tr.name)

        # Figure out which table names are referenced in blocks
        for blockmap, contributions in chain(
                block_contributions.items(),
                ir["piecewise_ir"]["block_contributions"].items()):
            for blockdata in contributions:
                if blockdata.block_mode in ("preintegrated", "premultiplied"):
                    active_table_names.add(blockdata.name)
                elif blockdata.block_mode in ("partial", "full", "safe"):
                    for mad in blockdata.ma_data:
                        active_table_names.add(mad.tabledata.name)

        # Record all table types before dropping tables
        ir["unique_table_types"].update(unique_table_types)

        # Drop tables not referenced from modified terminals
        # and tables of zeros and ones
        unused_ttypes = ("zeros", "ones", "quadrature")
        keep_table_names = set()
        for name in active_table_names:
            ttype = ir["unique_table_types"][name]
            if ttype not in unused_ttypes:
                if name in unique_tables:
                    keep_table_names.add(name)
        unique_tables = {
            name: unique_tables[name]
            for name in keep_table_names
        }

        # Add to global set of all tables
        for name, table in unique_tables.items():
            tbl = ir["unique_tables"].get(name)
            if tbl is not None and not numpy.allclose(
                    tbl, table, rtol=p["table_rtol"], atol=p["table_atol"]):
                error("Table values mismatch with same name.")
        ir["unique_tables"].update(unique_tables)

        # Analyse active terminals to check what we'll need to generate code for
        active_mts = []
        for i, mt in zip(modified_terminal_indices, modified_terminals):
            if FV_active[i]:
                active_mts.append(mt)

        # Figure out if we need to access CellCoordinate to
        # avoid generating quadrature point table otherwise
        if integral_type == "cell":
            need_points = any(
                isinstance(mt.terminal, CellCoordinate) for mt in active_mts)
        elif integral_type in facet_integral_types:
            need_points = any(
                isinstance(mt.terminal, FacetCoordinate) for mt in active_mts)
        elif integral_type in custom_integral_types:
            need_points = True  # TODO: Always?
        else:
            need_points = False

        # Figure out if we need to access QuadratureWeight to
        # avoid generating quadrature point table otherwise
        #need_weights = any(isinstance(mt.terminal, QuadratureWeight)
        #                   for mt in active_mts)

        # Count blocks of each mode
        block_modes = defaultdict(int)
        for blockmap, contributions in block_contributions.items():
            for blockdata in contributions:
                block_modes[blockdata.block_mode] += 1
        # Debug output
        summary = "\n".join("  %d\t%s" % (count, mode)
                            for mode, count in sorted(block_modes.items()))
        debug("Blocks of each mode: \n" + summary)

        # If there are any blocks other than preintegrated we need weights
        if expect_weight and any(mode != "preintegrated"
                                 for mode in block_modes):
            need_weights = True
        elif integral_type in custom_integral_types:
            need_weights = True  # TODO: Always?
        else:
            need_weights = False

        # Build IR dict for the given expressions
        expr_ir = {}

        # (array) FV-index -> UFL subexpression
        expr_ir["V"] = FV

        # (array) V indices for each input expression component in flattened order
        expr_ir["V_targets"] = FV_targets

        ### Result of factorization:
        # (array) MA-index -> UFL expression of modified arguments
        expr_ir["modified_arguments"] = modified_arguments

        # (dict) tuple(MA-indices) -> FV-index of monomial factor
        #expr_ir["argument_factorization"] = argument_factorization

        expr_ir["block_contributions"] = block_contributions

        ### Modified terminals
        # (array) list of FV-indices to modified terminals
        #expr_ir["modified_terminal_indices"] = modified_terminal_indices

        # Dependency structure of graph:
        # (CRSArray) FV-index -> direct dependency FV-index list
        #expr_ir["dependencies"] = FV_deps

        # (CRSArray) FV-index -> direct dependee FV-index list
        #expr_ir["inverse_dependencies"] = inv_FV_deps

        # Metadata about each vertex
        #expr_ir["active"] = FV_active        # (array) FV-index -> bool
        #expr_ir["V_piecewise"] = FV_piecewise  # (array) FV-index -> bool
        expr_ir["V_varying"] = FV_varying  # (array) FV-index -> bool
        expr_ir["V_mts"] = FV_mts

        # Store mapping from modified terminal object to
        # table data, this is used in integralgenerator
        expr_ir["mt_tabledata"] = mt_unique_table_reference

        # To emit quadrature rules only if needed
        expr_ir["need_points"] = need_points
        expr_ir["need_weights"] = need_weights

        # Store final ir for this num_points
        ir["varying_irs"][num_points] = expr_ir

    return ir
Beispiel #8
0
    def compute_midpoint_geometry(self, L, ir):
        # Dimensions
        gdim = ir["geometric_dimension"]
        tdim = ir["topological_dimension"]
        num_dofs = ir["num_scalar_coordinate_element_dofs"]

        # Tables of coordinate basis function values and derivatives at
        # X=0 and X=midpoint available through ir. This is useful in
        # several geometry functions.
        tables = ir["tables"]

        # Check the table shapes against our expectations
        xm_table = tables["xm"]
        Jm_table = tables["Jm"]
        assert xm_table.shape == (num_dofs,)
        assert Jm_table.shape == (tdim, num_dofs)

        # TODO: Use epsilon parameter here?
        # TODO: Move to a more 'neutral' utility file
        from ffc.uflacs.elementtables import clamp_table_small_numbers
        xm_table = clamp_table_small_numbers(xm_table)
        Jm_table = clamp_table_small_numbers(Jm_table)

        # Table symbols
        phi_Xm = L.Symbol("phi_Xm")
        dphi_Xm = L.Symbol("dphi_Xm")

        # Table declarations
        table_decls = [
            L.ArrayDecl("const double", phi_Xm, sizes=xm_table.shape, values=xm_table),
            L.ArrayDecl("const double", dphi_Xm, sizes=Jm_table.shape, values=Jm_table),
            ]

        # Symbol for ufc_geometry cell midpoint definition
        cellname = ir["cell_shape"]
        Xm = L.Symbol("%s_midpoint" % cellname)

        # Output geometry
        x = L.Symbol("x")
        J = L.Symbol("J")
        detJ = L.Symbol("detJ")
        K = L.Symbol("K")

        # Dimensions
        num_points = 1

        # Input geometry
        X = Xm

        # Input cell data
        coordinate_dofs = L.Symbol("coordinate_dofs")
        coordinate_dofs = L.FlattenedArray(coordinate_dofs, dims=(num_dofs, gdim))
        Jf = L.FlattenedArray(J, dims=(gdim, tdim))

        i = L.Symbol("i")
        j = L.Symbol("j")
        d = L.Symbol("d")

        xm_code = [
            L.Comment("Compute x"),
            L.ForRanges(
                (i, 0, gdim),
                (d, 0, num_dofs),
                index_type=index_type,
                body=L.AssignAdd(x[i], coordinate_dofs[d, i]*phi_Xm[d])
            ),
        ]

        Jm_code = [
            L.Comment("Compute J"),
            L.ForRanges(
                (i, 0, gdim),
                (j, 0, tdim),
                (d, 0, num_dofs),
                index_type=index_type,
                body=L.AssignAdd(Jf[i, j], coordinate_dofs[d, i]*dphi_Xm[j, d])
            ),
        ]

        # Reuse functions for detJ and K
        code = table_decls + xm_code + Jm_code
        return code
Beispiel #9
0
    def _compute_reference_coordinates_affine(self, L, ir, output_all=False):
        # Dimensions
        gdim = ir["geometric_dimension"]
        tdim = ir["topological_dimension"]
        cellname = ir["cell_shape"]
        num_points = L.Symbol("num_points")

        # Number of dofs for a scalar component
        num_dofs = ir["num_scalar_coordinate_element_dofs"]

        # Loop indices
        ip = L.Symbol("ip") # point
        i = L.Symbol("i")   # gdim
        j = L.Symbol("j")   # tdim
        k = L.Symbol("k")   # sum iteration

        # Output geometry
        X = L.FlattenedArray(L.Symbol("X"), dims=(num_points, tdim))
        # if output_all, this is also output:
        Jsym = L.Symbol("J")
        detJsym = L.Symbol("detJ")
        Ksym = L.Symbol("K")

        # Input geometry
        x = L.FlattenedArray(L.Symbol("x"), dims=(num_points, gdim))

        # Input cell data
        coordinate_dofs = L.FlattenedArray(L.Symbol("coordinate_dofs"), dims=(num_dofs, gdim))
        cell_orientation = L.Symbol("cell_orientation")

        if output_all:
            decls = []
        else:
            decls = [
                L.ArrayDecl("double", Jsym, sizes=(gdim*tdim,)),
                L.ArrayDecl("double", detJsym, sizes=(1,)),
                L.ArrayDecl("double", Ksym, sizes=(tdim*gdim,)),
            ]

        # Tables of coordinate basis function values and derivatives at
        # X=0 and X=midpoint available through ir. This is useful in
        # several geometry functions.
        tables = ir["tables"]

        # Check the table shapes against our expectations
        x_table = tables["x0"]
        J_table = tables["J0"]
        assert x_table.shape == (num_dofs,)
        assert J_table.shape == (tdim, num_dofs)

        # TODO: Use epsilon parameter here?
        # TODO: Move to a more 'neutral' utility file
        from ffc.uflacs.elementtables import clamp_table_small_numbers
        x_table = clamp_table_small_numbers(x_table)
        J_table = clamp_table_small_numbers(J_table)

        # Table symbols
        phi_X0 = L.Symbol("phi_X0")
        dphi_X0 = L.Symbol("dphi_X0")

        # Table declarations
        table_decls = [
            L.ArrayDecl("const double", phi_X0, sizes=x_table.shape, values=x_table),
            L.ArrayDecl("const double", dphi_X0, sizes=J_table.shape, values=J_table),
            ]

        # Compute x0 = x(X=0) (optimized by precomputing basis at X=0)
        x0 = L.Symbol("x0")
        compute_x0 = [
            L.ArrayDecl("double", x0, sizes=(gdim,), values=0),
            L.ForRanges(
                (i, 0, gdim),
                (k, 0, num_dofs),
                index_type=index_type,
                body=L.AssignAdd(x0[i], coordinate_dofs[k, i] * phi_X0[k])
            ),
        ]

        # For more convenient indexing
        detJ = detJsym[0]
        J = L.FlattenedArray(Jsym, dims=(gdim, tdim))
        K = L.FlattenedArray(Ksym, dims=(tdim, gdim))

        # Compute J = J(X=0) (optimized by precomputing basis at X=0)
        compute_J0 = [
            L.ForRanges(
                (i, 0, gdim),
                (j, 0, tdim),
                index_type=index_type,
                body=[
                    L.Assign(J[i, j], 0.0),
                    L.ForRange(k, 0, num_dofs, index_type=index_type, body=
                        L.AssignAdd(J[i, j], coordinate_dofs[k, i] * dphi_X0[j, k])
                    )
                ]
            ),
            ]

        # Compute K = inv(J) (and intermediate value det(J))
        compute_K0 = [
            L.Call("compute_jacobian_determinants", (detJsym, 1, Jsym, cell_orientation)),
            L.Call("compute_jacobian_inverses", (Ksym, 1, Jsym, detJsym)),
            ]

        # Compute X = K0*(x-x0) for each physical point x
        compute_X = [
            L.ForRanges(
                (ip, 0, num_points),
                (j, 0, tdim),
                (i, 0, gdim),
                index_type=index_type,
                body=L.AssignAdd(X[ip, j], K[j, i]*(x[ip, i] - x0[i]))
            )
            ]

        # Stitch it together
        code = table_decls + decls + compute_x0 + compute_J0 + compute_K0 + compute_X
        return code