Ejemplo n.º 1
0
def test_boundaries():
    """
    Tests if boundaries can be parallelized (or not).
    """
    dimensions = 3

    def reference_vector(vec):
        first_nonzero_position = next(i for i in range(len(vec)) if vec[i])
        return -vec * Vector.unit_vector(first_nonzero_position, len(vec))

    def get_stencil(boundary):  # boundaries of 1-norm n depend on boundaries of 1-norm n-1 by looking in on the first non-zero element.
        read_vector = reference_vector(boundary)
        component = StencilComponent(
            'mesh',
            SparseWeightArray(
                {
                    read_vector: 1
                }
            )
        )
        return Stencil(component, 'mesh', [
            (-1, 0, 1) if bound == 1 else (0, 1, 1) if bound == -1 else (1, -1, 1)
            for bound in boundary
        ])

    stencils = [
        (boundary, get_stencil(boundary)) for boundary in Vector.moore_vectors(dimensions)
    ]

    group = StencilGroup([stencil for bound, stencil in stencils])

    serial = create_dependency_graph(group, {"mesh": (32,)*dimensions})
    parallel = create_parallel_graph(group, {"mesh": (32,)*dimensions})
    # print(serial)
    # print(parallel)
    # print(serial == parallel)
    # exit()

    for (b1, s1), (b2, s2) in itertools.product(stencils, repeat=2):
        has_conflict = b2+reference_vector(b2) == b1 or b1 == b2
        graph_conflict = serial[hash(s2)][hash(s1)]
        parallel_conflict = parallel[hash(s2)][hash(s1)]
        reported = stencil_conflict(s1, s2, shape_map={"mesh": (32,)*dimensions})
        print(b1, b2, reported, has_conflict, graph_conflict, parallel_conflict, '*'*10*(parallel_conflict != has_conflict))
Ejemplo n.º 2
0
        def transform(self, tree, program_config):
            subconfig, tuning_config = program_config
            name_shape_map = {name: arg.shape for name, arg in subconfig.items()}
            ndim = len(name_shape_map.values()[0])
            print("analyzing", len(self.original.body))
            dependency_graph = create_dependency_graph(self.original, name_shape_map)
            stencil_ids = [hash(s) for s in self.original.body]
            print("done analyzing")
            result = super(OpenMPCompiler.LazySpecializedKernel, self).transform(tree, program_config)
            result.config_target = 'omp'
            result.body.insert(0, CppInclude("omp.h"))
            result = BlockCombineTransformer().visit(result)
            result = self.parent_cls.ParallelForTasks().visit(result)

            node = result.find(FunctionDecl)
            node.defn = [Pragma("omp parallel", body=node.defn, braces=True)]
            result = self.parent_cls.MakeSingle(dependency_graph, stencil_ids, ndim).visit(result)
            result = self.parent_cls.Privatize().visit(result)

            return result