def test_boundaries(): """ Tests if boundaries can be parallelized (or not). """ dimensions = 3 def reference_vector(vec): first_nonzero_position = next(i for i in range(len(vec)) if vec[i]) return -vec * Vector.unit_vector(first_nonzero_position, len(vec)) def get_stencil(boundary): # boundaries of 1-norm n depend on boundaries of 1-norm n-1 by looking in on the first non-zero element. read_vector = reference_vector(boundary) component = StencilComponent( 'mesh', SparseWeightArray( { read_vector: 1 } ) ) return Stencil(component, 'mesh', [ (-1, 0, 1) if bound == 1 else (0, 1, 1) if bound == -1 else (1, -1, 1) for bound in boundary ]) stencils = [ (boundary, get_stencil(boundary)) for boundary in Vector.moore_vectors(dimensions) ] group = StencilGroup([stencil for bound, stencil in stencils]) serial = create_dependency_graph(group, {"mesh": (32,)*dimensions}) parallel = create_parallel_graph(group, {"mesh": (32,)*dimensions}) # print(serial) # print(parallel) # print(serial == parallel) # exit() for (b1, s1), (b2, s2) in itertools.product(stencils, repeat=2): has_conflict = b2+reference_vector(b2) == b1 or b1 == b2 graph_conflict = serial[hash(s2)][hash(s1)] parallel_conflict = parallel[hash(s2)][hash(s1)] reported = stencil_conflict(s1, s2, shape_map={"mesh": (32,)*dimensions}) print(b1, b2, reported, has_conflict, graph_conflict, parallel_conflict, '*'*10*(parallel_conflict != has_conflict))
def transform(self, tree, program_config): subconfig, tuning_config = program_config name_shape_map = {name: arg.shape for name, arg in subconfig.items()} ndim = len(name_shape_map.values()[0]) print("analyzing", len(self.original.body)) dependency_graph = create_dependency_graph(self.original, name_shape_map) stencil_ids = [hash(s) for s in self.original.body] print("done analyzing") result = super(OpenMPCompiler.LazySpecializedKernel, self).transform(tree, program_config) result.config_target = 'omp' result.body.insert(0, CppInclude("omp.h")) result = BlockCombineTransformer().visit(result) result = self.parent_cls.ParallelForTasks().visit(result) node = result.find(FunctionDecl) node.defn = [Pragma("omp parallel", body=node.defn, braces=True)] result = self.parent_cls.MakeSingle(dependency_graph, stencil_ids, ndim).visit(result) result = self.parent_cls.Privatize().visit(result) return result