def _make_partree(self, candidates, omp_pragma): """Parallelize `root` attaching a suitable OpenMP pragma.""" assert candidates root = candidates[0] # Get the collapsable Iterations collapsable = [] if ncores() >= Ompizer.COLLAPSE and IsPerfectIteration().visit(root): for n, i in enumerate(candidates[1:], 1): # The OpenMP specification forbids collapsed loops to use iteration # variables in initializer expressions. E.g., the following is forbidden: # # #pragma omp ... collapse(2) # for (i = ... ) # for (j = i ...) # ... # # Here, we make sure this won't happen if any(j.dim in i.symbolic_min.free_symbols for j in candidates[:n]): break # Also, we do not want to collapse vectorizable Iterations if i.is_Vectorizable: break collapsable.append(i) # Attach an OpenMP pragma-for with a collapse clause ncollapse = 1 + len(collapsable) partree = root._rebuild(pragmas=root.pragmas + (omp_pragma(ncollapse),), properties=root.properties + (COLLAPSED(ncollapse),)) collapsed = [partree] + collapsable return root, partree, collapsed
def _make_partree(self, candidates, omp_pragma=None): """Parallelize `root` attaching a suitable OpenMP pragma.""" assert candidates root = candidates[0] # Pick up an omp-pragma template # Caller-provided -> stick to it # Affine -> ... schedule(static,1) ... # Non-affine -> ... schedule(static) ... if omp_pragma is None: if all(i.is_Affine for i in candidates): omp_pragma = self.lang['for-static-1'] else: omp_pragma = self.lang['for-static'] # Get the collapsable Iterations collapsable = [] if ncores() >= Ompizer.COLLAPSE_NCORES and IsPerfectIteration().visit( root): for n, i in enumerate(candidates[1:], 1): # The OpenMP specification forbids collapsed loops to use iteration # variables in initializer expressions. E.g., the following is forbidden: # # #pragma omp ... collapse(2) # for (i = ... ) # for (j = i ...) # ... # # Here, we make sure this won't happen if any(j.dim in i.symbolic_min.free_symbols for j in candidates[:n]): break # Also, we do not want to collapse vectorizable Iterations if i.is_Vectorizable: break # Would there be enough work per parallel iteration? try: work = prod( [int(j.dim.symbolic_size) for j in candidates[n + 1:]]) if work < Ompizer.COLLAPSE_WORK: break except TypeError: pass collapsable.append(i) # Attach an OpenMP pragma-for with a collapse clause ncollapse = 1 + len(collapsable) partree = root._rebuild( pragmas=root.pragmas + (omp_pragma(ncollapse), ), properties=root.properties + (COLLAPSED(ncollapse), )) collapsed = [partree] + collapsable return root, partree, collapsed
def __init__(self, *args, **kwargs): kwargs.pop('pragmas', None) pragma = self._make_header(**kwargs) properties = as_tuple(kwargs.pop('properties', None)) properties += (COLLAPSED(kwargs.get('ncollapse', 1)),) self.parallel = kwargs.pop('parallel', False) self.ncollapse = kwargs.pop('ncollapse', None) self.chunk_size = kwargs.pop('chunk_size', None) self.nthreads = kwargs.pop('nthreads', None) self.reduction = kwargs.pop('reduction', None) super(ParallelIteration, self).__init__(*args, pragmas=[pragma], properties=properties, **kwargs)
def _make_partree(self, candidates, nthreads=None): """Parallelize the `candidates` Iterations attaching suitable OpenMP pragmas.""" assert candidates root = candidates[0] # Get the collapsable Iterations collapsable = self._find_collapsable(root, candidates) ncollapse = 1 + len(collapsable) # Prepare to build a ParallelTree prefix = [] if all(i.is_Affine for i in candidates): if nthreads is None: # pragma omp for ... schedule(..., 1) nthreads = self.nthreads omp_pragma = self.lang['for'](ncollapse, 1) else: # pragma omp parallel for ... schedule(..., 1) omp_pragma = self.lang['par-for'](ncollapse, 1, nthreads) else: # pragma omp for ... schedule(..., expr) assert nthreads is None nthreads = self.nthreads_nonaffine chunk_size = Symbol(name='chunk_size') omp_pragma = self.lang['for'](ncollapse, chunk_size) niters = prod([root.symbolic_size] + [j.symbolic_size for j in collapsable]) value = INT(Max(niters / (nthreads * self.CHUNKSIZE_NONAFFINE), 1)) prefix.append( Expression(DummyEq(chunk_size, value, dtype=np.int32))) # Create a ParallelTree body = root._rebuild(pragmas=root.pragmas + (omp_pragma, ), properties=root.properties + (COLLAPSED(ncollapse), )) partree = ParallelTree(prefix, body, nthreads=nthreads) collapsed = [partree] + collapsable return root, partree, collapsed
def _make_partree(self, candidates, nthreads=None): """Parallelize `root` attaching a suitable OpenMP pragma.""" assert candidates root = candidates[0] # Get the collapsable Iterations collapsable = [] if ncores() >= Ompizer.COLLAPSE_NCORES and IsPerfectIteration().visit( root): for n, i in enumerate(candidates[1:], 1): # The OpenMP specification forbids collapsed loops to use iteration # variables in initializer expressions. E.g., the following is forbidden: # # #pragma omp ... collapse(2) # for (i = ... ) # for (j = i ...) # ... # # Here, we make sure this won't happen if any(j.dim in i.symbolic_min.free_symbols for j in candidates[:n]): break # Also, we do not want to collapse vectorizable Iterations if i.is_Vectorizable: break # Would there be enough work per parallel iteration? try: work = prod( [int(j.dim.symbolic_size) for j in candidates[n + 1:]]) if work < Ompizer.COLLAPSE_WORK: break except TypeError: pass collapsable.append(i) ncollapse = 1 + len(collapsable) # Prepare to build a ParallelTree prefix = [] if all(i.is_Affine for i in candidates): if nthreads is None: # pragma omp for ... schedule(..., 1) nthreads = self.nthreads omp_pragma = self.lang['for'](ncollapse, 1) else: # pragma omp parallel for ... schedule(..., 1) omp_pragma = self.lang['par-for'](ncollapse, 1, nthreads) else: # pragma omp for ... schedule(..., expr) assert nthreads is None nthreads = self.nthreads_nonaffine chunk_size = Symbol(name='chunk_size') omp_pragma = self.lang['for'](ncollapse, chunk_size) niters = prod([root.symbolic_size] + [j.symbolic_size for j in collapsable]) value = INT(Max(niters / (nthreads * self.CHUNKSIZE_NONAFFINE), 1)) prefix.append( Expression(DummyEq(chunk_size, value, dtype=np.int32))) # Create a ParallelTree body = root._rebuild(pragmas=root.pragmas + (omp_pragma, ), properties=root.properties + (COLLAPSED(ncollapse), )) partree = ParallelTree(prefix, body, nthreads=nthreads) collapsed = [partree] + collapsable return root, partree, collapsed