def _make_partree(self, candidates, nthreads=None): """ Parallelize the `candidates` Iterations attaching suitable OpenMP pragmas for parallelism. In particular: * All parallel Iterations not *writing* to a host Function, that is a Function `f` such that ``is_on_device(f) == False`, are offloaded to the device. * The remaining ones, that is those writing to a host Function, are parallelized on the host. """ assert candidates root = candidates[0] if is_on_device(root, self.gpu_fit, only_writes=True): # The typical case: all written Functions are device Functions, that is # they're mapped in the device memory. Then we offload `root` to the device # Get the collapsable Iterations collapsable = self._find_collapsable(root, candidates) ncollapse = 1 + len(collapsable) body = self._Iteration(gpu_fit=self.gpu_fit, ncollapse=ncollapse, **root.args) partree = ParallelTree([], body, nthreads=nthreads) collapsed = [partree] + collapsable return root, partree, collapsed elif not self.par_disabled: # Resort to host parallelism return super()._make_partree(candidates, nthreads) else: return root, None, None
def _make_partree(self, candidates, nthreads=None): """ Parallelize the `candidates` Iterations attaching suitable OpenMP pragmas for GPU offloading. """ assert candidates root = candidates[0] # Get the collapsable Iterations collapsable = self._find_collapsable(root, candidates) ncollapse = 1 + len(collapsable) # Prepare to build a ParallelTree # Create a ParallelTree body = self._Iteration(ncollapse=ncollapse, **root.args) partree = ParallelTree([], body, nthreads=nthreads) collapsed = [partree] + collapsable return root, partree, collapsed
def _make_partree(self, candidates, nthreads=None): """ Parallelize the `candidates` Iterations attaching suitable OpenMP pragmas for GPU offloading. """ assert candidates root = candidates[0] # Get the collapsable Iterations collapsable = self._find_collapsable(root, candidates) ncollapse = 1 + len(collapsable) # Prepare to build a ParallelTree omp_pragma = self.lang['par-for-teams'](ncollapse) # Create a ParallelTree body = root._rebuild(pragmas=root.pragmas + (omp_pragma, ), properties=root.properties + (COLLAPSED(ncollapse), )) partree = ParallelTree([], body, nthreads=nthreads) collapsed = [partree] + collapsable return root, partree, collapsed