def distribute_observation(detectors, observations, rank=None, comm=MPI.COMM_WORLD): size = comm.size if size == 1: return detectors.copy(), list(observations) if rank is None: rank = comm.Get_rank() nthreads = omp_num_threads() ndetectors = np.sum(~detectors) nobservations = len(observations) # number of observations. They should approximatively be of the same length nx = nobservations # number of detectors, grouped by the number of cpu cores ny = int(np.ceil(ndetectors / nthreads)) # we start with the minimum blocksize and increase it until we find a # configuration that covers all the observations blocksize = int(np.ceil(nx * ny / size)) while True: # by looping over x first, we favor larger numbers of detectors and # fewer numbers of observations per processor, to minimise inter- # processor communication in case of correlations between # detectors for xblocksize in range(1, blocksize + 1): if blocksize / xblocksize != blocksize // xblocksize: continue yblocksize = int(blocksize // xblocksize) nx_block = int(np.ceil(nx / xblocksize)) ny_block = int(np.ceil(ny / yblocksize)) if nx_block * ny_block <= size: break if nx_block * ny_block <= size: break blocksize += 1 ix = rank // ny_block iy = rank % ny_block # check that the processor has something to do if ix >= nx_block: idetector = slice(0, 0) iobservation = slice(0, 0) else: idetector = slice(iy * yblocksize * nthreads, (iy + 1) * yblocksize * nthreads) iobservation = slice(ix * xblocksize, (ix + 1) * xblocksize) detectors_ = detectors.copy() igood = np.where(~detectors_.ravel())[0] detectors_.ravel()[igood[0:idetector.start]] = True detectors_.ravel()[igood[idetector.stop:]] = True observations_ = observations[iobservation] return detectors_, observations_
def distribute_observation(detectors, observations, rank=None, comm=MPI.COMM_WORLD): size = comm.size if size == 1: return detectors.copy(), list(observations) if rank is None: rank = comm.Get_rank() nthreads = omp_num_threads() ndetectors = np.sum(~detectors) nobservations = len(observations) # number of observations. They should approximatively be of the same length nx = nobservations # number of detectors, grouped by the number of cpu cores ny = int(np.ceil(ndetectors / nthreads)) # we start with the minimum blocksize and increase it until we find a # configuration that covers all the observations blocksize = int(np.ceil(nx * ny / size)) while True: # by looping over x first, we favor larger numbers of detectors and # fewer numbers of observations per processor, to minimise inter- # processor communication in case of correlations between # detectors for xblocksize in range(1, blocksize+1): if blocksize / xblocksize != blocksize // xblocksize: continue yblocksize = int(blocksize // xblocksize) nx_block = int(np.ceil(nx / xblocksize)) ny_block = int(np.ceil(ny / yblocksize)) if nx_block * ny_block <= size: break if nx_block * ny_block <= size: break blocksize += 1 ix = rank // ny_block iy = rank % ny_block # check that the processor has something to do if ix >= nx_block: idetector = slice(0, 0) iobservation = slice(0, 0) else: idetector = slice(iy * yblocksize * nthreads, (iy+1) * yblocksize * nthreads) iobservation = slice(ix * xblocksize, (ix+1) * xblocksize) detectors_ = detectors.copy() igood = np.where(~detectors_.ravel())[0] detectors_.ravel()[igood[0:idetector.start]] = True detectors_.ravel()[igood[idetector.stop:]] = True observations_ = observations[iobservation] return detectors_, observations_
def func(env): global counter with env: nthreads = os.getenv("OMP_NUM_THREADS") expected = omp_num_threads() with pool_threading() as pool: assert_equal(int(os.environ["OMP_NUM_THREADS"]), 1) if mkl is not None: assert_equal(mkl.get_max_threads(), 1) counter = 0 pool.map(func_thread, range(pool._processes)) assert_equal(os.getenv("OMP_NUM_THREADS"), nthreads) if mkl is not None: assert_equal(mkl.get_max_threads(), mkl_nthreads) assert_equal(counter, expected) assert_not_in("OMP_NUM_THREADS", os.environ)
def func(env): global counter with env: nthreads = os.getenv('OMP_NUM_THREADS') expected = omp_num_threads() with pool_threading() as pool: assert_equal(int(os.environ['OMP_NUM_THREADS']), 1) if mkl is not None: assert_equal(mkl.get_max_threads(), 1) counter = 0 pool.map(func_thread, range(pool._processes)) assert_equal(os.getenv('OMP_NUM_THREADS'), nthreads) if mkl is not None: assert_equal(mkl.get_max_threads(), mkl_nthreads) assert_equal(counter, expected) assert_not_in('OMP_NUM_THREADS', os.environ)