def main(A): """ match the mean fraction by fixing prefactor A(a) and B(a) on tau requires 'gaussian' to be finished. run before convolve, though it uses functions in convolve for evaluating the cost function. """ global meanfractionmodel global varfractionmodel varfractionmodel = VarFractionModel(A) meanfractionmodel = MeanFractionModel(A) Nbins = 8 zBins = numpy.linspace(2.0, 4.0, Nbins + 1, endpoint=True) LogLamBins = numpy.log10(1216.0 * (1 + zBins )) z = 0.5 * (zBins[1:] + zBins[:-1]) Af = sharedmem.empty(z.shape) Bf = sharedmem.empty(z.shape) xmeanF = sharedmem.empty(z.shape) xstdF = sharedmem.empty(z.shape) def work(i): if i > 0: Afguess, Bfguess = Af[i-1], Bf[i-1] else: Afguess, Bfguess = (0.00015, 1.5) Af[i], Bf[i], xmeanF[i], xstdF[i] = fitRange(A, LogLamBins[i], LogLamBins[i + 1], Afguess, Bfguess) map(work, range(Nbins)) numpy.savez(A.MatchMeanFractionOutput, a=1 / (z+1), Af=Af, Bf=Bf, xmeanF=xmeanF, xvarF=xstdF ** 2)
def __init__(self, face_width, res_x, res_y, f_x, f_y, fan_position, visualize=False): # Initialize multiprocessing.Process parent multiprocessing.Process.__init__(self) # Exit event for stopping process self._exit = multiprocessing.Event() # Event that is set, everytime a new servo angle position has been computed self.newposition_event = multiprocessing.Event() # An array in shared memory for storing the current face position self._currentface = sharedmem.empty((4, 1), dtype='int16') # An array in shared memory for storing the current position angles self._currentangles = sharedmem.empty((2, 1), dtype='float') self._facewidth = face_width self._res_x = res_x self._res_y = res_y self._f_x = f_x self._f_y = f_y self._fan_position = fan_position # Defines whether to visualize the servo angles position self._visualize = visualize
def test_memory_type(): a = sharedmem.empty(100) b = sharedmem.empty(100) assert isinstance(b, type(a)) assert not isinstance(a + 10, type(a)) assert not isinstance(numpy.sum(a), type(a)) assert not isinstance(a + b, type(a)) assert not isinstance(a * b, type(a))
def __init__(self, config): def getfilename(mock): dir = os.path.join(config.prefix, mock) paramfile = os.path.join(config.prefix, mock, 'paramfile') c = Config(paramfile, basedir=dir) return os.path.join(c.datadir, 'bootstrap.npz') if config.UseMocks is None: filenames = sorted(list(glob(os.path.join(config.prefix, '[0-9]*', '*', 'bootstrap.npz')))) else: filenames = [getfilename(mock) for mock in config.UseMocks] files = [ numpy.load(f) for f in filenames] print 'using', len(filenames), ' files', filenames self.r = files[0]['r'] self.mu = files[0]['mu'] # b/c they all have the same cosmology self.eigenmodes = EigenModes(numpy.load(config.EigenModesOutput)['eigenmodes']) self.DQDQ, self.RQDQ, self.RQRQ = sharedmem.empty( [3, len(files)] + list(files[0]['DQDQ'].shape)) self.DQDFsum1, self.RQDFsum1, self.DFDFsum1 = sharedmem.empty( [3, len(files)] + list(files[0]['DQDQ'].shape)) self.DQDFsum2, self.RQDFsum2, self.DFDFsum2 = sharedmem.empty( [3, len(files)] + list(files[0]['DQDQ'].shape)) self.ND, self.NR = sharedmem.empty([2, len(files)] + list(files[0]['Qchunksize'].shape)) def read(i): file = files[i] self.DQDQ[i] = file['DQDQ'] self.RQDQ[i] = file['RQDQ'] self.RQRQ[i] = file['RQRQ'] self.DQDFsum1[i] = file['DQDFsum1'][0] self.RQDFsum1[i] = file['RQDFsum1'][0] self.DFDFsum1[i] = file['DFDFsum1'][0] self.DQDFsum2[i] = file['DQDFsum2'] self.RQDFsum2[i] = file['RQDFsum2'] self.DFDFsum2[i] = file['DFDFsum2'] self.ND[i] = file['Qchunksize'] self.NR[i] = file['Rchunksize'] chunkmap(read, range(len(files)), 1) self.Nchunks = self.DQDQ[0].shape[0] # build the currelation function on the first sample # use it as a template self.dummy = self(0)
def getforest(A, Zmin, Zmax, RfLamMin, RfLamMax, combine=1): spectra = SpectraOutput(A) meanFred = MeanFractionMeasured(A, kind='red') meanFreal = MeanFractionMeasured(A, kind='real') combine = numpy.minimum(spectra.sightlines.Npixels.max(), combine) # will combine every this many pixels Npixels1 = spectra.sightlines.Npixels // combine Offset1 = numpy.concatenate([[0], numpy.cumsum(Npixels1)]) Npixels = Npixels1.sum() print Npixels1.min(), Npixels1.max() print spectra.sightlines.Npixels.min(), spectra.sightlines.Npixels.max() data = sharedmem.empty(Npixels, ('f4', 3)) DFred, DFreal, Delta = data.T pos = sharedmem.empty(Npixels, ('f4', 3)) x, y, z = pos.T mask = sharedmem.empty(Npixels, '?') id = sharedmem.empty(Npixels, 'i4') spectra.taured spectra.taureal def work(i): def combinepixels(value, method=numpy.mean): # reduce the number of pixels with 'method' return \ method(value[:Npixels1[i] * combine]\ .reshape([Npixels1[i]] + [combine]), axis=-1) sl = slice(Offset1[i], Npixels1[i] + Offset1[i]) a = spectra.a[i] Fred = numpy.exp(-spectra.taured[i]) / meanFred(a) - 1 Freal = numpy.exp(-spectra.taureal[i]) / meanFreal(a) - 1 DFred[sl] = combinepixels(Fred) DFreal[sl] = combinepixels(Freal) Delta[sl] = combinepixels(spectra.delta[i]) p = spectra.position(i) x[sl] = combinepixels(p[:, 0]) y[sl] = combinepixels(p[:, 1]) z[sl] = combinepixels(p[:, 2]) m = spectra.z[i] > Zmin m &= spectra.z[i] < Zmax m &= spectra.RfLam(i) > RfLamMin m &= spectra.RfLam(i) < RfLamMax mask[sl] = combinepixels(m, method=numpy.all) id[sl] = i chunkmap(work, range(len(spectra)), 100) return data[mask], pos[mask], id[mask]
def main(config): global cov DB = BootstrapDB(config) MASK = DB.dummy.imesh >= 0 MASK &= DB.dummy.rmesh <= config.rmax MASK &= DB.dummy.rmesh >= config.rmin print "dof in fitting", MASK.sum() # create a dummy to test the fitting p0 = [-0.2, 3.5, 1.5, 1.5] eigenmodes = DB.eigenmodes dummy = eigenmodes(p0) covfull = numpy.load(config.CovarianceMatrixOutput)["cov"] cov = covfull[MASK][:, MASK] print "inverting" INV = linalg.inv(covfull[MASK][:, MASK]) print "inverted" x, chi = fit1(dummy, eigenmodes, INV, MASK) print "x =", x print "p0 = bF, bQ, BF, BQ", p0 error = poles_err(dummy, covfull) fitted = sharedmem.empty((len(DB), len(p0))) chi = sharedmem.empty((len(DB))) samples, models = [], [] sharedmem.set_debug(True) def work(i): sample = DB(i) print "fitting", i fitted[i], chi[i] = fit1(sample, eigenmodes, INV, MASK) model = eigenmodes(fitted[i]) print zip(sample[0].monopole, model[0].monopole) return i, sample, model def reduce(rt): i, s, m = rt samples.append((i, s)) models.append((i, m)) chunkmap(work, range(len(DB)), 100, reduce=reduce) samples = [s for i, s in sorted(samples)] models = [s for i, s in sorted(models)] numpy.savez("fit.npz", samples=samples, models=models, fittedparameters=fitted, chi=chi, error=error)
def create_video_pipe(video, name=None, read_ahead=False): """ creates the two ends of a video pipe. The typical use case is def worker_process(self, video): ''' worker process processing a video ''' expensive_function(video) if __name__ == '__main__': # load a video file video = VideoFile('test.mov') # create the video pipe sender, receiver = create_video_pipe(video) # create the worker process proc = multiprocessing.Process(target=worker_process, args=(receiver,)) proc.start() sender.start() """ # create the pipe used for communication pipe_sender, pipe_receiver = mp.Pipe(duplex=True) # create the buffer in memory that is used for passing frames frame_buffer = sharedmem.empty(video.shape[1:], np.uint8) # create the two ends of the video pipe sender = VideoPipeSender(video, pipe_sender, frame_buffer, name, read_ahead) receiver = VideoPipeReceiver(pipe_receiver, frame_buffer, video.video_format, name) return sender, receiver
def generate_shared_array(unshared_arr,dtype): r"""Creates synchronized shared arrays from numpy arrays. The function takes a numpy array `unshared_arr` and returns a shared memory object, `shared_arr`. The user also specifies the data-type of the values in the array with the `dataType` argument. See multiprocessing.Array and ctypes for details on shared memory arrays and the data-types. Parameters ---------- unshared_arr : ndarray Array_like means all those objects -- lists, nested lists, etc. -- that can be converted to an array. We can also refer to variables like `var1`. dtype : ctypes instance The data-type specificed has to be an instance of the ctypes library. See ctypes for details. Returns ------- shared_arr : synchronized shared array An array that is read accessible from multiple processes/threads. """ shared_arr = sharedmem.empty(unshared_arr.shape, dtype=dtype) shared_arr[:] = unshared_arr[:] return shared_arr
def argsort(ar): min = minimum.reduce(ar) max = maximum.reduce(ar) nchunk = sharedmem.cpu_count() * 2 #bins = numpy.linspace(min, max, nchunk, endpoint=True) step = 1.0 * (max - min) / nchunk bins = numpy.array( 1.0 * numpy.arange(nchunk + 1) * (max - min) / nchunk + min, min.dtype) dig = digitize(ar, bins) binlength = bincount(dig, minlength=len(bins) + 1) binoffset = numpy.cumsum(binlength) out = sharedmem.empty(len(ar), dtype='intp') with sharedmem.MapReduce() as pool: def work(i): # we can do this a lot faster # but already having pretty good speed. ind = numpy.nonzero(dig == i + 1)[0] myar = ar[ind] out[binoffset[i]:binoffset[i+1]] = ind[myar.argsort()] pool.map(work, range(nchunk)) return out
def call(self, args, axis=0, out=None, chunksize=1024 * 1024, **kwargs): """ axis is the axis to chop it off. if self.altreduce is set, the results will be reduced with altreduce and returned otherwise will be saved to out, then return out. """ if self.altreduce is not None: ret = [None] else: if out is None : if self.outdtype is not None: dtype = self.outdtype else: try: dtype = numpy.result_type(*[args[i] for i in self.ins] * 2) except: dtype = None out = sharedmem.empty( numpy.broadcast(*[args[i] for i in self.ins] * 2).shape, dtype=dtype) if axis != 0: for i in self.ins: args[i] = numpy.rollaxis(args[i], axis) out = numpy.rollaxis(out, axis) size = numpy.max([len(args[i]) for i in self.ins]) with sharedmem.MapReduce() as pool: def work(i): sl = slice(i, i+chunksize) myargs = args[:] for j in self.ins: try: tmp = myargs[j][sl] a, b, c = sl.indices(len(args[j])) myargs[j] = tmp except Exception as e: print tmp print j, e pass if b == a: return None rt = self.ufunc(*myargs, **kwargs) if self.altreduce is not None: return rt else: out[sl] = rt def reduce(rt): if self.altreduce is None: return if ret[0] is None: ret[0] = rt elif rt is not None: ret[0] = self.altreduce(ret[0], rt) pool.map(work, range(0, size, chunksize), reduce=reduce) if self.altreduce is None: if axis != 0: out = numpy.rollaxis(out, 0, axis + 1) return out else: return ret[0]
def test_critical(): t = sharedmem.empty((), dtype='i8') t[...] = 0 # FIXME: if the system has one core then this will never fail, # even if the critical section is not with sharedmem.MapReduce(np=8) as pool: def work(i): with pool.critical: t[...] = 1 if i != 30: time.sleep(0.01) assert_equal(t, 1) t[...] = 0 pool.map(work, range(16)) def work(i): t[...] = 1 if i != 30: time.sleep(0.01) assert_equal(t, 1) t[...] = 0 try: pool.map(work, range(16)) except sharedmem.SlaveException as e: assert isinstance(e.reason, AssertionError) return raise AssertionError("Shall not reach here.")
def main(config): DB = BootstrapDB(config) Ndof = len(DB.dummy.compress()) if mpicov.world.rank == 0: numpy.random.seed(9999) seeds = numpy.random.randint(low=0, high=99999999999, size=config.BigN) else: seeds = [] myseeds = mpicov.world.scatter(numpy.array_split(seeds, mpicov.world.size)) print 'This Task = ', mpicov.world.rank, 'Number of samples = ', \ len(myseeds), 'seed0 =', myseeds[0] myxi = sharedmem.empty((len(myseeds), Ndof), dtype='f8') def work(i): rng = numpy.random.RandomState(myseeds[i]) choice = rng.choice(len(DB), size=DB.Nchunks) sample = DB(choice) myxi[i][...] = sample.compress() print 'build samples' chunkmap(work, range(len(myxi)), 100) print 'done samples' print 'covariance matrix' cov = mpicov.cov(myxi, rowvar=0, ddof=0) print 'done covariance matrix' print numpy.nanmin(numpy.diag(cov)) if mpicov.world.rank == 0: numpy.savez(config.CovarianceMatrixOutput, cov=cov, BigN=config.BigN, dummy=DB.dummy, xi_cov=DB.dummy.copy().uncompress(myxi[0]), r=DB.r, mu=DB.mu)
def test_local(): t = sharedmem.empty(800) with sharedmem.MapReduce(np=4) as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) with pool.ordered: t[i] = pool.local.rank pool.map(work, range(800)) assert_equal(numpy.unique(t), range(4))
def test_sum(): """ Integrate [0, ... 1.0) with rectangle rule. Compare results from 1. direct sum of 'xdx' (filled by subprocesses) 2. 'shmsum', cummulated by partial sums on each process 3. sum of partial sums from each process. """ xdx = sharedmem.empty(1024 * 1024 * 128, dtype='f8') shmsum = sharedmem.empty((), dtype='f8') shmsum[...] = 0.0 with sharedmem.MapReduce() as pool: def work(i): s = slice (i, i + chunksize) start, end, step = s.indices(len(xdx)) dx = 1.0 / len(xdx) myxdx = numpy.arange(start, end, step) \ * 1.0 / len(xdx) * dx xdx[s] = myxdx a = xdx[s].sum(dtype='f8') with pool.critical: shmsum[...] += a return i, a def reduce(i, a): # print('chunk', i, 'done', 'local sum', a) return a chunksize = 1024 * 1024 r = pool.map(work, range(0, len(xdx), chunksize), reduce=reduce) assert_almost_equal(numpy.sum(r, dtype='f8'), shmsum) assert_almost_equal(numpy.sum(xdx, dtype='f8'), shmsum)
def __init__(self, x, y, scale_factor=1.1, minsize=(60, 60), classifier='haarcascade_frontalface_alt2.xml', use_lowpass=True, lowpass_rc=50, visualize=False): # Initialize multiprocessing.Process parent multiprocessing.Process.__init__(self) # Exit event for stopping process self._exit = multiprocessing.Event() # Event that is set, everytime a face is detected self.newface_event = multiprocessing.Event() # Event that pauses the main loop if set self._pause_event = multiprocessing.Event() # An array in shared memory to store the current image frame self._currentframe = sharedmem.empty((y, x), dtype='uint8') # Set camera parameters self._x = x self._y = y # Set parameters for face detection algorithm self._scale_factor = scale_factor self._minsize = minsize self._classifier_file = classifier self._use_lowpass = use_lowpass self._lowpass_rc = lowpass_rc # Defines whether to visualize the camera output self._visualize = visualize # A tuple for storing the current width and height of a face self._currentface = sharedmem.empty((4, 1), dtype='float') # A tuple for storing the last width and height of a face self._lastface = sharedmem.empty((4, 1), dtype='float') # Setup a multiscale classifier self._classifier = cv2.CascadeClassifier(self._classifier_file)
def test_create(): a = sharedmem.empty(100, dtype='f8') l = list(a) b = sharedmem.empty_like(a) assert b.shape == a.shape b = sharedmem.empty_like(l) assert b.shape == a.shape c = sharedmem.full(100, 1.0, dtype='f8') assert c.shape == a.shape c = sharedmem.full_like(a, 1.0) assert c.shape == a.shape
def test_memory_pickle(): import pickle a = sharedmem.empty(100) a[:] = range(100) s = pickle.dumps(a) b = pickle.loads(s) assert isinstance(b, type(a)) b[:] += 10 assert (a == b).all()
def loadgrpsubgrp_data(subgrpbool,flagno,dirname,n2div,SMbool,preci): precb = (preci + 1)*4 if (subgrpbool == 0): gstr2 = 'grpids_' if (subgrpbool == 1): gstr2 = 'subgrpids_' ids_correct = numpy.memmap(dirname+'/'+gstr2+'idscorrect',dtype=numpy.uint64) n2 = len(ids_correct) del ids_correct if (flagno == 0): gshp1 = n2 gdtp1 = numpy.uint64 gdtp2 = numpy.uint64 gstr1 = 'idscorrect' if (flagno == 1): gshp1 = (n2,3) gdtp1 = 'f'+str(precb) gdtp2 = ('f'+str(precb),3) gstr1 = 'poscorrect' if (flagno == 2): gshp1 = n2 gdtp1 = numpy.int32 gdtp2 = numpy.int32 gstr1 = 'ptypecorrect' if (flagno == 3): gshp1 = n2 gdtp1 = 'f'+str(precb) gdtp2 = 'f'+str(precb) gstr1 = 'masscorrect' if (flagno == 4): gshp1 = n2 gdtp1 = numpy.int32 gdtp2 = numpy.int32 gstr1 = 'DMmask' if (flagno == 5): gshp1 = n2 gdtp1 = numpy.int32 gdtp2 = numpy.int32 gstr1 = 'starmask' if (SMbool == 0): gsnap_corrected = numpy.zeros(gshp1,dtype = gdtp1) if (SMbool == 1): gsnap_corrected = sharedmem.empty(gshp1,gdtp1) chunksize = numpy.int(n2/n2div) slices = [slice(i, i+chunksize) for i in range(0, n2-chunksize,chunksize)] slices.append(slice(chunksize*n2div,n2)) f = open(dirname+'/'+gstr2+gstr1) for i in range(0,n2div): gsnap_corrected[slices[i]] = numpy.fromfile(f,dtype = gdtp2,count = chunksize) print i gsnap_corrected[slices[n2div]] = numpy.fromfile(f,dtype = gdtp2,count = n2 - (n2div*chunksize)) del slices, chunksize, n2, gshp1, gdtp1, gdtp2, gstr1, gstr2 return gsnap_corrected
def _make_shared(self, numpy_matrix): """ Avoids the copying of Read-Only shared memory. """ if self._sim_args.n_processing == 1: return numpy_matrix if numpy_matrix is None: return None shared = sharedmem.empty(numpy_matrix.shape, dtype=numpy_matrix.dtype) shared[:] = numpy_matrix[:] return shared
def fstack_mp_new(img, fmap): img_stacked = shmem.empty(img.shape[0:2], dtype='uint16') indexl = shmem.empty(img.shape[0:2], dtype='bool') edges = get_edges(img, 16) # This implementation is faster than breaking each image plane up for parallel processing def do_work(x): if x!=img.shape[2]-1: def mt_assignment(input, y): return input[index[edges[y]:edges[y+1],:]] index = ne.evaluate("fmap==x") img_stacked[index] = img[:, :, x][index] index = ne.evaluate("(fmap > x) & (fmap < x+1)") with ThreadPoolExecutor(max_workers=16) as pool: A = np.concatenate([(pool.submit(mt_assignment, fmap, y)).result() for y in range(16)], axis=0) B = np.concatenate([(pool.submit(mt_assignment, img[:, :, x+1], y)).result() for y in range(16)], axis=0) C = np.concatenate([(pool.submit(mt_assignment, img[:, :, x], y)).result() for y in range(16)], axis=0) print('A Shape is : ', A.shape) print('A content is: ', A) img_stacked[index] = ne.evaluate("(A-x) * B + (x+1-A) * C") else: last_ind = img.shape[2]-1 indexl = ne.evaluate("fmap == last_ind") with shmem.MapReduce(np=img.shape[2]) as pool: pool.map(do_work, range(img.shape[2])) num_proc = shmem.cpu_count() edges = get_edges(img, num_proc) def mp_assignment(x): img_stacked[edges[x]:edges[x+1],:][indexl[edges[x]:edges[x+1],:]] = img[edges[x]:edges[x+1], :, -1]\ [indexl[edges[x]:edges[x+1], :]] with shmem.MapReduce(np=num_proc) as pool: pool.map(mp_assignment, range(num_proc)) return img_stacked
def compose_equatorial_faces(faces, overlap, num_cores=None): nside = faces[0].shape[0] assert faces[0].shape[1] == nside assert overlap <= 3 * nside // 4 extra = nside // 4 + overlap nside_output = nside + 2 * extra masks = _face_masks(extra, num_cores) output = sm.empty((4, nside_output, nside_output)) with sharedmem_pool(num_cores, numexpr=False) as pool: def work(i): rows = [] temp = [] temp.append(faces[4 + (i + 1) % 4][-extra:, -extra:]) temp.append(faces[i][-extra:, :]) temp.append( (masks[0] * np.rot90(faces[i][-extra:, -extra:], -1) + masks[1] * np.rot90(faces[(i + 3) % 4][:extra, :extra], 1)) / (masks[0] + masks[1])) rows.append(np.hstack(temp)) temp = [] temp.append(faces[8 + i][:, -extra:]) temp.append(faces[4 + i]) temp.append(faces[(i + 3) % 4][:, :extra]) rows.append(np.hstack(temp)) temp = [] temp.append( (masks[2] * np.rot90(faces[8 + (i + 3) % 4][:extra, :extra], -1) + masks[3] * np.rot90(faces[8 + i][-extra:, -extra:], 1)) / (masks[2] + masks[3])) temp.append(faces[8 + (i + 3) % 4][:extra, :]) temp.append(faces[4 + (i + 3) % 4][:extra, :extra]) rows.append(np.hstack(temp)) output[i] = np.vstack(rows) pool.map(work, range(4)) return np.array(output)
def __init__(self, tabfilename, format, count=10, **kwargs): """ tabfilename is like groups_019/group_tab_019.%d. """ g = Snapshot(tabfilename % 0, format + '.GroupTab', **kwargs) if count < 0 or count > g.C['Ntot'][0]: count = g.C['Ntot'][0] i = 0 # decide number of files to open nread = 0 tabs = [] while nread < count: g = Snapshot(tabfilename % i, format + '.GroupTab', **kwargs) nread += g.C['N'][0] i = i + 1 tabs.append(g) #print 'will read', len(tabs), 'files' Field.__init__(self, numpoints=count, components={'offset':'i8', 'length':'i8', 'massbytype':('f8', 6), 'mass':'f8', 'pos':('f8', 3), 'vel':('f8', 3)}) if len(tabs) > 0: self.take_snapshots(tabs, ptype=0) del tabs # fix the offset which may overflow for large halos self['offset'][1:] = self['length'].cumsum()[:-1] nread = 0 nshallread = self['length'].sum() i = 0 idslen = numpy.zeros(g.C['Nfiles'], dtype='i8') while nread < nshallread: idslen[i] = numpy.fromfile(tabfilename.replace('_tab_', '_ids_') % i, dtype='i4', count=3)[2] nread += idslen[i] i = i + 1 idsoffset = numpy.concatenate(([0], idslen.cumsum())) ids = sharedmem.empty(idslen.sum(), dtype=g.C['idtype']) #print 'reading', i, 'id files' with sharedmem.Pool() as pool: def work(i): more = numpy.memmap(tabfilename.replace('_tab_', '_ids_') % i, dtype=g.C['idtype'], mode='r', offset=28) ids[idsoffset[i]:idsoffset[i] + idslen[i]] = more pool.map(work, range(i)) self.ids = packarray(ids, self['length']) for i in range(self.numpoints): self.ids[i].sort()
def test_scalar(): s = sharedmem.empty((), dtype='f8') s[...] = 1.0 assert_equal(s, 1.0) with sharedmem.MapReduce() as pool: def work(i): with pool.ordered: s[...] = i pool.map(work, range(10)) assert_equal(s, 9)
def create(self, sampleShape_local, labelShape_local, batch_size_local, batchPoolSize_local, batchBlockNum_local, latestStep): datashape = [batchPoolSize_local, batch_size_local] + sampleShape_local labelshape = [batchPoolSize_local, batch_size_local] + labelShape_local self.data = np.frombuffer(sharedmem.empty(datashape, dtype=np.float32), dtype=np.float32).reshape(datashape) self.label = np.frombuffer(sharedmem.empty(labelshape, dtype=np.int64), dtype=np.int64).reshape(labelshape) self.step = np.frombuffer(sharedmem.full( batchPoolSize_local, [latestStep] * batchPoolSize_local, dtype=np.int64), dtype=np.int64) # 0: ready to use, 1: used, 2:full of updating, 3 updating conti. self.state = np.frombuffer(sharedmem.full( batchPoolSize_local * (batchBlockNum_local + 1), [1] * batchPoolSize_local * (batchBlockNum_local + 1), dtype=np.int8), dtype=np.int8).reshape( batchBlockNum_local + 1, batchPoolSize_local)
def __init__(self, size, title='', output_period=1, multiprocessing=True, position=None): """ initializes the video shower. size sets the width and the height of the image to be shown title sets the title of the window. This should be unique if multiple windows are used. output_period determines if frames are skipped during display. For instance, `output_period=10` only shows every tenth frame. multiprocessing indicates whether a separate process is used for displaying. If multiprocessing=None, multiprocessing is used for all platforms, except MacOX position determines the coordinates of the top left corner of the window that displays the image """ self.title = title self.output_period = output_period self.this_frame = 0 self._proc = None # multiprocessing does not work in current MacOS OpenCV if multiprocessing is None: multiprocessing = (sys.platform != "darwin") if multiprocessing: # open if sharedmem: try: # create the pipe to talk to the child self._pipe, pipe_child = mp.Pipe(duplex=True) # setup the shared memory area self._data = sharedmem.empty(size, np.uint8) # initialize the process that shows the image self._proc = mp.Process(target=_show_image_from_pipe, args=(pipe_child, self._data, title, position)) self._proc.daemon = True self._proc.start() logger.debug('Started process %d for displaying images' % self._proc.pid) except AssertionError: logger.warn('Could not start a separate process to display images. ' 'The main process will thus be used.') else: logger.warn('Package sharedmem could not be imported and ' 'images are thus shown using the main process.') if self._proc is None: # open window in this process cv2.namedWindow(title) if position is not None: cv2.moveWindow(title, position[0], position[1]) cv2.waitKey(1)
def setup_para(self, nb_images, c, h, w, y, type=None, val_size=2000): assert type is not None print("setup_para") self.val_size = val_size self.training_set_x = sharedmem.empty((nb_images, c, h, w), dtype='float32') if type == 'classifier': self.training_set_y = sharedmem.empty((nb_images, ), dtype='int32') elif type == 'regressor': self.training_set_y = sharedmem.empty((nb_images, y), dtype='float32') else: assert False, 'Type has to be classfier or regressor' self.setDataAndCompileFunctions(self.training_set_x[val_size:], self.training_set_y[val_size:], self.training_set_x[:val_size], self.training_set_y[:val_size]) self.para_load = True import warnings warnings.filterwarnings("ignore")
def main(): workers = [] task_queue = Queue() result_queue = Queue() # input data n = 10000 m = 100 X_in = np.random.rand(n, m) size_in = X_in.size shape_in = X_in.shape X_in.shape = size_in X_ctypes_in = sharedctypes.RawArray(ctypes.c_double, X_in) #X_in = np.frombuffer(X_ctypes_in, dtype=np.float64, count=size_in) #X_in.shape = shape_in # output data X_out = sharedmem.empty((n, n)) # create worker and start concurrency = 4 unit = n / 4 for i in xrange(concurrency): worker = Worker(task_queue, result_queue, X_ctypes_in, shape_in, X_out) worker.start() workers.append(worker) # put task for i in xrange(concurrency): task_queue.put((i, unit*i, unit*i+unit)) pass # get result of task elapsed_times = [] while True: elapsed_time = result_queue.get() elapsed_times.append(elapsed_time) if len(elapsed_times) == concurrency: break # stop worker for i in xrange(concurrency): workers[i].terminate() # do math for elapsed time print "Elapsed time {} [s]".format(np.max(elapsed_times))
def __init__(self, stim_arr, NFFT, Fs, noverlap, tr_length, dtype): # this is a weird notation StimulusModel.__init__(self, stim_arr, dtype=dtype) # absorb the vars self.NFFT = NFFT self.Fs = Fs self.noverlap = noverlap self.tr_length = tr_length spectrogram, freqs, times = generate_spectrogram(self.stim_arr, self.NFFT, self.Fs, self.noverlap) self.spectrogram = sharedmem.empty(spectrogram.shape, dtype='float64') self.spectrogram[:] = spectrogram[:] self.freqs = sharedmem.empty(freqs.shape, dtype='float64') self.freqs[:] = freqs[:] self.times = sharedmem.empty(times.shape, dtype='float64') self.times[:] = times[:]
def parallelize_correlation(): chunk_size = len(shared_subject1) // cf.NUM_PROCESSES output_correlations = sm.empty(len(shared_subject1)) processes = [ Process(target=correlation, args=(shared_subject1, shared_subject2, i * chunk_size, min( (i + 1) * chunk_size, len(shared_subject1)), output_correlations)) for i in xrange(cf.NUM_PROCESSES) ] for p in processes: p.start() for p in processes: p.join() return output_correlations
def loadsnapshot(flagno,dirname,n2div,SMbool,preci): precb = (preci + 1)*4 snappid_notsorted = numpy.memmap(dirname+'/snappid_notsorted',dtype=numpy.uint64,mode='r') n1 = len(snappid_notsorted) del snappid_notsorted if (flagno == 0): #snap_notsorted = numpy.zeros(n1,dtype = numpy.uint64) shp1 = n1 dtp1 = numpy.uint64 dtp2 = numpy.uint64 str1 = 'snappid_notsorted' if (flagno == 1): #snap_notsorted = numpy.zeros((n1,3),dtype = numpy.float64) shp1 = (n1,3) dtp1 = 'f'+str(precb) dtp2 = ('f'+str(precb),3) str1 = 'snappos_notsorted' if (flagno == 2): #snap_notsorted = numpy.zeros(n1,dtype = numpy.int32) shp1 = n1 dtp1 = numpy.int32 dtp2 = numpy.int32 str1 = 'snapptype_notsorted' if (flagno == 3): #snap_notsorted = numpy.zeros(n1,dtype = numpy.float64) shp1 = n1 dtp1 = 'f'+str(precb) dtp2 = 'f'+str(precb) str1 = 'snapmass_notsorted' if (flagno == 4): #snap_notsorted = numpy.zeros(n1,dtype = numpy.int) shp1 = n1 dtp1 = numpy.int dtp2 = numpy.int str1 = 'snappid_argsortednew' if (SMbool == 0): snap_notsorted = numpy.zeros(shp1,dtype = dtp1) if (SMbool == 1): snap_notsorted = sharedmem.empty(shp1,dtp1) chunksize = numpy.int(n1/n2div) slices = [slice(i, i+chunksize) for i in range(0, n1-chunksize,chunksize)] slices.append(slice(chunksize*n2div,n1)) f = open(dirname+'/'+str1) for i in range(0,n2div): snap_notsorted[slices[i]] = numpy.fromfile(f,dtype = dtp2,count = chunksize) print i snap_notsorted[slices[n2div]] = numpy.fromfile(f,dtype = dtp2,count = n1 - (n2div*chunksize)) del slices, chunksize, n1, shp1, dtp1, dtp2, str1 return snap_notsorted
def compute_dual_weight_and_norms(width, height, spectrograms, total_cores): # currently, this is not parallelized! # FIXME: IT COULD BE A GOOD IDEA TO CHANGE THIS! # dual_frame_weight = np.zeros((height, width)) dual_frame_weight = np.zeros((height, width), dtype=DTYPES[0]) for spect in spectrograms: dual_frame_weight += np.square(spect) # fourier_norms = sm.empty(len(spectrograms)) # space_norms = sm.empty(len(spectrograms)) fourier_norms = sm.empty(len(spectrograms), dtype=DTYPES[0]) space_norms = sm.empty(len(spectrograms), dtype=DTYPES[0]) with sharedmem_pool(total_cores, numexpr=False) as pool: def work(i): spect_norm = np.linalg.norm(spectrograms[i]) fourier_norms[i] = spect_norm space_norms[i] = spect_norm / math.sqrt(spectrograms[i].size) pool.map(work, list(range(len(spectrograms)))) return (dual_frame_weight, list(fourier_norms), list(space_norms))
def __init__(self, filename, video_class=VideoFile): super(VideoReaderProcess, self).__init__() self.daemon = True self.running = False self.filename = filename self.video_class = video_class # create the pipe used for communication self.pipe_sender, pipe_receiver = mp.Pipe(duplex=True) video = self.video_class(self.filename) # create the buffer in memory that is used for passing frames self.frame_buffer = sharedmem.empty(video.shape[1:], np.uint8) self.receiver = VideoPipeReceiver(pipe_receiver, self.frame_buffer, video.video_format) video.close()
def importSlice(self, s0=0, s1=None, chans=np.arange(NCHAN)): # s0, s1 are the first and last sample indices of the slice if s1 is None: s1 = self.nsamples chans.sort() # numpy requires that indexing elements be sorted self.chan2slice_idx = {chan: i for i, chan in enumerate(chans)} # cast to float, center on zero (subtract 2**16/2 = 2**15), and convert to microvolts self.slice_uv = np.asarray( ((self.dset[s0:s1, chans].astype(np.float32) - 2**15) * MICROVOLTS_PER_COUNT).transpose()) self.slice_s0 = s0 self.slice_s1 = s1 if (s1 - s0) != self.slice_nsamples: self.slice_nsamples = s1 - s0 self.slice_filtered = sharedmem.empty( (len(chans), self.slice_nsamples), dtype=np.float32) if len(chans) != self.slice_nchans: self.slice_nchans = len(chans) self.slice_activity = sharedmem.empty(self.slice_nchans, dtype=np.float32) self.slice_min = np.min(self.slice_uv) self.slice_max = np.max(self.slice_uv) self.sliceImported = True
def main(share): SIZE = 10000000 shared = sharedmem.empty(SIZE) queues = [JoinableQueue(maxsize=1) for i in range(10)] processes = [Process(target=process, args=(i, q, shared)) for i, q in enumerate(queues)] [p.start() for p in processes] while True: time.sleep(5) size = random.randint(SIZE-1000, SIZE) dat = np.random.random(size) shared[:size] = dat put = size if share else dat print("regenerated", size, "; sum: ", dat.sum()) [q.put(put) for q in queues] [q.join() for q in queues]
def __init__(self, x, y, K, d, camid=0, visualize=False, debug=False): # Initialize multiprocessing.Process parent multiprocessing.Process.__init__(self) # Exit event for stopping process self._exit = multiprocessing.Event() # Event that is set, everytime an image has been unwarped self.newframe_event = multiprocessing.Event() # Event that pauses the main loop if set self._pause_event = multiprocessing.Event() # Defines whether to visualize the camera output self._visualize = visualize # Switches debugging mode self._debug = debug # Some variable for storing the time of the last frame self._oldtime = time.time() # Set camera parameters self._cam_device_id = camid # Get camera ID self._x = x # Get width self._y = y # Get height # An empty array in shared memory to store the current image frame self._currentframe = sharedmem.empty((y, x), dtype='uint8') # Define camera matrix K self._K = K # Define distortion coefficients d self._d = d # Setup camera object using OpenCV self._cam = cv2.VideoCapture(self._cam_device_id) self._cam.set(cv2.cv.CV_CAP_PROP_FRAME_WIDTH, self._x) self._cam.set(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT, self._y) # Generate optimal camera matrix self._newcameramatrix, self._roi = cv2.getOptimalNewCameraMatrix(self._K, self._d, (self._x, self._y), 0) # Generate LUTs for undistortion self._mapx, self._mapy = cv2.initUndistortRectifyMap(self._K, self._d, None, self._newcameramatrix, (self._x, self._y), 5)
def __make_np_arrays_sharable(self): """ Replaces all numpy array object variables with dimension > 0 with a sharedmem array, which should have the same behaviour / properties as the numpy array """ varDict = self.__dict__ for key, var in varDict.items(): if type(var) is np.ndarray: if not key in self.exclude: try: varDict[key] = sharedmem.copy(var) except AttributeError: share_var = sharedmem.empty(1, type(var)) share_var[0] = var varDict[key] = share_var
def chop(Nside, pos): """ bootstrap the sky, returns about 100 chunks, only 50 of them are big""" # we paint quasar uniformly as long as it is covered by sdss: Npix = chealpy.nside2npix(Nside) chunkid = sharedmem.empty(len(pos), dtype='intp') print len(pos) with sharedmem.MapReduce() as pool: chunksize = 1024 * 1024 def work(i): sl = slice(i, i + chunksize) chunkid[sl] = chealpy.vec2pix_nest(Nside, pos[sl]) pool.map(work, range(0, len(pos), chunksize)) arg = sharedmem.argsort(chunkid) chunksize = sharedmem.array.bincount(chunkid, minlength=Npix) assert (chunksize == numpy.bincount(chunkid, minlength=Npix)).all() return sharedmem.array.packarray(arg, chunksize)
def ztree(self, zkey=None, scale=None, minthresh=10, maxthresh=20, np=None): if scale is None: scale = fc.scale(self['locations'].min(axis=0), self['locations'].ptp(axis=0)) zkey = sharedmem.empty(self.numpoints, dtype=fc.fckeytype) with sharedmem.MapReduce(np=np) as pool: chunksize = 1024 * 1024 def work(i): X, Y, Z = self['locations'][i:i+chunksize].T fc.encode(X, Y, Z, scale=scale, out=zkey[i:i+chunksize]) pool.map(work, range(0, len(zkey), chunksize)) # use sharemem.argsort, because it is faster arg = sharedmem.argsort(zkey, np=np) return zt.Tree(zkey=zkey, scale=scale, arg=arg, minthresh=minthresh, maxthresh=maxthresh)
def main(A): sightlines = Sightlines(A) spectra = SpectraOutput(A) loglam = sharedmem.empty(sightlines.Npixels.sum(), 'f4') chunksize = 100 #sharedmem.set_debug(True) def work(i): sl = slice(sightlines.PixelOffset[i], sightlines.PixelOffset[i] + sightlines.Npixels[i]) loglam[sl] = spectra.LogLam[i] chunkmap(work, range(len(sightlines)), chunksize=100) Nbins = 400 zBins = numpy.linspace(2.0, 4.0, Nbins + 1, endpoint=True) LogLamBins = numpy.log10(1216.0 * (1 + zBins )) z = 0.5 * (zBins[1:] + zBins[:-1]) ind = numpy.digitize(loglam, LogLamBins) N = numpy.bincount(ind, minlength=Nbins+2) N[N == 0] = 1.0 F = numpy.exp(-spectra.taured.data) K1 = numpy.bincount(ind, F, minlength=Nbins+2) / N K2 = numpy.bincount(ind, F ** 2, minlength=Nbins+2) / N meanF = K1 varF = K2 - K1 ** 2 meanF = meanF[1:-1] varF = varF[1:-1] F = numpy.exp(-spectra.taureal.data) K1 = numpy.bincount(ind, F, minlength=Nbins+2) / N K2 = numpy.bincount(ind, F ** 2, minlength=Nbins+2) / N meanFreal = K1 varFreal = K2 - K1 ** 2 meanFreal = meanFreal[1:-1] varFreal = varFreal[1:-1] print z print meanF print varF numpy.savez(A.MeasuredMeanFractionOutput, a=1/(1+z), abins=1/(1+zBins), xmeanF=meanF, xvarF=varF, xmeanFreal=meanFreal, xvarFreal=varFreal, )
def smooth(self, ftype, ngb=32): gas = self.F[ftype] tree = self.T[ftype] from gaepsi.compiledbase.ngbquery import NGBQueryN q = NGBQueryN(tree, ngb) gas['sml'] = sharedmem.empty(len(gas), dtype='f8') with sharedmem.MapReduce(np=self.np) as pool: chunksize = 1024 * 64 def work(i): sl = slice(i, i + chunksize) x, y, z = gas['pos'][sl].T arr = q(x, y, z)[0]['weights'] arr = arr.reshape(-1, ngb) dist = arr[:, 0] ** 0.5 gas['sml'][sl] = dist print i, len(gas) pool.map(work, range(0, len(gas), chunksize))
def _blend_masks_par(nside, size, num_cores=None): def _cos(x): # return -0.5 * np.cos(np.pi*x) + 0.5 pi = np.pi return ne.evaluate('-0.5 * cos(pi * x) + 0.5') x, y = np.meshgrid( np.linspace(1, nside, nside), np.hstack([ np.zeros(3 * nside // 4 - size), np.linspace(0, 1, size), np.ones(nside // 4) ])) blend_masks = sm.empty((8, nside, nside)) blend_masks[0] = _cos(y) # blend_masks[1] = np.rot90(blend_masks[0], -1) # blend_masks[2] = np.rot90(blend_masks[0], -2) # blend_masks[3] = np.rot90(blend_masks[0], -3) with sharedmem_pool(num_cores, numexpr=False) as pool: def work(i): blend_masks[i] = np.rot90(blend_masks[0], -i) pool.map(work, range(1, 4)) # blend_masks[4] = blend_masks[0] * blend_masks[3] # blend_masks[5] = blend_masks[1] * blend_masks[2] # blend_masks[6] = (1 - blend_masks[0]) * (1 - blend_masks[1]) # blend_masks[7] = (1 - blend_masks[2]) * (1 - blend_masks[3]) first_index = (0, 1, 0, 2) second_index = (3, 2, 1, 3) with sharedmem_pool(num_cores) as pool: def work(i): if i in {4, 5}: blend_masks[i] = (blend_masks[first_index[i - 4]] * blend_masks[second_index[i - 4]]) elif i in {6, 7}: blend_masks[i] = ((1 - blend_masks[first_index[i - 4]]) * (1 - blend_masks[second_index[i - 4]])) pool.map(work, range(4, 8)) return np.array(blend_masks)
def fitRange(A, LogLamMin, LogLamMax, Afguess, Bfguess): sightlines = Sightlines(A, LogLamMin, LogLamMax) maker = SpectraMaker(A, sightlines) # What is the mean of the model? def fun(loglam): a = 1216. / 10 ** loglam return meanfractionmodel(a) meanF = romberg(fun, LogLamMin, LogLamMax) / (LogLamMax - LogLamMin) def fun(loglam): a = 1216. / 10 ** loglam return varfractionmodel(a) varF = romberg(fun, LogLamMin, LogLamMax) / (LogLamMax - LogLamMin) stdF = varF ** 0.5 F = sharedmem.empty(sightlines.Npixels.sum(), 'f8') F[...] = numpy.nan def cost(Af, Bf): def work(i): sl = slice(sightlines.PixelOffset[i], sightlines.PixelOffset[i] + sightlines.Npixels[i]) if sightlines.Npixels[i] == 0: return taured = maker.convolve(i, Afunc=lambda x: Af, Bfunc=lambda x: Bf, returns=['taured']).taured F[sl] = numpy.exp(-taured) chunkmap(work, range(0, len(sightlines), 100), 100) F1 = F[~numpy.isnan(F)] xmeanF = F1.mean() xstdF = F1.std() v = (xmeanF/ meanF - 1) , (xstdF / stdF - 1) return v r = root(lambda x: cost(*x), (Afguess, Bfguess), method='lm') Af, Bf = r.x print r.x, r.fun cost(Af, Bf) # this will update F F1 = F[~numpy.isnan(F)] xmeanF = F1.mean() xstdF = F1.std() print "lam range", 10**LogLamMin, 10**LogLamMax print "Af, Bf", Af, Bf print 'check', xmeanF, meanF, xstdF, stdF return Af, Bf, xmeanF, xstdF
def load_mat(self, path='W_mat.npy', X=None): # Try to load mat! try: self.W = np.load(path).astype(np.float32) self.W_shared = sharedmem.empty(self.W.shape, dtype=np.float32) self.W_shared[:, :] = self.W[:, :] print("The weight matrix was loaded succesfully!") if X is not None: # Make X to csr as this is needed for prediction and advantageous when training. X_csr = X.copy().tocsr() # Save X_csr internally to predict later on. self.X_indptr = X_csr.indptr.astype(np.int32) self.X_idx = X_csr.indices.astype(np.int32) self.X_data = X_csr.data.astype(np.float32) except FileNotFoundError: print("The weight matrix could not be loaded!")
def test_ordered(): t = sharedmem.empty(800) with sharedmem.MapReduce(np=32) as pool: def work(i): time.sleep(0.1 * numpy.random.uniform()) with pool.ordered: t[i] = time.time() pool.map(work, range(800)) # without ordered, the time is ordered assert (t[1:] > t[:-1]).all() def work(i): time.sleep(0.1 * numpy.random.uniform()) t[i] = time.time() pool.map(work, range(800)) # without ordered, the ordering is messy assert not (t[1:] > t[:-1]).all()
def split_work(num): n = 20 width = int(n / num) shared = sharedmem.empty(n) shared[:] = numpy.random.rand(1, n)[0] print("values are %s" % shared) processes = [ Process(target=do_work, args=(shared, i * width), daemon=False) for i in range(0, num) ] for p in processes: p.start() for p in processes: p.join() print("values are %s" % shared) print("type is %s" % type(shared[0]))
def doTask(self, tstamp): """Compute difference between given image and accumulation, then accumulate and set result with the difference. Initialize accumulation if needed (if opacity is 100%.)""" # Compute the alpha value. alpha, self.tstamp_prev = util.getAlpha(self.tstamp_prev) image = common[tstamp]['image_in'] # Initalize accumulation if so indicated. if self.image_acc is None: self.image_acc = np.empty(np.shape(image)) # Allocate shared memory for the diff image. shape = np.shape(image) dtype = image.dtype image_diff = sharedmem.empty(shape, dtype) # Compute difference. cv2.absdiff( self.image_acc.astype(image.dtype), image, image_diff, ) # Write the framerate on top of the image. fps_text = '{:.2f}, {:.2f}, {:.2f} fps process'.format(*self.framerate.tick()) util.writeOSD(image_diff, ('', fps_text,)) # First line is blank (written to later.) # Write diff image (actually, reference thereof) to process-shared table. hello = common[tstamp] hello['image_diff'] = image_diff common[tstamp] = hello # Propagate result to the next stage. self.putResult(tstamp) # Accumulate. hello = cv2.accumulateWeighted( image, self.image_acc, alpha, )
def smooth(self, ftype, ngb=32): gas = self.F[ftype] tree = self.T[ftype] from gaepsi.compiledbase.ngbquery import NGBQueryN q = NGBQueryN(tree, ngb) gas['sml'] = sharedmem.empty(len(gas), dtype='f8') with sharedmem.MapReduce(np=self.np) as pool: chunksize = 1024 * 64 def work(i): sl = slice(i, i + chunksize) x, y, z = gas['pos'][sl].T arr = q(x, y, z)[0]['weights'] arr = arr.reshape(-1, ngb) dist = arr[:, 0]**0.5 gas['sml'][sl] = dist print i, len(gas) pool.map(work, range(0, len(gas), chunksize))
def loadgrpsubgrp_idspiv(subgrpbool,dirname,n2div): if (subgrpbool == 0): str1 = 'grpids_piv' str2 = 'grpids_' if (subgrpbool == 1): str1 = 'subgrpids_piv' str2 = 'subgrpids_' ids_piv = numpy.memmap(dirname+'/'+str1,dtype=numpy.int) n2 = len(ids_piv) del ids_piv ids_piv = sharedmem.empty(n2,numpy.int) f = open(dirname+'/'+str1) chunksize = numpy.int(n2/n2div) slices = [slice(i, i+chunksize) for i in range(0, n2-chunksize,chunksize)] slices.append(slice(chunksize*n2div,n2)) for i in range(0,n2div): ids_piv[slices[i]] = numpy.fromfile(f,dtype = numpy.int,count = chunksize) print i ids_piv[slices[n2div]] = numpy.fromfile(f,dtype = numpy.int,count = n2 - (n2div*chunksize)) return ids_piv
def shm_chunk_gaukernop_at(x, xp, y, data): nthread = int(os.environ["TENSIGA_NUM_THREADS"]) chunk_size = x.shape[0]//nthread last_chunk_size = chunk_size + x.shape[0] % nthread indices_start = [ chunk_size*k for k in range(nthread-1) ] indices_start.append(chunk_size*(nthread-1)) indices_start = shm.copy(np.array(indices_start)) indices_stop = [ chunk_size*(k+1) for k in range(nthread-1) ] indices_stop.append(chunk_size*(nthread-1) + last_chunk_size) indices_stop = shm.copy(np.array(indices_stop)) y = np.ascontiguousarray(y) x = shm.copy(x) xp = shm.copy(xp) y = shm.copy(y) data = shm.copy(data) result = shm.empty((y.shape[0],1), np.float) with shm.MapReduce(np=nthread) as pool: @jit(fastmath=True) def row(k): xslice = x[slice(indices_start[k], indices_stop[k]),:] res = np.empty((xslice.shape[0],1)) for l in range(xslice.shape[0]): d = xslice[l,:] - xp norm = np.sqrt(np.sum(d**2, axis=1)) res[l] = ((data[0]**2) * np.exp(-(norm/(data[1]*data[2]))**2)) @ y return k, res def reduce(k, coeff): result[slice(indices_start[k], indices_stop[k])] = coeff r = pool.map(row, np.arange(nthread), reduce=reduce) return result
def main(): workers = [] task_queue = Queue() result_queue = Queue() n = 10000 m = 100 X = sharedmem.empty((n, m)) concurrency = 4 unit = n / 4 # create worker and start for i in xrange(concurrency): worker = Worker(task_queue, result_queue, X) worker.start() workers.append(worker) # put task for i in xrange(concurrency): task_queue.put((i, unit * i, unit * i + unit)) pass # get result of task elapsed_times = [] while True: elapsed_time = result_queue.get() elapsed_times.append(elapsed_time) if len(elapsed_times) == concurrency: break # stop worker for i in xrange(concurrency): workers[i].terminate() # do math for elapsed time print "Elapsed time {} [s]".format(np.max(elapsed_times))
def shm_gaukernop_at(x, xp, y, data): y = np.ascontiguousarray(y) x_shm = shm.copy(x) xp_shm = shm.copy(xp) y_shm = shm.copy(y) data_shm = shm.copy(data) nthread = int(os.environ["TENSIGA_NUM_THREADS"]) result = shm.empty(y.shape, np.float) with shm.MapReduce(np=nthread) as pool: def row(k): d = x_shm[k,:] - xp_shm norm = np.sqrt(np.sum(d**2, axis=1)) return k, ((data_shm[0]**2) * np.exp(-(norm/(data_shm[1]*data_shm[2]))**2)) @ y_shm def reduce(k, coeff): result[k] = coeff r = pool.map(row, np.arange(x_shm.shape[0]), reduce=reduce) return result
def doTask(self, tstamp): """Compute difference between given image and accumulation, then accumulate and return the difference. Initialize accumulation if needed (if opacity is 100%.)""" # Compute the alpha value. alpha, self.tstamp_prev = util.getAlpha(self.tstamp_prev) image = common[tstamp]['image_in'] # Initalize accumulation if so indicated. if self.image_acc is None: self.image_acc = np.empty(np.shape(image)) # Allocate shared memory for the diff image. image_diff = sharedmem.empty(np.shape(image), image.dtype) # Compute difference. cv2.absdiff( self.image_acc.astype(image.dtype), image, image_diff, ) # Write diff image (actually, reference thereof) to process-shared table. hello = common[tstamp] hello['image_diff'] = image_diff common[tstamp] = hello # Propagate result to the next stage. self.putResult(tstamp) # Accumulate. hello = cv2.accumulateWeighted( image, self.image_acc, alpha, )
def compute_strain_energy(self, deck, data_solver): ## Strain energy density at each node self.strain_energy = sharedmem.empty((deck.num_nodes), dtype=np.float64) threads = deck.num_threads part = int(deck.num_nodes / threads) processes = [] for i in range(0, threads): start = i * part if i < threads - 1: end = (i + 1) * part else: end = deck.num_nodes processes.append( Process(target=self.compute_strain_energy_slice, args=(deck, data_solver, start, end))) processes[i].start() for p in processes: p.join()
def translate_distMat(combined_list, core_distMat, acc_distMat): """Convert distances from a square form (2 NxN matrices) to a long form (1 matrix with n_comparisons rows and 2 columns). Args: combined_list Combined list of references followed by queries (list) core_distMat (numpy.array) NxN core distances acc_distMat (numpy.array) NxN accessory distances Returns: distMat (numpy.array) Distances in long form """ # indices i = 0 j = 1 # create distmat number_pairs = int(0.5 * len(combined_list) * (len(combined_list) - 1)) distMat = sharedmem.empty((number_pairs, 2)) # extract distances for row in distMat: row[0] = core_distMat[i, j] row[1] = acc_distMat[i, j] if j == len(combined_list) - 1: i += 1 j = i + 1 else: j += 1 return distMat
def transform(self, image, do_norm=True, total_cores=None): self._check_dimensions(image) if total_cores is None: total_cores = self.total_cores # im_fourier = my_fft_shift(self._fft(image)) im_fourier = self.do_fourier(image) # trafo = sm.empty((len(self._spectrograms), self.height, self.width), # dtype='complex128') trafo = sm.empty((len(self._spectrograms), self.height, self.width), dtype=DTYPES[1]) # sm.set_debug(True) numexpr_flag = NUM_FFTW_THREADS != 1 with sharedmem_pool(total_cores, numexpr=numexpr_flag) as pool: def work(i): trafo[i] = self.transform_fourier(im_fourier, i, do_norm) # if do_norm: # # trafo[i] = (self._ifft(my_ifft_shift(im_fourier * # # self._spectrograms[i]), # # local_ifft) # # / self._space_norms[i]) # trafo[i] = (self._ifft(my_ifft_shift(im_fourier * # self._spectrograms[i])) # / self._space_norms[i]) # else: # # trafo[i] = self._ifft(my_ifft_shift(im_fourier * # # self._spectrograms[i]), # # local_ifft) # trafo[i] = self._ifft(my_ifft_shift(im_fourier * # self._spectrograms[i])) pool.map(work, list(range(len(self._spectrograms)))) return np.array(trafo)