def __call__(self, P): spikes = array(self._threshold(P), dtype=int) nspikes = array([len(spikes)]) pypar.broadcast(nspikes, self.owner) nspikes = nspikes[0] if nspikes > 0: pypar.broadcast(spikes, self.owner) return spikes
def __call__(self, P): nspikes = array([0]) pypar.broadcast(nspikes, self.owner) nspikes = nspikes[0] spikes = zeros(nspikes, dtype=int) if nspikes > 0: pypar.broadcast(spikes, self.owner) return spikes
def update_timestep(self, yieldstep, finaltime): #LINDA: # moved the calculation so that it is done after timestep # has been broadcast # # Calculate local timestep # Domain.update_timestep(self, yieldstep, finaltime) import time t0 = time.time() # For some reason it looks like pypar only reduces numeric arrays # hence we need to create some dummy arrays for communication ltimestep = num.ones(1, num.float) ltimestep[0] = self.flux_timestep gtimestep = num.zeros(1, num.float) # Buffer for results #ltimestep = self.flux_timeste #print self.processor, ltimestep, gtimestep gtimestep = pypar.reduce(ltimestep, pypar.MIN, 0, buffer=gtimestep) #print self.processor, ltimestep, gtimestep pypar.broadcast(gtimestep, 0) #print self.processor, ltimestep, gtimestep self.flux_timestep = gtimestep[0] self.communication_reduce_time += time.time() - t0 # LINDA: # Now update time stats # Calculate local timestep Domain.update_timestep(self, yieldstep, finaltime)
def update_timestep(self, yieldstep, finaltime): #LINDA: # moved the calculation so that it is done after timestep # has been broadcast # # Calculate local timestep # Domain.update_timestep(self, yieldstep, finaltime) import time t0 = time.time() # For some reason it looks like pypar only reduces numeric arrays # hence we need to create some dummy arrays for communication ltimestep = num.ones( 1, num.float ) ltimestep[0] = self.flux_timestep gtimestep = num.zeros( 1, num.float ) # Buffer for results #ltimestep = self.flux_timeste #print self.processor, ltimestep, gtimestep gtimestep = pypar.reduce(ltimestep, pypar.MIN, 0, buffer=gtimestep) #print self.processor, ltimestep, gtimestep pypar.broadcast(gtimestep,0) #print self.processor, ltimestep, gtimestep self.flux_timestep = gtimestep[0] self.communication_reduce_time += time.time()-t0 # LINDA: # Now update time stats # Calculate local timestep Domain.update_timestep(self, yieldstep, finaltime)
def test(): xmax = 2. N = 5 bands = 3 dx = 2 * xmax / N #Create original matrix A = zeros((N, N), dtype=complex) for i in range(N): x = -xmax + i*dx for j in range(-bands, bands+1): if 0 <= j+i < N: A[i,j+i] = x**2 / (abs(j)+1) #Create packed matrix PackedA = zeros((N, 2*bands+1), complex) for i in range(N): for j in range(-bands, bands+1): if 0 <= j+i < N: row = i col = i+j packedRow, packedCol = MapRowColToPacked(row, col, N, bands) PackedA[packedRow, packedCol] = A[row, col] """ figure() imshow(A, interpolation="nearest") figure() imshow(PackedA, interpolation="nearest") """ #Create in-vector psi = rand(N) + 0.0j #send psi from proc0 to everyone pypar.broadcast(psi, 0) #output refOutput = dot(A, psi) #Create local vectors and matrices localSize = GetDistributedShape(N, ProcCount, ProcId) globalStartIndex = GetGlobalStartIndex(N, ProcCount, ProcId) globalEndIndex = globalStartIndex+localSize localPackedA = PackedA[globalStartIndex:globalEndIndex, :] localPsi = psi[globalStartIndex:globalEndIndex] localRefOutput = refOutput[globalStartIndex:globalEndIndex] localTestOutput = zeros(localSize, dtype=complex) for i in range(ProcCount): if i == ProcId: print "ProcId == %i" % (i) print localSize print globalStartIndex, " -> ", globalEndIndex print "" pypar.barrier() #BandedMatrixVectorMultiply(localPackedA, N, bands, localPsi, localTestOutput, ProcCount, ProcId) #BandedMatrixMultiply_Wrapper(localPackedA.reshape(localPackedA.size), 1.0, localPsi, localTestOutput, N, bands) TensorPotentialMultiply_BandedDistributed(localPackedA.reshape(localPackedA.size), 1.0, localPsi, localTestOutput, N, bands) #the verdict for i in range(ProcCount): if i == ProcId: if i == 0: print "" print refOutput print "" print "ProcId == %i" % (i) print sqrt(sum(abs(localRefOutput)**2)) print sqrt(sum(abs(localTestOutput)**2)) print sqrt(sum(abs(localRefOutput - localTestOutput)**2)) #print localTestOutput #print localRefOutput print "" pypar.barrier()
def dpsi_dt(self, psi, t): """ dp_dt = dpsi_dt(psi, t) Method that serves as input to the odeint() function. Calculates dpsi/dt = -i S**-1 H(t) psi. Parameters ---------- psi : 1D complex array. Wavefunction. t : float. Time. Returns ------- dp_dt : 1D complex array. Derivative of the wavefunction. """ # #To avoid doing anything twice. (odeint tends to do that.) # #--------------------------------------------------------- # novel, result = self.check_novelty(t,psi) # if not novel: # if self.my_id == 0: # print "Time: %2.2f / %2.2f au. Runtime: %2.2f---"%( # t, self.total_duration, (time.time() - self.t_0)/60.) # self.debug_norm(t, psi, result) # # return result # ########################################################## #Making a complex array. psi_complex = psi[:len(psi)/2] + 1j * psi[len(psi)/2:] dp_dt_complex = zeros(psi_complex.shape, dtype = complex) dp_dt_buffer= zeros(psi_complex.shape, dtype = complex) #Do operations. mat_vec = self.mat_vec_product(psi_complex, t) dp_dt_complex[self.my_slice] = self.solve_overlap(-1j * mat_vec) #Add and redistribute. dp_dt_complex = pypar.reduce(dp_dt_complex, pypar.SUM, 0, buffer = dp_dt_buffer) dp_dt_buffer = dp_dt_complex.copy() dp_dt_complex = pypar.broadcast(dp_dt_buffer, 0) #Making a float array. dp_dt = r_[real(dp_dt_buffer), imag(dp_dt_buffer)] if self.my_id == 0: print "Time: %2.2f / %2.2f au. Runtime: %2.2f"%( t, self.total_duration, (time.time() - self.t_0)/60.) self.debug_norm(t, psi, dp_dt) #Store latest result. ---------------------------------- self.prev_out = dp_dt ############################3###########################3 return dp_dt
# 5D double arrays # X=pypar.receive(myid-1) pypar.send(X, (myid+1)%numproc) # 5D complex arrays # X=pypar.receive(myid-1) pypar.send(X, (myid+1)%numproc) # Test broadcast - with buffers (arrays, strings and general) # testString = ('test' + str(myid)).ljust(10) #Buffers must have the same length on all procs! pypar.broadcast(testString, 0) assert testString.strip() == 'test0' testString = ('test' + str(myid)).ljust(10) #Buffers must have the same length on all procs! pypar.broadcast(testString, numproc-1) assert testString.strip() == 'test' + str(numproc-1) if myid == 0: print "Broadcast communication of strings OK" #################################################### N = 17 #Number of elements testArray = myid * numpy.array(range(N)) pypar.broadcast(testArray, 1) assert numpy.allclose(testArray, 1 * testArray)
send_submesh(submesh, triangles_per_proc, p) # Build the local mesh for processor 0 points, vertices, boundary, quantities, ghost_recv_dict, full_send_dict = \ extract_submesh(submesh, triangles_per_proc) # read in the mesh partition that belongs to this # processor (note that the information is in the # correct form for the GA data structure else: [points, vertices, boundary, quantities, ghost_recv_dict, full_send_dict] = \ rec_submesh(0) pypar.broadcast(rect,0) domain = Parallel_Domain(points, vertices, boundary, full_send_dict = full_send_dict, ghost_recv_dict = ghost_recv_dict, velocity = [0.1,0.0]) # Make a notes of which triangles are full and which are ghost tri_full_flag = build_full_flag(domain, ghost_recv_dict) #domain.initialise_visualiser(rect=rect) #Boundaries T = Transmissive_boundary(domain)
def Propagate(**args): #args['imtime'] = False #args['additional_potentials'] = ["LaserPotential"] #args['config'] = "propagation.ini" numOutput = args.get("numOutput", 50) #get propagation tasks propTasks = args.get("propagationTasks", []) #Setup problem prop = SetupProblem(**args) #Get initial state #filename = "%s/groundstate_%s.h5" % (groundstateDir, GetGridPrefix(**args)) filename = GetGroundstateFilename(**args) if not os.path.exists(filename): raise Exception("Ground state file not found, please run FindGroundstate() (%s)" % filename) pyprop.serialization.LoadWavefunctionHDF(filename, "/wavefunction", prop.psi) prop.psi.Normalize() initPsi = prop.psi.Copy() xGrid = prop.psi.GetRepresentation().GetLocalGrid(0) normList = [] corrList = [] timeList = [] #handle restarting if args.get("restart", False): pyprop.PrintOut("Restarting propagation...") restartFile = args["restartFile"] #Get original propagation end time T = prop.Config.Propagation.duration #load checkpoint wavefunction pyprop.serialization.LoadWavefunctionHDF(restartFile, "/wavefunction", prop.psi) #get restart time restartTime = numpy.zeros(1, dtype=numpy.double) if pyprop.ProcId == 0: h5file = tables.openFile(restartFile) try: restartTime[0] = h5file.root.wavefunction._v_attrs.Time finally: h5file.close() #distribute restart time to all procs pypar.broadcast(restartTime, 0) #restart all propagators with correct time prop.RestartPropagation(prop.TimeStep, restartTime[0], T) for t in prop.Advance(numOutput): N = prop.psi.GetNorm() C = abs(prop.psi.InnerProduct(initPsi))**2 pyprop.PrintOut("t = %f, norm = %f, corr = %f" % (t, N, C)) normList += [N] corrList += [C] timeList += [t] #other propagation tasks for task in propTasks: task(t, prop) prop.NormList = normList prop.CorrList = corrList prop.TimeList = timeList return prop
print "I am processor %d of %d on node %s" % (myid, nproc, node) # Generation of spikes myspikes = randint(100, size=randint(10)) print "I am sending", myspikes # Broadcasting spikes = [[] for _ in range(nproc)] spikes[myid] = myspikes nspikes = array([0]) for i in range(nproc): if i == myid: # that's me! # Create spikes nspikes[0] = len(myspikes) pypar.broadcast(nspikes, i) if i != myid: # This would be the virtual group spikes[i] = zeros(nspikes, dtype=int) pypar.broadcast(spikes[i], i) print "I received", spikes pypar.finalize() #if myid == 0: # x=2*mV # pypar.send(x,1) # y=pypar.receive(1) # print "I got a big number:",y # v=zeros(10) # pypar.receive(1,buffer=v) # faster with arrays, does not copy the data # print "and now random numbers!"
def main(): EMAN.appinit(sys.argv) if sys.argv[-1].startswith("usefs="): sys.argv = sys.argv[:-1] # remove the runpar fileserver info (options, rawimage, refmap) = parse_command_line() sffile = options.sffile verbose = options.verbose shrink = options.shrink mask = options.mask first = options.first last = options.last scorefunc = options.scorefunc projfile = options.projection output_ptcls = options.update_rawimage cmplstfile = options.cmplstfile ortlstfile = options.ortlstfile startSym = options.startSym endSym = options.endSym if not options.nocmdlog: pid = EMAN.LOGbegin(sys.argv) EMAN.LOGInfile(pid, rawimage) EMAN.LOGInfile(pid, refmap) if projfile: EMAN.LOGOutfile(pid, projfile) if output_ptcls: EMAN.LOGOutfile(pid, output_ptcls) if cmplstfile: EMAN.LOGOutfile(pid, cmplstfile) if ortlstfile: EMAN.LOGOutfile(pid, ortlstfile) ptcls = [] if not (mpi or pypar) or ((mpi and mpi.rank == 0) or (pypar and pypar.rank == 0)): ptcls = EMAN.image2list(rawimage) ptcls = ptcls[first:last] print "Read %d particle parameters" % (len(ptcls)) # ptcls = ptcls[0:10] if mpi and mpi.size > 1: ptcls = mpi.bcast(ptcls) print "rank=%d\t%d particles" % (mpi.rank, len(ptcls)) elif pypar and pypar.size() > 1: ptcls = pypar.broadcast(ptcls) print "rank=%d\t%d particles" % (pypar.rank(), len(ptcls)) if sffile: sf = EMAN.XYData() sf.readFile(sffile) sf.logy() if not mpi or ((mpi and mpi.rank == 0) or (pypar and pypar.rank() == 0)): if cmplstfile and projfile: if output_ptcls: raw_tmp = output_ptcls else: raw_tmp = rawimage raw_tmp = rawimage fp = open("tmp-" + cmplstfile, "w") fp.write("#LST\n") for i in range(len(ptcls)): fp.write("%d\t%s\n" % (first + i, projfile)) fp.write("%d\t%s\n" % (first + i, raw_tmp)) fp.close() if (mpi and mpi.size > 1 and mpi.rank == 0) or (pypar and pypar.size() > 1 and pypar.rank() == 0): total_recv = 0 if output_ptcls: total_recv += len(ptcls) if projfile: total_recv += len(ptcls) for r in range(total_recv): # print "before recv from %d" % (r) if mpi: msg, status = mpi.recv() else: msg = pypar.receive(r) # print "after recv from %d" % (r) # print msg, status d = emdata_load(msg[0]) fname = msg[1] index = msg[2] d.writeImage(fname, index) print "wrtie %s %d" % (fname, index) if options.ortlstfile: solutions = [] for r in range(1, mpi.size): msg, status = mpi.recv(source=r, tag=r) solutions += msg def ptcl_cmp(x, y): eq = cmp(x[0], y[0]) if not eq: return cmp(x[1], y[1]) else: return eq solutions.sort(ptcl_cmp) if (not mpi or (mpi and ((mpi.size > 1 and mpi.rank > 0) or mpi.size == 1))) or ( not pypar or (pypar and ((pypar.size() > 1 and pypar.rank() > 0) or pypar.size() == 1)) ): map3d = EMAN.EMData() map3d.readImage(refmap, -1) map3d.normalize() if shrink > 1: map3d.meanShrink(shrink) map3d.realFilter(0, 0) # threshold, remove negative pixels imgsize = map3d.ySize() img = EMAN.EMData() ctffilter = EMAN.EMData() ctffilter.setSize(imgsize + 2, imgsize, 1) ctffilter.setComplex(1) ctffilter.setRI(1) if (mpi and mpi.size > 1) or (pypar and pypar.size() > 1): ptclset = range(mpi.rank - 1, len(ptcls), mpi.size - 1) else: ptclset = range(0, len(ptcls)) if mpi: print "Process %d/%d: %d/%d particles" % (mpi.rank, mpi.size, len(ptclset), len(ptcls)) solutions = [] for i in ptclset: ptcl = ptcls[i] e = EMAN.Euler(ptcl[2], ptcl[3], ptcl[4]) dx = ptcl[5] - imgsize / 2 dy = ptcl[6] - imgsize / 2 print "%d\talt,az,phi=%8g,%8g,%8g\tx,y=%8g,%8g" % ( i + first, e.alt() * 180 / pi, e.az() * 180 / pi, e.phi() * 180 / pi, dx, dy, ), img.readImage(ptcl[0], ptcl[1]) img.setTAlign(-dx, -dy, 0) img.setRAlign(0, 0, 0) img.rotateAndTranslate() # now img is centered img.applyMask(int(mask - max(abs(dx), abs(dy))), 6, 0, 0, 0) if img.hasCTF(): fft = img.doFFT() ctfparm = img.getCTF() ctffilter.setCTF(ctfparm) if options.phasecorrected: if sffile: ctffilter.ctfMap(64, sf) # Wiener filter with 1/CTF (no sign) correction else: if sffile: ctffilter.ctfMap(32, sf) # Wiener filter with 1/CTF (including sign) correction else: ctffilter.ctfMap(2, EMAN.XYData()) # flip phase fft.mult(ctffilter) img2 = fft.doIFT() # now img2 is the CTF-corrected raw image img.gimmeFFT() del fft else: img2 = img img2.normalize() if shrink > 1: img2.meanShrink(shrink) # if sffile: # snrcurve = img2.ctfCurve(9, sf) # absolute SNR # else: # snrcurve = img2.ctfCurve(3, EMAN.XYData()) # relative SNR e.setSym(startSym) maxscore = -1e30 # the larger the better scores = [] for s in range(e.getMaxSymEl()): ef = e.SymN(s) # proj = map3d.project3d(ef.alt(), ef.az(), ef.phi(), -6) # Wen's direct 2D accumulation projection proj = map3d.project3d( ef.alt(), ef.az(), ef.phi(), -1 ) # Pawel's fast projection, ~3 times faster than mode -6 with 216^3 # don't use mode -4, it modifies its own data # proj2 = proj proj2 = proj.matchFilter(img2) proj2.applyMask(int(mask - max(abs(dx), abs(dy))), 6, 0, 0, 0) if scorefunc == "ncccmp": score = proj2.ncccmp(img2) elif scorefunc == "lcmp": score = -proj2.lcmp(img2)[0] elif scorefunc == "pcmp": score = -proj2.pcmp(img2) elif scorefunc == "fsccmp": score = proj2.fscmp(img2, []) elif scorefunc == "wfsccmp": score = proj2.fscmp(img2, snrcurve) if score > maxscore: maxscore = score best_proj = proj2 best_ef = ef best_s = s scores.append(score) # proj2.writeImage("proj-debug.img",s) # print "\tsym %2d/%2d: euler=%8g,%8g,%8g\tscore=%12.7g\tbest=%2d euler=%8g,%8g,%8g score=%12.7g\n" % \ # (s,60,ef.alt()*180/pi,ef.az()*180/pi,ef.phi()*180/pi,score,best_s,best_ef.alt()*180/pi,best_ef.az()*180/pi,best_ef.phi()*180/pi,maxscore) scores = Numeric.array(scores) print "\tbest=%2d euler=%8g,%8g,%8g max score=%12.7g\tmean=%12.7g\tmedian=%12.7g\tmin=%12.7g\n" % ( best_s, best_ef.alt() * 180 / pi, best_ef.az() * 180 / pi, best_ef.phi() * 180 / pi, maxscore, MLab.mean(scores), MLab.median(scores), MLab.min(scores), ) if projfile: best_proj.setTAlign(dx, dy, 0) best_proj.setRAlign(0, 0, 0) best_proj.rotateAndTranslate() best_proj.set_center_x(ptcl[5]) best_proj.set_center_y(ptcl[6]) best_proj.setRAlign(best_ef) # print "before proj send from %d" % (mpi.rank) if mpi and mpi.size > 1: mpi.send((emdata_dump(best_proj), projfile, i + first), 0) elif pypar and pypar.size() > 1: pypar.send((emdata_dump(best_proj), projfile, i + first), 0) # print "after proj send from %d" % (mpi.rank) else: best_proj.writeImage(projfile, i + first) img2.setTAlign(0, 0, 0) img2.setRAlign(best_ef) img2.setNImg(1) # print "before raw send from %d" % (mpi.rank) if output_ptcls: if mpi and mpi.size > 1: mpi.send((emdata_dump(img2), output_ptcls, i + first), 0) elif pypar and pypar.size() > 1: pypar.send((emdata_dump(img2), output_ptcls, i + first), 0) # print "after raw send from %d" % (mpi.rank) else: img2.writeImage(output_ptcls, i + first) solutions.append((ptcl[0], ptcl[1], best_ef.alt(), best_ef.az(), best_ef.phi(), ptcl[5], ptcl[6])) if mpi and (mpi.size > 1 and mpi.rank > 0): mpi.send(solutions, 0, tag=mpi.rank) if mpi: mpi.barrier() elif pypar: pypar.barrier() if mpi: mpi.finalize() elif pypar: pypar.finalize() if options.cmplstfile: os.rename("tmp-" + cmplstfile, cmplstfile) if options.ortlstfile: lFile = open(options.ortlstfile, "w") lFile.write("#LST\n") for i in solutions: lFile.write( "%d\t%s\t%g\t%g\t%g\t%g\t%g\n" % (i[1], i[0], i[2] * 180.0 / pi, i[3] * 180.0 / pi, i[4] * 180.0 / pi, i[5], i[6]) ) lFile.close() if not options.nocmdlog: EMAN.LOGend()
def dkmeans3(pnts_fn, nk, niters, clst_fn, nn_class=nn.nn, seed=42, pnts_step=50000, iters_to_output=[], root_rank=0, checkpoint=True, featureWrapper=featureNoWrapper): """ Distributed k-means. """ if featureWrapper == None: featureWrapper = featureNoWrapper elif featureWrapper == 'hell': featureWrapper = toHellinger npr.seed(seed) pnts = pointsObj(pnts_fn) npnts = pnts.shape[0] ndims = pnts.shape[1] if rank == root_rank: print 'Using a (%d x %d) %s array for the datapoints' % (npnts, ndims, pnts.dtype) if rank == root_rank and ndims > npnts: raise RuntimeError, 'dodgy matrix format -- number of dimensions is greater than the number of points!' # Find preferred dtype if pnts.dtype == 'float64': pref_dtype = 'float64' else: pref_dtype = 'float32' start_iter = np.zeros((1, ), dtype='int') distortion = np.zeros((1, )) clst_data = np.empty((nk, ndims), dtype=pref_dtype) if rank == root_rank: print 'Using a (%d x %d) %s array for the clusters' % ( clst_data.shape[0], clst_data.shape[1], clst_data.dtype) checkpoint_fn = clst_fn + '.checkpoint' if os.path.exists(checkpoint_fn): start_iter[0], clst_data, distortion[0] = dkmeans3_read_clusters( checkpoint_fn) print 'Restarting from checkpoint. Start iteration = %d' % start_iter else: clst_inds = np.arange(npnts) npr.shuffle(clst_inds) clst_inds = clst_inds[:nk] clst_inds.sort() for i, ind in enumerate(clst_inds): clst_data[i] = featureWrapper(pnts[ind]) if 0 in iters_to_output: dkmeans3_save_clusters(clst_fn + '.000', clst_data, 0, niters, pnts.shape, seed, 0.0) mpi.broadcast(start_iter, root_rank) # Start iterations for iter_num in range(start_iter[0], niters): t1 = time.time() mpi.broadcast(clst_data, root_rank) # Broadcast the cluster centers to all nodes. nn_functor = nn_class(clst_data) # Build the NN functor clst_sums = np.zeros((nk, ndims), dtype=pref_dtype) # NOTE: The accumulator here is floating point to avoid a cast when used with numpy. clst_sums_n = np.zeros( nk, dtype=pref_dtype ) # Be careful here -- float32 has 24bits of integer precision. distortion = np.zeros((1, )) # Let's do nearest neighbours stack = [] if rank == root_rank: for l in range(0, npnts, pnts_step): r = min(l + pnts_step, npnts) stack.append((l, r)) stack.reverse() mpi_queue.mpi_queue(stack, dkmeans3_worker_func( pnts, nn_functor, clst_sums, clst_sums_n, distortion, pref_dtype, featureWrapper=featureWrapper), dkmeans3_result_func, queue_rank=root_rank) mpi.inplace_reduce(clst_sums, mpi.SUM, root_rank) mpi.inplace_reduce(clst_sums_n, mpi.SUM, root_rank) mpi.inplace_reduce(distortion, mpi.SUM, root_rank) if rank == root_rank: # Check for clusters with no assignments. noassign_inds = np.where(clst_sums_n == 0)[0] if len(noassign_inds): warnings.warn( 'iter %d: %d clusters have zero points assigned to them - using random points' % (iter_num, len(noassign_inds))) clst_sums_n[noassign_inds] = 1 for ind in noassign_inds: clst_sums[ind] = featureWrapper(pnts[npr.randint( 0, pnts.shape[0])]) clst_sums /= clst_sums_n.reshape(-1, 1) clst_data = clst_sums t2 = time.time() #print 'Iteration %d, sse = %g, mem = %.2fMB, took %.2fs' % (iter_num+1, distortion[0], resident()/2**20, t2-t1) print 'relja_retrival,dkmeans::cluster,%s,%d,%d,%g' % (str( datetime.datetime.now()), iter_num + 1, niters, distortion[0]) # Potentially save the clusters. if checkpoint: dkmeans3_save_clusters(checkpoint_fn, clst_data, iter_num + 1, niters, pnts.shape, seed, distortion[0]) if (iter_num + 1) in iters_to_output: dkmeans3_save_clusters(clst_fn + '.%03d' % (iter_num + 1), clst_data, iter_num + 1, niters, pnts.shape, seed, distortion[0]) del clst_sums del clst_sums_n if rank == root_rank: dkmeans3_save_clusters(clst_fn, clst_data, niters, niters, pnts.shape, seed, distortion[0]) if checkpoint: try: os.remove(checkpoint_fn ) # Remove the checkpoint file once we've got here. except OSError: pass del clst_data #del clst_sums #del clst_sums_n mpi.barrier() # Is this needed?
send_submesh(submesh, triangles_per_proc, p) # Build the local mesh for processor 0 points, vertices, boundary, quantities, ghost_recv_dict, full_send_dict = \ extract_submesh(submesh, triangles_per_proc) # read in the mesh partition that belongs to this # processor (note that the information is in the # correct form for the GA data structure else: [points, vertices, boundary, quantities, ghost_recv_dict, full_send_dict] = \ rec_submesh(0) pypar.broadcast(rect, 0) domain = Parallel_Domain(points, vertices, boundary, full_send_dict=full_send_dict, ghost_recv_dict=ghost_recv_dict, velocity=[0.1, 0.0]) # Make a notes of which triangles are full and which are ghost tri_full_flag = build_full_flag(domain, ghost_recv_dict) #domain.initialise_visualiser(rect=rect) #Boundaries
def BO_dipole_couplings(self, m_list, q_list, E_lim): """ BO_dipole_couplings(m_list, q_list, E_lim) Parallel program that calculates the dipole couplings for a z-polarized laser in lenght gauge. An eigenstate basis is used, of states whose quantum numbers are in <m_list> and <q_list>, that have energies below <E_lim>. The couplings are stored to an HDF5 file. Parameters ---------- m_list : list of integers, containing the m values wanted in the basis. q_list : list of integers, containing the q values wanted in the basis. E_lim : float, the upper limit of the energies wanted in the basis, for R ~ 2.0. Notes ----- I sometimes observe unnatural spikes in the couplings (as a function of R), which should be removed before the couplings are used. I don't know why they are there. Example ------- >>> filename = "el_states_m_0_nu_70_mu_25_beta_1_00_theta_0_00.h5" >>> tdse = tdse_electron.TDSE_length_z(filename = filename) >>> m = [0] >>> q = [0,1,2,3] >>> E_lim = 5.0 >>> tdse.BO_dipole_couplings(m, q, E_lim) """ #Name of the HDF5 file where the couplings will be saved. self.coupling_file = name_gen.electronic_eig_couplings_R(self, m_list, q_list, E_lim) #Parallel stuff #-------------- #Get processor 'name'. my_id = pypar.rank() #Get total number of processors. nr_procs = pypar.size() #Size of eigenstate basis. (Buffer for broadcast.) basis_size_buffer = r_[0] #Get number of tasks. f = tables.openFile(self.eigenstate_file) try: R_grid = f.root.R_grid[:] finally: f.close() nr_tasks = len(R_grid) #Get a list of the indices of this processors share of R_grid. my_tasks = nice_stuff.distribute_work(nr_procs, nr_tasks, my_id) #The processors will be writing to the same file. #In order to avoid problems, the procs will do a relay race of writing to #file. This is handeled by blocking send() and receive(). #Hopefully there will not be to much waiting. #ID of the processor that will start writing. starter = 0 #ID of the processor that will be the last to write. ender = (nr_tasks - 1) % nr_procs #Buffer for the baton, i.e. the permission slip for file writing. baton = r_[0] #The processor one is to receive the baton from. receive_from = (my_id - 1) % nr_procs #The processor one is to send the baton to. send_to = (my_id + 1) % nr_procs #------------------------------- #Initializing the HDF5 file #-------------------------- if my_id == 0: #Initialize index list. index_array = [] #Find the index of the R closest to 2.0. R_index = argmin(abs(R_grid - 2.0)) #Choose basis functions. f = tables.openFile(self.eigenstate_file) try: for m in m_list: m_group = name_gen.m_name(m) for q in q_list: q_group = name_gen.q_name(q) for i in range(self.config.nu_max + 1): if eval("f.root.%s.%s.E[%i,%i]"%(m_group, q_group, i, R_index)) > E_lim: break else: #Collect indices of the basis functions. index_array.append(r_[m, q, i]) finally: f.close() #Cast index list as an array. index_array = array(index_array) #Number of eigenstates in the basis. basis_size = len(index_array) print basis_size, "is the basis size" basis_size_buffer[0] = basis_size f = tables.openFile(self.coupling_file, 'w') try: f.createArray("/", "R_grid", R_grid) #Saving the index array. f.createArray("/", "index_array", index_array) #Initializing the arrays for the couplings and energies. f.createCArray('/', 'E', tables.atom.FloatAtom(), (basis_size, nr_tasks), chunkshape=(basis_size, 1)) f.createCArray('/', 'couplings', tables.atom.ComplexAtom(16), (basis_size, basis_size, nr_tasks), chunkshape=(basis_size, basis_size, 1)) finally: f.close() #Save config instance. self.config.save_config(self.coupling_file) #---------------------------------- #Calculating the dipole couplings #-------------------------------- #Broadcasting the basis size from processor 0. pypar.broadcast(basis_size_buffer, 0) #Initializing the index array. if my_id != 0: index_array = zeros([basis_size_buffer[0], 3], dtype=int) #Broadcasting the index array from proc. 0. pypar.broadcast(index_array, 0) #Looping over the tasks of this processor. for i in my_tasks: #Calculate the dipole couplings for one value of R. couplings, E = self.calculate_dipole_eig_R(index_array, R_grid[i]) #First file write. (Send, but not receive baton.) if starter == my_id: #Write to file. self.save_dipole_eig_R(couplings, E, R_grid[i]) #Avoiding this statement 2nd time around. starter = -1 #Sending the baton to the next writer. pypar.send(baton, send_to, use_buffer = True) #Last file write. (Receive, but not send baton.) elif i == my_tasks[-1] and ender == my_id : #Receiving the baton from the previous writer. pypar.receive(receive_from, buffer = baton) #Write to file. self.save_dipole_eig_R(couplings, E, R_grid[i]) #The rest of the file writes. else: #Receiving the baton from the previous writer. pypar.receive(receive_from, buffer = baton) #Write to file. self.save_dipole_eig_R(couplings, E, R_grid[i]) #Sending the baton to the next writer. pypar.send(baton, send_to, use_buffer = True) #Showing the progress of the work. if my_id == 0: nice_stuff.status_bar("Electronic dipole couplings:", i, len(my_tasks)) #---------------------------- #Letting everyone catch up. pypar.barrier()
def test(): xmax = 2.0 N = 5 bands = 3 dx = 2 * xmax / N # Create original matrix A = zeros((N, N), dtype=complex) for i in range(N): x = -xmax + i * dx for j in range(-bands, bands + 1): if 0 <= j + i < N: A[i, j + i] = x ** 2 / (abs(j) + 1) # Create packed matrix PackedA = zeros((N, 2 * bands + 1), complex) for i in range(N): for j in range(-bands, bands + 1): if 0 <= j + i < N: row = i col = i + j packedRow, packedCol = MapRowColToPacked(row, col, N, bands) PackedA[packedRow, packedCol] = A[row, col] """ figure() imshow(A, interpolation="nearest") figure() imshow(PackedA, interpolation="nearest") """ # Create in-vector psi = rand(N) + 0.0j # send psi from proc0 to everyone pypar.broadcast(psi, 0) # output refOutput = dot(A, psi) # Create local vectors and matrices localSize = GetDistributedShape(N, ProcCount, ProcId) globalStartIndex = GetGlobalStartIndex(N, ProcCount, ProcId) globalEndIndex = globalStartIndex + localSize localPackedA = PackedA[globalStartIndex:globalEndIndex, :] localPsi = psi[globalStartIndex:globalEndIndex] localRefOutput = refOutput[globalStartIndex:globalEndIndex] localTestOutput = zeros(localSize, dtype=complex) for i in range(ProcCount): if i == ProcId: print "ProcId == %i" % (i) print localSize print globalStartIndex, " -> ", globalEndIndex print "" pypar.barrier() # BandedMatrixVectorMultiply(localPackedA, N, bands, localPsi, localTestOutput, ProcCount, ProcId) # BandedMatrixMultiply_Wrapper(localPackedA.reshape(localPackedA.size), 1.0, localPsi, localTestOutput, N, bands) TensorPotentialMultiply_BandedDistributed( localPackedA.reshape(localPackedA.size), 1.0, localPsi, localTestOutput, N, bands ) # the verdict for i in range(ProcCount): if i == ProcId: if i == 0: print "" print refOutput print "" print "ProcId == %i" % (i) print sqrt(sum(abs(localRefOutput) ** 2)) print sqrt(sum(abs(localTestOutput) ** 2)) print sqrt(sum(abs(localRefOutput - localTestOutput) ** 2)) # print localTestOutput # print localRefOutput print "" pypar.barrier()