# Set the optimality tolerance analysis.RAMP_penalty = 0.0 analysis.props.setPenaltyType(penalty='convex', ptype='ramp') analysis.setNewInitPointPenalty(analysis.xinit) elif start_strategy == 'uniform': analysis.xinit[:] = 1.0 / analysis.num_materials analysis.xinit[::(analysis.num_materials + 1)] = 1.0 analysis.props.setPenaltyType(penalty='convex', ptype=ptype) analysis.setNewInitPointPenalty(analysis.xinit) else: # Set the initial starting point strategy analysis.props.setPenaltyType(penalty='convex', ptype=ptype) analysis.setNewInitPointPenalty(analysis.xinit) # Keep track of the ellapsed CPU time init_time = MPI.Wtime() # Keep track of the number of iterations niters = 0 # Set the output file name fname = os.path.join(prefix, 'opt_history.out') opt.setOutputFile(fname) for k in range(max_iters): # Optimize if k > 0 and optimizer == 'paropt': opt.resetDesignAndBounds() beta = max(10 * tol, (0.75**k) * 1e-2) opt.setStartAffineStepMultiplierMin(beta) elif k > 0 and optimizer != 'paropt':
def train(self, data): (X, Y) = data lossfunction = loss(self.lossfunction) regularization = regularizer(self.regularizer) prox_loss = proxoperator(self.lossfunction) prox_regularizer = proxoperator(self.regularizer) # dimensions of the problem d = X.Width n = X.Height if self.problem == "multiclass_classification": k = int(comm.allreduce(max(Y.Matrix), op=MPI.MAX)) # number of classes if self.zerobased: k = k + 1 else: k = Y.Matrix.shape[1] N = int(self.numfeaturepartitions) # number of column splits P = NumProcessors D = self.randomfeatures if rank == 0: print self.__dict__ print """Dimensions: X is n=%d x d=%d, k=%d classes, D=%d random features, P=%d processors, N=%d feature partitions""" % ( n, d, k, D, P, N) starttime = MPI.Wtime() # Prepare ADMM intermediate matrices # distributed intermediate matrices -> split over examples O = elem.DistMatrix_d_VC_STAR() elem.Zeros(O, n, k) Obar = elem.DistMatrix_d_VC_STAR() elem.Zeros(Obar, n, k) nu = elem.DistMatrix_d_VC_STAR() elem.Zeros(nu, n, k) # distributed intermediate matrices -> split over features #W = elem.DistMatrix_d_VC_STAR(); #elem.Zeros(W, D,k) #Wbar = elem.DistMatrix_d_STAR_STAR(); # (*,*) distribution - replicated everywhere #elem.Zeros(Wbar, D, k) #mu = elem.DistMatrix_d_VC_STAR(); #elem.Zeros(mu, D, k) #J = range(W.ColShift, D, W.ColStride) # the rows of W,Wbar,mu owner local # on root node if rank == 0: W = numpy.zeros((D, k)) Wbar = numpy.zeros((D, k)) mu = numpy.zeros((D, k)) else: W = None Wbar = None mu = None # local intermediate matrices Wi = numpy.zeros((D, k)) mu_ij = numpy.zeros((D, k)) ZtObar_ij = numpy.zeros((D, k)) iter = 0 ni = O.LocalHeight # Create RFTs blksize = int(math.ceil(D / N)) self.RFTs = [ self.kernel.rft(blksize, self.subtype, forceppy=True) for i in range(N - 1) ] self.RFTs.append( self.kernel.rft(D - (N - 1) * blksize, self.subtype, forceppy=True)) # FIXME for now we are forcing pure python implementation since C++ layer # transforms still do not have a good serialization solution Precomputed = [] #y = preprocess_labels(Y.Matrix) if self.lossfunction == "crossentropy" or self.lossfunction == "hinge": if not self.zerobased: y = Y.Matrix - 1.0 # convert from 1-to-K to 0-to-(K-1) representation else: y = skylark.ml.utils.dummycoding(Y.Matrix, k, self.zerobased) y = 2 * y - 1 localloss = lossfunction(O.Matrix, y) while (iter < self.MAXITER): iter = iter + 1 totalloss = comm.reduce(localloss) if rank == 0: ElapsedTime = MPI.Wtime() - starttime print 'iter=%d, objective=%f, time=%f' % ( iter, totalloss + self.regparam * regularization(W), ElapsedTime) #print '\t\titer=%d, objective=%f' % (iter, objective(Wbar)); Wbar = comm.bcast(Wbar, root=0) mu_ij = mu_ij - Wbar #mu_ij = mu_ij - Wbar.Matrix; # O optimization O.Matrix[:] = prox_loss(Obar.Matrix - nu.Matrix, 1.0 / self.rho, y, O.Matrix[:]) # Compute value of Loss function # W optimization #W.Matrix[:] = prox_regularizer(Wbar.Matrix[J,:] - mu.Matrix, self.regparam/self.rho); if rank == 0: W = prox_regularizer(Wbar - mu, self.regparam / self.rho) # graph projection step sum_o = numpy.zeros((ni, k)) for j in range(0, N): start = j * blksize finish = min((j + 1) * blksize, D) JJ = range(start, finish) Dj = len(JJ) Z = (self.RFTs[j] / X.Matrix) * math.sqrt(float(Dj) / D) if iter == 1: ZtZ = numpy.dot(Z.T, Z) A = linalg.inv(ZtZ + numpy.identity(Dj)) Precomputed.append(A) ##############3 graph projection ############## #(Wi[JJ,:], o) = proj_graph(TransformOperator, X.Matrix, JJ, Wbar.Matrix[JJ, :] - mu_ij[JJ,:], ZtObar_ij[JJ,:] + Z(I,JJ)'*nu.Matrix, Precomputed[j]); C = Wbar[JJ, :] - mu_ij[JJ, :] ZtD = ZtObar_ij[JJ, :] + numpy.dot(Z.T, nu.Matrix) WW = numpy.dot(Precomputed[j], (C + ZtD)) Wi[JJ, :] = WW o = numpy.dot(Z, WW) ############################################### mu_ij[JJ, :] = mu_ij[JJ, :] + Wi[JJ, :] ZtObar_ij[JJ, :] = numpy.dot(Z.T, o) sum_o = sum_o + o localloss = 0.0 o = numpy.zeros((ni, k)) for j in range(0, N): start = j * blksize finish = min((j + 1) * blksize, D) JJ = range(start, finish) Dj = len(JJ) Z = (self.RFTs[j] / X.Matrix) * math.sqrt(float(Dj) / D) ZtObar_ij[JJ, :] = ZtObar_ij[JJ, :] + numpy.dot( Z.T, (O.Matrix - sum_o)) / (N + 1) o = o + numpy.dot(Z, Wbar[JJ, :]) localloss = localloss + lossfunction(o, y) Obar.Matrix[:] = (sum_o + N * O.Matrix) / (N + 1) nu.Matrix[:] = nu.Matrix + O.Matrix - Obar.Matrix Wisum = comm.reduce(Wi) if rank == 0: #Wisum = comm.allreduce(Wi) #Wbar.Matrix[J,:] = (Wisum[J,:] + W.Matrix)/(P+1) #Wbar.Matrix = (Wisum[J,:] + W.Matrix)/(P+1) Wbar = (Wisum + W) / (P + 1) mu = mu + W - Wbar # distributed sum below #mu.Matrix[:] = mu.Matrix + W.Matrix - Wbar.Matrix[J,:]; comm.barrier() self.coefficients = Wbar
def main(): random.seed(10000) # Parse user input params = parse_input_arguments(sys.argv) pdb = params['pdb'] geom = params['geom'] beam = params['beam'] orient = int(params['UniformOrientation']) number = int(params['numSlices']) outDir = params['outDir'] saveName = params['saveNameHDF5'] savePhotons = params['savePhotons'] # Initialize MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() sz = comm.size det = None data = None if rank == 0: print( "====================================================================" ) print("Running %d parallel MPI processes" % (comm.size)) t_start = MPI.Wtime() orientations = np.zeros((2 * number, 4)) particle = ps.Particle() if rank == 0: if orient == 1: orientations = ps.geometry.get_uniform_quat(num_pts=number).astype( np.float64) elif orient == 0: orientations = ps.geometry.get_random_quat(num_pts=number).astype( np.float64) print "O=", orientations.shape print "ODtype=", orientations.dtype #sys.exit(0) print("Reading PDB file...") particle.read_pdb(pdb, ff='WK') # reading beam and detector files beam = ps.Beam(beam) #beam.set_wavelength(1.0e-10) print beam.get_wavelength() det = ps.PnccdDetector(geom=geom, beam=beam) print("Broadcasting input to processes...") data = { 'particle': particle, 'orientations': orientations, 'detector': det } dct = comm.bcast(data, root=0) if rank == 0: pattern_shape = det.pedestal.shape fin = h5.File( os.path.join(outDir, 'test_saveHDF5_parallel_intens_combined.h5'), 'w') if savePhotons == 1: fph = h5.File( os.path.join(outDir, 'test_saveHDF5_parallel_photons_combined.h5'), 'w') if savePhotons == 1: dset_photons = fph.create_dataset('imgPhot', shape=(number, ) + pattern_shape, dtype=np.int32, chunks=(1, ) + pattern_shape, compression="gzip", compression_opts=4) dset_intens = fin.create_dataset('imgIntens', shape=(number, ) + pattern_shape, dtype=np.float32, chunks=(1, ) + pattern_shape, compression="gzip", compression_opts=4) if savePhotons == 1: fph.create_dataset('orientation', data=orientations, compression="gzip", compression_opts=4) fin.create_dataset('orientation', data=orientations, compression="gzip", compression_opts=4) print("Done creating HDF5 file and datasets...") c = 0 while c < number: status1 = MPI.Status() result = comm.recv(source=MPI.ANY_SOURCE, status=status1) # (index,photImg) i = status1.Get_source() dd = det.add_correction(result[1]) print("Rank 0: Received image %d from rank %d" % (result[0], i)) dset_intens[result[0], :, :, :] = dd #result[1] #photoImg = det.add_correction_and_quantization(pattern=result[1]) if savePhotons == 1: photoImg = det.add_quantization(pattern=dd) dset_photons[result[0], :, :, :] = photoImg c += 1 else: # slave # initialize intensity volume ori = dct['orientations'] det = dct['detector'] particle = dct['particle'] slices_num = ori.shape[0] pattern_shape = det.pedestal.shape pixel_momentum = det.pixel_position_reciprocal sliceOne = np.zeros((pattern_shape)) #left out dtype=np.float32 mesh_length = 128 mesh, voxel_length = det.get_reciprocal_mesh( voxel_number_1d=mesh_length) print "MeshDtype=", mesh.dtype intensVol = pg.diffraction.calculate_diffraction_pattern_gpu( mesh, particle, return_type='intensity') # lft out mesh.astype(np.float32) for i in range((rank - 1), number, sz - 1): # transform quaternion (set of orientations) into 3D rotation rotmat = ps.geometry.quaternion2rot3d(ori[i, :]) intensSlice = slave_calc_intensity(rot3d=rotmat, pixel_momentum=pixel_momentum, pattern_shape=pattern_shape, volume=intensVol, voxel_length=voxel_length) # intensVol.astype(np.float32) # Convert the one image to photons #photImg = det.add_correction_and_quantization(pattern=intensSlice) # astype(np.int32) print("Sending slice %d from rank %d" % (i, rank)) comm.ssend((i, intensSlice), dest=0) if rank == 0: t_end = MPI.Wtime() print("Finishing constructing %d patterns in %f seconds" % (number, t_end - t_start)) import matplotlib.pyplot as plt fin.flush() if savePhotons == 1: fph.flush() # Display first diffraction image photImgAssem = det.assemble_image_stack( image_stack=fph['imgPhot'][0, :, :, :]) intensImgAssemb = det.assemble_image_stack( image_stack=fin['imgIntens'][0, :, :, :]) #diff = photoImg2 - photoImg #print np.nonzero(diff) #print np.max #diffImgAssemb = det.assemble_image_stack(image_stack=diff) #fig = plt.figure() #ax1 = fig.add_subplot(2,1,1) #plt.imshow(diffImgAssemb) #plt.colorbar() #ax1.colorbar() #ax2 = fig.add_subplot(2,1,2) #ax2.imshow(np.log(photImgAssem+1), interpolation='none') #ax2.colorbar() plt.show() fin.close() if savePhotons == 1: fph.close() sys.exit()
rank = comm.Get_rank() size = comm.Get_size() myhost = MPI.Get_processor_name() def filter_fn(evt): return True xtc_dir = "/global/cscratch1/sd/monarin/testxtc2/hsd" max_events = int(sys.argv[1]) ds = DataSource('exp=xpptut13:run=1:dir=%s' % (xtc_dir), filter=filter_fn, max_events=max_events, batch_size=1) st = MPI.Wtime() for run in ds.runs(): #det = run.Detector('xppcspad') for evt in run.events(): print("%s %d %f" % (myhost, rank, time.time())) #pass en = MPI.Wtime() if rank == 0: print( "#Events %d #Files %d #smd0_threads %s Total Elapsed (s): %6.2f Rate (kHz): %6.2f" % (max_events, 16, os.environ.get("PS_SMD0_THREADS", 1), en - st, (max_events / ((en - st) * 1000))))
def start_timing(self, f): info = self.information(f) self.tree = self.tree.add_node(info) self.func[info].total_time.append(mpi.Wtime()) self.func[info].self_time.append(0)
os.mkdir(prefix) fname = os.path.join(prefix, 'performance_profile.dat') fp = open(fname, 'w') # Iterate over all the trusses index = 0 for vals in trusses: # Set the values of N/M N = vals[0] M = vals[1] print 'Optimizing truss (%d x %d) ...' % (N, M) # Optimize each of the trusses truss = setup_ground_struct(N, M) t0 = MPI.Wtime() if optimizer is 'None': opt = paropt_truss(truss, prefix=prefix, use_hessian=use_hessian) # Get the optimized point x = opt.getOptimizedPoint() else: # Read out the options from the dictionary of options options = all_options[optimizer] # Set the output file filename = os.path.join(prefix, 'output_%dx%d.out' % (N, M)) options[outfile_name] = filename # Optimize the truss with the specified optimizer opt, prob, sol = pyopt_truss(truss,
def main(argv): comm = MPI.COMM_WORLD #Comunicador pid = comm.rank #Proceso size = comm.size #Cantidad de procesos #Constantes proporcionadas por consola R = 0 #Tasa constante de decremento de luciferina G = 0 #Fracción constante de incremento de luciferina S = 0 #Distancia constante en la que se mueven los gusanos I = 0 #Rango de cobertura de un gusano para incluir datos asociados L = 0 #Valor inicial de luciferina en los gusanos K = 0 #Cantidad de clases a encontrar M = 0 #Tasa de gusanos por dato #Variables data = [] #Conjunto de manos cant_gusanos = 0 #Cantidad de gusanos, 90% de la cantidad total de datos gusanos = [] #Arreglo de gusanos listaInv = [] #Lista invertida con los índices de los datos diccionarioC_r = {} #Key = {cantidad de elementos cubiertos} | Value {Elemenos con 'key' datos cubiertos} maxIntraD = 0.0 #Valor maximo de intraD dentro del conjunto de gusanos centroidesCandidatos = [] #Lista con los centroides candidatos valor_SSE = 0.0 #Valor de la SSE (Squared algo) interDist = 0 #Valor de la intradistancia #Se sincronizan los procesos comm.barrier() #Función para sincronizar #<-----Se inicia la toma del tiempo-----> t_start = MPI.Wtime() """ Proceso 0 se encarga de recoger los datos pasados por consola, así como cargar los datos Realiza el cálculo de la cantidad de Gusanos siendo del 0.9 del total de datos """ if pid == 0: R, G, S, I, L, K, M = getValores(argv) #Guardar un valor dado por consola data, cant_datos = cargarDatos() #Bcast a todos los procesos con la lista de datos data, R, G, S, I, L, K, M = comm.bcast((data, R, G, S, I, L, K, M), root = 0) #<------Rangos para la paralelización por tareas------> #Se determinan los rangos de trabajo para crear los gusanos inicio = int(pid * (len(data)*M) / size) final = int((len(data)*M) / size + inicio) #Se determinan los rangos de trabajo para crear la lista invertida init_ListaInvertida = int(pid * len(data) / size) final_ListaInvertida = int((pid + 1) * len(data) / size) #<------Rangos para la paralelización por tareas------> #Validación de las funciones reductoras especiales para las diferentes estructuras de datos diccionarioSUM = MPI.Op.Create(combinarDiccionarios,commute = True) listaInvertidaSUM = MPI.Op.Create(combinarListasInvertidas, commute = True) #Creación de la lista invertida por procesos listaInv = generarListaInvertida(data,init_ListaInvertida, final_ListaInvertida) #Se reduce la lista invertida al proceso 0 listaInv = comm.allreduce(listaInv, op = listaInvertidaSUM) #Se realiza un bcast a todos los procesos con la lista invertida listaInv = comm.bcast(listaInv, root = 0) #Se crean los gusanos dependiendo de la división de trabajo entre procesos for i in range(inicio, final): g = Gusano(L,randomPos(pid,size),I,S) g.sacarConjuntoCubierto(listaInv,data) g.setIntraD(data) if(g.getIntraD() > maxIntraD): maxIntraD = g.getIntraD() if(len(g.getCCubierto()) > 0): #Se descartan los gusanos que no cubren ningún dato gusanos.append(g) if(len(g.getCCubierto()) in diccionarioC_r): #Se revisa que la cantidad de datos cubiertos esté dentro del diccionario diccionarioC_r[len(g.getCCubierto())].append(g) #Si la cantidad se encuentra se agrega al los value de ese key else: diccionarioC_r[len(g.getCCubierto())] = [g] #Si no se encuentra, se crea una nueva key #Se reducen los diccionarios en el proceso 0 diccionarioFinalC_r = comm.allreduce(diccionarioC_r, op = diccionarioSUM) #Se reduce la lista de gusanos al proceso 0 gusanos = comm.reduce(gusanos,op = MPI.SUM) #El procesos 0 se encarga de generar la lista de los centroides cantidatos if pid == 0: gusanos.sort(key = lambda x: len(x.cCubierto), reverse = True) print(diccionarioFinalC_r.keys()) centroidesCandidatos = gusanos[:int(len(gusanos)/2)] valor_SSE = getSSE(centroidesCandidatos,gusanos) interDist = getInterDist(centroidesCandidatos) nom_arch_msjs = "GSOprogress.txt" str_toWrite = "" centroidesCandidatos, valor_SSE, interDist, maxIntraD, gusanos = comm.bcast((centroidesCandidatos,valor_SSE,interDist,maxIntraD,gusanos),0) #while(condiciones): #PARALELIZAR ESTE CICLO TAL QUE ABARQUE SOLO UNA CANTIDAD ESPECIFICA DE GUSANOS for k in range(0,NUMERODEITERACIONES): t_inicio_iteracion = MPI.Wtime() newGusanos = [] inicio = int(pid * (len(gusanos)/size)) final = int(len(gusanos)/size + inicio) for i in range(inicio,final): gusanos[i].setFitness(len(data),valor_SSE,maxIntraD) gusanos[i].actualizarLuciferina(R,G) gusanos[i].sacarVecindario(gusanos) #EXTREMADAMENTE INEFICIENTE, TERMINA TENIENDO UNA COMPLEJIDAD DE TIEMPO n^2 (POSIBLES OPTIMIZACIONES) gusanos[i].setMejorVecino() gusanos[i].moverGusano() gusanos[i].sacarConjuntoCubierto(listaInv,data) gusanos[i].setIntraD(data) if(len(gusanos[i].getCCubierto()) > 0): newGusanos.append(gusanos[i]) gusanos = newGusanos gusanos = comm.reduce(gusanos,op = MPI.SUM) t_final_iteracion = MPI.Wtime() t_total_iteracion = comm.reduce(t_final_iteracion-t_inicio_iteracion, op = MPI.MAX) if(pid == 0): gusanos = revisarCentroides(gusanos, 2) centroidesCandidatos = sacarCcPorFitness(gusanos) valor_SSE = getSSE(centroidesCandidatos,gusanos) interDist = getInterDist(centroidesCandidatos) centroidesCandidatos = revisarCentroides(centroidesCandidatos, 2) print("Cant CC = ", len(centroidesCandidatos)) print("Cant Gusanos = ", len(gusanos)) print(t_total_iteracion) with open(nom_arch_msjs, 'w') as archivo: str_toWrite += "Iteracion " + str(k) + " tardó " + str(t_total_iteracion) + " segundos." + '\n' + "Cantidad de Centroides:" + str(len(centroidesCandidatos)) + '\n' + "Cantidad de gusanos: " + str(len(gusanos)) + '\n' + "\n --------------------------------------------------------------------------- \n" archivo.write(str_toWrite) archivo.close() if(len(centroidesCandidatos) <= 10): archivo.write("Centroides Finales: \n") for i in centroidesCandidatos: str_toWrite += "Centroide " + str(i) + ": " + i.toString() archivo.write(str_toWrite) archivo.close() centroidesCandidatos, valor_SSE, interDist, gusanos = comm.bcast((centroidesCandidatos,valor_SSE,interDist,gusanos),0) #<-----Se inicia la toma del tiempo-----> t_final = MPI.Wtime() #Se reduce la toma del tiempo al proceso 0 tw = comm.reduce(t_final-t_start, op = MPI.MAX) if pid == 0: print(tw)
def manager(taskCommandlineDictionary): still_todo = taskCommandlineDictionary num_tasks = len(still_todo) comm = MPI.COMM_WORLD jobid = os.getenv("SLURM_JOBID", default="nojobid") num_workers = comm.Get_size() - 1 print "Job ID:", jobid print "Processes: 1 master and %d workers" % num_workers print "Tasks to do:", num_tasks active_workers = range(1, num_workers + 1) worker_to_subdir = {} granted_wtime = int(os.getenv( "PBS_WALLTIME", default=4294967295)) # PBS_WALLTIME is to be set start_time = MPI.Wtime() # current time in seconds def elapsed_time(): return MPI.Wtime() - start_time def remaining_time(): return int(granted_wtime - elapsed_time()) print "Remaining wall time:", remaining_time() def send_task_from_still_todo(destination): comm.send(MSG_MANAGER_HAS_WORK, tag=TAG_STATUS_MSG, dest=destination) subdir, commandline = still_todo.popitem(last=False) comm.send(subdir, tag=TAG_SUBDIR, dest=destination) comm.send(commandline, tag=TAG_COMMANDLINE, dest=destination) comm.send(remaining_time(), tag=TAG_WALLTIME_SECONDS, dest=destination) worker_to_subdir[destination] = subdir print "Worker %d processes task %s" % (destination, subdir) # distribute initial work for i in range(1, min(num_tasks + 1, num_workers + 1)): send_task_from_still_todo(destination=i) # if necessary, tell some workers that there is no work for them # and dismiss them for i in range(num_tasks + 1, num_workers + 1): comm.send(MSG_MANAGER_HAS_NO_WORK, tag=TAG_STATUS_MSG, dest=i) active_workers.remove(i) abort_job = False abort_filename = "ABORT." + jobid failed_tasks = 0 def give_new_task_if_we_have_any(destination): "if our list of tasks is not empty and the flag abort_job is not set, send out a new task" if len(still_todo) > 0 and not abort_job: # send new work send_task_from_still_todo(destination=i) else: # no more work or time is up, dismiss the worker comm.send(MSG_MANAGER_HAS_NO_WORK, tag=TAG_STATUS_MSG, dest=i) active_workers.remove(i) while len(active_workers) > 0: if not abort_job and remaining_time() < safety_walltime: print "Remaining wall time is less than %d seconds, stopping distribution of tasks\n" % safety_walltime abort_job = True elif not abort_job and os.path.exists(abort_filename): print "Found file %s, stopping distribution of tasks\n" % abort_filename abort_job = True # Check for idle workers for i in active_workers: if comm.Iprobe(source=i, tag=TAG_STATUS_MSG): msg = comm.recv(source=i, tag=TAG_STATUS_MSG) if msg == MSG_WORKER_FINISHED: print "SUCCESS: Worker %d, task %s" % (i, worker_to_subdir[i]) give_new_task_if_we_have_any(destination=i) elif msg == MSG_WORKER_ERROR: print "FAILURE: Worker %d, task %s" % (i, worker_to_subdir[i]) failed_tasks += 1 give_new_task_if_we_have_any(destination=i) else: raise Exception( "Manager reveived an invalid status message from worker %d" % i) print "Remaining wall time:", remaining_time( ), ", remaining tasks:", len(still_todo) print # save some cpu time.sleep(0.1) print "End of job.\nRemaining wall time: %d\nTasks not started: %d" % ( remaining_time(), len(still_todo)) print "Failed tasks: %d" % failed_tasks return
def elapsed_time(): return MPI.Wtime() - start_time
def update_nodes(self): """ Update the u- and f-values at the collocation nodes -> corresponds to a single sweep over all nodes Returns: None """ # get current level and problem description L = self.level P = L.prob # only if the level has been touched before assert L.status.unlocked # get number of collocation nodes for easier access M = self.coll.num_nodes # form Jacobian at fixed time jtime = self.params.fixed_time_in_jacobian dfdu = P.eval_jacobian(L.u[jtime]) # form collocation problem Gu_ = self.integrate() i = 0 for m in self.node_list: # Gu_[m] -= L.u[m + 1] - L.u[0] if L.tau[m] is not None: Gu_[m] += L.tau[m] Guv = [] for m in range(M): if m in self.node_list: Guv.append(np.zeros(Gu_[m].values.size, dtype='d')) else: Guv.append(None) dnk = np.zeros(Gu_[m].values.size, dtype='d') for m in range(M): U = np.zeros(Gu_[m].values.size, dtype='d') #complex) #P.dtype_u(P.init, val=0.0) if m in self.node_list: for j in self.node_list: U = U + (self.Vi[m, j] * Gu_[j].values).flatten() #print(self.rank, "rufe", m, self.rank) self.params.comm.Reduce(U, Guv[m], root=self.rank, op=MPI.SUM) Guv[m] = Guv[m].reshape(2, (np.sqrt(Guv[m].size / 2)).astype(int), (np.sqrt(Guv[m].size / 2)).astype(int)) else: for j in self.node_list: U = U + (self.Vi[m, j] * Gu_[j].values).flatten() root = 0 if self.rank == 0: root = 1 #print(self.rank, "sende", m, root) self.params.comm.Reduce(U, dnk, root=root, op=MPI.SUM) uv_g = [] for m in range(M): if m in self.node_list: uv_g.append(P.dtype_u(P.init, val=0)) else: uv_g.append(P.dtype_u(P.init, val=0)) for m in self.node_list: # range(M): # hell yeah, this is parallel!! #if m in self.node_list: t1 = MPI.Wtime() uv_g[m].values = (P.solve_system_jacobian( dfdu, Guv[m], L.dt * self.D[m], L.u[0], L.time + L.dt * self.coll.nodes[m]).values) for m in range(M): U = np.zeros( Gu_[m].values.size, dtype='d') #.flatten() #complex) #P.dtype_u(P.init, val=0.0) K = np.zeros( Gu_[m].values.size, dtype='d') #.flatten() #complex) #P.dtype_u(P.init, val=0.0) if m in self.node_list: for j in self.node_list: U = U + ( (self.V[m, j] * uv_g[j].values.flatten()).astype(float) ) #.flatten() self.params.comm.Reduce(U, K, root=self.rank, op=MPI.SUM) L.u[m + 1].values += K.reshape( 2, (np.sqrt(Guv[m].size / 2)).astype(int), (np.sqrt(Guv[m].size / 2)).astype(int)) else: for j in self.node_list: U = U + ( (self.V[m, j] * uv_g[j].values.flatten()).astype(float) ) #.flatten() root = 0 if self.rank == 0: root = 1 self.params.comm.Reduce(U, dnk, root=root, op=MPI.SUM) for m in range(M): #self.node_list: # # hell yeah, this is parallel!! if m in self.node_list: L.f[m + 1] = P.eval_f(L.u[m + 1], L.time + L.dt * self.coll.nodes[m]) L.status.updated = True return None
def run_vfi(comm): ''' This function runs the main process. ''' s0 = MPI.Wtime() f0_sum = 0 #------------------------------------------# # STEP1: INITIALIZATION #------------------------------------------# sys.stdout.write("Running at %d of %d on %s.\n" % (comm.rank, comm.size, MPI.Get_processor_name())) # INITILIZE THE HOUSEHOLD CLASS hh = Household() #------------------------------------------# # STEP2: LIFECYCLE COMPUTATION #------------------------------------------# for age in reversed(range(hh.T)): s2 = MPI.Wtime() # EMPTY BIN FOR VALUE FUNCTION AND POLICY FUNCITONS results = np.zeros((hh.na * hh.ne, 2)) V_temp = np.zeros((hh.na, hh.ne)) a1_temp = np.zeros((hh.na, hh.ne)) # NO GRID SEARCH AT AGE T if (age == hh.T - 1): if comm.rank == 0: for ind in range(hh.na * hh.ne): ia = ind // hh.ne ie = ind % hh.ne cc = (1 + hh.r) * hh.agrid[ia] + hh.w * hh.egrid[ie] if cc <= 0: cc = 1e-5 V_temp[ia, ie] = hh.util(cc) # VALUE FUNCTION a1_temp[ia, ie] = 0.0 # SAVING # GRID SEARCH AT AGE < T else: if comm.rank == 0: V1 = hh.V[age + 1, :, :] else: V1 = np.empty((hh.na, hh.ne), dtype=np.float64) comm.Bcast(V1, root=0) # Split the for loop by workers lb = int((comm.rank + 0) * np.ceil((hh.na * hh.ne) / comm.size)) ub = int((comm.rank + 1) * np.ceil((hh.na * hh.ne) / comm.size)) if hh.na * hh.ne < ub: ub = hh.na * hh.ne leng = ub - lb Vp = np.empty((int(leng), 2)) it = 0 for ind in range(lb, ub): Vp[it, :] = vfi_opt(hh, age, ind) it += 1 # Gather the computed value function by each worker comm.Gather(Vp, results, root=0) for ind in range(hh.na * hh.ne): ia = ind // hh.ne ie = ind % hh.ne V_temp[ia, ie] = results[ind][0] # VALUE FUNCTION a1_temp[ia, ie] = results[ind][1] # SAVING hh.set_V(age, V_temp) hh.set_a1(age, a1_temp) f2 = MPI.Wtime() - s2 f0_sum += f2 if comm.rank == 0: sys.stdout.write("Age: %d. Time: %f seconds. \n" % (age + 1, round(f2, 4))) comm.Barrier() # TOTAL RUNTIME f0 = time.time() - s0 run_time = [f0_sum, f0] return run_time
for i in range(len(recs[0].seq)): if recs[0].seq[i]!="-" and recs[1].seq[i]!="-": if recs[0].seq[i]!=recs[1].seq[i]: dist+=1 else: gaps+=1 similarity=1-(float(dist)/(len(recs[0].seq)-gaps)) #print "Sim ",similarity, " gaps ", gaps,"dis ", dist return similarity #************************************************************ main code ********************************************************************************* #timing starting=MPI.Wtime() #read the records In_Handle = open(INPUT_PATH, "r") List_Rec=[] NumSeqs=0 for record in SeqIO.parse(In_Handle, "fasta") : List_Rec.append(record) NumSeqs+=1 In_Handle.close() #Calculation of the number of pairs to be aligned TotPairs= float(len(List_Rec)*len(List_Rec)-len(List_Rec))/2 #Distributing the pairs to the all the processors
def cal_sn_dep_Cov_cij(): comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() comm.Barrier() t_start = MPI.Wtime() #num_rbin = {'num_sbin': (5, 15, 30, 100, 150), 'num_pbin': (6, 19, 38, 127, 191)} # s: spectroscopic; p: photometric -- This case is correct only for n(z) Stage-III. num_rbin = { 'num_sbin': (5, 6, 7, 8, 9, 10, 15, 20, 22, 25, 27, 30, 32, 35, 37), 'num_pbin': (6, 7, 8, 10, 11, 12, 18, 25, 27, 31, 34, 37, 40, 44, 46) } # s: spectroscopic; p: photometric red_bin = num_rbin['num_sbin'][nbin_case] red_bin_ext = num_rbin['num_pbin'][nbin_case] N_dset = (red_bin + 1) * red_bin // 2 N_dset_ext = (red_bin_ext + 1) * red_bin_ext // 2 num_kin = 505 l_min = 1 l_max = 2002 #delta_l = 1 delta_l = 3 num_l = (l_max - l_min) // delta_l + 1 f_sky = 15000.0 / 41253.0 # Survey area 15000 deg^2 is from PW-Stage IV (LSST) #print("f_sky: ", f_sky) data_type_size = 8 prefix = 'TW_zext_' idir0 = './BAO_alpha_{}/'.format(alpha) ##idir = './mpi_preliminary_data_{}/comm_size{}/'.format(Pk_type, comm_size) idir = idir0 + 'mpi_preliminary_data_{}/'.format(Pk_type) #------------- !! write output files, they are the basic files --------------# ofdir = idir0 + 'mpi_{}sn_exp_k_data_{}/comm_size{}/'.format( prefix, Pk_type, size) Gm_ifprefix = idir0 + 'mpi_preliminary_data_Pwig_nonlinear/' + prefix ofprefix = ofdir + prefix print('Output file prefix:', ofprefix) if rank == 0: if not os.path.exists(ofdir): os.makedirs(ofdir) # read shape noise term \sigma^2/n^i inputf = Gm_ifprefix + 'pseudo_shapenoise_{0}rbins_ext.out'.format( red_bin_ext) pseudo_sn_ext = np.loadtxt(inputf, dtype='f8', comments='#') pseudo_sn = np.array(pseudo_sn_ext[0:red_bin]) * snf print(pseudo_sn.shape) else: pseudo_sn = np.zeros(red_bin) comm.Bcast(pseudo_sn, root=0) default_num_l_in_rank = int(np.ceil(num_l / size)) # Rounding errors here should not be a problem unless default size is very small end_num_l_in_rank = num_l - (default_num_l_in_rank * (size - 1)) assert end_num_l_in_rank >= 1, "Assign fewer number of processes." if (rank == (size - 1)): num_l_in_rank = end_num_l_in_rank else: num_l_in_rank = default_num_l_in_rank # be careful here we have extended photometric redshift bins, which is different from TF case. Cijl_len = num_l_in_rank * N_dset_ext Cijl_sets = np.zeros(Cijl_len) Gm_len = num_l_in_rank * N_dset_ext * num_kout Gm_sets = np.zeros(Gm_len) # default case with delta_l = 3 file_Cijl_cross = idir + prefix + 'Cij_l_{}rbins_ext_{}kbins_CAMB.bin'.format( red_bin_ext, num_kin) # Cij_l stores Cij for each ell by row Cijl_freader = MPI.File.Open( comm, file_Cijl_cross) # Open and read a binary file Cijl_fh_start = rank * Cijl_len * data_type_size # need to calculate how many bytes shifted Cijl_freader.Seek(Cijl_fh_start) Cijl_freader.Read([Cijl_sets, MPI.DOUBLE]) # Read using individual file pointer #print('Cij(l) from rank', rank, 'is:', Cijl_sets, '\n') comm.Barrier() Cijl_freader.Close() # Since Cijl has been generated with equal number of ells from different 4 ranks, we could directly read data from sub-binary files. # file_Cijl_cross = idir + 'comm_size{}/'.format(comm.size) + prefix + 'Cij_l_{}rbins_ext_{}kbins_CAMB_rank{}.bin'.format(red_bin_ext, num_kin, rank) # Cij_l stores Cij for each ell by row # Cijl_freader = open(file_Cijl_cross, 'rb') # Open and read a binary file # Cijl_sets = np.fromfile(Cijl_freader, dtype='d', count=-1, sep='') # #print('Cij(l) from rank', rank, 'is:', Cijl_sets, '\n') # Cijl_freader.close() #--------------- !! read Gm_cross part by part for each ell -----------------# file_Gm_cross = Gm_ifprefix + 'Gm_cross_out_{}rbins_{}kbins_CAMB.bin'.format( red_bin_ext, num_kout) Gm_freader = MPI.File.Open(comm, file_Gm_cross) Gm_fh_start = rank * Gm_len * data_type_size Gm_freader.Seek(Gm_fh_start) Gm_freader.Read([Gm_sets, MPI.DOUBLE]) #print('Gm from rank', rank, 'is:', Gm_sets.shape, '\n') comm.Barrier() Gm_freader.Close() def cal_C_G(l, rank): n_l = default_num_l_in_rank * rank + l ell = l_min + n_l * delta_l #offset_cijl = n_l * N_dset * data_type_size #offset_Gm = n_l * N_dset * num_kout * data_type_size # put the whole array cij at ell into the upper triangle part of the matrix cijl_array = Cijl_sets[l * N_dset_ext:(l + 1) * N_dset_ext] #print(cijl_array, cijl_array.shape) cijl_m[iu1] = np.array(cijl_array) #print(cijl_m, cijl_m.shape) cijl_m_select = np.array( cijl_m[0:red_bin, 0:red_bin] ) # select the first red_bin bins of Cij, match the case with T-F cijl_true = np.array( cijl_m_select[iu2]) # convert upper triangle matrix to an array cijl_sn = np.array(cijl_true) cijl_sn[sn_id] = cijl_true[ sn_id] + pseudo_sn # add shape noise terms in Cii(l) terms Cov_cij_cpq = cal_cov_matrix( red_bin, iu2, cijl_sn) # calculate the covariance matrix of Cij(l), Cpq(l') # if rank == 0: # rank_matrix = np.linalg.matrix_rank(Cov_cij_cpq) # print('ell, rank of Cov:', ell, rank_matrix) Cov_cij_cpq = Cov_cij_cpq / ( (2.0 * ell + 1.0) * delta_l * f_sky ) # account the number of modes for each l with the interval delta_l w_ccij, v_ccij = linalg.eigh( Cov_cij_cpq, lower=False, overwrite_a=True ) # Get eigenvalue and eigenvectors from Scipy routine w_inv = 1.0 / w_ccij if not np.all(w_inv > 0.0): print('w_inv from ell ', ell, ' is negative.' ) # show below which ell, the inverse of Cov_cij_cpq fails # If uncomment the below, overwrite_a should be set False in the linalg.eigh() # sqrt_w_inv = np.diag(w_inv**0.5) # v_inv = np.transpose(v_ccij) # Cov_cij_sym = np.triu(Cov_cij_cpq, k=1) + Cov_cij_cpq.T # print reduce(np.dot, [np.diag(sqrt_w_inv**2.0), v_inv, Cov_cij_sym, v_inv.T]) Cov_inv_half = np.transpose( w_inv**0.5 * v_ccij ) # Simplify the expression of dot(sqrt_w_inv, v_inv), 05/09/2016 G_l_array = Gm_sets[l * N_dset_ext * num_kout:(l + 1) * N_dset_ext * num_kout] Gm_l_ext = np.reshape( G_l_array, (N_dset_ext, num_kout), 'C' ) # In Python, the default storage of a matrix follows C language format. #print(Gm_l_ext) Gm_l = np.array(Gm_l_ext[Gmrow_sel_ind, :]) Gm_l = np.dot(Cov_inv_half, Gm_l) cijl_true = np.dot(Cov_inv_half, cijl_true) return cijl_true, Gm_l amode = MPI.MODE_WRONLY | MPI.MODE_CREATE #-------- generate C^ij(l) prime -------## Cijl_prime_file = ofprefix + 'Cijlprime_{}rbins_{}kbins_snf{}_rank{}.bin'.format( red_bin, num_kin, snf, rank) Cijl_prime_fwriter = open(Cijl_prime_file, 'wb') #------------- !! Gm_prime output Gm_prime_file = ofprefix + 'Gm_cross_prime_{}rbins_{}kbins_snf{}_rank{}.bin'.format( red_bin, num_kout, snf, rank) Gm_prime_fwriter = open(Gm_prime_file, 'wb') Gmrow_sel_ind = np.array([], dtype=int) ind_pre = 0 for row in range(red_bin): count = red_bin - row for i in range(count): Gmrow_sel_ind = np.append(Gmrow_sel_ind, i + ind_pre) ind_pre = ind_pre + red_bin_ext - row print('Gmrow_sel_ind:', Gmrow_sel_ind) iu1 = np.triu_indices(red_bin_ext) iu2 = np.triu_indices(red_bin) sn_id = [(2 * red_bin + 1 - ii) * ii // 2 for ii in range(red_bin) ] # id of dset C^ij(l) which is added with shot noise cijl_m = np.zeros( (red_bin_ext, red_bin_ext)) # one matrix to store C^ij at one ell for l in range(num_l_in_rank): cijl_true, Gm_l = cal_C_G(l, rank) cijl_true.tofile(Cijl_prime_fwriter, sep="") Gm_l.tofile(Gm_prime_fwriter, sep="") comm.Barrier() Cijl_prime_fwriter.close() Gm_prime_fwriter.close() t_end = MPI.Wtime() if rank == 0: print('With total processes', size, ', the running time:', t_end - t_start)
def MonteCarlo( latticeDim, cycles): #calculate the energy and magnetization for a given temp. # cycles: MonteCarlo cycles (how many times do we flip the matrix?) #latticeDim = dim of square matrix # EAverage = energy of matrix averaged over cycles, normalized to spins**2 #MagAverage= magnetic filed of matrix, averaged over cycles, normalized to spins**2 #EVariance = variance of energy, normalized #MagAbsAverage= absolute value of magnetic field, average over cycles #Setup spin matrix, initialize to ground state #MPI Initializations comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() Tstart = 2.0 Tend = 2.3 dt = 0.05 E_slutt = 0 M_slutt = 0 heatCapacity_slutt = 0 Susceptibility_slutt = 0 #spinMatrix = np.zeros((latticeDim,latticeDim),np.int8) + 1 #ALL SPIN UP spinMatrix = np.zeros((latticeDim, latticeDim), np.int8) #RANDOM for i in xrange(latticeDim): for j in xrange(latticeDim): if np.random.random() < 0.5: spinMatrix[i, j] = 1 else: spinMatrix[i, j] = -1 Trange = linspace(Tstart, Tend, int((Tend - Tstart) / dt)) for temperature in Trange: #create and initialize variables StartTime = MPI.Wtime() E = M = 0 EAverage = E2Average = MagAverage = Mag2Average = MagAbsAverage = 0 k = 1.0 J = 1.0 beta = 1 / float(k * temperature) NumberOfAcceptedStates = 0 #Possible energy changes, -8J, -4J, 0J, 4J, 8J w = np.zeros(17, np.float64) #17=16 +1, for degeneration in xrange(-8, 9, 4): w[degeneration + 8] = math.exp(-degeneration * J * beta) #legger til 8 #print w #Calculate initial magnetization M = spinMatrix.sum() for j in xrange(latticeDim): for i in xrange(latticeDim): E -= spinMatrix.item( i, j) * (spinMatrix.item(periodic(i, latticeDim, -1), j) + spinMatrix.item(i, periodic(j, latticeDim, 1)) ) #initial energy NumberOfAcceptedStates = 0 #start metropolis MonteCarlo Computation for i in xrange(cycles): #monte carlo cycle #loop over all spins, pick a random spin each time for s in xrange(latticeDim**(2)): x = int(np.random.random() * latticeDim) y = int(np.random.random() * latticeDim) spinUp = spinMatrix.item(x, periodic(y, latticeDim, 1)) spinDown = spinMatrix.item(x, periodic(y, latticeDim, -1)) spinRight = spinMatrix.item(periodic(x, latticeDim, 1), y) spinLeft = spinMatrix.item(periodic(x, latticeDim, -1), y) deltaE = 2 * spinMatrix.item( x, y) * (spinLeft + spinRight + spinUp + spinDown) if np.random.random() <= w[deltaE + 8]: #accept spinMatrix[x, y] = -spinMatrix[x, y] M += 2 * spinMatrix[x, y] #flipped spin in x and y E += deltaE NumberOfAcceptedStates += 1 #updating exceptation values EAverage += E E2Average += E**2 MagAverage += M Mag2Average += M**2 MagAbsAverage += math.fabs(M) #To get the average values EAverage /= float(cycles) E2Average /= float(cycles) MagAverage /= float(cycles) Mag2Average /= float(cycles) MagAbsAverage /= float(cycles) heatCapacity = (E2Average - EAverage**2) / float( latticeDim**(2) * temperature**(2)) Susceptibility = (Mag2Average - MagAbsAverage**2) / float( latticeDim**(2) * temperature) EAverage /= float(latticeDim**2) MagAverage /= float(latticeDim**2) MagAbsAverage /= float(latticeDim**2) Elist = np.array(EAverage) Eslist = np.array(0.) Mlist = np.array(MagAbsAverage) Mslist = np.array(0.) CVlist = np.array(heatCapacity) CVslist = np.array(0.) Xilist = np.array(Susceptibility) Xislist = np.array(0.) comm.Reduce(Elist, Eslist, op=MPI.SUM) comm.Reduce(Mlist, Mslist, op=MPI.SUM) comm.Reduce(CVlist, CVslist, op=MPI.SUM) comm.Reduce(Xilist, Xislist, op=MPI.SUM) EAv.append(Eslist / size) temp.append(temperature) Mag.append(Mslist / size) CV.append(CVslist / size) Xi.append(Xislist / size) if rank == 0: print Eslist / size, Mslist / size, CVslist / size, Xislist / size, temperature EndTime = MPI.Wtime() Totaltime = EndTime - StartTime if (rank == 0): print #### print Totaltime return Eslist / size, CVslist / size, Mslist / size, Xislist / size, temperature
comm = MPI.COMM_WORLD worker = comm.Get_rank() num_workers = comm.Get_size() def createarray(): vA = np.random.randint(10, size=N) return vA #comm.barrier() def NsendAll(vB): for i in range(1, num_workers): comm.send(vB, dest=i) if worker == 0: vA = createarray() start = MPI.Wtime() NsendAll(vA) end = MPI.Wtime() print("Runtime", end - start) else: data = comm.recv() comm.barrier()
#plt.draw(); else: data = None result = np.zeros((ImageSize, ImageSize), dtype=np.float64) # Figure out the size of each chunk of data n = n_phi / size begin = n * rank end = n * (rank + 1) Transformer = data_transformer(sample_size, ImageSize) # Communicate data and compute back-projection comm.barrier() p_start = MPI.Wtime() if rank == 0: for k in xrange(1, size): # to other processes # send every process its respective chunk of data comm.Send(data[n * k:n * (k + 1)], dest=k) else: # allocate buffer data = np.zeros((n, sample_size), dtype=np.float64) # Receive data from root comm.Recv(data, source=0) for k in xrange(0, n): phi = -(k + n * rank) * math.pi / n_phi result += Transformer.transform(data[k, :], phi) if k % 64 == 0:
Qrmat = pfs.CSR2Mat(Qr) PrintGreen('done \n') # Clear some space in memory del ffdisc.L, ffdisc.B, ffdisc.B2, ffdisc.Q, Qr # Compute optimal forcings Print('Compute optimal forcings using SLEPC ... ') omegas = linspace(0.05, 2, 10) G = zeros(len(omegas)) idx = 0 for iomega in range(len(omegas)): omega = omegas[iomega] Print(' omega = %f' % omega) # Set up the shell matrix and compute the factorizations t1 = MPI.Wtime() shell = pfs.OptimalForcings(Lmat, Bmat, B2mat, Pumat, Qmat, Qrmat, omega) localsizes, globalsizes = Qrmat.getSizes() FR = PETSc.Mat().create(comm) FR.setSizes(globalsizes) FR.setType('python') FR.setPythonContext(shell) FR.setUp() t2 = MPI.Wtime() Print(' CPU time to build FR object : %10.4g ' % (t2 - t1)) # Compute optimal perturbations gains, fs, qs = pfs.OptimalForcingsSLEPc(FR, shell, 1) if rank == 0: G[idx] = gains[0].real idx += 1
#----------------broadcasting source data--------------------------------------------------------- #broadcast source to all processors all_source = comm.bcast(all_source if comm_rank == 0 else None, root=0) #divide source to each processor num_source = all_source.shape[1] local_source_offset = np.linspace(0, num_source, comm_size + 1).astype('int') #broadcast target to all processors all_target = comm.bcast(all_target if comm_rank == 0 else None, root=0) comm.Barrier() #start timeing t_start = MPI.Wtime() #----------------local computation on comm_rank processor----------------------------------------- #get the local data which will be processed in this processor #this local source and target array lives on comm_rank processor local_source = all_source[:, local_source_offset[comm_rank]: local_source_offset[comm_rank + 1]] local_target = all_target print("------------- local target point --------------") print(" %d/%d processor has local target with size %d" % (comm_rank, comm_size, local_target.size)) #get local source and target dim N, local_source_num = local_source.shape M = local_target.shape[0] local_u = np.zeros(M) + 1j * np.zeros(M)
def bench_outputs_with_single_file_multiple_writers( self, container_name, directory_name, file_name, output_per_rank, data=None): ''' Benchmarking outputs with pattern `Single File Multiple Writers` Each processes will access a single shared file in different sections exclusively. Data from different rank is stored in different blocks Pattern of global block ids: 00002-00005, first section represents for the rank while the second section represents block id written by the rank The process is: 1. Each rank write blocks to Azure 2. MPI_Barrier() to wait for all ranks 3. Get uncommited block list, rearrange for the order of data 4. Commit changes param: container_name: target container directory_name: target directory file_name: target file output_per_rank: size of outputs per rank in MiB data: optional cached data for outputs, in this case stands for data of a full block(100 MiB data) return: max_write_time: maximum writing time min_write_time: minimum writing time avg_write_time: average writing time ''' # Data prepare if data == None: data = common.workload_generator(self.__mpi_rank, self.BLOCK_LIMIT_IN_BYTES) else: data = data[0:self.BLOCK_LIMIT_IN_BYTES - 1] last_block_data = data block_count = output_per_rank // self.BLOCK_LIMIT # Last block doesn't full if output_per_rank % self.BLOCK_LIMIT: block_count = block_count + 1 last_block_data = common.workload_generator( self.__mpi_rank, (output_per_rank % self.BLOCK_LIMIT) << 20) # Step.1 put blocks MPI.COMM_WORLD.Barrier() start = MPI.Wtime() for i in range(0, block_count): block_id = '{:0>5}-{:0>5}'.format(self.__mpi_rank, i) if i != (block_count - 1): self.__storage_service.put_block(container_name, file_name, data, block_id) elif i == (block_count - 1): self.__storage_service.put_block(container_name, file_name, last_block_data, block_id) end = MPI.Wtime() MPI.COMM_WORLD.Barrier() max_write, min_write, avg_write = common.collect_bench_metrics(end - start) if 0 == self.__mpi_rank: start_postprocessing = MPI.Wtime() # Step.3 get block list and sort according to block id block_list = self.__storage_service.get_block_list( container_name, file_name, block_list_type=blob.BlockListType.All).uncommitted_blocks block_list.sort(key=lambda block: block.id) # Step.4 commit self.__storage_service.put_block_list(container_name, file_name, block_list) end_postprocessing = MPI.Wtime() postprocessing_time = end_postprocessing - start_postprocessing max_write = round(max_write + postprocessing_time, 3) min_write = round(min_write + postprocessing_time, 3) avg_write = round(avg_write + postprocessing_time, 3) return max_write, min_write, avg_write
def testWTime(self): time1 = MPI.Wtime() self.assertTrue(type(time1) is float) time2 = MPI.Wtime() self.assertTrue(type(time2) is float) self.assertTrue(time2 >= time1)
import AnalysisFunctions as af comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() status = MPI.Status() # # INITIALIZATION DO WHATEVER # # END INITIALIZATION # RANK 0 (MASTER CORE) SETS UP SOME STUFF FOR ITSELF if rank == 0: print 'Setting my own stuff up' t_start=MPI.Wtime() DO WHATEVER comm.Barrier() for it in range(rank,NUMBER_OF_TOTAL_TIMES,comm.size): DO WHATEVER COMMUNICATE IF YOU LIKE comm.Barrier() if rank == 0: COMMUNICATE THE DATA IF YOU LIKE t_fin = MPI.Wtime()-t_start print 'Total time taken %0.3f'%t_fin
numprocs = comm.Get_size() myrank = comm.Get_rank() if myrank == 0: n = int(sys.argv[1]) else: n = None #broadcast n n = comm.bcast(n, root=1) if n <= 0: comm.Abort(-1) #turn on the stop watch starttime = MPI.Wtime() #calculate the interval size, same for X and Y h = math.pi / float(n) mysum = 0.0 #distribute work in the X axis for i in range(myrank, n, numprocs): x = h * (i + 0.5) #do regular integration in the Y axis for j in range(n): y = h * (j + 0.5) mysum += math.sin(x + y) local_integral = h * mysum
# Define number of processes and rank num_processes = comm.Get_size() rank = comm.Get_rank() if not num_processes in [2**i for i in range(M + 1)]: raise IOError("Number of cpus must be in ", [2**i for i in range(M + 1)]) # Each cpu gets ownership of Np slices Np = N / num_processes # 'global' matrices (exposed to all processes) Ag = np.random.rand(N, N) # resultant matrix (global) Bg = np.empty((N, N), dtype='complex128') # sub-matrix for this process (Np-by-N) A = Ag[rank * Np:(rank + 1) * Np, :] comm.Barrier() # start MPI timer t_start = MPI.Wtime() B = np.fft.fft(A) comm.Gather([B, MPI.DOUBLE], [Bg, MPI.DOUBLE]) comm.Barrier() t_final = (MPI.Wtime() - t_start) # stop MPI timer if rank == 0: print t_final sys.exit()
my_size = size // comm.size # Every process computes a vector of lenth *my_size* size = comm.size * my_size # Make sure size is a integer multiple of comm.size my_offset = comm.rank * my_size # This is the complete vector vec = np.zeros(size) # Every element zero... vec[0] = 1.0 # ... besides vec[0] # Create my (local) slice of the matrix my_M = np.zeros((my_size, size)) for i in xrange(my_size): j = (my_offset + i - 1) % size my_M[i, j] = 1.0 comm.Barrier() ### Start stopwatch ### t_start = MPI.Wtime() for t in xrange(iter): my_new_vec = np.inner(my_M, vec) comm.Allgather([my_new_vec, MPI.DOUBLE], [vec, MPI.DOUBLE]) comm.Barrier() t_diff = MPI.Wtime() - t_start ### Stop stopwatch ### if fabs(vec[iter] - 1.0) > 0.01: pprint("!! Error: Wrong result!") pprint(" %d iterations of size %d in %5.2fs: %5.2f iterations per second" % (iter, size, t_diff, iter / t_diff)) pprint(
def bench_outputs_with_single_file_multiple_writers(self, container_name, directory_name, file_name, output_per_rank, data = None): ''' Benchmarking outputs with pattern `Single File Multiple Writers` Each processes will access a single shared file in different sections exclusively. Data fro mdifferent rank is stored in different ranges The processes is: 1. Create the file with specified size 2. Each process update their range of File param: container_name: target container directory_name: target directory file_name: target file output_per_rank: size of outputs per rank in MiB data: optional cached data for outputs return: max_write_time: maximum writing time min_write_time: minimum writing time avg_write_time: average writing time ''' # Data prepare output_per_rank_in_bytes = output_per_rank << 20 # in bytes if data == None: data = common.workload_generator(self.__mpi_rank, self.FILE_CHUNK_LIMIT_IN_BYTES) else: data = data[0:self.FILE_CHUNK_LIMIT_IN_BYTES - 1] data_last_chunk = data chunk_count = output_per_rank // self.FILE_CHUNK_LIMIT # Last chunk doesn't full if output_per_rank % self.FILE_CHUNK_LIMIT: chunk_count = chunk_count + 1 data_last_chunk = common.workload_generator(self.__mpi_rank, (output_per_rank % self.FILE_CHUNK_LIMIT) << 20) # Step .1 File create create_start = 0 create_end = 0 if 0 == self.__mpi_rank: create_start = MPI.Wtime() self.__storage_service.create_file(container_name, directory_name, file_name, output_per_rank_in_bytes * self.__mpi_size) create_end = MPI.Wtime() create_time = create_end - create_start MPI.COMM_WORLD.Barrier() start = MPI.Wtime() for i in range(0, chunk_count): if i != (chunk_count - 1): start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES end_range = start_range + len(data) - 1 self.__storage_service.update_range(container_name, directory_name, file_name, data, start_range, end_range) elif i == (chunk_count - 1): start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES end_range = start_range + len(data_last_chunk) - 1 self.__storage_service.update_range(container_name, directory_name, file_name, data_last_chunk, start_range, end_range) end = MPI.Wtime() MPI.COMM_WORLD.Barrier() max_write, min_write, avg_write = common.collect_bench_metrics(end - start) max_write = round(max_write + create_time,3) min_write = round(min_write + create_time,3) avg_write = round(avg_write + create_time,3) return max_write, min_write, avg_write
from mpi4py import MPI import numpy as np import mpids.MPInumpy as mpi_np measure_time = lambda: MPI.Wtime() #Creation Routines def array(size, iters=10000, comm=MPI.COMM_WORLD): data = np.arange(size, dtype=np.float64).tolist() comm.Barrier() time = measure_time() for _ in range(iters): mpi_np.array(data, dtype=np.float64, comm=comm, dist='b') time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) return time/iters def empty(size, iters=10000, comm=MPI.COMM_WORLD): comm.Barrier() time = measure_time() for _ in range(iters): mpi_np.empty(size, dtype=np.float64, comm=comm, dist='b') time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) return time/iters def arange(size, iters=10000, comm=MPI.COMM_WORLD): comm.Barrier() time = measure_time() for _ in range(iters): mpi_np.arange(size, dtype=np.float64, comm=comm, dist='b')
1.0 / kout[j + 1]) * GF * Pnorm_out[j] return Gmatrix_l # Gm_cross_out uses selected new k bins Gm_cross_file = outf_prefix + 'Gm_cross_out_{}rbins_{}kbins_CAMB_rank{}.bin'.format( nbin_ext, num_kout, rank) # basic variable Gm_cross_fwriter = open(Gm_cross_file, 'wb') for l in range(num_l_in_rank): Gm = cal_Gm(l, rank) Gm.tofile(Gm_cross_fwriter, sep="") Gm_cross_fwriter.close() if cal_sn == "True" and rank == 0: get_shapenoise() time0 = MPI.Wtime() if cal_cijl == "True": get_Cijl(comm, rank) time1 = MPI.Wtime() if rank == 0: print('Running time for Cijl:', time1 - time0) if Pk_type != 'Pnow' and cal_Gm == "True": get_Gm_out(comm, rank) time2 = MPI.Wtime() if rank == 0: print('Running time for Gm:', time2 - time1) ####################################################### #
def __init__(self, circle, src, dest, treewalk=None, totalsize=0, hostcnt=0, prune=False, verify=False, resume=False, workq=None): BaseTask.__init__(self, circle) self.circle = circle self.treewalk = treewalk self.totalsize = totalsize self.prune = prune self.workq = workq self.resume = resume self.checkpoint_file = None self.checkpoint_db = None self.src = src self.dest = os.path.abspath(dest) # cache, keep the size conservative # TODO: we need a more portable LRU size if hostcnt != 0: max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE) procs_per_host = self.circle.size / hostcnt self._read_cache_limit = ((max_ofile - 64) / procs_per_host) / 3 self._write_cache_limit = ((max_ofile - 64) / procs_per_host) * 2 / 3 if self._read_cache_limit <= 0 or self._write_cache_limit <= 0: self._read_cache_limit = 1 self._write_cache_limit = 8 self.rfd_cache = LRU(self._read_cache_limit) self.wfd_cache = LRU(self._write_cache_limit) self.cnt_filesize_prior = 0 self.cnt_filesize = 0 self.blocksize = 1024 * 1024 self.chunksize = 1024 * 1024 # debug self.d = {"rank": "rank %s" % circle.rank} self.wtime_started = MPI.Wtime() self.wtime_ended = None self.workcnt = 0 # this is the cnt for the enqued items self.reduce_items = 0 # this is the cnt for processed items if self.treewalk: log.debug("treewalk files = %s" % treewalk.flist, extra=self.d) # fini_check self.fini_cnt = Counter() # verify self.verify = verify self.use_store = False if self.verify: self.chunksums_mem = [] self.chunksums_buf = [] # checkpointing self.checkpoint_interval = sys.maxsize self.checkpoint_last = MPI.Wtime() if self.circle.rank == 0: print("Start copying process ...")
def main(): comm = MPI.COMM_WORLD id= comm.Get_rank() wsize= comm.Get_size() tstart = MPI.Wtime() fsky = open("skymap.png","r") reader = Reader(fsky) skypixelwidth, skypixelheight, skypixels, metadata=reader.read_flat() pixelwidth = int(argv[1]) pixelheight = int(argv[2]) tskymapstart = MPI.Wtime() telepixels = np.zeros((pixelwidth*pixelheight*3),dtype=np.uint8) colorpixels = np.zeros((pixelwidth*pixelheight),dtype=np.uint8) skystartall = np.zeros((pixelwidth*pixelheight),dtype=np.uint32) telestartall = np.zeros((pixelwidth*pixelheight),dtype=np.uint32) colorall = np.zeros((pixelwidth*pixelheight),dtype=np.uint8) totnstepsall=np.zeros((wsize),dtype=np.uint32) tskymapend = MPI.Wtime() tskymap = tskymapend-tskymapstart tmin = 1.e6 tpercparmin=1.e6 hinit=1.e-1 #h=1.e-4 Router = 1000. Rplane = 700. Rs = 2. every = 1 deltalamb = 1.e-1 imagewidth = 50; imageheight = 50; tiny = 1.e-30 epsilon=1.e-8 eccentricity = 0.2 Rfac = 1.+1.e-10 heps = 1.e-14 semilatusr = 10.0 tstartpp=MPI.Wtime() #percent parallelized numperprocess = pixelheight*pixelwidth/wsize skystart=np.zeros((numperprocess),dtype=np.int32) telestart=np.zeros((numperprocess),dtype=np.int32) color = np.zeros((numperprocess),dtype=np.int8) totnsteps=np.zeros((numperprocess),dtype=np.int32) trk4all=np.zeros((numperprocess),dtype=np.float) ttelestop = MPI.Wtime() ttele = ttelestop-tstartpp trk4=float("inf") for index in range(numperprocess): ypix = int((id*numperprocess+index)/pixelwidth) xpix = (id*numperprocess+index)%pixelwidth tstartrk4=MPI.Wtime() totnsteps[index],skystart[index],telestart[index],color[index]=integrateNullGeodesic(xpix, ypix, pixelheight,pixelwidth, skypixelheight,skypixelwidth,imagewidth,imageheight,Rs,Router,Rplane,eccentricity, semilatusr, epsilon, tiny, hinit,Rfac,heps) tendrk4=MPI.Wtime() trk4=min(trk4,(tendrk4-tstartrk4)/float(totnsteps[index])) totnstepsmax=max(totnsteps) tstoppp = MPI.Wtime() tpercpar=tstoppp-tstartpp comm.Barrier() if id==0: totnstepsmaxall=0 else: totnstepsmaxall=None comm.Barrier() totnstepsmaxall=comm.reduce(totnstepsmax,op=MPI.MAX,root=0) tskymapall = comm.reduce(tskymap, op=MPI.MAX, root=0) tteleall = comm.reduce(ttele,op=MPI.MAX,root=0) comm.Gatherv(skystart,skystartall,root=0) comm.Gatherv(telestart, telestartall, root=0) comm.Gatherv(color,colorall, root=0) trk4min=comm.reduce(trk4,op=MPI.MIN,root=0) comm.Barrier() tend = MPI.Wtime() tall = tend-tstart if id==0: tindexstart = MPI.Wtime() for index in range(pixelheight*pixelwidth): if(colorall[index]==1): telepixels[telestartall[index]:telestartall[index]+3]=skypixels[skystartall[index]:skystartall[index]+3] else: telepixels[telestartall[index]]=255 #leave other two indices zero,red tindexend = MPI.Wtime() tindex = tindexend-tindexstart if id==0: twritestart = MPI.Wtime() ftele = open('teleview_{pw}_{ph}_{ws}.png'.format(pw=pixelwidth,ph=pixelheight,ws=wsize), "w") telewrite=Writer(width=pixelwidth,height=pixelheight,greyscale=False,alpha=False) telewrite.write_array(ftele,telepixels) ftele.close() twriteend=MPI.Wtime() twrite = twriteend-twritestart fsky.close() comm.Barrier() tmax = comm.reduce(tall,MPI.MAX,root=0) tpercparmin = comm.reduce(tpercpar/tall,op=MPI.MIN,root=0) comm.Barrier() if (id==0): # print("Telescope dimensions in M", 2.*imagewidth, 2.*imageheight) # print("Telescope resolution", pixelwidth, pixelheight) # print("Skymap resolution", skypixelwidth, skypixelheight) # print("Schwarzschild radius in M", 2.*Rs) # print("Outer radius in M", 2.*Router) # print("Telescope radius in M", 2.*Rplane) # print("Number of processes = ",wsize) # print("Maximum number of integration steps taken is",totnstepsmaxall) # print("The time for a single step of the RK4 is",trk4min) # print("Total runtime = ",tmax) # print("Fraction parallel = ", tpercparmin) print pixelwidth,pixelheight,wsize,totnstepsmaxall,trk4min,tmax,tpercparmin, tindex, twrite, tskymapall, tteleall MPI.Finalize()
multialignment='center', fontsize=10) axes.set_title("rank {}".format(rank), fontsize=20) return axes if __name__ == "__main__": comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() name = MPI.Get_processor_name() print('Size:', size) print('Rank:', rank) print('Name:', name) tmesh = par_regmesh(size) #tmesh = par_mesh(0.05, size) lmesh = local_mesh(tmesh, rank) t0 = MPI.Wtime() c = coloring(lmesh, comm) t1 = MPI.Wtime() print('color time', t1-t0) flag = check_color(lmesh, c) print('Process ', rank, " with same coloring ", np.sum(flag)) axes = show_mesh(lmesh, c) #lmesh.find_edge(axes, index=flag) ##show_mesh(lmesh, r1) plt.show()