Example #1
0
        # Set the optimality tolerance
        analysis.RAMP_penalty = 0.0
        analysis.props.setPenaltyType(penalty='convex', ptype='ramp')
        analysis.setNewInitPointPenalty(analysis.xinit)
    elif start_strategy == 'uniform':
        analysis.xinit[:] = 1.0 / analysis.num_materials
        analysis.xinit[::(analysis.num_materials + 1)] = 1.0
        analysis.props.setPenaltyType(penalty='convex', ptype=ptype)
        analysis.setNewInitPointPenalty(analysis.xinit)
    else:
        # Set the initial starting point strategy
        analysis.props.setPenaltyType(penalty='convex', ptype=ptype)
        analysis.setNewInitPointPenalty(analysis.xinit)

    # Keep track of the ellapsed CPU time
    init_time = MPI.Wtime()

    # Keep track of the number of iterations
    niters = 0

    # Set the output file name
    fname = os.path.join(prefix, 'opt_history.out')
    opt.setOutputFile(fname)

    for k in range(max_iters):
        # Optimize
        if k > 0 and optimizer == 'paropt':
            opt.resetDesignAndBounds()
            beta = max(10 * tol, (0.75**k) * 1e-2)
            opt.setStartAffineStepMultiplierMin(beta)
        elif k > 0 and optimizer != 'paropt':
Example #2
0
    def train(self, data):
        (X, Y) = data
        lossfunction = loss(self.lossfunction)
        regularization = regularizer(self.regularizer)
        prox_loss = proxoperator(self.lossfunction)
        prox_regularizer = proxoperator(self.regularizer)

        # dimensions of the problem
        d = X.Width
        n = X.Height
        if self.problem == "multiclass_classification":
            k = int(comm.allreduce(max(Y.Matrix),
                                   op=MPI.MAX))  # number of classes
            if self.zerobased:
                k = k + 1
        else:
            k = Y.Matrix.shape[1]

        N = int(self.numfeaturepartitions)  # number of column splits
        P = NumProcessors
        D = self.randomfeatures

        if rank == 0:
            print self.__dict__
            print """Dimensions: X is n=%d x d=%d, k=%d classes, D=%d random features, P=%d processors, N=%d feature partitions""" % (
                n, d, k, D, P, N)
            starttime = MPI.Wtime()

        # Prepare ADMM intermediate matrices

        # distributed intermediate matrices -> split over examples
        O = elem.DistMatrix_d_VC_STAR()
        elem.Zeros(O, n, k)
        Obar = elem.DistMatrix_d_VC_STAR()
        elem.Zeros(Obar, n, k)
        nu = elem.DistMatrix_d_VC_STAR()
        elem.Zeros(nu, n, k)

        # distributed intermediate matrices -> split over features
        #W = elem.DistMatrix_d_VC_STAR();
        #elem.Zeros(W, D,k)
        #Wbar = elem.DistMatrix_d_STAR_STAR(); # (*,*) distribution - replicated everywhere
        #elem.Zeros(Wbar, D, k)
        #mu = elem.DistMatrix_d_VC_STAR();
        #elem.Zeros(mu, D, k)
        #J = range(W.ColShift, D, W.ColStride) # the rows of W,Wbar,mu owner local

        # on root node
        if rank == 0:
            W = numpy.zeros((D, k))
            Wbar = numpy.zeros((D, k))
            mu = numpy.zeros((D, k))
        else:
            W = None
            Wbar = None
            mu = None

        # local intermediate matrices
        Wi = numpy.zeros((D, k))
        mu_ij = numpy.zeros((D, k))
        ZtObar_ij = numpy.zeros((D, k))

        iter = 0
        ni = O.LocalHeight

        # Create RFTs
        blksize = int(math.ceil(D / N))
        self.RFTs = [
            self.kernel.rft(blksize, self.subtype, forceppy=True)
            for i in range(N - 1)
        ]
        self.RFTs.append(
            self.kernel.rft(D - (N - 1) * blksize, self.subtype,
                            forceppy=True))
        # FIXME for now we are forcing pure python implementation since C++ layer
        #       transforms still do not have a good serialization solution

        Precomputed = []

        #y = preprocess_labels(Y.Matrix)
        if self.lossfunction == "crossentropy" or self.lossfunction == "hinge":
            if not self.zerobased:
                y = Y.Matrix - 1.0  # convert from 1-to-K to 0-to-(K-1) representation
        else:
            y = skylark.ml.utils.dummycoding(Y.Matrix, k, self.zerobased)
            y = 2 * y - 1

        localloss = lossfunction(O.Matrix, y)

        while (iter < self.MAXITER):
            iter = iter + 1

            totalloss = comm.reduce(localloss)
            if rank == 0:
                ElapsedTime = MPI.Wtime() - starttime
                print 'iter=%d, objective=%f, time=%f' % (
                    iter, totalloss + self.regparam * regularization(W),
                    ElapsedTime)
                #print '\t\titer=%d, objective=%f' % (iter, objective(Wbar));

            Wbar = comm.bcast(Wbar, root=0)
            mu_ij = mu_ij - Wbar
            #mu_ij = mu_ij - Wbar.Matrix;

            # O optimization
            O.Matrix[:] = prox_loss(Obar.Matrix - nu.Matrix, 1.0 / self.rho, y,
                                    O.Matrix[:])

            # Compute value of Loss function

            # W optimization

            #W.Matrix[:] = prox_regularizer(Wbar.Matrix[J,:] - mu.Matrix, self.regparam/self.rho);
            if rank == 0:
                W = prox_regularizer(Wbar - mu, self.regparam / self.rho)

            # graph projection step
            sum_o = numpy.zeros((ni, k))

            for j in range(0, N):
                start = j * blksize
                finish = min((j + 1) * blksize, D)
                JJ = range(start, finish)
                Dj = len(JJ)

                Z = (self.RFTs[j] / X.Matrix) * math.sqrt(float(Dj) / D)
                if iter == 1:
                    ZtZ = numpy.dot(Z.T, Z)
                    A = linalg.inv(ZtZ + numpy.identity(Dj))
                    Precomputed.append(A)

                ##############3 graph projection ##############
                #(Wi[JJ,:], o) = proj_graph(TransformOperator, X.Matrix, JJ, Wbar.Matrix[JJ, :] -  mu_ij[JJ,:],  ZtObar_ij[JJ,:] + Z(I,JJ)'*nu.Matrix, Precomputed[j]);
                C = Wbar[JJ, :] - mu_ij[JJ, :]
                ZtD = ZtObar_ij[JJ, :] + numpy.dot(Z.T, nu.Matrix)

                WW = numpy.dot(Precomputed[j], (C + ZtD))
                Wi[JJ, :] = WW
                o = numpy.dot(Z, WW)

                ###############################################

                mu_ij[JJ, :] = mu_ij[JJ, :] + Wi[JJ, :]

                ZtObar_ij[JJ, :] = numpy.dot(Z.T, o)

                sum_o = sum_o + o

            localloss = 0.0
            o = numpy.zeros((ni, k))
            for j in range(0, N):
                start = j * blksize
                finish = min((j + 1) * blksize, D)
                JJ = range(start, finish)
                Dj = len(JJ)
                Z = (self.RFTs[j] / X.Matrix) * math.sqrt(float(Dj) / D)
                ZtObar_ij[JJ, :] = ZtObar_ij[JJ, :] + numpy.dot(
                    Z.T, (O.Matrix - sum_o)) / (N + 1)
                o = o + numpy.dot(Z, Wbar[JJ, :])
            localloss = localloss + lossfunction(o, y)

            Obar.Matrix[:] = (sum_o + N * O.Matrix) / (N + 1)
            nu.Matrix[:] = nu.Matrix + O.Matrix - Obar.Matrix

            Wisum = comm.reduce(Wi)
            if rank == 0:
                #Wisum = comm.allreduce(Wi)
                #Wbar.Matrix[J,:] = (Wisum[J,:] + W.Matrix)/(P+1)
                #Wbar.Matrix = (Wisum[J,:] + W.Matrix)/(P+1)
                Wbar = (Wisum + W) / (P + 1)
                mu = mu + W - Wbar
                # distributed sum below
                #mu.Matrix[:] = mu.Matrix + W.Matrix - Wbar.Matrix[J,:];

            comm.barrier()

        self.coefficients = Wbar
Example #3
0
def main():

    random.seed(10000)

    # Parse user input
    params = parse_input_arguments(sys.argv)
    pdb = params['pdb']
    geom = params['geom']
    beam = params['beam']
    orient = int(params['UniformOrientation'])
    number = int(params['numSlices'])
    outDir = params['outDir']
    saveName = params['saveNameHDF5']
    savePhotons = params['savePhotons']
    # Initialize MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    sz = comm.size

    det = None
    data = None
    if rank == 0:
        print(
            "===================================================================="
        )
        print("Running %d parallel MPI processes" % (comm.size))

        t_start = MPI.Wtime()

        orientations = np.zeros((2 * number, 4))
        particle = ps.Particle()

    if rank == 0:
        if orient == 1:
            orientations = ps.geometry.get_uniform_quat(num_pts=number).astype(
                np.float64)
        elif orient == 0:
            orientations = ps.geometry.get_random_quat(num_pts=number).astype(
                np.float64)
        print "O=", orientations.shape
        print "ODtype=", orientations.dtype
        #sys.exit(0)
        print("Reading PDB file...")
        particle.read_pdb(pdb, ff='WK')
        # reading beam and detector files
        beam = ps.Beam(beam)
        #beam.set_wavelength(1.0e-10)
        print beam.get_wavelength()
        det = ps.PnccdDetector(geom=geom, beam=beam)
        print("Broadcasting input to processes...")

        data = {
            'particle': particle,
            'orientations': orientations,
            'detector': det
        }

    dct = comm.bcast(data, root=0)

    if rank == 0:
        pattern_shape = det.pedestal.shape
        fin = h5.File(
            os.path.join(outDir, 'test_saveHDF5_parallel_intens_combined.h5'),
            'w')
        if savePhotons == 1:
            fph = h5.File(
                os.path.join(outDir,
                             'test_saveHDF5_parallel_photons_combined.h5'),
                'w')

        if savePhotons == 1:
            dset_photons = fph.create_dataset('imgPhot',
                                              shape=(number, ) + pattern_shape,
                                              dtype=np.int32,
                                              chunks=(1, ) + pattern_shape,
                                              compression="gzip",
                                              compression_opts=4)
        dset_intens = fin.create_dataset('imgIntens',
                                         shape=(number, ) + pattern_shape,
                                         dtype=np.float32,
                                         chunks=(1, ) + pattern_shape,
                                         compression="gzip",
                                         compression_opts=4)

        if savePhotons == 1:
            fph.create_dataset('orientation',
                               data=orientations,
                               compression="gzip",
                               compression_opts=4)
        fin.create_dataset('orientation',
                           data=orientations,
                           compression="gzip",
                           compression_opts=4)

        print("Done creating HDF5 file and datasets...")

        c = 0
        while c < number:
            status1 = MPI.Status()
            result = comm.recv(source=MPI.ANY_SOURCE,
                               status=status1)  # (index,photImg)
            i = status1.Get_source()

            dd = det.add_correction(result[1])
            print("Rank 0: Received image %d from rank %d" % (result[0], i))
            dset_intens[result[0], :, :, :] = dd  #result[1]
            #photoImg = det.add_correction_and_quantization(pattern=result[1])
            if savePhotons == 1:
                photoImg = det.add_quantization(pattern=dd)
                dset_photons[result[0], :, :, :] = photoImg
            c += 1

    else:  # slave
        # initialize intensity volume
        ori = dct['orientations']
        det = dct['detector']
        particle = dct['particle']
        slices_num = ori.shape[0]
        pattern_shape = det.pedestal.shape
        pixel_momentum = det.pixel_position_reciprocal
        sliceOne = np.zeros((pattern_shape))  #left out dtype=np.float32
        mesh_length = 128
        mesh, voxel_length = det.get_reciprocal_mesh(
            voxel_number_1d=mesh_length)
        print "MeshDtype=", mesh.dtype

        intensVol = pg.diffraction.calculate_diffraction_pattern_gpu(
            mesh, particle, return_type='intensity')
        # lft out mesh.astype(np.float32)
        for i in range((rank - 1), number, sz - 1):
            # transform quaternion (set of orientations) into 3D rotation
            rotmat = ps.geometry.quaternion2rot3d(ori[i, :])

            intensSlice = slave_calc_intensity(rot3d=rotmat,
                                               pixel_momentum=pixel_momentum,
                                               pattern_shape=pattern_shape,
                                               volume=intensVol,
                                               voxel_length=voxel_length)
            # intensVol.astype(np.float32)
            # Convert the one image to photons
            #photImg = det.add_correction_and_quantization(pattern=intensSlice)
            # astype(np.int32)
            print("Sending slice %d from rank %d" % (i, rank))
            comm.ssend((i, intensSlice), dest=0)

    if rank == 0:
        t_end = MPI.Wtime()
        print("Finishing constructing %d patterns in %f seconds" %
              (number, t_end - t_start))
        import matplotlib.pyplot as plt
        fin.flush()
        if savePhotons == 1:
            fph.flush()
            # Display first diffraction image
            photImgAssem = det.assemble_image_stack(
                image_stack=fph['imgPhot'][0, :, :, :])
        intensImgAssemb = det.assemble_image_stack(
            image_stack=fin['imgIntens'][0, :, :, :])
        #diff = photoImg2 - photoImg
        #print np.nonzero(diff)
        #print np.max
        #diffImgAssemb = det.assemble_image_stack(image_stack=diff)
        #fig = plt.figure()
        #ax1 = fig.add_subplot(2,1,1)
        #plt.imshow(diffImgAssemb)
        #plt.colorbar()
        #ax1.colorbar()

        #ax2 = fig.add_subplot(2,1,2)
        #ax2.imshow(np.log(photImgAssem+1), interpolation='none')
        #ax2.colorbar()
        plt.show()
        fin.close()
        if savePhotons == 1:
            fph.close()
        sys.exit()
Example #4
0
rank = comm.Get_rank()
size = comm.Get_size()
myhost = MPI.Get_processor_name()


def filter_fn(evt):
    return True


xtc_dir = "/global/cscratch1/sd/monarin/testxtc2/hsd"
max_events = int(sys.argv[1])
ds = DataSource('exp=xpptut13:run=1:dir=%s' % (xtc_dir),
                filter=filter_fn,
                max_events=max_events,
                batch_size=1)

st = MPI.Wtime()
for run in ds.runs():
    #det = run.Detector('xppcspad')
    for evt in run.events():
        print("%s %d %f" % (myhost, rank, time.time()))
        #pass

en = MPI.Wtime()

if rank == 0:
    print(
        "#Events %d #Files %d #smd0_threads %s Total Elapsed (s): %6.2f Rate (kHz): %6.2f"
        % (max_events, 16, os.environ.get("PS_SMD0_THREADS", 1), en - st,
           (max_events / ((en - st) * 1000))))
Example #5
0
 def start_timing(self, f):
     info = self.information(f)
     self.tree = self.tree.add_node(info)
     self.func[info].total_time.append(mpi.Wtime())
     self.func[info].self_time.append(0)
Example #6
0
        os.mkdir(prefix)

    fname = os.path.join(prefix, 'performance_profile.dat')
    fp = open(fname, 'w')

    # Iterate over all the trusses
    index = 0
    for vals in trusses:
        # Set the values of N/M
        N = vals[0]
        M = vals[1]

        print 'Optimizing truss (%d x %d) ...' % (N, M)
        # Optimize each of the trusses
        truss = setup_ground_struct(N, M)
        t0 = MPI.Wtime()
        if optimizer is 'None':
            opt = paropt_truss(truss, prefix=prefix, use_hessian=use_hessian)

            # Get the optimized point
            x = opt.getOptimizedPoint()
        else:
            # Read out the options from the dictionary of options
            options = all_options[optimizer]

            # Set the output file
            filename = os.path.join(prefix, 'output_%dx%d.out' % (N, M))
            options[outfile_name] = filename

            # Optimize the truss with the specified optimizer
            opt, prob, sol = pyopt_truss(truss,
Example #7
0
def main(argv):
    comm = MPI.COMM_WORLD       #Comunicador
    pid = comm.rank             #Proceso
    size = comm.size            #Cantidad de procesos

    #Constantes proporcionadas por consola
    R = 0                       #Tasa constante de decremento de luciferina
    G = 0                       #Fracción constante de incremento de luciferina
    S = 0                       #Distancia constante en la que se mueven los gusanos
    I = 0                       #Rango de cobertura de un gusano para incluir datos asociados
    L = 0                       #Valor inicial de luciferina en los gusanos
    K = 0                       #Cantidad de clases a encontrar
    M = 0                       #Tasa de gusanos por dato

    #Variables
    
    data = []                   #Conjunto de manos
    cant_gusanos = 0            #Cantidad de gusanos, 90% de la cantidad total de datos
    gusanos = []                #Arreglo de gusanos
    listaInv = []               #Lista invertida con los índices de los datos
    diccionarioC_r = {}         #Key = {cantidad de elementos cubiertos} | Value {Elemenos con 'key' datos cubiertos}
    maxIntraD = 0.0             #Valor maximo de intraD dentro del conjunto de gusanos
    centroidesCandidatos = []   #Lista con los centroides candidatos
    valor_SSE = 0.0             #Valor de la SSE (Squared algo)
    interDist = 0               #Valor de la intradistancia

    #Se sincronizan los procesos
    comm.barrier()              #Función para sincronizar

    #<-----Se inicia la toma del tiempo----->
    t_start = MPI.Wtime()

    """
    Proceso 0 se encarga de recoger los datos pasados por consola, así como cargar los datos
    Realiza el cálculo de la cantidad de Gusanos siendo del 0.9 del total de datos
    """
    if pid == 0:
        R, G, S, I, L, K, M = getValores(argv) #Guardar un valor dado por consola
        data, cant_datos = cargarDatos()
        
    #Bcast a todos los procesos con la lista de datos
    data, R, G, S, I, L, K, M = comm.bcast((data, R, G, S, I, L, K, M), root = 0)

    
    #<------Rangos para la paralelización por tareas------>

    #Se determinan los rangos de trabajo para crear los gusanos
    inicio = int(pid * (len(data)*M) / size)
    final = int((len(data)*M) / size + inicio)

    #Se determinan los rangos de trabajo para crear la lista invertida
    init_ListaInvertida = int(pid * len(data) / size)
    final_ListaInvertida = int((pid + 1) * len(data) / size)

    #<------Rangos para la paralelización por tareas------>

    #Validación de las funciones reductoras especiales para las diferentes estructuras de datos
    diccionarioSUM = MPI.Op.Create(combinarDiccionarios,commute = True)
    listaInvertidaSUM = MPI.Op.Create(combinarListasInvertidas, commute = True)
    
    #Creación de la lista invertida por procesos
    listaInv = generarListaInvertida(data,init_ListaInvertida, final_ListaInvertida)

    #Se reduce la lista invertida al proceso 0
    listaInv = comm.allreduce(listaInv, op = listaInvertidaSUM)

    #Se realiza un bcast a todos los procesos con la lista invertida
    listaInv = comm.bcast(listaInv, root = 0)

    #Se crean los gusanos dependiendo de la división de trabajo entre procesos
    for i in range(inicio, final): 
        g = Gusano(L,randomPos(pid,size),I,S)
        g.sacarConjuntoCubierto(listaInv,data)
        g.setIntraD(data)
        if(g.getIntraD() > maxIntraD):
            maxIntraD = g.getIntraD()

        if(len(g.getCCubierto()) > 0):                              #Se descartan los gusanos que no cubren ningún dato
            gusanos.append(g)
            if(len(g.getCCubierto()) in diccionarioC_r):            #Se revisa que la cantidad de datos cubiertos esté dentro del diccionario
                diccionarioC_r[len(g.getCCubierto())].append(g)     #Si la cantidad se encuentra se agrega al los value de ese key
            else:
                diccionarioC_r[len(g.getCCubierto())] = [g]         #Si no se encuentra, se crea una nueva key
        
        
    #Se reducen los diccionarios en el proceso 0 
    diccionarioFinalC_r = comm.allreduce(diccionarioC_r, op = diccionarioSUM)
    
    #Se reduce la lista de gusanos al proceso 0
    gusanos = comm.reduce(gusanos,op = MPI.SUM)

    #El procesos 0 se encarga de generar la lista de los centroides cantidatos 
    if pid == 0:
        gusanos.sort(key = lambda x: len(x.cCubierto), reverse = True)
        print(diccionarioFinalC_r.keys())
        centroidesCandidatos = gusanos[:int(len(gusanos)/2)]
        valor_SSE = getSSE(centroidesCandidatos,gusanos)
        interDist = getInterDist(centroidesCandidatos)
        nom_arch_msjs = "GSOprogress.txt"
        str_toWrite = ""
        
    centroidesCandidatos, valor_SSE, interDist, maxIntraD, gusanos = comm.bcast((centroidesCandidatos,valor_SSE,interDist,maxIntraD,gusanos),0)
    
    
    #while(condiciones): #PARALELIZAR ESTE CICLO TAL QUE ABARQUE SOLO UNA CANTIDAD ESPECIFICA DE GUSANOS
    for k in range(0,NUMERODEITERACIONES):
        t_inicio_iteracion = MPI.Wtime()
        newGusanos = []
        inicio = int(pid * (len(gusanos)/size))
        final = int(len(gusanos)/size + inicio)
        for i in range(inicio,final):
            gusanos[i].setFitness(len(data),valor_SSE,maxIntraD)
            gusanos[i].actualizarLuciferina(R,G)
            gusanos[i].sacarVecindario(gusanos) #EXTREMADAMENTE INEFICIENTE, TERMINA TENIENDO UNA COMPLEJIDAD DE TIEMPO n^2 (POSIBLES OPTIMIZACIONES)
            gusanos[i].setMejorVecino()
            gusanos[i].moverGusano()
            gusanos[i].sacarConjuntoCubierto(listaInv,data)
            gusanos[i].setIntraD(data)
            if(len(gusanos[i].getCCubierto()) > 0):
                newGusanos.append(gusanos[i])
        
        gusanos = newGusanos
        gusanos = comm.reduce(gusanos,op = MPI.SUM)

        t_final_iteracion = MPI.Wtime()
        t_total_iteracion = comm.reduce(t_final_iteracion-t_inicio_iteracion, op = MPI.MAX)

        if(pid == 0):
            gusanos = revisarCentroides(gusanos, 2)
            centroidesCandidatos = sacarCcPorFitness(gusanos)
            valor_SSE = getSSE(centroidesCandidatos,gusanos)
            interDist = getInterDist(centroidesCandidatos)
            centroidesCandidatos = revisarCentroides(centroidesCandidatos, 2)
            print("Cant CC = ", len(centroidesCandidatos))
            print("Cant Gusanos = ", len(gusanos))
            print(t_total_iteracion)
            with open(nom_arch_msjs, 'w') as archivo:
                str_toWrite += "Iteracion " + str(k) +  " tardó " + str(t_total_iteracion) + " segundos." + '\n' + "Cantidad de Centroides:" + str(len(centroidesCandidatos)) + '\n' + "Cantidad de gusanos: " + str(len(gusanos)) + '\n' + "\n --------------------------------------------------------------------------- \n"
                archivo.write(str_toWrite)
                archivo.close()
            if(len(centroidesCandidatos) <= 10):
                archivo.write("Centroides Finales: \n")
                for i in centroidesCandidatos:
                    str_toWrite += "Centroide " + str(i) + ": " + i.toString()
                    archivo.write(str_toWrite)
                archivo.close()
        centroidesCandidatos, valor_SSE, interDist, gusanos = comm.bcast((centroidesCandidatos,valor_SSE,interDist,gusanos),0)
        

    #<-----Se inicia la toma del tiempo----->
    t_final = MPI.Wtime()

    #Se reduce la toma del tiempo al proceso 0
    tw = comm.reduce(t_final-t_start, op = MPI.MAX)

    if pid == 0:
        print(tw)
Example #8
0
def manager(taskCommandlineDictionary):
    still_todo = taskCommandlineDictionary
    num_tasks = len(still_todo)

    comm = MPI.COMM_WORLD
    jobid = os.getenv("SLURM_JOBID", default="nojobid")
    num_workers = comm.Get_size() - 1
    print "Job ID:", jobid
    print "Processes: 1 master and %d workers" % num_workers
    print "Tasks to do:", num_tasks
    active_workers = range(1, num_workers + 1)
    worker_to_subdir = {}

    granted_wtime = int(os.getenv(
        "PBS_WALLTIME", default=4294967295))  # PBS_WALLTIME is to be set
    start_time = MPI.Wtime()  # current time in seconds

    def elapsed_time():
        return MPI.Wtime() - start_time

    def remaining_time():
        return int(granted_wtime - elapsed_time())

    print "Remaining wall time:", remaining_time()

    def send_task_from_still_todo(destination):
        comm.send(MSG_MANAGER_HAS_WORK, tag=TAG_STATUS_MSG, dest=destination)
        subdir, commandline = still_todo.popitem(last=False)
        comm.send(subdir, tag=TAG_SUBDIR, dest=destination)
        comm.send(commandline, tag=TAG_COMMANDLINE, dest=destination)
        comm.send(remaining_time(), tag=TAG_WALLTIME_SECONDS, dest=destination)
        worker_to_subdir[destination] = subdir
        print "Worker %d processes task %s" % (destination, subdir)

    # distribute initial work
    for i in range(1, min(num_tasks + 1, num_workers + 1)):
        send_task_from_still_todo(destination=i)

    # if necessary, tell some workers that there is no work for them
    # and dismiss them
    for i in range(num_tasks + 1, num_workers + 1):
        comm.send(MSG_MANAGER_HAS_NO_WORK, tag=TAG_STATUS_MSG, dest=i)
        active_workers.remove(i)

    abort_job = False
    abort_filename = "ABORT." + jobid

    failed_tasks = 0

    def give_new_task_if_we_have_any(destination):
        "if our list of tasks is not empty and the flag abort_job is not set, send out a new task"
        if len(still_todo) > 0 and not abort_job:
            # send new work
            send_task_from_still_todo(destination=i)
        else:
            # no more work or time is up, dismiss the worker
            comm.send(MSG_MANAGER_HAS_NO_WORK, tag=TAG_STATUS_MSG, dest=i)
            active_workers.remove(i)

    while len(active_workers) > 0:
        if not abort_job and remaining_time() < safety_walltime:
            print "Remaining wall time is less than %d seconds, stopping distribution of tasks\n" % safety_walltime
            abort_job = True
        elif not abort_job and os.path.exists(abort_filename):
            print "Found file %s, stopping distribution of tasks\n" % abort_filename
            abort_job = True

        # Check for idle workers
        for i in active_workers:
            if comm.Iprobe(source=i, tag=TAG_STATUS_MSG):
                msg = comm.recv(source=i, tag=TAG_STATUS_MSG)
                if msg == MSG_WORKER_FINISHED:
                    print "SUCCESS: Worker %d, task %s" % (i,
                                                           worker_to_subdir[i])
                    give_new_task_if_we_have_any(destination=i)
                elif msg == MSG_WORKER_ERROR:
                    print "FAILURE: Worker %d, task %s" % (i,
                                                           worker_to_subdir[i])
                    failed_tasks += 1
                    give_new_task_if_we_have_any(destination=i)
                else:
                    raise Exception(
                        "Manager reveived an invalid status message from worker %d"
                        % i)
                print "Remaining wall time:", remaining_time(
                ), ", remaining tasks:", len(still_todo)
                print
        # save some cpu
        time.sleep(0.1)
    print "End of job.\nRemaining wall time: %d\nTasks not started: %d" % (
        remaining_time(), len(still_todo))
    print "Failed tasks: %d" % failed_tasks
    return
Example #9
0
 def elapsed_time():
     return MPI.Wtime() - start_time
    def update_nodes(self):
        """
        Update the u- and f-values at the collocation nodes -> corresponds to a single sweep over all nodes

        Returns:
            None
        """

        # get current level and problem description
        L = self.level
        P = L.prob

        # only if the level has been touched before
        assert L.status.unlocked

        # get number of collocation nodes for easier access
        M = self.coll.num_nodes

        # form Jacobian at fixed time
        jtime = self.params.fixed_time_in_jacobian

        dfdu = P.eval_jacobian(L.u[jtime])

        # form collocation problem
        Gu_ = self.integrate()

        i = 0
        for m in self.node_list:  #
            Gu_[m] -= L.u[m + 1] - L.u[0]
            if L.tau[m] is not None:
                Gu_[m] += L.tau[m]

        Guv = []
        for m in range(M):
            if m in self.node_list:
                Guv.append(np.zeros(Gu_[m].values.size, dtype='d'))
            else:
                Guv.append(None)

        dnk = np.zeros(Gu_[m].values.size, dtype='d')
        for m in range(M):
            U = np.zeros(Gu_[m].values.size,
                         dtype='d')  #complex) #P.dtype_u(P.init, val=0.0)

            if m in self.node_list:
                for j in self.node_list:
                    U = U + (self.Vi[m, j] * Gu_[j].values).flatten()
                #print(self.rank, "rufe", m, self.rank)
                self.params.comm.Reduce(U, Guv[m], root=self.rank, op=MPI.SUM)
                Guv[m] = Guv[m].reshape(2,
                                        (np.sqrt(Guv[m].size / 2)).astype(int),
                                        (np.sqrt(Guv[m].size / 2)).astype(int))

            else:
                for j in self.node_list:
                    U = U + (self.Vi[m, j] * Gu_[j].values).flatten()
                root = 0
                if self.rank == 0:
                    root = 1
                #print(self.rank, "sende", m, root)
                self.params.comm.Reduce(U, dnk, root=root, op=MPI.SUM)

        uv_g = []
        for m in range(M):
            if m in self.node_list:
                uv_g.append(P.dtype_u(P.init, val=0))
            else:
                uv_g.append(P.dtype_u(P.init, val=0))

        for m in self.node_list:  # range(M):  # hell yeah, this is parallel!!

            #if m in self.node_list:
            t1 = MPI.Wtime()
            uv_g[m].values = (P.solve_system_jacobian(
                dfdu, Guv[m], L.dt * self.D[m], L.u[0],
                L.time + L.dt * self.coll.nodes[m]).values)

        for m in range(M):
            U = np.zeros(
                Gu_[m].values.size,
                dtype='d')  #.flatten() #complex) #P.dtype_u(P.init, val=0.0)
            K = np.zeros(
                Gu_[m].values.size,
                dtype='d')  #.flatten() #complex) #P.dtype_u(P.init, val=0.0)
            if m in self.node_list:
                for j in self.node_list:
                    U = U + (
                        (self.V[m, j] * uv_g[j].values.flatten()).astype(float)
                    )  #.flatten()
                self.params.comm.Reduce(U, K, root=self.rank, op=MPI.SUM)
                L.u[m + 1].values += K.reshape(
                    2, (np.sqrt(Guv[m].size / 2)).astype(int),
                    (np.sqrt(Guv[m].size / 2)).astype(int))
            else:
                for j in self.node_list:
                    U = U + (
                        (self.V[m, j] * uv_g[j].values.flatten()).astype(float)
                    )  #.flatten()
                root = 0
                if self.rank == 0:
                    root = 1
                self.params.comm.Reduce(U, dnk, root=root, op=MPI.SUM)

        for m in range(M):  #self.node_list: #  # hell yeah, this is parallel!!
            if m in self.node_list:
                L.f[m + 1] = P.eval_f(L.u[m + 1],
                                      L.time + L.dt * self.coll.nodes[m])

        L.status.updated = True

        return None
Example #11
0
def run_vfi(comm):
    '''
    This function runs the main process.
    '''

    s0 = MPI.Wtime()
    f0_sum = 0

    #------------------------------------------#
    #      STEP1: INITIALIZATION
    #------------------------------------------#
    sys.stdout.write("Running at %d of %d on %s.\n" %
                     (comm.rank, comm.size, MPI.Get_processor_name()))

    # INITILIZE THE HOUSEHOLD CLASS
    hh = Household()

    #------------------------------------------#
    #     STEP2: LIFECYCLE COMPUTATION
    #------------------------------------------#
    for age in reversed(range(hh.T)):

        s2 = MPI.Wtime()

        # EMPTY BIN FOR VALUE FUNCTION AND POLICY FUNCITONS
        results = np.zeros((hh.na * hh.ne, 2))
        V_temp = np.zeros((hh.na, hh.ne))
        a1_temp = np.zeros((hh.na, hh.ne))

        # NO GRID SEARCH AT AGE T
        if (age == hh.T - 1):
            if comm.rank == 0:
                for ind in range(hh.na * hh.ne):
                    ia = ind // hh.ne
                    ie = ind % hh.ne
                    cc = (1 + hh.r) * hh.agrid[ia] + hh.w * hh.egrid[ie]
                    if cc <= 0: cc = 1e-5
                    V_temp[ia, ie] = hh.util(cc)  # VALUE FUNCTION
                    a1_temp[ia, ie] = 0.0  # SAVING

        # GRID SEARCH AT AGE < T
        else:
            if comm.rank == 0:
                V1 = hh.V[age + 1, :, :]
            else:
                V1 = np.empty((hh.na, hh.ne), dtype=np.float64)
            comm.Bcast(V1, root=0)

            # Split the for loop by workers
            lb = int((comm.rank + 0) * np.ceil((hh.na * hh.ne) / comm.size))
            ub = int((comm.rank + 1) * np.ceil((hh.na * hh.ne) / comm.size))
            if hh.na * hh.ne < ub:
                ub = hh.na * hh.ne
            leng = ub - lb
            Vp = np.empty((int(leng), 2))
            it = 0

            for ind in range(lb, ub):
                Vp[it, :] = vfi_opt(hh, age, ind)
                it += 1

            # Gather the computed value function by each worker
            comm.Gather(Vp, results, root=0)

            for ind in range(hh.na * hh.ne):
                ia = ind // hh.ne
                ie = ind % hh.ne
                V_temp[ia, ie] = results[ind][0]  # VALUE FUNCTION
                a1_temp[ia, ie] = results[ind][1]  # SAVING

        hh.set_V(age, V_temp)
        hh.set_a1(age, a1_temp)

        f2 = MPI.Wtime() - s2
        f0_sum += f2
        if comm.rank == 0:
            sys.stdout.write("Age: %d. Time: %f seconds. \n" %
                             (age + 1, round(f2, 4)))

        comm.Barrier()

    # TOTAL RUNTIME
    f0 = time.time() - s0
    run_time = [f0_sum, f0]

    return run_time
Example #12
0
	for i in range(len(recs[0].seq)):
		if recs[0].seq[i]!="-" and recs[1].seq[i]!="-":
			if recs[0].seq[i]!=recs[1].seq[i]:
				dist+=1
		else:
			gaps+=1
	similarity=1-(float(dist)/(len(recs[0].seq)-gaps))
	#print "Sim ",similarity, " gaps ", gaps,"dis ", dist 
	return similarity



#************************************************************ main code *********************************************************************************

#timing
starting=MPI.Wtime()

#read the records	       
In_Handle = open(INPUT_PATH, "r")
List_Rec=[]

NumSeqs=0
for record in SeqIO.parse(In_Handle, "fasta") :
	List_Rec.append(record)
	NumSeqs+=1
In_Handle.close()

#Calculation of the number of pairs to be aligned
TotPairs= float(len(List_Rec)*len(List_Rec)-len(List_Rec))/2

#Distributing the pairs to the all the processors
def cal_sn_dep_Cov_cij():
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    comm.Barrier()
    t_start = MPI.Wtime()

    #num_rbin = {'num_sbin': (5, 15, 30, 100, 150), 'num_pbin': (6, 19, 38, 127, 191)}    # s: spectroscopic; p: photometric -- This case is correct only for n(z) Stage-III.
    num_rbin = {
        'num_sbin': (5, 6, 7, 8, 9, 10, 15, 20, 22, 25, 27, 30, 32, 35, 37),
        'num_pbin': (6, 7, 8, 10, 11, 12, 18, 25, 27, 31, 34, 37, 40, 44, 46)
    }  # s: spectroscopic; p: photometric

    red_bin = num_rbin['num_sbin'][nbin_case]
    red_bin_ext = num_rbin['num_pbin'][nbin_case]
    N_dset = (red_bin + 1) * red_bin // 2
    N_dset_ext = (red_bin_ext + 1) * red_bin_ext // 2

    num_kin = 505
    l_min = 1
    l_max = 2002
    #delta_l = 1
    delta_l = 3
    num_l = (l_max - l_min) // delta_l + 1
    f_sky = 15000.0 / 41253.0  # Survey area 15000 deg^2 is from PW-Stage IV (LSST)
    #print("f_sky: ", f_sky)
    data_type_size = 8

    prefix = 'TW_zext_'
    idir0 = './BAO_alpha_{}/'.format(alpha)
    ##idir = './mpi_preliminary_data_{}/comm_size{}/'.format(Pk_type, comm_size)
    idir = idir0 + 'mpi_preliminary_data_{}/'.format(Pk_type)
    #------------- !! write output files, they are the basic files --------------#
    ofdir = idir0 + 'mpi_{}sn_exp_k_data_{}/comm_size{}/'.format(
        prefix, Pk_type, size)
    Gm_ifprefix = idir0 + 'mpi_preliminary_data_Pwig_nonlinear/' + prefix
    ofprefix = ofdir + prefix
    print('Output file prefix:', ofprefix)
    if rank == 0:
        if not os.path.exists(ofdir):
            os.makedirs(ofdir)
        # read shape noise term \sigma^2/n^i
        inputf = Gm_ifprefix + 'pseudo_shapenoise_{0}rbins_ext.out'.format(
            red_bin_ext)
        pseudo_sn_ext = np.loadtxt(inputf, dtype='f8', comments='#')
        pseudo_sn = np.array(pseudo_sn_ext[0:red_bin]) * snf
        print(pseudo_sn.shape)
    else:
        pseudo_sn = np.zeros(red_bin)
    comm.Bcast(pseudo_sn, root=0)

    default_num_l_in_rank = int(np.ceil(num_l / size))
    # Rounding errors here should not be a problem unless default size is very small
    end_num_l_in_rank = num_l - (default_num_l_in_rank * (size - 1))
    assert end_num_l_in_rank >= 1, "Assign fewer number of processes."

    if (rank == (size - 1)):
        num_l_in_rank = end_num_l_in_rank
    else:
        num_l_in_rank = default_num_l_in_rank

    # be careful here we have extended photometric redshift bins, which is different from TF case.
    Cijl_len = num_l_in_rank * N_dset_ext
    Cijl_sets = np.zeros(Cijl_len)
    Gm_len = num_l_in_rank * N_dset_ext * num_kout
    Gm_sets = np.zeros(Gm_len)

    # default case with delta_l = 3
    file_Cijl_cross = idir + prefix + 'Cij_l_{}rbins_ext_{}kbins_CAMB.bin'.format(
        red_bin_ext, num_kin)  # Cij_l stores Cij for each ell by row
    Cijl_freader = MPI.File.Open(
        comm, file_Cijl_cross)  # Open and read a binary file
    Cijl_fh_start = rank * Cijl_len * data_type_size  # need to calculate how many bytes shifted
    Cijl_freader.Seek(Cijl_fh_start)
    Cijl_freader.Read([Cijl_sets,
                       MPI.DOUBLE])  # Read using individual file pointer
    #print('Cij(l) from rank', rank, 'is:', Cijl_sets, '\n')
    comm.Barrier()
    Cijl_freader.Close()
    # Since Cijl has been generated with equal number of ells from different 4 ranks, we could directly read data from sub-binary files.
    # file_Cijl_cross = idir + 'comm_size{}/'.format(comm.size) + prefix + 'Cij_l_{}rbins_ext_{}kbins_CAMB_rank{}.bin'.format(red_bin_ext, num_kin, rank) # Cij_l stores Cij for each ell by row
    # Cijl_freader = open(file_Cijl_cross, 'rb') # Open and read a binary file
    # Cijl_sets = np.fromfile(Cijl_freader, dtype='d', count=-1, sep='')
    # #print('Cij(l) from rank', rank, 'is:', Cijl_sets, '\n')
    # Cijl_freader.close()

    #--------------- !! read Gm_cross part by part for each ell -----------------#
    file_Gm_cross = Gm_ifprefix + 'Gm_cross_out_{}rbins_{}kbins_CAMB.bin'.format(
        red_bin_ext, num_kout)
    Gm_freader = MPI.File.Open(comm, file_Gm_cross)
    Gm_fh_start = rank * Gm_len * data_type_size
    Gm_freader.Seek(Gm_fh_start)
    Gm_freader.Read([Gm_sets, MPI.DOUBLE])
    #print('Gm from rank', rank, 'is:', Gm_sets.shape, '\n')
    comm.Barrier()
    Gm_freader.Close()

    def cal_C_G(l, rank):
        n_l = default_num_l_in_rank * rank + l
        ell = l_min + n_l * delta_l
        #offset_cijl = n_l * N_dset * data_type_size
        #offset_Gm = n_l * N_dset * num_kout * data_type_size

        # put the whole array cij at ell into the upper triangle part of the matrix
        cijl_array = Cijl_sets[l * N_dset_ext:(l + 1) * N_dset_ext]
        #print(cijl_array, cijl_array.shape)
        cijl_m[iu1] = np.array(cijl_array)
        #print(cijl_m, cijl_m.shape)
        cijl_m_select = np.array(
            cijl_m[0:red_bin, 0:red_bin]
        )  # select the first red_bin bins of Cij, match the case with T-F
        cijl_true = np.array(
            cijl_m_select[iu2])  # convert upper triangle matrix to an array
        cijl_sn = np.array(cijl_true)
        cijl_sn[sn_id] = cijl_true[
            sn_id] + pseudo_sn  # add shape noise terms in Cii(l) terms

        Cov_cij_cpq = cal_cov_matrix(
            red_bin, iu2,
            cijl_sn)  # calculate the covariance matrix of Cij(l), Cpq(l')
        # if rank == 0:
        #     rank_matrix = np.linalg.matrix_rank(Cov_cij_cpq)
        #     print('ell, rank of Cov:', ell, rank_matrix)

        Cov_cij_cpq = Cov_cij_cpq / (
            (2.0 * ell + 1.0) * delta_l * f_sky
        )  # account the number of modes for each l with the interval delta_l

        w_ccij, v_ccij = linalg.eigh(
            Cov_cij_cpq, lower=False, overwrite_a=True
        )  # Get eigenvalue and eigenvectors from Scipy routine
        w_inv = 1.0 / w_ccij
        if not np.all(w_inv > 0.0):
            print('w_inv from ell ', ell, ' is negative.'
                  )  # show below which ell, the inverse of Cov_cij_cpq fails
        # If uncomment the below, overwrite_a should be set False in the linalg.eigh()
        # sqrt_w_inv = np.diag(w_inv**0.5)
        # v_inv = np.transpose(v_ccij)
        # Cov_cij_sym = np.triu(Cov_cij_cpq, k=1) + Cov_cij_cpq.T
        # print reduce(np.dot, [np.diag(sqrt_w_inv**2.0), v_inv, Cov_cij_sym, v_inv.T])

        Cov_inv_half = np.transpose(
            w_inv**0.5 * v_ccij
        )  # Simplify the expression of dot(sqrt_w_inv, v_inv), 05/09/2016
        G_l_array = Gm_sets[l * N_dset_ext * num_kout:(l + 1) * N_dset_ext *
                            num_kout]
        Gm_l_ext = np.reshape(
            G_l_array, (N_dset_ext, num_kout), 'C'
        )  # In Python, the default storage of a matrix follows C language format.
        #print(Gm_l_ext)
        Gm_l = np.array(Gm_l_ext[Gmrow_sel_ind, :])
        Gm_l = np.dot(Cov_inv_half, Gm_l)

        cijl_true = np.dot(Cov_inv_half, cijl_true)

        return cijl_true, Gm_l

    amode = MPI.MODE_WRONLY | MPI.MODE_CREATE

    #-------- generate C^ij(l) prime -------##
    Cijl_prime_file = ofprefix + 'Cijlprime_{}rbins_{}kbins_snf{}_rank{}.bin'.format(
        red_bin, num_kin, snf, rank)
    Cijl_prime_fwriter = open(Cijl_prime_file, 'wb')

    #------------- !! Gm_prime output
    Gm_prime_file = ofprefix + 'Gm_cross_prime_{}rbins_{}kbins_snf{}_rank{}.bin'.format(
        red_bin, num_kout, snf, rank)
    Gm_prime_fwriter = open(Gm_prime_file, 'wb')

    Gmrow_sel_ind = np.array([], dtype=int)
    ind_pre = 0
    for row in range(red_bin):
        count = red_bin - row
        for i in range(count):
            Gmrow_sel_ind = np.append(Gmrow_sel_ind, i + ind_pre)
        ind_pre = ind_pre + red_bin_ext - row
    print('Gmrow_sel_ind:', Gmrow_sel_ind)

    iu1 = np.triu_indices(red_bin_ext)
    iu2 = np.triu_indices(red_bin)

    sn_id = [(2 * red_bin + 1 - ii) * ii // 2 for ii in range(red_bin)
             ]  # id of dset C^ij(l) which is added with shot noise
    cijl_m = np.zeros(
        (red_bin_ext, red_bin_ext))  # one matrix to store C^ij at one ell
    for l in range(num_l_in_rank):
        cijl_true, Gm_l = cal_C_G(l, rank)
        cijl_true.tofile(Cijl_prime_fwriter, sep="")
        Gm_l.tofile(Gm_prime_fwriter, sep="")

    comm.Barrier()
    Cijl_prime_fwriter.close()
    Gm_prime_fwriter.close()
    t_end = MPI.Wtime()
    if rank == 0:
        print('With total processes', size, ', the running time:',
              t_end - t_start)
def MonteCarlo(
        latticeDim,
        cycles):  #calculate the energy and magnetization for a given temp.
    # cycles: MonteCarlo cycles (how many times do we flip the matrix?)
    #latticeDim = dim of square matrix
    # EAverage = energy of matrix averaged over cycles, normalized to spins**2
    #MagAverage= magnetic filed of matrix, averaged over cycles, normalized to spins**2
    #EVariance = variance of energy, normalized
    #MagAbsAverage= absolute value of magnetic field, average over cycles
    #Setup spin matrix, initialize to ground state

    #MPI Initializations
    comm = MPI.COMM_WORLD
    size = comm.Get_size()
    rank = comm.Get_rank()

    Tstart = 2.0
    Tend = 2.3
    dt = 0.05

    E_slutt = 0
    M_slutt = 0
    heatCapacity_slutt = 0
    Susceptibility_slutt = 0

    #spinMatrix = np.zeros((latticeDim,latticeDim),np.int8) + 1 #ALL SPIN UP

    spinMatrix = np.zeros((latticeDim, latticeDim), np.int8)  #RANDOM

    for i in xrange(latticeDim):
        for j in xrange(latticeDim):
            if np.random.random() < 0.5:
                spinMatrix[i, j] = 1
            else:
                spinMatrix[i, j] = -1

    Trange = linspace(Tstart, Tend, int((Tend - Tstart) / dt))
    for temperature in Trange:
        #create and initialize variables
        StartTime = MPI.Wtime()
        E = M = 0
        EAverage = E2Average = MagAverage = Mag2Average = MagAbsAverage = 0
        k = 1.0
        J = 1.0
        beta = 1 / float(k * temperature)
        NumberOfAcceptedStates = 0
        #Possible energy changes, -8J, -4J, 0J, 4J, 8J
        w = np.zeros(17, np.float64)  #17=16 +1,
        for degeneration in xrange(-8, 9, 4):
            w[degeneration + 8] = math.exp(-degeneration * J *
                                           beta)  #legger til 8

        #print w

        #Calculate initial magnetization
        M = spinMatrix.sum()
        for j in xrange(latticeDim):
            for i in xrange(latticeDim):
                E -= spinMatrix.item(
                    i, j) * (spinMatrix.item(periodic(i, latticeDim, -1), j) +
                             spinMatrix.item(i, periodic(j, latticeDim, 1))
                             )  #initial energy

        NumberOfAcceptedStates = 0
        #start metropolis MonteCarlo Computation
        for i in xrange(cycles):  #monte carlo cycle
            #loop over all spins, pick a random spin each time
            for s in xrange(latticeDim**(2)):
                x = int(np.random.random() * latticeDim)
                y = int(np.random.random() * latticeDim)

                spinUp = spinMatrix.item(x, periodic(y, latticeDim, 1))
                spinDown = spinMatrix.item(x, periodic(y, latticeDim, -1))
                spinRight = spinMatrix.item(periodic(x, latticeDim, 1), y)
                spinLeft = spinMatrix.item(periodic(x, latticeDim, -1), y)

                deltaE = 2 * spinMatrix.item(
                    x, y) * (spinLeft + spinRight + spinUp + spinDown)

                if np.random.random() <= w[deltaE + 8]:
                    #accept
                    spinMatrix[x, y] = -spinMatrix[x, y]
                    M += 2 * spinMatrix[x, y]  #flipped spin in x and y
                    E += deltaE
                    NumberOfAcceptedStates += 1

            #updating exceptation values
            EAverage += E
            E2Average += E**2
            MagAverage += M
            Mag2Average += M**2
            MagAbsAverage += math.fabs(M)

        #To get the average values
        EAverage /= float(cycles)
        E2Average /= float(cycles)
        MagAverage /= float(cycles)
        Mag2Average /= float(cycles)
        MagAbsAverage /= float(cycles)

        heatCapacity = (E2Average - EAverage**2) / float(
            latticeDim**(2) * temperature**(2))

        Susceptibility = (Mag2Average - MagAbsAverage**2) / float(
            latticeDim**(2) * temperature)

        EAverage /= float(latticeDim**2)
        MagAverage /= float(latticeDim**2)
        MagAbsAverage /= float(latticeDim**2)

        Elist = np.array(EAverage)
        Eslist = np.array(0.)
        Mlist = np.array(MagAbsAverage)
        Mslist = np.array(0.)
        CVlist = np.array(heatCapacity)
        CVslist = np.array(0.)
        Xilist = np.array(Susceptibility)
        Xislist = np.array(0.)

        comm.Reduce(Elist, Eslist, op=MPI.SUM)
        comm.Reduce(Mlist, Mslist, op=MPI.SUM)
        comm.Reduce(CVlist, CVslist, op=MPI.SUM)
        comm.Reduce(Xilist, Xislist, op=MPI.SUM)

        EAv.append(Eslist / size)
        temp.append(temperature)
        Mag.append(Mslist / size)
        CV.append(CVslist / size)
        Xi.append(Xislist / size)

        if rank == 0:
            print Eslist / size, Mslist / size, CVslist / size, Xislist / size, temperature

        EndTime = MPI.Wtime()

        Totaltime = EndTime - StartTime

        if (rank == 0):
            print  ####
            print Totaltime

    return Eslist / size, CVslist / size, Mslist / size, Xislist / size, temperature
Example #15
0
comm = MPI.COMM_WORLD
worker = comm.Get_rank()
num_workers = comm.Get_size()


def createarray():
    vA = np.random.randint(10, size=N)
    return vA


#comm.barrier()


def NsendAll(vB):

    for i in range(1, num_workers):
        comm.send(vB, dest=i)


if worker == 0:
    vA = createarray()
    start = MPI.Wtime()
    NsendAll(vA)
    end = MPI.Wtime()
    print("Runtime", end - start)

else:
    data = comm.recv()

comm.barrier()
Example #16
0
        #plt.draw();
    else:
        data = None

    result = np.zeros((ImageSize, ImageSize), dtype=np.float64)

    # Figure out the size of each chunk of data
    n = n_phi / size
    begin = n * rank
    end = n * (rank + 1)

    Transformer = data_transformer(sample_size, ImageSize)

    # Communicate data and compute back-projection
    comm.barrier()
    p_start = MPI.Wtime()

    if rank == 0:
        for k in xrange(1, size):  # to other processes
            # send every process its respective chunk of data
            comm.Send(data[n * k:n * (k + 1)], dest=k)
    else:
        # allocate buffer
        data = np.zeros((n, sample_size), dtype=np.float64)
        # Receive data from root
        comm.Recv(data, source=0)

    for k in xrange(0, n):
        phi = -(k + n * rank) * math.pi / n_phi
        result += Transformer.transform(data[k, :], phi)
        if k % 64 == 0:
Example #17
0
Qrmat = pfs.CSR2Mat(Qr)
PrintGreen('done \n')

# Clear some space in memory
del ffdisc.L, ffdisc.B, ffdisc.B2, ffdisc.Q, Qr

# Compute optimal forcings
Print('Compute optimal forcings using SLEPC ... ')
omegas = linspace(0.05, 2, 10)
G = zeros(len(omegas))
idx = 0
for iomega in range(len(omegas)):
    omega = omegas[iomega]
    Print('  omega = %f' % omega)
    # Set up the shell matrix and compute the factorizations
    t1 = MPI.Wtime()
    shell = pfs.OptimalForcings(Lmat, Bmat, B2mat, Pumat, Qmat, Qrmat, omega)
    localsizes, globalsizes = Qrmat.getSizes()
    FR = PETSc.Mat().create(comm)
    FR.setSizes(globalsizes)
    FR.setType('python')
    FR.setPythonContext(shell)
    FR.setUp()
    t2 = MPI.Wtime()
    Print(' CPU time to build FR object : %10.4g ' % (t2 - t1))

    # Compute optimal perturbations
    gains, fs, qs = pfs.OptimalForcingsSLEPc(FR, shell, 1)
    if rank == 0:
        G[idx] = gains[0].real
        idx += 1
Example #18
0
#----------------broadcasting source data---------------------------------------------------------
#broadcast source to all processors
    all_source = comm.bcast(all_source if comm_rank == 0 else None, root=0)

    #divide source to each processor
    num_source = all_source.shape[1]
    local_source_offset = np.linspace(0, num_source,
                                      comm_size + 1).astype('int')

    #broadcast target to all processors
    all_target = comm.bcast(all_target if comm_rank == 0 else None, root=0)

    comm.Barrier()
    #start timeing
    t_start = MPI.Wtime()

    #----------------local computation on comm_rank processor-----------------------------------------
    #get the local data which will be processed in this processor
    #this local source and target array lives on comm_rank processor
    local_source = all_source[:, local_source_offset[comm_rank]:
                              local_source_offset[comm_rank + 1]]
    local_target = all_target
    print("------------- local target point --------------")
    print(" %d/%d processor has local target with size %d" %
          (comm_rank, comm_size, local_target.size))

    #get local source and target dim
    N, local_source_num = local_source.shape
    M = local_target.shape[0]
    local_u = np.zeros(M) + 1j * np.zeros(M)
Example #19
0
    def bench_outputs_with_single_file_multiple_writers(
            self,
            container_name,
            directory_name,
            file_name,
            output_per_rank,
            data=None):
        '''
		Benchmarking outputs with pattern `Single File Multiple Writers`
		
		Each processes will access a single shared file in different sections exclusively.

		Data from different rank is stored in different blocks

		Pattern of global block ids: 00002-00005, first section represents for the rank while the second section represents block id written by the rank

		The process is:
		1. Each rank write blocks to Azure
		2. MPI_Barrier() to wait for all ranks
		3. Get uncommited block list, rearrange for the order of data
		4. Commit changes

		param:
		 container_name: target container
		 directory_name: target directory
		 file_name: target file
		 output_per_rank: size of outputs per rank in MiB
		 data: optional cached data for outputs, in this case stands for data of a full block(100 MiB data)
		
		return:
		 max_write_time: maximum writing time
		 min_write_time: minimum writing time
		 avg_write_time: average writing time
		'''
        # Data prepare
        if data == None:
            data = common.workload_generator(self.__mpi_rank,
                                             self.BLOCK_LIMIT_IN_BYTES)
        else:
            data = data[0:self.BLOCK_LIMIT_IN_BYTES - 1]
        last_block_data = data
        block_count = output_per_rank // self.BLOCK_LIMIT
        # Last block doesn't full
        if output_per_rank % self.BLOCK_LIMIT:
            block_count = block_count + 1
            last_block_data = common.workload_generator(
                self.__mpi_rank, (output_per_rank % self.BLOCK_LIMIT) << 20)

        # Step.1 put blocks
        MPI.COMM_WORLD.Barrier()
        start = MPI.Wtime()
        for i in range(0, block_count):
            block_id = '{:0>5}-{:0>5}'.format(self.__mpi_rank, i)
            if i != (block_count - 1):
                self.__storage_service.put_block(container_name, file_name,
                                                 data, block_id)
            elif i == (block_count - 1):
                self.__storage_service.put_block(container_name, file_name,
                                                 last_block_data, block_id)
        end = MPI.Wtime()
        MPI.COMM_WORLD.Barrier()
        max_write, min_write, avg_write = common.collect_bench_metrics(end -
                                                                       start)

        if 0 == self.__mpi_rank:
            start_postprocessing = MPI.Wtime()
            # Step.3 get block list and sort according to block id
            block_list = self.__storage_service.get_block_list(
                container_name,
                file_name,
                block_list_type=blob.BlockListType.All).uncommitted_blocks
            block_list.sort(key=lambda block: block.id)

            # Step.4 commit
            self.__storage_service.put_block_list(container_name, file_name,
                                                  block_list)
            end_postprocessing = MPI.Wtime()

            postprocessing_time = end_postprocessing - start_postprocessing
            max_write = round(max_write + postprocessing_time, 3)
            min_write = round(min_write + postprocessing_time, 3)
            avg_write = round(avg_write + postprocessing_time, 3)

        return max_write, min_write, avg_write
Example #20
0
 def testWTime(self):
     time1 = MPI.Wtime()
     self.assertTrue(type(time1) is float)
     time2 = MPI.Wtime()
     self.assertTrue(type(time2) is float)
     self.assertTrue(time2 >= time1)
Example #21
0
import AnalysisFunctions as af

comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
status = MPI.Status()
#
# INITIALIZATION
DO WHATEVER
#
# END INITIALIZATION

# RANK 0 (MASTER CORE) SETS UP SOME STUFF FOR ITSELF
if rank == 0:
   print 'Setting my own stuff up'
   t_start=MPI.Wtime()
   DO WHATEVER

comm.Barrier()

for it in range(rank,NUMBER_OF_TOTAL_TIMES,comm.size):
   DO WHATEVER
   COMMUNICATE IF YOU LIKE
comm.Barrier()

if rank == 0:
   COMMUNICATE THE DATA IF YOU LIKE
   t_fin = MPI.Wtime()-t_start
   print 'Total time taken %0.3f'%t_fin

numprocs = comm.Get_size()
myrank = comm.Get_rank()

if myrank == 0:
    n = int(sys.argv[1])
else:
    n = None

#broadcast n
n = comm.bcast(n, root=1)

if n <= 0:
    comm.Abort(-1)

#turn on the stop watch
starttime = MPI.Wtime()

#calculate the interval size, same for X and Y
h = math.pi / float(n)

mysum = 0.0

#distribute work in the X axis
for i in range(myrank, n, numprocs):
    x = h * (i + 0.5)
    #do regular integration in the Y axis
    for j in range(n):
        y = h * (j + 0.5)
        mysum += math.sin(x + y)

local_integral = h * mysum
Example #23
0
# Define number of processes and rank
num_processes = comm.Get_size()
rank = comm.Get_rank()
if not num_processes in [2**i for i in range(M + 1)]:
    raise IOError("Number of cpus must be in ", [2**i for i in range(M + 1)])

# Each cpu gets ownership of Np slices
Np = N / num_processes

# 'global' matrices (exposed to all processes)
Ag = np.random.rand(N, N)
# resultant matrix (global)
Bg = np.empty((N, N), dtype='complex128')

# sub-matrix for this process (Np-by-N)
A = Ag[rank * Np:(rank + 1) * Np, :]

comm.Barrier()  # start MPI timer
t_start = MPI.Wtime()

B = np.fft.fft(A)

comm.Gather([B, MPI.DOUBLE], [Bg, MPI.DOUBLE])

comm.Barrier()
t_final = (MPI.Wtime() - t_start)  # stop MPI timer

if rank == 0:
    print t_final

sys.exit()
Example #24
0
my_size = size // comm.size  # Every process computes a vector of lenth *my_size*
size = comm.size * my_size  # Make sure size is a integer multiple of comm.size
my_offset = comm.rank * my_size

# This is the complete vector
vec = np.zeros(size)  # Every element zero...
vec[0] = 1.0  #  ... besides vec[0]

# Create my (local) slice of the matrix
my_M = np.zeros((my_size, size))
for i in xrange(my_size):
    j = (my_offset + i - 1) % size
    my_M[i, j] = 1.0

comm.Barrier()  ### Start stopwatch ###
t_start = MPI.Wtime()

for t in xrange(iter):
    my_new_vec = np.inner(my_M, vec)

    comm.Allgather([my_new_vec, MPI.DOUBLE], [vec, MPI.DOUBLE])

comm.Barrier()
t_diff = MPI.Wtime() - t_start  ### Stop stopwatch ###

if fabs(vec[iter] - 1.0) > 0.01:
    pprint("!! Error: Wrong result!")

pprint(" %d iterations of size %d in %5.2fs: %5.2f iterations per second" %
       (iter, size, t_diff, iter / t_diff))
pprint(
Example #25
0
	def bench_outputs_with_single_file_multiple_writers(self, container_name, directory_name, file_name, output_per_rank, data = None):
		'''
		Benchmarking outputs with pattern `Single File Multiple Writers`
		
		Each processes will access a single shared file in different sections exclusively.

		Data fro mdifferent rank is stored in different ranges

		The processes is:
		 1. Create the file with specified size
		 2. Each process update their range of File

		param:
		 container_name: target container
		 directory_name: target directory
		 file_name: target file
		 output_per_rank: size of outputs per rank in MiB
		 data: optional cached data for outputs
		
		return:
		 max_write_time: maximum writing time
		 min_write_time: minimum writing time
		 avg_write_time: average writing time
		'''
		# Data prepare
		output_per_rank_in_bytes = output_per_rank << 20 # in bytes
		if data == None:
			data = common.workload_generator(self.__mpi_rank, self.FILE_CHUNK_LIMIT_IN_BYTES)
		else:
			data = data[0:self.FILE_CHUNK_LIMIT_IN_BYTES - 1]
		data_last_chunk = data
		chunk_count = output_per_rank // self.FILE_CHUNK_LIMIT
		# Last chunk doesn't full
		if output_per_rank % self.FILE_CHUNK_LIMIT:
			chunk_count = chunk_count + 1
			data_last_chunk = common.workload_generator(self.__mpi_rank, (output_per_rank % self.FILE_CHUNK_LIMIT) << 20)

		# Step .1 File create
		create_start = 0
		create_end = 0
		if 0 == self.__mpi_rank:
			create_start = MPI.Wtime()
			self.__storage_service.create_file(container_name, directory_name, file_name, output_per_rank_in_bytes * self.__mpi_size)
			create_end = MPI.Wtime()
		create_time = create_end - create_start

		MPI.COMM_WORLD.Barrier()
		start = MPI.Wtime()
		for i in range(0, chunk_count):
			if i != (chunk_count - 1):
				start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES
				end_range = start_range + len(data) - 1
				self.__storage_service.update_range(container_name, directory_name, file_name, data, start_range, end_range)
			elif i == (chunk_count - 1):
				start_range = self.__mpi_rank * output_per_rank_in_bytes + i * self.FILE_CHUNK_LIMIT_IN_BYTES
				end_range = start_range + len(data_last_chunk) - 1
				self.__storage_service.update_range(container_name, directory_name, file_name, data_last_chunk, start_range, end_range)
		end = MPI.Wtime()
		MPI.COMM_WORLD.Barrier()

		max_write, min_write, avg_write = common.collect_bench_metrics(end - start)
		max_write = round(max_write + create_time,3)	
		min_write = round(min_write + create_time,3)
		avg_write = round(avg_write + create_time,3)

		return max_write, min_write, avg_write
Example #26
0
from mpi4py import MPI
import numpy as np
import mpids.MPInumpy as mpi_np

measure_time = lambda: MPI.Wtime()

#Creation Routines
def array(size, iters=10000, comm=MPI.COMM_WORLD):
    data = np.arange(size, dtype=np.float64).tolist()
    comm.Barrier()
    time = measure_time()
    for _ in range(iters):
        mpi_np.array(data, dtype=np.float64, comm=comm, dist='b')
    time = measure_time() - time
    comm.reduce(time, op=MPI.MAX, root=0)
    return time/iters

def empty(size, iters=10000, comm=MPI.COMM_WORLD):
    comm.Barrier()
    time = measure_time()
    for _ in range(iters):
        mpi_np.empty(size, dtype=np.float64, comm=comm, dist='b')
    time = measure_time() - time
    comm.reduce(time, op=MPI.MAX, root=0)
    return time/iters

def arange(size, iters=10000, comm=MPI.COMM_WORLD):
    comm.Barrier()
    time = measure_time()
    for _ in range(iters):
        mpi_np.arange(size, dtype=np.float64, comm=comm, dist='b')
Example #27
0
                                1.0 / kout[j + 1]) * GF * Pnorm_out[j]
            return Gmatrix_l

        # Gm_cross_out uses selected new k bins
        Gm_cross_file = outf_prefix + 'Gm_cross_out_{}rbins_{}kbins_CAMB_rank{}.bin'.format(
            nbin_ext, num_kout, rank)  # basic variable
        Gm_cross_fwriter = open(Gm_cross_file, 'wb')
        for l in range(num_l_in_rank):
            Gm = cal_Gm(l, rank)
            Gm.tofile(Gm_cross_fwriter, sep="")
        Gm_cross_fwriter.close()

    if cal_sn == "True" and rank == 0:
        get_shapenoise()

    time0 = MPI.Wtime()
    if cal_cijl == "True":
        get_Cijl(comm, rank)
    time1 = MPI.Wtime()
    if rank == 0:
        print('Running time for Cijl:', time1 - time0)
    if Pk_type != 'Pnow' and cal_Gm == "True":
        get_Gm_out(comm, rank)
        time2 = MPI.Wtime()
        if rank == 0:
            print('Running time for Gm:', time2 - time1)


#######################################################

#
Example #28
0
    def __init__(self, circle, src, dest,
                 treewalk=None,
                 totalsize=0,
                 hostcnt=0,
                 prune=False,
                 verify=False,
                 resume=False,
                 workq=None):
        BaseTask.__init__(self, circle)
        self.circle = circle
        self.treewalk = treewalk
        self.totalsize = totalsize
        self.prune = prune
        self.workq = workq
        self.resume = resume
        self.checkpoint_file = None
        self.checkpoint_db = None
        self.src = src
        self.dest = os.path.abspath(dest)

        # cache, keep the size conservative
        # TODO: we need a more portable LRU size

        if hostcnt != 0:
            max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
            procs_per_host = self.circle.size / hostcnt
            self._read_cache_limit = ((max_ofile - 64) / procs_per_host) / 3
            self._write_cache_limit = ((max_ofile - 64) / procs_per_host) * 2 / 3

        if self._read_cache_limit <= 0 or self._write_cache_limit <= 0:
            self._read_cache_limit = 1
            self._write_cache_limit = 8

        self.rfd_cache = LRU(self._read_cache_limit)
        self.wfd_cache = LRU(self._write_cache_limit)

        self.cnt_filesize_prior = 0
        self.cnt_filesize = 0

        self.blocksize = 1024 * 1024
        self.chunksize = 1024 * 1024

        # debug
        self.d = {"rank": "rank %s" % circle.rank}
        self.wtime_started = MPI.Wtime()
        self.wtime_ended = None
        self.workcnt = 0  # this is the cnt for the enqued items
        self.reduce_items = 0  # this is the cnt for processed items
        if self.treewalk:
            log.debug("treewalk files = %s" % treewalk.flist, extra=self.d)

        # fini_check
        self.fini_cnt = Counter()

        # verify
        self.verify = verify
        self.use_store = False
        if self.verify:
            self.chunksums_mem = []
            self.chunksums_buf = []

        # checkpointing
        self.checkpoint_interval = sys.maxsize
        self.checkpoint_last = MPI.Wtime()

        if self.circle.rank == 0:
            print("Start copying process ...")
Example #29
0
def main():

    comm = MPI.COMM_WORLD
    id= comm.Get_rank()
    wsize= comm.Get_size()    
    tstart = MPI.Wtime()
    fsky = open("skymap.png","r")
    reader = Reader(fsky)
    skypixelwidth, skypixelheight, skypixels, metadata=reader.read_flat()
    pixelwidth = int(argv[1])
    pixelheight = int(argv[2])
    tskymapstart = MPI.Wtime()
    telepixels = np.zeros((pixelwidth*pixelheight*3),dtype=np.uint8)
    colorpixels = np.zeros((pixelwidth*pixelheight),dtype=np.uint8)
    skystartall = np.zeros((pixelwidth*pixelheight),dtype=np.uint32)
    telestartall = np.zeros((pixelwidth*pixelheight),dtype=np.uint32)
    colorall = np.zeros((pixelwidth*pixelheight),dtype=np.uint8)
    totnstepsall=np.zeros((wsize),dtype=np.uint32)
    tskymapend = MPI.Wtime()
    tskymap = tskymapend-tskymapstart

    tmin = 1.e6
    tpercparmin=1.e6
    hinit=1.e-1
    #h=1.e-4
    Router = 1000.
    Rplane = 700.
    Rs = 2.
    every = 1
    deltalamb = 1.e-1
    imagewidth = 50;
    imageheight = 50;
    tiny = 1.e-30
    epsilon=1.e-8
    eccentricity = 0.2
    Rfac = 1.+1.e-10
    heps = 1.e-14
    semilatusr = 10.0  

    
    tstartpp=MPI.Wtime() #percent parallelized
    numperprocess = pixelheight*pixelwidth/wsize
    skystart=np.zeros((numperprocess),dtype=np.int32)
    telestart=np.zeros((numperprocess),dtype=np.int32)
    color = np.zeros((numperprocess),dtype=np.int8)
    totnsteps=np.zeros((numperprocess),dtype=np.int32)
    trk4all=np.zeros((numperprocess),dtype=np.float)
    ttelestop = MPI.Wtime()
    ttele = ttelestop-tstartpp
    trk4=float("inf")
    for index in range(numperprocess):
        ypix = int((id*numperprocess+index)/pixelwidth)
        xpix = (id*numperprocess+index)%pixelwidth
        tstartrk4=MPI.Wtime()
        totnsteps[index],skystart[index],telestart[index],color[index]=integrateNullGeodesic(xpix, ypix, pixelheight,pixelwidth, skypixelheight,skypixelwidth,imagewidth,imageheight,Rs,Router,Rplane,eccentricity, semilatusr, epsilon, tiny, hinit,Rfac,heps)
        tendrk4=MPI.Wtime()
        trk4=min(trk4,(tendrk4-tstartrk4)/float(totnsteps[index]))
    totnstepsmax=max(totnsteps)
    tstoppp = MPI.Wtime()
    tpercpar=tstoppp-tstartpp

    comm.Barrier()
    if id==0:
        totnstepsmaxall=0
    else:
        totnstepsmaxall=None
    comm.Barrier()

    totnstepsmaxall=comm.reduce(totnstepsmax,op=MPI.MAX,root=0)
    tskymapall = comm.reduce(tskymap, op=MPI.MAX, root=0)
    tteleall = comm.reduce(ttele,op=MPI.MAX,root=0)
    comm.Gatherv(skystart,skystartall,root=0)
    comm.Gatherv(telestart, telestartall, root=0)
    comm.Gatherv(color,colorall, root=0)
    trk4min=comm.reduce(trk4,op=MPI.MIN,root=0)
    comm.Barrier()
    tend = MPI.Wtime()
    tall = tend-tstart
    if id==0:
        tindexstart = MPI.Wtime()
        for index in range(pixelheight*pixelwidth):

            if(colorall[index]==1):
                telepixels[telestartall[index]:telestartall[index]+3]=skypixels[skystartall[index]:skystartall[index]+3]
            else:
                telepixels[telestartall[index]]=255 #leave other two indices zero,red
        tindexend = MPI.Wtime()
        tindex = tindexend-tindexstart
    if id==0:
        twritestart = MPI.Wtime()
        ftele = open('teleview_{pw}_{ph}_{ws}.png'.format(pw=pixelwidth,ph=pixelheight,ws=wsize), "w")
        telewrite=Writer(width=pixelwidth,height=pixelheight,greyscale=False,alpha=False)
        telewrite.write_array(ftele,telepixels)
        ftele.close()
        twriteend=MPI.Wtime()
        twrite = twriteend-twritestart
    fsky.close()
    comm.Barrier()
    tmax = comm.reduce(tall,MPI.MAX,root=0)
    tpercparmin = comm.reduce(tpercpar/tall,op=MPI.MIN,root=0)
    comm.Barrier()
    if (id==0):
#        print("Telescope dimensions in M", 2.*imagewidth, 2.*imageheight)
#        print("Telescope resolution", pixelwidth, pixelheight)
#        print("Skymap resolution", skypixelwidth, skypixelheight)
#        print("Schwarzschild radius in M", 2.*Rs)
#        print("Outer radius in M", 2.*Router)
#        print("Telescope radius in M", 2.*Rplane)
#        print("Number of processes = ",wsize)
#        print("Maximum number of integration steps taken is",totnstepsmaxall)
#        print("The time for a single step of the RK4 is",trk4min)
#        print("Total runtime = ",tmax)
#        print("Fraction parallel = ", tpercparmin)
        print pixelwidth,pixelheight,wsize,totnstepsmaxall,trk4min,tmax,tpercparmin, tindex, twrite, tskymapall, tteleall

    MPI.Finalize()
Example #30
0
                multialignment='center', fontsize=10) 
    axes.set_title("rank {}".format(rank), fontsize=20)
    return axes

if __name__ == "__main__":
    comm = MPI.COMM_WORLD
    size = comm.Get_size()
    rank = comm.Get_rank()
    name = MPI.Get_processor_name()

    print('Size:', size)
    print('Rank:', rank)
    print('Name:', name)

    tmesh = par_regmesh(size)
    #tmesh = par_mesh(0.05, size) 
    lmesh = local_mesh(tmesh, rank)

    t0 = MPI.Wtime()
    c = coloring(lmesh, comm)
    t1 = MPI.Wtime()
    print('color time', t1-t0)

    flag = check_color(lmesh, c)
    print('Process ', rank, " with same coloring ",  np.sum(flag))

    axes = show_mesh(lmesh, c)
    #lmesh.find_edge(axes, index=flag) 
    ##show_mesh(lmesh, r1)
    plt.show()