def main(): comm = MPI.COMM_WORLD np = split_size_2d(comm.size) grid = [ numpy.linspace(0, 1.0, np[0] + 1, endpoint=True), numpy.linspace(0, 1.0, np[1] + 1, endpoint=True), ] domain = GridND(grid) if comm.rank == 0: logging.info('grid %s' % str(grid) ) [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Position', 'ID']) tpos = P['Position'] tid = P['ID'] del P Ntot = sum(comm.allgather(len(tpos))) if comm.rank == 0: logging.info('Total number of particles %d, ll %g' % (Ntot, ns.LinkingLength)) ll = ns.LinkingLength * Ntot ** -0.3333333 #print pos #print ((pos[0] - pos[1]) ** 2).sum()** 0.5, ll layout = domain.decompose(tpos, smoothing=ll * 1) tpos = layout.exchange(tpos) tid = layout.exchange(tid) logging.info('domain %d has %d particles' % (comm.rank, len(tid))) data = cluster.dataset(tpos, boxsize=1.0) fof = cluster.fof(data, linking_length=ll, np=0, verbose=True) # initialize global labels minid = equiv_class(fof.labels, tid, op=numpy.fmin)[fof.labels] del fof del data del tpos del tid while True: # merge, if a particle belongs to several ranks # use the global label of the minimal minid_new = layout.gather(minid, mode=numpy.fmin) minid_new = layout.exchange(minid_new) # on my rank, these particles have been merged merged = minid_new != minid # if no rank has merged any, we are done # gl is the global label (albeit with some holes) total = comm.allreduce(merged.sum()) if comm.rank == 0: print 'merged ', total, 'halos' if total == 0: del minid_new break old = minid[merged] new = minid_new[merged] arg = old.argsort() new = new[arg] old = old[arg] replacesorted(minid, old, new, out=minid) minid = layout.gather(minid, mode=numpy.fmin) label = assign_halo_label(minid, comm, thresh=ns.nmin) N = halos.count(label, comm=comm) if comm.rank == 0: print 'total halos is', len(N) [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Position']) hpos = halos.centerofmass(label, P['Position'], boxsize=1.0, comm=comm) [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Velocity']) hvel = halos.centerofmass(label, P['Velocity'], boxsize=None, comm=comm) if comm.rank == 0: print N print 'total groups', N.shape print 'total particles', N.sum() print 'above ', ns.nmin, (N >ns.nmin).sum() N[0] = -1 with open(ns.output + '.halo', 'w') as ff: numpy.int32(len(N)).tofile(ff) numpy.float32(ns.LinkingLength).tofile(ff) numpy.int32(N).tofile(ff) numpy.float32(hpos).tofile(ff) numpy.float32(hvel).tofile(ff) print hpos del N del hpos npart = None if comm.rank == 0: snapshot = Snapshot(ns.filename,TPMSnapshotFile) npart = snapshot.npart for i in range(len(snapshot.npart)): with open(ns.output + '.grp.%02d' % i, 'w') as ff: numpy.int32(npart[i]).tofile(ff) numpy.float32(ns.LinkingLength).tofile(ff) pass npart = comm.bcast(npart) start = sum(comm.allgather(len(label))[:comm.rank]) end = sum(comm.allgather(len(label))[:comm.rank+1]) label = numpy.int32(label) written = 0 for i in range(len(npart)): filestart = sum(npart[:i]) fileend = sum(npart[:i+1]) mystart = start - filestart myend = end - filestart if myend <= 0 : continue if mystart >= npart[i] : continue if myend > npart[i]: myend = npart[i] if mystart < 0: mystart = 0 with open(ns.output + '.grp.%02d' % i, 'r+') as ff: ff.seek(8, 0) ff.seek(mystart * 4, 1) label[written:written + myend - mystart].tofile(ff) written += myend - mystart return
def main(): comm = MPI.COMM_WORLD np = split_size_2d(comm.size) grid = [numpy.linspace(0, 1.0, np[0] + 1, endpoint=True), numpy.linspace(0, 1.0, np[1] + 1, endpoint=True)] domain = GridND(grid) if comm.rank == 0: logging.info("grid %s" % str(grid)) # read in all ! [[Position]] = ns.datasource.read(["Position"], comm, bunchsize=None) Position /= ns.datasource.BoxSize print Position.shape print Position.max(axis=0) Ntot = sum(comm.allgather(len(Position))) if comm.rank == 0: logging.info("Total number of particles %d, ll %g" % (Ntot, ns.LinkingLength)) ll = ns.LinkingLength * Ntot ** -0.3333333 # print pos # print ((pos[0] - pos[1]) ** 2).sum()** 0.5, ll layout = domain.decompose(Position, smoothing=ll * 1) Position = layout.exchange(Position) logging.info("domain %d has %d particles" % (comm.rank, len(Position))) labels = local_fof(Position, ll) del Position if comm.rank == 0: logging.info("local fof done") [[ID]] = ns.datasource.read(["ID"], comm, bunchsize=None) ID = layout.exchange(ID) # initialize global labels minid = equiv_class(labels, ID, op=numpy.fmin)[labels] del ID if comm.rank == 0: logging.info("equiv class, done") while True: # merge, if a particle belongs to several ranks # use the global label of the minimal minid_new = layout.gather(minid, mode=numpy.fmin) minid_new = layout.exchange(minid_new) # on my rank, these particles have been merged merged = minid_new != minid # if no rank has merged any, we are done # gl is the global label (albeit with some holes) total = comm.allreduce(merged.sum()) if comm.rank == 0: logging.info("merged %d halos", total) if total == 0: del minid_new break old = minid[merged] new = minid_new[merged] arg = old.argsort() new = new[arg] old = old[arg] replacesorted(minid, old, new, out=minid) minid = layout.gather(minid, mode=numpy.fmin) del layout if comm.rank == 0: logging.info("merging, done") Nitem = len(minid) data = numpy.empty(Nitem, dtype=[("origind", "u8"), ("fofid", "u8")]) # assign origind for recovery of ordering, since # we need to work in sorted fofid data["fofid"] = minid del minid label = assign_halo_label(data, comm, thresh=ns.nmin) label = label.copy() del data N = halos.count(label, comm=comm) if comm.rank == 0: logging.info("Length of entries %s ", str(N)) logging.info("Length of entries %s ", N.shape[0]) logging.info("Total particles %s ", N.sum()) [[Position]] = ns.datasource.read(["Position"], comm, bunchsize=None) Position /= ns.datasource.BoxSize hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm) del Position [[Velocity]] = ns.datasource.read(["Velocity"], comm, bunchsize=None) Velocity /= ns.datasource.BoxSize hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm) del Velocity if comm.rank == 0: with h5py.File(ns.output + ".hdf5", "w") as ff: N[0] = 0 data = numpy.empty( shape=(len(N),), dtype=[("Position", ("f4", 3)), ("Velocity", ("f4", 3)), ("Length", "i4")] ) data["Position"] = hpos data["Velocity"] = hvel data["Length"] = N # do not create dataset then fill because of # https://github.com/h5py/h5py/pull/606 dataset = ff.create_dataset(name="FOFGroups", data=data) dataset.attrs["Ntot"] = Ntot dataset.attrs["LinkLength"] = ns.LinkingLength dataset.attrs["BoxSize"] = ns.datasource.BoxSize del N del hpos Ntot = comm.allreduce(len(label)) nfile = (Ntot + 512 ** 3 - 1) // (512 ** 3) npart = [(i + 1) * Ntot // nfile - i * Ntot // nfile for i in range(nfile)] if comm.rank == 0: for i in range(len(npart)): with open(ns.output + ".grp.%02d" % i, "w") as ff: numpy.int32(npart[i]).tofile(ff) numpy.float32(ns.LinkingLength).tofile(ff) pass start = sum(comm.allgather(len(label))[: comm.rank]) end = sum(comm.allgather(len(label))[: comm.rank + 1]) label = numpy.int32(label) written = 0 for i in range(len(npart)): filestart = sum(npart[:i]) fileend = sum(npart[: i + 1]) mystart = start - filestart myend = end - filestart if myend <= 0: continue if mystart >= npart[i]: continue if myend > npart[i]: myend = npart[i] if mystart < 0: mystart = 0 with open(ns.output + ".grp.%02d" % i, "r+") as ff: ff.seek(8, 0) ff.seek(mystart * 4, 1) label[written : written + myend - mystart].tofile(ff) written += myend - mystart return