def run(self): """ Run the TraceHalo Algorithm """ import mpsort from nbodykit import halos comm = self.comm with self.source.open() as source: [[ID]] = source.read(['ID'], full=True) Ntot = self.comm.allreduce(len(ID)) with self.sourcelabel.open() as sourcelabel: [[label]] = sourcelabel.read(['Label'], full=True) mpsort.sort(label, orderby=ID, comm=self.comm) del ID data = numpy.empty(len(label), dtype=[ ('ID', ('i8')), ('Position', ('f4', 3)), ('Velocity', ('f4', 3)), ]) with self.dest.open() as dest: [[data['Position'][...]]] = dest.read(['Position'], full=True) [[data['Velocity'][...]]] = dest.read(['Velocity'], full=True) [[data['ID'][...]]] = dest.read(['ID'], full=True) mpsort.sort(data, orderby='ID', comm=self.comm) data['Position'] /= self.dest.BoxSize data['Velocity'] /= self.dest.BoxSize N = halos.count(label) hpos = halos.centerofmass(label, data['Position'], boxsize=1.0) hvel = halos.centerofmass(label, data['Velocity'], boxsize=None) return hpos, hvel, N, Ntot
def main(): comm = MPI.COMM_WORLD np = split_size_2d(comm.size) grid = [ numpy.linspace(0, 1.0, np[0] + 1, endpoint=True), numpy.linspace(0, 1.0, np[1] + 1, endpoint=True), ] domain = GridND(grid) if comm.rank == 0: logging.info('grid %s' % str(grid) ) [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Position', 'ID']) tpos = P['Position'] tid = P['ID'] del P Ntot = sum(comm.allgather(len(tpos))) if comm.rank == 0: logging.info('Total number of particles %d, ll %g' % (Ntot, ns.LinkingLength)) ll = ns.LinkingLength * Ntot ** -0.3333333 #print pos #print ((pos[0] - pos[1]) ** 2).sum()** 0.5, ll layout = domain.decompose(tpos, smoothing=ll * 1) tpos = layout.exchange(tpos) tid = layout.exchange(tid) logging.info('domain %d has %d particles' % (comm.rank, len(tid))) data = cluster.dataset(tpos, boxsize=1.0) fof = cluster.fof(data, linking_length=ll, np=0, verbose=True) # initialize global labels minid = equiv_class(fof.labels, tid, op=numpy.fmin)[fof.labels] del fof del data del tpos del tid while True: # merge, if a particle belongs to several ranks # use the global label of the minimal minid_new = layout.gather(minid, mode=numpy.fmin) minid_new = layout.exchange(minid_new) # on my rank, these particles have been merged merged = minid_new != minid # if no rank has merged any, we are done # gl is the global label (albeit with some holes) total = comm.allreduce(merged.sum()) if comm.rank == 0: print 'merged ', total, 'halos' if total == 0: del minid_new break old = minid[merged] new = minid_new[merged] arg = old.argsort() new = new[arg] old = old[arg] replacesorted(minid, old, new, out=minid) minid = layout.gather(minid, mode=numpy.fmin) label = assign_halo_label(minid, comm, thresh=ns.nmin) N = halos.count(label, comm=comm) if comm.rank == 0: print 'total halos is', len(N) [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Position']) hpos = halos.centerofmass(label, P['Position'], boxsize=1.0, comm=comm) [P] = read(comm, ns.filename, TPMSnapshotFile, columns=['Velocity']) hvel = halos.centerofmass(label, P['Velocity'], boxsize=None, comm=comm) if comm.rank == 0: print N print 'total groups', N.shape print 'total particles', N.sum() print 'above ', ns.nmin, (N >ns.nmin).sum() N[0] = -1 with open(ns.output + '.halo', 'w') as ff: numpy.int32(len(N)).tofile(ff) numpy.float32(ns.LinkingLength).tofile(ff) numpy.int32(N).tofile(ff) numpy.float32(hpos).tofile(ff) numpy.float32(hvel).tofile(ff) print hpos del N del hpos npart = None if comm.rank == 0: snapshot = Snapshot(ns.filename,TPMSnapshotFile) npart = snapshot.npart for i in range(len(snapshot.npart)): with open(ns.output + '.grp.%02d' % i, 'w') as ff: numpy.int32(npart[i]).tofile(ff) numpy.float32(ns.LinkingLength).tofile(ff) pass npart = comm.bcast(npart) start = sum(comm.allgather(len(label))[:comm.rank]) end = sum(comm.allgather(len(label))[:comm.rank+1]) label = numpy.int32(label) written = 0 for i in range(len(npart)): filestart = sum(npart[:i]) fileend = sum(npart[:i+1]) mystart = start - filestart myend = end - filestart if myend <= 0 : continue if mystart >= npart[i] : continue if myend > npart[i]: myend = npart[i] if mystart < 0: mystart = 0 with open(ns.output + '.grp.%02d' % i, 'r+') as ff: ff.seek(8, 0) ff.seek(mystart * 4, 1) label[written:written + myend - mystart].tofile(ff) written += myend - mystart return
def fof_catalogue(datasource, label, comm, calculate_initial=False): """ Catalogue of FOF groups based on label from a data source Friend-of-friend was first used by Davis et al 1985 to define halos in hierachical structure formation of cosmological simulations. The algorithm is also known as DBSCAN in computer science. The subroutine here implements a parallel version of the FOF. The underlying local FOF algorithm is from `kdcount.cluster`, which is an adaptation of the implementation in Volker Springel's Gadget and Martin White's PM. It could have been done faster. Parameters ---------- label : array_like halo label of particles from data source. datasource: DataSource datasource; must support Position and Velocity. datasource.BoxSize is used too. comm: MPI.Comm The mpi communicator. Must agree with the datasource Returns ------- catalogue: array_like A 1-d array of type 'Position', 'Velocity', 'Length'. The center mass position and velocity of the FOF halo, and Length is the number of particles in a halo. The catalogue is sorted such that the most massive halo is first. catalogue[0] does not correspond to any halo. """ dtype=[ ('Position', ('f4', 3)), ('Velocity', ('f4', 3)), ('Length', 'i4')] N = halos.count(label, comm=comm) # explicitly open the DataSource with datasource.keep_cache(): with datasource.open() as stream: [[Position]] = stream.read(['Position'], full=True) Position /= datasource.BoxSize hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm) del Position with datasource.open() as stream: [[Velocity]] = stream.read(['Velocity'], full=True) Velocity /= datasource.BoxSize hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm) del Velocity if calculate_initial: dtype.append(('InitialPosition', ('f4', 3))) with datasource.open() as stream: [[Position]] = stream.read(['InitialPosition'], full=True) Position /= datasource.BoxSize hpos_init = halos.centerofmass(label, Position, boxsize=1.0, comm=comm) del Position if comm.rank == 0: logger.info("Calculated catalogue %d halos found. " % (len(N) -1 )) if comm.rank == 0: logger.info("Length = %s " % N[1:]) if comm.rank == 0: logger.info("%d particles not in halo" % N[0]) if comm.rank == 0: catalogue = numpy.empty(shape=len(N), dtype=dtype) catalogue['Position'] = hpos catalogue['Velocity'] = hvel catalogue['Length'] = N catalogue['Length'][0] = 0 if calculate_initial: catalogue['InitialPosition'] = hpos_init else: catalogue = numpy.empty(shape=0, dtype=dtype) return catalogue
def main(): comm = MPI.COMM_WORLD IC, SNAP, LABEL = None, None, None if comm.rank == 0: LABEL = files.Snapshot(ns.halolabel, files.HaloLabelFile) LABEL = comm.bcast(LABEL) Ntot = sum(LABEL.npart) [[ID]] = ns.datasource_tf.read(['ID'], comm, bunchsize=None) start = sum(comm.allgather(len(ID))[:comm.rank]) end = sum(comm.allgather(len(ID))[:comm.rank+1]) data = numpy.empty(end - start, dtype=[ ('Label', ('i4')), ('ID', ('i8')), ]) data['ID'] = ID del ID data['Label'] = LABEL.read("Label", start, end) mpsort.sort(data, orderby='ID') label = data['Label'].copy() data = numpy.empty(end - start, dtype=[ ('ID', ('i8')), ('Position', ('f4', 3)), ]) [[data['Position'][...]]] = ns.datasource_ti.read(['Position'], comm, bunchsize=None) [[data['ID'][...]]] = ns.datasource_ti.read(['ID'], comm, bunchsize=None) mpsort.sort(data, orderby='ID') pos = data['Position'] / ns.datasource_ti.BoxSize del data N = halos.count(label) hpos = halos.centerofmass(label, pos, boxsize=1.0) if comm.rank == 0: logging.info("Total number of halos: %d" % len(N)) logging.info("N %s" % str(N)) LinkingLength = LABEL.get_file(0).linking_length with h5py.File(ns.output + '.hdf5', 'w') as ff: N[0] = 0 data = numpy.empty(shape=(len(N),), dtype=[ ('Position', ('f4', 3)), ('Velocity', ('f4', 3)), ('Length', 'i4')]) data['Position'] = hpos data['Velocity'] = 0 data['Length'] = N # do not create dataset then fill because of # https://github.com/h5py/h5py/pull/606 dataset = ff.create_dataset( name='TracedFOFGroups', data=data ) dataset.attrs['Ntot'] = Ntot dataset.attrs['BoxSize'] = ns.datasource_ti.BoxSize dataset.attrs['ti'] = ns.datasource_ti.string dataset.attrs['tf'] = ns.datasource_tf.string logging.info("Written %s" % ns.output + '.hdf5')
def main(): comm = MPI.COMM_WORLD np = split_size_2d(comm.size) grid = [numpy.linspace(0, 1.0, np[0] + 1, endpoint=True), numpy.linspace(0, 1.0, np[1] + 1, endpoint=True)] domain = GridND(grid) if comm.rank == 0: logging.info("grid %s" % str(grid)) # read in all ! [[Position]] = ns.datasource.read(["Position"], comm, bunchsize=None) Position /= ns.datasource.BoxSize print Position.shape print Position.max(axis=0) Ntot = sum(comm.allgather(len(Position))) if comm.rank == 0: logging.info("Total number of particles %d, ll %g" % (Ntot, ns.LinkingLength)) ll = ns.LinkingLength * Ntot ** -0.3333333 # print pos # print ((pos[0] - pos[1]) ** 2).sum()** 0.5, ll layout = domain.decompose(Position, smoothing=ll * 1) Position = layout.exchange(Position) logging.info("domain %d has %d particles" % (comm.rank, len(Position))) labels = local_fof(Position, ll) del Position if comm.rank == 0: logging.info("local fof done") [[ID]] = ns.datasource.read(["ID"], comm, bunchsize=None) ID = layout.exchange(ID) # initialize global labels minid = equiv_class(labels, ID, op=numpy.fmin)[labels] del ID if comm.rank == 0: logging.info("equiv class, done") while True: # merge, if a particle belongs to several ranks # use the global label of the minimal minid_new = layout.gather(minid, mode=numpy.fmin) minid_new = layout.exchange(minid_new) # on my rank, these particles have been merged merged = minid_new != minid # if no rank has merged any, we are done # gl is the global label (albeit with some holes) total = comm.allreduce(merged.sum()) if comm.rank == 0: logging.info("merged %d halos", total) if total == 0: del minid_new break old = minid[merged] new = minid_new[merged] arg = old.argsort() new = new[arg] old = old[arg] replacesorted(minid, old, new, out=minid) minid = layout.gather(minid, mode=numpy.fmin) del layout if comm.rank == 0: logging.info("merging, done") Nitem = len(minid) data = numpy.empty(Nitem, dtype=[("origind", "u8"), ("fofid", "u8")]) # assign origind for recovery of ordering, since # we need to work in sorted fofid data["fofid"] = minid del minid label = assign_halo_label(data, comm, thresh=ns.nmin) label = label.copy() del data N = halos.count(label, comm=comm) if comm.rank == 0: logging.info("Length of entries %s ", str(N)) logging.info("Length of entries %s ", N.shape[0]) logging.info("Total particles %s ", N.sum()) [[Position]] = ns.datasource.read(["Position"], comm, bunchsize=None) Position /= ns.datasource.BoxSize hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm) del Position [[Velocity]] = ns.datasource.read(["Velocity"], comm, bunchsize=None) Velocity /= ns.datasource.BoxSize hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm) del Velocity if comm.rank == 0: with h5py.File(ns.output + ".hdf5", "w") as ff: N[0] = 0 data = numpy.empty( shape=(len(N),), dtype=[("Position", ("f4", 3)), ("Velocity", ("f4", 3)), ("Length", "i4")] ) data["Position"] = hpos data["Velocity"] = hvel data["Length"] = N # do not create dataset then fill because of # https://github.com/h5py/h5py/pull/606 dataset = ff.create_dataset(name="FOFGroups", data=data) dataset.attrs["Ntot"] = Ntot dataset.attrs["LinkLength"] = ns.LinkingLength dataset.attrs["BoxSize"] = ns.datasource.BoxSize del N del hpos Ntot = comm.allreduce(len(label)) nfile = (Ntot + 512 ** 3 - 1) // (512 ** 3) npart = [(i + 1) * Ntot // nfile - i * Ntot // nfile for i in range(nfile)] if comm.rank == 0: for i in range(len(npart)): with open(ns.output + ".grp.%02d" % i, "w") as ff: numpy.int32(npart[i]).tofile(ff) numpy.float32(ns.LinkingLength).tofile(ff) pass start = sum(comm.allgather(len(label))[: comm.rank]) end = sum(comm.allgather(len(label))[: comm.rank + 1]) label = numpy.int32(label) written = 0 for i in range(len(npart)): filestart = sum(npart[:i]) fileend = sum(npart[: i + 1]) mystart = start - filestart myend = end - filestart if myend <= 0: continue if mystart >= npart[i]: continue if myend > npart[i]: myend = npart[i] if mystart < 0: mystart = 0 with open(ns.output + ".grp.%02d" % i, "r+") as ff: ff.seek(8, 0) ff.seek(mystart * 4, 1) label[written : written + myend - mystart].tofile(ff) written += myend - mystart return
def fof_catalogue(datasource, label, comm, calculate_initial=False): """ Catalogue of FOF groups based on label from a data source Friend-of-friend was first used by Davis et al 1985 to define halos in hierachical structure formation of cosmological simulations. The algorithm is also known as DBSCAN in computer science. The subroutine here implements a parallel version of the FOF. The underlying local FOF algorithm is from `kdcount.cluster`, which is an adaptation of the implementation in Volker Springel's Gadget and Martin White's PM. It could have been done faster. Parameters ---------- label : array_like halo label of particles from data source. datasource: DataSource datasource; must support Position and Velocity. datasource.BoxSize is used too. comm: MPI.Comm The mpi communicator. Must agree with the datasource Returns ------- catalogue: array_like A 1-d array of type 'Position', 'Velocity', 'Length'. The center mass position and velocity of the FOF halo, and Length is the number of particles in a halo. The catalogue is sorted such that the most massive halo is first. catalogue[0] does not correspond to any halo. """ dtype = [('Position', ('f4', 3)), ('Velocity', ('f4', 3)), ('Length', 'i4')] N = halos.count(label, comm=comm) # explicitly open the DataSource with datasource.keep_cache(): with datasource.open() as stream: [[Position]] = stream.read(['Position'], full=True) Position /= datasource.BoxSize hpos = halos.centerofmass(label, Position, boxsize=1.0, comm=comm) del Position with datasource.open() as stream: [[Velocity]] = stream.read(['Velocity'], full=True) Velocity /= datasource.BoxSize hvel = halos.centerofmass(label, Velocity, boxsize=None, comm=comm) del Velocity if calculate_initial: dtype.append(('InitialPosition', ('f4', 3))) with datasource.open() as stream: [[Position]] = stream.read(['InitialPosition'], full=True) Position /= datasource.BoxSize hpos_init = halos.centerofmass(label, Position, boxsize=1.0, comm=comm) del Position if comm.rank == 0: logger.info("Calculated catalogue %d halos found. " % (len(N) - 1)) if comm.rank == 0: logger.info("Length = %s " % N[1:]) if comm.rank == 0: logger.info("%d particles not in halo" % N[0]) if comm.rank == 0: catalogue = numpy.empty(shape=len(N), dtype=dtype) catalogue['Position'] = hpos catalogue['Velocity'] = hvel catalogue['Length'] = N catalogue['Length'][0] = 0 if calculate_initial: catalogue['InitialPosition'] = hpos_init else: catalogue = numpy.empty(shape=0, dtype=dtype) return catalogue