def main(ns): def copy(bbi, bbo, chunksize=1024*1024): for i in range(0, bbi.size, chunksize): bbo.write(i, bbi[i:i+chunksize]) with bigfile.File(ns.source) as bfi, \ bigfile.File(ns.dest, create=True) as bfo: header = bfi['Header'] with bfi['1/Position'] as bbi: with bfo.create('1/Position', dtype=bbi.dtype, size=bbi.size, Nfile=bbi.Nfile) as bbo: copy(bbi, bbo) npart = bbi.size Nfile = bbi.Nfile with bfi['1/Velocity'] as bbi: with bfo.create('1/Velocity', dtype=bbi.dtype, size=bbi.size, Nfile=bbi.Nfile) as bbo: copy(bbi, bbo) with bfi['1/ID'] as bbi: with bfo.create('1/ID', dtype=bbi.dtype, size=bbi.size, Nfile=bbi.Nfile) as bbo: copy(bbi, bbo) with bfo.create('1/Mass', dtype='f4', size=npart, Nfile=Nfile) as bbo: mass = numpy.broadcast_to(header.attrs['MassTable'][1], npart) copy(mass, bbo) with bfo.create('Header') as bbo: for key in header.attrs: bbo.attrs[key] = header.attrs[key]
def load_field_chunk_bigfile(field_name, dens_dir, R_smooth, n_gr, i_chunk, n_chunks, Lbox, padding=40.): # if not smoothing if field_name == 'delta' and int(R_smooth) == 0: data = bigfile.File(dens_dir + "density_%d.bigfile" % n_gr)['Field'] else: data = bigfile.File(dens_dir + field_name + "_%d.bigfile" % R_smooth)['Field'] assert n_gr == int(np.round( data.size**(1. / 3))), "Data is incompatible with the input" grid_size = Lbox / n_gr chunk_size = Lbox / n_chunks assert grid_size < chunk_size, "The chunk size must be larger than the cell size" # starting and finishing index in the grid i1, i2 = (np.array( [i_chunk * chunk_size - padding, (i_chunk + 1) * chunk_size + padding]) // grid_size).astype(int) gr_depth = i2 - i1 # make sure within box i1 %= n_gr i2 %= n_gr # get coordinates in the box of loaded field start = (i1 * grid_size) % Lbox end = ((i2 + 1) * grid_size) % Lbox # convert to indices in bigfile i1 *= n_gr**2 i2 *= n_gr**2 if i1 > i2: data1 = data[i1:] data2 = data[:i2] n = len(data1) + len(data2) field_chunk = np.zeros(n, dtype=np.float32) field_chunk[:len(data1)] = data1 field_chunk[len(data1):] = data2 del data1, data2 field_chunk = field_chunk.reshape((gr_depth, n_gr, n_gr)) else: field_chunk = data[i1:i2].reshape((gr_depth, n_gr, n_gr)) return field_chunk, start, end
def read(self, columns, start, stop, step=1): """ Read the specified column(s) over the given range, as a dictionary 'start' and 'stop' should be between 0 and :attr:`size`, which is the total size of the binary file (in particles) """ import bigfile with bigfile.File(filename=self.path)[self.dataset] as f: ds = bigfile.Dataset(f, columns) return ds[start:stop][::step]
def __init__(self, path, exclude=None, header=Automatic, dataset='./'): if not dataset.endswith('/'): dataset = dataset + '/' import bigfile self.dataset = dataset self.path = path # store the attributes self.attrs = {} # the file path with bigfile.File(filename=path) as ff: columns = [block for block in ff[self.dataset].blocks] headers = self._find_headers(header, dataset, ff) if exclude is None: # by default exclude header only. exclude = headers if not isinstance(exclude, (list, tuple)): exclude = [exclude] columns = [ column for column in set(columns) if not any(fnmatch(column, e) for e in exclude) ] ds = bigfile.Dataset(ff[self.dataset], columns) # set the data type and size self.dtype = ds.dtype self.size = ds.size headers = [ff[header] for header in headers] all_attrs = [header.attrs for header in headers] for attrs in all_attrs: # copy over the attrs for k in attrs.keys(): # load a JSON representation if str starts with json::// if isinstance( attrs[k], string_types) and attrs[k].startswith('json://'): self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder) # copy over an array else: self.attrs[k] = numpy.array(attrs[k], copy=True)
def main(ns): f = bigfile.File(ns.source) ds = bigfile.Dataset(f['1/'], ['Position', 'Velocity', 'ID']) header = f['Header'] print("---input file : %s -----", ns.source) for key in header.attrs: print(key, header.attrs[key]) gadget_header = make_gadget_header(header) dirname = os.path.dirname(os.path.abspath(ns.dest)) if not os.path.exists(dirname): print("making dir") os.makedirs(dirname) convert(ns.dest, gadget_header, ds, ns.precision)
def gen_comp(zs): f = bigfile.File(folder + 'wlen_jliu/WL-%.2f-N4096' % (zs)) nside = f['kappa'].attrs['nside'][0] zmin = f['kappa'].attrs['zlmin'][0] zmax = f['kappa'].attrs['zlmax'][0] #zstep = f['kappa'].attrs['zstep'][0] zs = f['kappa'].attrs['zs'][0] print('nside = ', nside) print('redshifts = ', zs) lmax = min([5000, nside]) #5000 ell_sim = np.arange(lmax + 1) print(f['kappa'][:].shape) fn_cl = folder + '/clkk/kappa_cl_z%.2f.npz' % (zs) if not os.path.isfile(fn_cl): cl = hp.anafast(f['kappa'][:], lmax=lmax) np.savez(fn_cl, ell=ell_sim, cl=cl)
def main(ns): cat = Gadget1Catalog(ns.source, ptype=1) attrs = cat.attrs.copy() cat.attrs.clear() cat.attrs['MassTable'] = attrs['Massarr'] cat.attrs['TotNumPart'] = numpy.int64(attrs['Nall']) + (numpy.int64(attrs['NallHW']) << 32) cat.attrs['TotNumPartInit'] = numpy.int64(attrs['Nall']) + (numpy.int64(attrs['NallHW']) << 32) cat.attrs['BoxSize'] = attrs['BoxSize'] cat.attrs['Time'] = attrs['Time'] cat.attrs['ScalingFactor'] = attrs['Time'] if ns.time_ic is None: ns.time_ic = attrs['Time'] cat.attrs['TimeIC'] = ns.time_ic cat.attrs['UnitVelocity_in_cm_per_s'] = 1e5 if ns.unit_system == 'Mpc': cat.attrs['UnitLength_in_cm'] = 3.085678e24 if ns.unit_system == 'Kpc': cat.attrs['UnitLength_in_cm'] = 3.085678e21 cat.attrs['UnitMass_in_g'] = 1.989e43 # The velocity convention is weird without this cat.attrs['UsePeculiarVelocity'] = True a = attrs['Time'] cat['Velocity'] = cat['GadgetVelocity'] * a ** 0.5 if ns.subsample is not None: cat = cat[::ns.subsample] cat.save(ns.dest, columns=['Position', 'Velocity', 'ID'], dataset='1', header='Header') with bigfile.File(ns.dest) as bf: with bf.create("1") as bb: bb.attrs['a.x'] = attrs['Time'] bb.attrs['a.v'] = attrs['Time'] bb.attrs['M0'] = attrs['Massarr'][1]
def main(ns): if ns.zlmax is None: ns.zlmax = max(ns.zs) zs_list = ns.zs ###### JL hardcode zs_list #zs_list = numpy.arange(ns.zs, 2.21, 0.1) zs_list = ns.zs zlmin = ns.zlmin zlmax = zs_list[-1]#ns.zlmax # no need to be accurate here ds_list = Planck15.comoving_distance(zs_list) path = ns.source cat = BigFileCatalog(path, dataset=ns.dataset) kappa = 0 Nm = 0 kappabar = 0 npix = healpix.nside2npix(ns.nside) localsize = npix * (cat.comm.rank + 1) // cat.comm.size - npix * (cat.comm.rank) // cat.comm.size nbar = (cat.attrs['NC'] ** 3 / cat.attrs['BoxSize'] ** 3 * cat.attrs['ParticleFraction'])[0] # print('DEBUG BoxSize', cat.attrs['BoxSize']) Nsteps = int(numpy.round((zlmax - zlmin) / ns.zstep)) if Nsteps < 2 : Nsteps = 2 z = numpy.linspace(zlmax, zlmin, Nsteps+1, endpoint=True) if cat.comm.rank == 0: cat.logger.info("Splitting data redshift bins %s" % str(z)) kappa_all = numpy.zeros((Nsteps, len(zs_list), localsize)) for i, (z1, z2) in enumerate(zip(z[:-1], z[1:])): import gc gc.collect() if cat.comm.rank == 0: cat.logger.info("nbar = %g, zlmin = %g, zlmax = %g zs = %s" % (nbar, z2, z1, zs_list)) slice = read_range(cat, 1/(1 + z1), 1 / (1 + z2)) if slice.csize == 0: continue if cat.comm.rank == 0: cat.logger.info("read %d particles" % slice.csize) kappa1, kappa1bar, Nm1 = make_kappa_maps(slice, ns.nside, zs_list, ds_list, localsize, nbar) kappa = kappa + kappa1 kappa_all[i] = kappa1 Nm = Nm + Nm1 kappabar = kappabar + kappa1bar cat.comm.barrier() if cat.comm.rank == 0: # use bigfile because it allows concurrent write to different datasets. cat.logger.info("writing to %s", ns.output) # array to get all map slices if cat.comm.rank == 0: kappa1_all = numpy.zeros((Nsteps, int(12*ns.nside**2))) for i, (zs, ds) in enumerate(zip(zs_list, ds_list)): std = numpy.std(cat.comm.allgather(len(kappa[i]))) mean = numpy.mean(cat.comm.allgather(len(kappa[i]))) if cat.comm.rank == 0: cat.logger.info("started gathering source plane %s, size-var = %g, size-bar = %g" % (zs, std, mean)) kappa1 = GatherArray(kappa[i], cat.comm) Nm1 = GatherArray(Nm[i], cat.comm) # get slices of kappa map for j in range(Nsteps): kappa1_allj = GatherArray(kappa_all[j,i], cat.comm) if cat.comm.rank == 0: kappa1_all[j] = kappa1_allj if cat.comm.rank == 0: cat.logger.info("done gathering source plane %s" % zs) if cat.comm.rank == 0: fname = ns.output + "/WL-%02.2f-N%04d" % (zs, ns.nside) cat.logger.info("started writing source plane %s" % zs) with bigfile.File(fname, create=True) as ff: print('DEBUG', kappa1_all.shape, len(kappa1_all), numpy.dtype((kappa1_all.dtype, kappa1_all.shape[1:]))) ds1 = ff.create_from_array("kappa", kappa1, Nfile=1) ds2 = ff.create_from_array("Nm", Nm1, Nfile=1) #ds3 = ff.create_from_array("kappa_all", kappa1_all.T, Nfile=1)#, memorylimit=1024*1024*1024) for d in ds1, ds2:#, ds3: d.attrs['kappabar'] = kappabar[i] d.attrs['nside'] = ns.nside d.attrs['zlmin'] = zlmin d.attrs['zlmax'] = zlmax d.attrs['zstep'] = ns.zstep d.attrs['zs'] = zs d.attrs['ds'] = ds d.attrs['nbar'] = nbar cat.comm.barrier() if cat.comm.rank == 0: # use bigfile because it allows concurrent write to different datasets. cat.logger.info("source plane at %g written. " % zs)
def main(ns): if ns.zlmax is None: ns.zlmax = max(ns.zs) zs_list = ns.zs zlmin = ns.zlmin zlmax = ns.zlmax # no need to be accurate here ds_list = Planck15.comoving_distance(zs_list) path = ns.source #'/global/cscratch1/sd/yfeng1/m3127/desi/1536-9201-40eae2464/lightcone/usmesh/' cat = BigFileCatalog(path, dataset=ns.dataset) kappa = 0 Nm = 0 kappabar = 0 npix = healpix.nside2npix(ns.nside) localsize = npix * (cat.comm.rank + 1) // cat.comm.size - npix * ( cat.comm.rank) // cat.comm.size nbar = (cat.attrs['NC']**3 / cat.attrs['BoxSize']**3 * cat.attrs['ParticleFraction'])[0] Nsteps = int(numpy.round((zlmax - zlmin) / ns.zstep)) if Nsteps < 2: Nsteps = 2 z = numpy.linspace(zlmax, zlmin, Nsteps, endpoint=True) if cat.comm.rank == 0: cat.logger.info("Splitting data redshift bins %s" % str(z)) for z1, z2 in zip(z[:-1], z[1:]): import gc gc.collect() if cat.comm.rank == 0: cat.logger.info("nbar = %g, zlmin = %g, zlmax = %g zs = %s" % (nbar, z2, z1, zs_list)) slice = read_range(cat, 1 / (1 + z1), 1 / (1 + z2)) if slice.csize == 0: continue if cat.comm.rank == 0: cat.logger.info("read %d particles" % slice.csize) kappa1, kappa1bar, Nm1 = make_kappa_maps(slice, ns.nside, zs_list, ds_list, localsize, nbar) kappa = kappa + kappa1 Nm = Nm + Nm1 kappabar = kappabar + kappa1bar cat.comm.barrier() if cat.comm.rank == 0: # use bigfile because it allows concurrent write to different datasets. cat.logger.info("writing to %s", ns.output) for i, (zs, ds) in enumerate(zip(zs_list, ds_list)): std = numpy.std(cat.comm.allgather(len(kappa[i]))) mean = numpy.mean(cat.comm.allgather(len(kappa[i]))) if cat.comm.rank == 0: cat.logger.info( "started gathering source plane %s, size-var = %g, size-bar = %g" % (zs, std, mean)) kappa1 = GatherArray(kappa[i], cat.comm) Nm1 = GatherArray(Nm[i], cat.comm) if cat.comm.rank == 0: cat.logger.info("done gathering source plane %s" % zs) if cat.comm.rank == 0: fname = ns.output + "/WL-%02.2f-N%04d" % (zs, ns.nside) cat.logger.info("started writing source plane %s" % zs) with bigfile.File(fname, create=True) as ff: ds1 = ff.create_from_array("kappa", kappa1, Nfile=1) ds2 = ff.create_from_array("Nm", Nm1, Nfile=1) for d in ds1, ds2: d.attrs['kappabar'] = kappabar[i] d.attrs['nside'] = ns.nside d.attrs['zlmin'] = zlmin d.attrs['zlmax'] = zlmax d.attrs['zs'] = zs d.attrs['ds'] = ds d.attrs['nbar'] = nbar cat.comm.barrier() if cat.comm.rank == 0: # use bigfile because it allows concurrent write to different datasets. cat.logger.info("source plane at %g written. " % zs)