def load_particle_data(self): """ Loads the particle data from a snapshot using ``h5py``. """ for particle_type, particle_name in zip( [1, 0, 4], ["dark_matter", "gas", "stars"]): try: setattr( self, particle_name, EAGLEParticleData( filename=self.filename, particle_type=particle_type, num_files=self.num_files_particles, ), ) except KeyError: # No particles of this type (e.g. stars in ICs) LOGGER.info( (f"No particles of type {particle_type} ({particle_name}) " "in this file. Skipping.")) setattr(self, particle_name, None) return
def load_data(self, array_name: str): """ Loads an array and returns it. Parameters ---------- array_name: str Name of the array (without particle type) to read, e.g. Coordinates Returns ------- output: np.array Output read from the HDF5 file h: float Hubble parameter. """ full_path = f"/PartType{self.particle_type}/{array_name}" LOGGER.info(f"Loading data from {full_path}.") with h5py.File(f"{self.filename}", "r") as handle: h = handle["Header"].attrs["HubbleParam"] output = handle[full_path][:] return output, h
def load_particle_data(self): """ Loads the particle data from a snapshot using ``h5py``. """ for particle_type, particle_name in zip( [1, 0, 4], ["dark_matter", "gas", "stars"] ): truncate_ids = self.truncate_ids[particle_type] if self.truncate_ids is not None else None try: setattr( self, particle_name, SIMBAParticleData( filename=self.filename, particle_type=particle_type, truncate_ids=truncate_ids ), ) except KeyError: # No particles of this type (e.g. stars in ICs) LOGGER.info( ( f"No particles of type {particle_type} ({particle_name}) " "in this file. Skipping." ) ) setattr(self, particle_name, None) return
def perform_particle_id_postprocessing(self): """ Performs postprocessing on ParticleIDs to ensure that they link correctly (required in cases where the particle IDs are offset when new generations of particles are spawned). """ LOGGER.info("Beginning particle ID postprocessing (empty).") return
def calculate_gas_lagrangian_regions( dark_matter_coordinates: unyt_array, gas_coordinates: unyt_array, dark_matter_lagrangian_regions: int32, boxsize: unyt_array, ) -> int32: """ Computes the Lagrangian Regions for gas particles by using a tree of their co-ordinates and the (defined) Lagrangian Regions for dark matter. Parameters ---------- dark_matter_coordinates: unyt_array[float64] The co-oridnates of dark matter particles gas_coordinates: unyt_array[float64] The co-ordinates of gas particles dark_matter_lagrangian_regions: np.array[int32] Lagrangian Regions of the dark matter particles. boxsize: unyt_array The box-size of the simulation so that periodic overlaps can be considered in the nearest neighbour calculation. Returns ------- gas_lagrangian_regions: np.array[int32] Lagrangian Regions of the gas particles, based on the tree search of the dark matter particles. Notes ----- The Lagrangian Region of a gas particle is defined as being the same as the Lagrangian Region of the closest dark matter particle. """ # We should just crash here if this is not the case. assert gas_coordinates.units == dark_matter_coordinates.units boxsize = boxsize.to(gas_coordinates.units) LOGGER.info("Beginning treebuild") dark_matter_tree = cKDTree(dark_matter_coordinates.value, boxsize=boxsize) LOGGER.info("Finished treebuild") LOGGER.info("Beginning tree search") _, indicies = dark_matter_tree.query(x=gas_coordinates, k=1, n_jobs=-1) LOGGER.info("Finished tree walk") gas_lagrangian_regions = dark_matter_lagrangian_regions[indicies] return gas_lagrangian_regions
def find_neighbours(self): """ Finds the initial state neighbours. Sets self.dark_matter_neighbours and self.gas_neighbours. """ LOGGER.info("Beginning search for initial neighbours") self.dark_matter_neighbours, self.gas_neighbours = find_closest_neighbours( dark_matter_coordinates=self.dark_matter_initial_coordinates, dark_matter_ids=self.dark_matter_initial_ids, boxsize=self.boxsize, gas_coordinates=self.gas_initial_coordinates, gas_ids=self.gas_initial_ids, ) LOGGER.info("Finished search for initial neighbours") return
def load_data(self, array_name: str): """ Loads an array and returns it. Parameters ---------- array_name: str Name of the array (without particle type) to read, e.g. Coordinates Returns ------- output: np.array Output read from the HDF5 file units: float Conversion to CGS units for this type, read from CGSConversionFactor. Includes conversion to remove h-factor. """ full_path = f"/PartType{self.particle_type}/{array_name}" LOGGER.info(f"Loading data from {full_path}.") output = [] with h5py.File(f"{self.filename}.0.hdf5", "r") as handle: units = handle[full_path].attrs["CGSConversionFactor"] h_exponent = handle[full_path].attrs["h-scale-exponent"] h_factor = pow(handle["Header"].attrs["HubbleParam"], h_exponent) units *= h_factor for file in range(self.num_files): current_filename = f"{self.filename}.{file}.hdf5" with h5py.File(current_filename, "r") as handle: output.append(handle[full_path][...]) output = concatenate(output) return output, units
def load_particle_data(self): """ Loads the particle data from a snapshot using ``swiftsimio``. """ data = load_snapshot(self.filename) # Let's just assume it's a box, eh? self.boxsize = data.metadata.boxsize[0] for particle_type in ["dark_matter", "gas", "stars"]: swift_dataset = getattr(data, particle_type, None) if swift_dataset is not None: setattr(self, particle_type, SWIFTParticleData(swift_dataset)) else: LOGGER.info(f"No particles of type {particle_type} in {self.filename}") return
def __init__(self, filename: str, particle_type: int, truncate_ids: Optional[int] = None): """ Parameters ---------- filename: str The SIMBA snapshot filename to extract the particle data from. particle_type: int The particle type to load (0, 1, 4, etc.) truncate_ids: int, optional Truncate IDs above this by using the % operator; i.e. discard higher bits. """ super().__init__() self.filename = filename self.particle_type = particle_type self.truncate_ids = truncate_ids LOGGER.info(f"Loading particle data from particle type {particle_type}") self.coordinates = self.load_coordinates() self.masses = self.load_masses() self.particle_ids = self.load_particle_ids() LOGGER.info(f"Finished loading data from particle type {particle_type}") LOGGER.info(f"Loaded {self.particle_ids.size} particles") self.perform_particle_id_postprocessing() return
def perform_particle_id_postprocessing(self): """ Performs postprocessing on ParticleIDs to ensure that they link correctly (required in cases where the particle IDs are offset when new generations of particles are spawned). """ if self.truncate_ids is None: LOGGER.info("Beginning particle ID postprocessing (empty).") else: LOGGER.info("Beginning particle ID postprocessing.") LOGGER.info(f"Truncating particle IDs above {self.truncate_ids}") self.particle_ids %= int64(self.truncate_ids) # TODO: Remove this requiremnet. At the moment, isin() breaks when # you have repeated values. self.particle_ids, indicies = unique(self.particle_ids, return_index=True) self.particle_ids = self.particle_ids.astype(int64) self.coordinates = self.coordinates[indicies] self.masses = self.masses[indicies] return
def __init__(self, filename: str, particle_type: int, num_files: int): """ Parameters ---------- filename: str The EAGLE snapshot filename to extract the particle data from, without the ``.x.hdf5``. particle_type: int The particle type to load (0, 1, 4, etc.) num_files: int The number of files to read from (as EAGLE snapshots are often split into multiple files). """ super().__init__() self.filename = filename self.particle_type = particle_type self.num_files = num_files LOGGER.info( f"Loading particle data from particle type {particle_type}") self.coordinates = self.load_coordinates() self.masses = self.load_masses() self.particle_ids = self.load_particle_ids() LOGGER.info( f"Finished loading data from particle type {particle_type}") LOGGER.info(f"Loaded {self.particle_ids.size} particles") self.perform_particle_id_postprocessing() return
def load_halo_data(self): """ Loads haloes from AHF and, using the center of mass of the halo and R_vir, uses trees through :meth:`ParticleDataset.associate_haloes` to set the halo values. Loads only central haloes (with hostHalo = -1) """ if self.halo_filename is None: return LOGGER.info(f"Loading halo catalogue data from {self.halo_filename}") raw_data = genfromtxt(self.halo_filename, usecols=[1, 5, 6, 7, 11]).T hostHalo = raw_data[0].astype(int) mask = hostHalo == -1 xmbp = raw_data[1][mask] ymbp = raw_data[2][mask] zmbp = raw_data[3][mask] center_of_potential = array([xmbp, ymbp, zmbp]).T r_vir = raw_data[4][mask] units = unyt_quantity(1.0 / self.hubble_param, units=unit_length).to("Mpc") halo_coordinates = unyt_array(center_of_potential, units=units) halo_radii = unyt_array(r_vir, units=units) self.number_of_groups = halo_radii.size LOGGER.info("Finished loading halo catalogue data") for particle_type in ["dark_matter", "gas", "stars"]: particle_data = getattr(self, particle_type, None) if particle_data is not None: LOGGER.info(f"Associating haloes for {particle_type}") particle_data.associate_haloes( halo_coordinates=halo_coordinates, halo_radii=halo_radii, boxsize=self.boxsize, ) self.halo_coordinates = halo_coordinates self.halo_radii = halo_radii return
def load_halo_data(self): """ Loads haloes from VELOCIraptor and, using the most bound particle center and R_200, uses trees through :meth:`SWIFTParticleDataset.associate_haloes` to set the halo values. """ if self.halo_filename is None: return LOGGER.info(f"Loading halo catalogue data from {self.halo_filename}") catalogue = load_catalogue(self.halo_filename) # Select only centrals centrals = catalogue.structure_type.structuretype == 10 self.number_of_groups = centrals.sum() halo_coordinates = ( unyt_array( [ getattr(catalogue.positions, f"{x}cmbp")[centrals] for x in ["x", "y", "z"] ] ).T / catalogue.units.a ) halo_radii = catalogue.radii.r_200mean[centrals] / catalogue.units.a LOGGER.info("Finished loading halo catalogue data") for particle_type in ["dark_matter", "gas", "stars"]: particle_data = getattr(self, particle_type, None) if particle_data is not None: LOGGER.info(f"Associating haloes for {particle_type}") particle_data.associate_haloes( halo_coordinates=halo_coordinates, halo_radii=halo_radii, boxsize=self.boxsize, ) self.halo_coordinates = halo_coordinates self.halo_radii = halo_radii return
def __init__(self, SWIFTDataset): """ Parameters ---------- SWIFTDataset The SWIFT dataset (e.g. ``x.dark_matter``) to extract the particle data from. """ super().__init__() LOGGER.info(f"Loading particle data from {SWIFTDataset}") self.coordinates = SWIFTDataset.coordinates self.masses = SWIFTDataset.masses try: self.particle_ids = SWIFTDataset.progenitor_particle_ids except AttributeError: self.particle_ids = SWIFTDataset.particle_ids LOGGER.info(f"Finished loading data from {SWIFTDataset}") LOGGER.info(f"Loaded {self.particle_ids.size} particles") self.perform_particle_id_postprocessing() return
def find_closest_neighbours( dark_matter_coordinates: unyt_array, dark_matter_ids: unyt_array, boxsize: unyt_array, gas_coordinates: Optional[unyt_array] = None, gas_ids: Optional[unyt_array] = None, ) -> Tuple[Dict[int, int]]: """ Finds the closest neighbours in the initial conditions, and returns a hashtable between their IDs. Parameters ---------- dark_matter_coordinates: unyt_array Dark matter co-ordinates. These will have a tree built from them and must be ordered in the same way as ``dark_matter_ids``. dark_matter_ids: unyt_array Unique IDs for dark matter particles. boxsize: unyt_array Box-size for the simulation volume so that periodic tree searches can be used to find neighbours over boundaries. gas_coordinates: unyt_array, optional Gas co-ordinates, must be ordered in the same way as ``gas_ids``. gas_ids: unyt_array, optional Unique IDs for gas particles. Returns ------- dark_matter_neighbours: numba.typed.Dict Dictionary of dark matter neighbours. Takes the particle and links to its neighbour so dark_matter_neighbours[id] gives the ID of the particle that was its nearest neighbour. gas_neighbours: numba.typed.Dict Dictionary of gas neighbours. Takes the praticle and links it to its nearest dark matter neighbour, so gas_neighbours[id] gives the ID of the particle that was its nearest neighbour. Notes ----- The returned hashtables are slower than their pythonic cousins in regular use. However, in ``@jit``ified functions, they are significantly faster. """ boxsize = boxsize.to(dark_matter_coordinates.units) assert dark_matter_coordinates.units == gas_coordinates.units LOGGER.info("Building dark matter tree for spread metric") tree = cKDTree(dark_matter_coordinates.value, boxsize=boxsize.value) LOGGER.info("Finished tree build") # For dark matter, the cloest particle will be ourself. _, closest_indicies = tree.query(x=dark_matter_coordinates.value, k=2, n_jobs=-1) dark_matter_neighbours = create_numba_hashtable( dark_matter_ids.value, dark_matter_ids.value[closest_indicies[:, 1]] ) # For gas, we can just use closest neighbour if gas_coordinates is not None: _, closest_indicies = tree.query(x=gas_coordinates.value, k=1, n_jobs=-1) gas_neighbours = create_numba_hashtable( gas_ids.value, dark_matter_ids.value[closest_indicies] ) else: gas_neighbours = None return dark_matter_neighbours, gas_neighbours
def associate_haloes(self, halo_coordinates: unyt_array, halo_radii: unyt_array, boxsize: unyt_array): """ Associates the haloes with this dataset. Performs this task by building a tree of the particle co-ordinates, and then searching it. Parameters ---------- halo_coordinates: unyt.unyt_array[float64] Co-ordinates of the haloes. Should be NX3. halo_radii: unyt.unyt_array[float64] Halo radii. Should be NX1. boxsize: unyt.unyt_array[float64] Boxsize of the simulation to enable periodic overlaps. Notes ----- This function sets the self.haloes property. """ boxsize = boxsize.to(self.coordinates.units) LOGGER.info("Beginning treebuild") tree = cKDTree(self.coordinates.value, boxsize=boxsize.value) LOGGER.info("Finished treebuild") haloes = full(self.masses.size, -1, dtype=int32) halo_coordinates.convert_to_units(self.coordinates.units) halo_radii.convert_to_units(self.coordinates.units) # Search the tree in blocks of haloes as this improves load balancing # by allowing the tree to parallelise. block_size = 1024 number_of_haloes = halo_radii.size number_of_blocks = 1 + number_of_haloes // block_size LOGGER.info("Beginning tree search") for block in range(number_of_blocks): LOGGER.debug( f"Running tree search on block {block}/{number_of_blocks}") starting_index = block * block_size ending_index = (block + 1) * (block_size) if ending_index > number_of_haloes: ending_index = number_of_haloes + 1 if starting_index >= ending_index: break particle_indicies = tree.query_ball_point( x=halo_coordinates[starting_index:ending_index].value, r=halo_radii[starting_index:ending_index].value, n_jobs=-1, ) for halo, indicies in enumerate(particle_indicies): haloes[indicies] = int32(halo + starting_index) self.haloes = haloes LOGGER.debug( f"Maximal halo ID = {haloes.max()}. Number of haloes: {number_of_haloes}" ) LOGGER.info("Finished tree search") return
def sort_by_particle_id(self): """ Sorts the internal data by the particle IDs. """ LOGGER.info("Beginning sort for particle IDs") mask = self.particle_ids.argsort() LOGGER.info("Finished sort on particle IDs") LOGGER.info("Beginning masking of sorted arrays") self.coordinates = self.coordinates[mask] self.masses = self.masses[mask] self.particle_ids = self.particle_ids[mask] try: self.haloes = self.haloes[mask] except (NameError, TypeError, AttributeError): # Haloes does not exist. LOGGER.info("No haloes property found on this instance") pass try: self.lagrangian_regions = self.lagrangian_regions[mask] except (NameError, TypeError, AttributeError): # Haloes does not exist. LOGGER.info( "No lagrangian_regions property found on this instance") pass LOGGER.info("Finished masking of sorted arrays") return
def __init__(self, filename: str, halo_filename: Optional[str]): self.filename = filename self.halo_filename = halo_filename LOGGER.info(f"Beginning particle load operation for {self.filename}") self.load_particle_data() LOGGER.info("Finished particle loading") LOGGER.info(f"Beginning halo loading for {self.halo_filename}") self.load_halo_data() LOGGER.info("Finished halo loading") LOGGER.info(f"Beginning master sort for {self.filename}") self.sort_all_data() LOGGER.info(f"Finished master sort") return
def find_neighbour_distances(self): """ Finds the distances to the initial nearest neighbours (i.e. this function computes the spread metric for all particles associated with this object). If relevant, sets: ``self.dark_matter_spread``, ``self.gas_spread``, ``self.star_spread``. """ if self.dark_matter_neighbours is None: self.find_neighbours() if self.dark_matter_final_coordinates is not None: boxsize = self.boxsize.to(self.dark_matter_final_coordinates.units) LOGGER.info("Computing dark matter spread metric") self.dark_matter_spread = find_neighbour_distances( neighbours=self.dark_matter_neighbours, particle_coordinates=self.dark_matter_final_coordinates.value, dark_matter_coordinates=self.dark_matter_final_coordinates.value, particle_ids=self.dark_matter_final_ids.value, dark_matter_ids=self.dark_matter_final_ids.value, boxsize=boxsize.value, ) self.dark_matter_spread = unyt_array( self.dark_matter_spread, units=self.dark_matter_final_coordinates.units ) LOGGER.info("Finished computing dark matter spread metric") else: self.dark_matter_spread = None if self.gas_final_coordinates is not None: boxsize = self.boxsize.to(self.gas_final_coordinates.units) LOGGER.info("Computing gas spread metric") self.gas_spread = find_neighbour_distances( neighbours=self.gas_neighbours, particle_coordinates=self.gas_final_coordinates.value, dark_matter_coordinates=self.dark_matter_final_coordinates.value, particle_ids=self.gas_final_ids.value, dark_matter_ids=self.dark_matter_final_ids.value, boxsize=boxsize.value, ) self.gas_spread = unyt_array( self.gas_spread, units=self.gas_final_coordinates.units ) LOGGER.info("Finished computing gas spread metric") else: self.gas_spread = None if self.star_final_coordinates is not None: boxsize = self.boxsize.to(self.star_final_coordinates.units) LOGGER.info("Computing star spread metric") self.star_spread = find_neighbour_distances( neighbours=self.gas_neighbours, particle_coordinates=self.star_final_coordinates.value, dark_matter_coordinates=self.dark_matter_final_coordinates.value, particle_ids=self.star_final_ids.value, dark_matter_ids=self.dark_matter_final_ids.value, boxsize=boxsize.value, ) self.star_spread = unyt_array( self.star_spread, units=self.star_final_coordinates.units ) LOGGER.info("Finished computing star spread metric") else: self.star_spread = None return
def load_halo_data(self): """ Loads haloes from SubFind and, using the center of potential and R_200, uses trees through :meth:`ParticleDataset.associate_haloes` to set the halo values. """ if self.halo_filename is None: return LOGGER.info(f"Loading halo catalogue data from {self.halo_filename}") # We want to extract the center of potential and r_200mean for central # galaxies only, based on their FoF group. However, based on this FOF # group we do not know which haloes are centrals. For this we need to # use the following: # # Subhalo/Subgroupnumber == 0 gives centrals # Subhalo/Groupnumber indexes the FOF catalogue # FOF/Group_R_Mean200 gives the R_200mean # FOF/GroupCentreOfPotential gives the center of potential # First load the length units out of the catalogue, and h-correct with h5py.File(f"{self.halo_filename}.0.hdf5", "r") as handle: hubble_param = handle["Header"].attrs["HubbleParam"] cgs = handle["Units"].attrs["UnitLength_in_cm"] / hubble_param units = unyt_quantity(cgs, units="cm").to("Mpc") # FoF group info; this will be sliced center_of_potential = [] r_200mean = [] central_group_numbers = [] for file in range(self.num_files_halo): current_filename = f"{self.halo_filename}.{file}.hdf5" with h5py.File(current_filename, "r") as handle: center_of_potential.append( handle["/FOF/GroupCentreOfPotential"][...]) r_200mean.append(handle["/FOF/Group_R_Mean200"][...]) sub_group_number = handle["/Subhalo/SubGroupNumber"][...] group_number = handle["/Subhalo/GroupNumber"][...] central_group_numbers.append( group_number[sub_group_number == 0]) # We currently have a list of arrays, need to stick together # Numbering stars at 1 for some reason... central_group_numbers = concatenate(central_group_numbers) - 1 # This slicing removes all non-central FoF groups. center_of_potential = concatenate( center_of_potential)[central_group_numbers] r_200mean = concatenate(r_200mean)[central_group_numbers] halo_coordinates = unyt_array(center_of_potential, units=units) halo_radii = unyt_array(r_200mean, units=units) self.number_of_groups = halo_radii.size LOGGER.info("Finished loading halo catalogue data") for particle_type in ["dark_matter", "gas", "stars"]: particle_data = getattr(self, particle_type, None) if particle_data is not None: LOGGER.info(f"Associating haloes for {particle_type}") particle_data.associate_haloes( halo_coordinates=halo_coordinates, halo_radii=halo_radii, boxsize=self.boxsize, ) self.halo_coordinates = halo_coordinates self.halo_radii = halo_radii return