Beispiel #1
0
    def load_particle_data(self):
        """
        Loads the particle data from a snapshot using ``h5py``.
        """

        for particle_type, particle_name in zip(
            [1, 0, 4], ["dark_matter", "gas", "stars"]):
            try:
                setattr(
                    self,
                    particle_name,
                    EAGLEParticleData(
                        filename=self.filename,
                        particle_type=particle_type,
                        num_files=self.num_files_particles,
                    ),
                )
            except KeyError:
                # No particles of this type (e.g. stars in ICs)
                LOGGER.info(
                    (f"No particles of type {particle_type} ({particle_name}) "
                     "in this file. Skipping."))

                setattr(self, particle_name, None)

        return
Beispiel #2
0
    def load_data(self, array_name: str):
        """
        Loads an array and returns it.

        Parameters
        ----------

        array_name: str
            Name of the array (without particle type) to read, e.g. Coordinates


        Returns
        -------

        output: np.array
            Output read from the HDF5 file

        h: float
            Hubble parameter.

        """

        full_path = f"/PartType{self.particle_type}/{array_name}"

        LOGGER.info(f"Loading data from {full_path}.")

        with h5py.File(f"{self.filename}", "r") as handle:
            h = handle["Header"].attrs["HubbleParam"]
            output = handle[full_path][:]

        return output, h
Beispiel #3
0
    def load_particle_data(self):
        """
        Loads the particle data from a snapshot using ``h5py``.
        """

        for particle_type, particle_name in zip(
            [1, 0, 4], ["dark_matter", "gas", "stars"]
        ):
            truncate_ids = self.truncate_ids[particle_type] if self.truncate_ids is not None else None

            try:
                setattr(
                    self,
                    particle_name,
                    SIMBAParticleData(
                        filename=self.filename, particle_type=particle_type, truncate_ids=truncate_ids
                    ),
                )
            except KeyError:
                # No particles of this type (e.g. stars in ICs)
                LOGGER.info(
                    (
                        f"No particles of type {particle_type} ({particle_name}) "
                        "in this file. Skipping."
                    )
                )

                setattr(self, particle_name, None)

        return
Beispiel #4
0
    def perform_particle_id_postprocessing(self):
        """
        Performs postprocessing on ParticleIDs to ensure that they link
        correctly (required in cases where the particle IDs are offset when
        new generations of particles are spawned).
        """

        LOGGER.info("Beginning particle ID postprocessing (empty).")

        return
Beispiel #5
0
def calculate_gas_lagrangian_regions(
    dark_matter_coordinates: unyt_array,
    gas_coordinates: unyt_array,
    dark_matter_lagrangian_regions: int32,
    boxsize: unyt_array,
) -> int32:
    """
    Computes the Lagrangian Regions for gas particles by using a tree of their
    co-ordinates and the (defined) Lagrangian Regions for dark matter.

    Parameters
    ----------

    dark_matter_coordinates: unyt_array[float64]
        The co-oridnates of dark matter particles

    gas_coordinates: unyt_array[float64]
        The co-ordinates of gas particles

    dark_matter_lagrangian_regions: np.array[int32]
        Lagrangian Regions of the dark matter particles.

    boxsize: unyt_array
        The box-size of the simulation so that periodic overlaps
        can be considered in the nearest neighbour calculation.


    Returns
    -------

    gas_lagrangian_regions: np.array[int32]
        Lagrangian Regions of the gas particles, based on the tree search
        of the dark matter particles.


    Notes
    -----

    The Lagrangian Region of a gas particle is defined as being the same
    as the Lagrangian Region of the closest dark matter particle.
    """

    # We should just crash here if this is not the case.
    assert gas_coordinates.units == dark_matter_coordinates.units

    boxsize = boxsize.to(gas_coordinates.units)

    LOGGER.info("Beginning treebuild")
    dark_matter_tree = cKDTree(dark_matter_coordinates.value, boxsize=boxsize)
    LOGGER.info("Finished treebuild")

    LOGGER.info("Beginning tree search")
    _, indicies = dark_matter_tree.query(x=gas_coordinates, k=1, n_jobs=-1)
    LOGGER.info("Finished tree walk")

    gas_lagrangian_regions = dark_matter_lagrangian_regions[indicies]

    return gas_lagrangian_regions
Beispiel #6
0
    def find_neighbours(self):
        """
        Finds the initial state neighbours. Sets self.dark_matter_neighbours
        and self.gas_neighbours.
        """

        LOGGER.info("Beginning search for initial neighbours")
        self.dark_matter_neighbours, self.gas_neighbours = find_closest_neighbours(
            dark_matter_coordinates=self.dark_matter_initial_coordinates,
            dark_matter_ids=self.dark_matter_initial_ids,
            boxsize=self.boxsize,
            gas_coordinates=self.gas_initial_coordinates,
            gas_ids=self.gas_initial_ids,
        )
        LOGGER.info("Finished search for initial neighbours")

        return
Beispiel #7
0
    def load_data(self, array_name: str):
        """
        Loads an array and returns it.

        Parameters
        ----------

        array_name: str
            Name of the array (without particle type) to read, e.g. Coordinates


        Returns
        -------

        output: np.array
            Output read from the HDF5 file

        units: float
            Conversion to CGS units for this type, read from CGSConversionFactor.
            Includes conversion to remove h-factor.

        """

        full_path = f"/PartType{self.particle_type}/{array_name}"

        LOGGER.info(f"Loading data from {full_path}.")

        output = []

        with h5py.File(f"{self.filename}.0.hdf5", "r") as handle:
            units = handle[full_path].attrs["CGSConversionFactor"]
            h_exponent = handle[full_path].attrs["h-scale-exponent"]
            h_factor = pow(handle["Header"].attrs["HubbleParam"], h_exponent)
            units *= h_factor

        for file in range(self.num_files):
            current_filename = f"{self.filename}.{file}.hdf5"

            with h5py.File(current_filename, "r") as handle:
                output.append(handle[full_path][...])

        output = concatenate(output)

        return output, units
Beispiel #8
0
    def load_particle_data(self):
        """
        Loads the particle data from a snapshot using ``swiftsimio``.
        """

        data = load_snapshot(self.filename)

        # Let's just assume it's a box, eh?
        self.boxsize = data.metadata.boxsize[0]

        for particle_type in ["dark_matter", "gas", "stars"]:
            swift_dataset = getattr(data, particle_type, None)

            if swift_dataset is not None:
                setattr(self, particle_type, SWIFTParticleData(swift_dataset))
            else:
                LOGGER.info(f"No particles of type {particle_type} in {self.filename}")

        return
Beispiel #9
0
    def __init__(self, filename: str, particle_type: int, truncate_ids: Optional[int] = None):
        """
        Parameters
        ----------

        filename: str
            The SIMBA snapshot filename to extract the particle data from.

        particle_type: int
            The particle type to load (0, 1, 4, etc.)

        truncate_ids: int, optional
            Truncate IDs above this by using the % operator; i.e. discard
            higher bits.
        """
        super().__init__()

        self.filename = filename
        self.particle_type = particle_type
        self.truncate_ids = truncate_ids

        LOGGER.info(f"Loading particle data from particle type {particle_type}")
        self.coordinates = self.load_coordinates()
        self.masses = self.load_masses()
        self.particle_ids = self.load_particle_ids()
        LOGGER.info(f"Finished loading data from particle type {particle_type}")
        LOGGER.info(f"Loaded {self.particle_ids.size} particles")

        self.perform_particle_id_postprocessing()

        return
Beispiel #10
0
    def perform_particle_id_postprocessing(self):
        """
        Performs postprocessing on ParticleIDs to ensure that they link
        correctly (required in cases where the particle IDs are offset when
        new generations of particles are spawned).
        """

        if self.truncate_ids is None:
            LOGGER.info("Beginning particle ID postprocessing (empty).")
        else:
            LOGGER.info("Beginning particle ID postprocessing.")
            LOGGER.info(f"Truncating particle IDs above {self.truncate_ids}")

            self.particle_ids %= int64(self.truncate_ids)
            
            # TODO: Remove this requiremnet. At the moment, isin() breaks when
            #       you have repeated values.

            self.particle_ids, indicies = unique(self.particle_ids, return_index=True)
            
            self.particle_ids = self.particle_ids.astype(int64)
            self.coordinates = self.coordinates[indicies]
            self.masses = self.masses[indicies]

        return
Beispiel #11
0
    def __init__(self, filename: str, particle_type: int, num_files: int):
        """
        Parameters
        ----------

        filename: str
            The EAGLE snapshot filename to extract the particle data from,
            without the ``.x.hdf5``.

        particle_type: int
            The particle type to load (0, 1, 4, etc.)

        num_files: int
            The number of files to read from (as EAGLE snapshots are often
            split into multiple files).
        """
        super().__init__()

        self.filename = filename
        self.particle_type = particle_type
        self.num_files = num_files

        LOGGER.info(
            f"Loading particle data from particle type {particle_type}")
        self.coordinates = self.load_coordinates()
        self.masses = self.load_masses()
        self.particle_ids = self.load_particle_ids()
        LOGGER.info(
            f"Finished loading data from particle type {particle_type}")
        LOGGER.info(f"Loaded {self.particle_ids.size} particles")

        self.perform_particle_id_postprocessing()

        return
Beispiel #12
0
    def load_halo_data(self):
        """
        Loads haloes from AHF and, using the center of mass of the halo
        and R_vir, uses trees through :meth:`ParticleDataset.associate_haloes`
        to set the halo values.

        Loads only central haloes (with hostHalo = -1)
        """

        if self.halo_filename is None:
            return

        LOGGER.info(f"Loading halo catalogue data from {self.halo_filename}")

        raw_data = genfromtxt(self.halo_filename, usecols=[1, 5, 6, 7, 11]).T

        hostHalo = raw_data[0].astype(int)
        mask = hostHalo == -1

        xmbp = raw_data[1][mask]
        ymbp = raw_data[2][mask]
        zmbp = raw_data[3][mask]

        center_of_potential = array([xmbp, ymbp, zmbp]).T
        r_vir = raw_data[4][mask]

        units = unyt_quantity(1.0 / self.hubble_param, units=unit_length).to("Mpc")

        halo_coordinates = unyt_array(center_of_potential, units=units)
        halo_radii = unyt_array(r_vir, units=units)

        self.number_of_groups = halo_radii.size

        LOGGER.info("Finished loading halo catalogue data")

        for particle_type in ["dark_matter", "gas", "stars"]:
            particle_data = getattr(self, particle_type, None)

            if particle_data is not None:
                LOGGER.info(f"Associating haloes for {particle_type}")
                particle_data.associate_haloes(
                    halo_coordinates=halo_coordinates,
                    halo_radii=halo_radii,
                    boxsize=self.boxsize,
                )

        self.halo_coordinates = halo_coordinates
        self.halo_radii = halo_radii

        return
Beispiel #13
0
    def load_halo_data(self):
        """
        Loads haloes from VELOCIraptor and, using the most bound particle center
        and R_200, uses trees through :meth:`SWIFTParticleDataset.associate_haloes`
        to set the halo values.
        """

        if self.halo_filename is None:
            return

        LOGGER.info(f"Loading halo catalogue data from {self.halo_filename}")

        catalogue = load_catalogue(self.halo_filename)

        # Select only centrals
        centrals = catalogue.structure_type.structuretype == 10
        self.number_of_groups = centrals.sum()

        halo_coordinates = (
            unyt_array(
                [
                    getattr(catalogue.positions, f"{x}cmbp")[centrals]
                    for x in ["x", "y", "z"]
                ]
            ).T
            / catalogue.units.a
        )

        halo_radii = catalogue.radii.r_200mean[centrals] / catalogue.units.a

        LOGGER.info("Finished loading halo catalogue data")

        for particle_type in ["dark_matter", "gas", "stars"]:
            particle_data = getattr(self, particle_type, None)

            if particle_data is not None:
                LOGGER.info(f"Associating haloes for {particle_type}")
                particle_data.associate_haloes(
                    halo_coordinates=halo_coordinates,
                    halo_radii=halo_radii,
                    boxsize=self.boxsize,
                )

        self.halo_coordinates = halo_coordinates
        self.halo_radii = halo_radii

        return
Beispiel #14
0
    def __init__(self, SWIFTDataset):
        """
        Parameters
        ----------

        SWIFTDataset
            The SWIFT dataset (e.g. ``x.dark_matter``) to extract
            the particle data from.
        """
        super().__init__()

        LOGGER.info(f"Loading particle data from {SWIFTDataset}")
        self.coordinates = SWIFTDataset.coordinates
        self.masses = SWIFTDataset.masses
        try:
            self.particle_ids = SWIFTDataset.progenitor_particle_ids
        except AttributeError:
            self.particle_ids = SWIFTDataset.particle_ids
        LOGGER.info(f"Finished loading data from {SWIFTDataset}")
        LOGGER.info(f"Loaded {self.particle_ids.size} particles")

        self.perform_particle_id_postprocessing()

        return
Beispiel #15
0
def find_closest_neighbours(
    dark_matter_coordinates: unyt_array,
    dark_matter_ids: unyt_array,
    boxsize: unyt_array,
    gas_coordinates: Optional[unyt_array] = None,
    gas_ids: Optional[unyt_array] = None,
) -> Tuple[Dict[int, int]]:
    """
    Finds the closest neighbours in the initial conditions, and returns a
    hashtable between their IDs.

    Parameters
    ----------

    dark_matter_coordinates: unyt_array
        Dark matter co-ordinates. These will have a tree built from them and
        must be ordered in the same way as ``dark_matter_ids``.

    dark_matter_ids: unyt_array
        Unique IDs for dark matter particles.

    boxsize: unyt_array
        Box-size for the simulation volume so that periodic tree searches can
        be used to find neighbours over boundaries.

    gas_coordinates: unyt_array, optional
        Gas co-ordinates, must be ordered in the same way as ``gas_ids``.

    gas_ids: unyt_array, optional
        Unique IDs for gas particles.


    Returns
    -------

    dark_matter_neighbours: numba.typed.Dict
        Dictionary of dark matter neighbours. Takes the particle and links
        to its neighbour so dark_matter_neighbours[id] gives the ID of the
        particle that was its nearest neighbour.

    gas_neighbours: numba.typed.Dict
        Dictionary of gas neighbours. Takes the praticle and links it to its
        nearest dark matter neighbour, so gas_neighbours[id] gives the ID of
        the particle that was its nearest neighbour.


    Notes
    -----

    The returned hashtables are slower than their pythonic cousins
    in regular use. However, in ``@jit``ified functions, they are
    significantly faster.
    """

    boxsize = boxsize.to(dark_matter_coordinates.units)

    assert dark_matter_coordinates.units == gas_coordinates.units

    LOGGER.info("Building dark matter tree for spread metric")
    tree = cKDTree(dark_matter_coordinates.value, boxsize=boxsize.value)
    LOGGER.info("Finished tree build")

    # For dark matter, the cloest particle will be ourself.
    _, closest_indicies = tree.query(x=dark_matter_coordinates.value, k=2, n_jobs=-1)

    dark_matter_neighbours = create_numba_hashtable(
        dark_matter_ids.value, dark_matter_ids.value[closest_indicies[:, 1]]
    )

    # For gas, we can just use closest neighbour
    if gas_coordinates is not None:
        _, closest_indicies = tree.query(x=gas_coordinates.value, k=1, n_jobs=-1)

        gas_neighbours = create_numba_hashtable(
            gas_ids.value, dark_matter_ids.value[closest_indicies]
        )
    else:
        gas_neighbours = None

    return dark_matter_neighbours, gas_neighbours
Beispiel #16
0
    def associate_haloes(self, halo_coordinates: unyt_array,
                         halo_radii: unyt_array, boxsize: unyt_array):
        """
        Associates the haloes with this dataset. Performs this task by building
        a tree of the particle co-ordinates, and then searching it.

        Parameters
        ----------

        halo_coordinates: unyt.unyt_array[float64]
            Co-ordinates of the haloes. Should be NX3.

        halo_radii: unyt.unyt_array[float64]
            Halo radii. Should be NX1.

        boxsize: unyt.unyt_array[float64]
            Boxsize of the simulation to enable periodic overlaps.


        Notes
        -----

        This function sets the self.haloes property.
        """

        boxsize = boxsize.to(self.coordinates.units)

        LOGGER.info("Beginning treebuild")
        tree = cKDTree(self.coordinates.value, boxsize=boxsize.value)
        LOGGER.info("Finished treebuild")

        haloes = full(self.masses.size, -1, dtype=int32)

        halo_coordinates.convert_to_units(self.coordinates.units)
        halo_radii.convert_to_units(self.coordinates.units)

        # Search the tree in blocks of haloes as this improves load balancing
        # by allowing the tree to parallelise.
        block_size = 1024
        number_of_haloes = halo_radii.size
        number_of_blocks = 1 + number_of_haloes // block_size

        LOGGER.info("Beginning tree search")

        for block in range(number_of_blocks):
            LOGGER.debug(
                f"Running tree search on block {block}/{number_of_blocks}")

            starting_index = block * block_size
            ending_index = (block + 1) * (block_size)

            if ending_index > number_of_haloes:
                ending_index = number_of_haloes + 1

            if starting_index >= ending_index:
                break

            particle_indicies = tree.query_ball_point(
                x=halo_coordinates[starting_index:ending_index].value,
                r=halo_radii[starting_index:ending_index].value,
                n_jobs=-1,
            )

            for halo, indicies in enumerate(particle_indicies):
                haloes[indicies] = int32(halo + starting_index)

        self.haloes = haloes

        LOGGER.debug(
            f"Maximal halo ID = {haloes.max()}. Number of haloes: {number_of_haloes}"
        )
        LOGGER.info("Finished tree search")

        return
Beispiel #17
0
    def sort_by_particle_id(self):
        """
        Sorts the internal data by the particle IDs.
        """

        LOGGER.info("Beginning sort for particle IDs")
        mask = self.particle_ids.argsort()
        LOGGER.info("Finished sort on particle IDs")

        LOGGER.info("Beginning masking of sorted arrays")
        self.coordinates = self.coordinates[mask]
        self.masses = self.masses[mask]
        self.particle_ids = self.particle_ids[mask]

        try:
            self.haloes = self.haloes[mask]
        except (NameError, TypeError, AttributeError):
            # Haloes does not exist.
            LOGGER.info("No haloes property found on this instance")
            pass

        try:
            self.lagrangian_regions = self.lagrangian_regions[mask]
        except (NameError, TypeError, AttributeError):
            # Haloes does not exist.
            LOGGER.info(
                "No lagrangian_regions property found on this instance")
            pass

        LOGGER.info("Finished masking of sorted arrays")

        return
Beispiel #18
0
    def __init__(self, filename: str, halo_filename: Optional[str]):
        self.filename = filename
        self.halo_filename = halo_filename

        LOGGER.info(f"Beginning particle load operation for {self.filename}")
        self.load_particle_data()
        LOGGER.info("Finished particle loading")
        LOGGER.info(f"Beginning halo loading for {self.halo_filename}")
        self.load_halo_data()
        LOGGER.info("Finished halo loading")
        LOGGER.info(f"Beginning master sort for {self.filename}")
        self.sort_all_data()
        LOGGER.info(f"Finished master sort")

        return
Beispiel #19
0
    def find_neighbour_distances(self):
        """
        Finds the distances to the initial nearest neighbours (i.e.
        this function computes the spread metric for all particles associated
        with this object). If relevant, sets: ``self.dark_matter_spread``,
        ``self.gas_spread``, ``self.star_spread``.
        """

        if self.dark_matter_neighbours is None:
            self.find_neighbours()

        if self.dark_matter_final_coordinates is not None:
            boxsize = self.boxsize.to(self.dark_matter_final_coordinates.units)

            LOGGER.info("Computing dark matter spread metric")
            self.dark_matter_spread = find_neighbour_distances(
                neighbours=self.dark_matter_neighbours,
                particle_coordinates=self.dark_matter_final_coordinates.value,
                dark_matter_coordinates=self.dark_matter_final_coordinates.value,
                particle_ids=self.dark_matter_final_ids.value,
                dark_matter_ids=self.dark_matter_final_ids.value,
                boxsize=boxsize.value,
            )
            self.dark_matter_spread = unyt_array(
                self.dark_matter_spread, units=self.dark_matter_final_coordinates.units
            )
            LOGGER.info("Finished computing dark matter spread metric")
        else:
            self.dark_matter_spread = None

        if self.gas_final_coordinates is not None:
            boxsize = self.boxsize.to(self.gas_final_coordinates.units)

            LOGGER.info("Computing gas spread metric")
            self.gas_spread = find_neighbour_distances(
                neighbours=self.gas_neighbours,
                particle_coordinates=self.gas_final_coordinates.value,
                dark_matter_coordinates=self.dark_matter_final_coordinates.value,
                particle_ids=self.gas_final_ids.value,
                dark_matter_ids=self.dark_matter_final_ids.value,
                boxsize=boxsize.value,
            )
            self.gas_spread = unyt_array(
                self.gas_spread, units=self.gas_final_coordinates.units
            )
            LOGGER.info("Finished computing gas spread metric")
        else:
            self.gas_spread = None

        if self.star_final_coordinates is not None:
            boxsize = self.boxsize.to(self.star_final_coordinates.units)

            LOGGER.info("Computing star spread metric")
            self.star_spread = find_neighbour_distances(
                neighbours=self.gas_neighbours,
                particle_coordinates=self.star_final_coordinates.value,
                dark_matter_coordinates=self.dark_matter_final_coordinates.value,
                particle_ids=self.star_final_ids.value,
                dark_matter_ids=self.dark_matter_final_ids.value,
                boxsize=boxsize.value,
            )
            self.star_spread = unyt_array(
                self.star_spread, units=self.star_final_coordinates.units
            )
            LOGGER.info("Finished computing star spread metric")
        else:
            self.star_spread = None

        return
Beispiel #20
0
    def load_halo_data(self):
        """
        Loads haloes from SubFind and, using the center of potential
        and R_200, uses trees through :meth:`ParticleDataset.associate_haloes`
        to set the halo values.
        """

        if self.halo_filename is None:
            return

        LOGGER.info(f"Loading halo catalogue data from {self.halo_filename}")

        # We want to extract the center of potential and r_200mean for central
        # galaxies only, based on their FoF group. However, based on this FOF
        # group we do not know which haloes are centrals. For this we need to
        # use the following:
        #
        #   Subhalo/Subgroupnumber == 0 gives centrals
        #   Subhalo/Groupnumber indexes the FOF catalogue
        #   FOF/Group_R_Mean200 gives the R_200mean
        #   FOF/GroupCentreOfPotential gives the center of potential

        # First load the length units out of the catalogue, and h-correct
        with h5py.File(f"{self.halo_filename}.0.hdf5", "r") as handle:
            hubble_param = handle["Header"].attrs["HubbleParam"]
            cgs = handle["Units"].attrs["UnitLength_in_cm"] / hubble_param

        units = unyt_quantity(cgs, units="cm").to("Mpc")

        # FoF group info; this will be sliced
        center_of_potential = []
        r_200mean = []
        central_group_numbers = []

        for file in range(self.num_files_halo):
            current_filename = f"{self.halo_filename}.{file}.hdf5"

            with h5py.File(current_filename, "r") as handle:
                center_of_potential.append(
                    handle["/FOF/GroupCentreOfPotential"][...])
                r_200mean.append(handle["/FOF/Group_R_Mean200"][...])

                sub_group_number = handle["/Subhalo/SubGroupNumber"][...]
                group_number = handle["/Subhalo/GroupNumber"][...]

                central_group_numbers.append(
                    group_number[sub_group_number == 0])

        # We currently have a list of arrays, need to stick together
        # Numbering stars at 1 for some reason...
        central_group_numbers = concatenate(central_group_numbers) - 1

        # This slicing removes all non-central FoF groups.
        center_of_potential = concatenate(
            center_of_potential)[central_group_numbers]
        r_200mean = concatenate(r_200mean)[central_group_numbers]

        halo_coordinates = unyt_array(center_of_potential, units=units)
        halo_radii = unyt_array(r_200mean, units=units)

        self.number_of_groups = halo_radii.size

        LOGGER.info("Finished loading halo catalogue data")

        for particle_type in ["dark_matter", "gas", "stars"]:
            particle_data = getattr(self, particle_type, None)

            if particle_data is not None:
                LOGGER.info(f"Associating haloes for {particle_type}")
                particle_data.associate_haloes(
                    halo_coordinates=halo_coordinates,
                    halo_radii=halo_radii,
                    boxsize=self.boxsize,
                )

        self.halo_coordinates = halo_coordinates
        self.halo_radii = halo_radii

        return