Example #1
0
    def _set_units(self):
        """
        Set "cm" units for explicitly comoving.
        Note, we are using comoving units all the time since
        we are dealing with data at multiple redshifts.
        """
        for my_unit in ["m", "pc", "AU"]:
            new_unit = f"{my_unit}cm"
            self.unit_registry.add(
                new_unit, self.unit_registry.lut[my_unit][0],
                length, self.unit_registry.lut[my_unit][3])

        setup = True
        for attr in ["hubble_constant",
                     "omega_matter",
                     "omega_lambda"]:
            if getattr(self, attr) is None:
                setup = False
                ytreeLogger.warning(
                    f"{attr} missing from data. "
                    "Arbor will have no cosmology calculator.")

        if setup:
            self.cosmology = Cosmology(
                hubble_constant=self.hubble_constant,
                omega_matter=self.omega_matter,
                omega_lambda=self.omega_lambda,
                omega_radiation=self.omega_radiation,
                unit_registry=self.unit_registry)
Example #2
0
def save_arbor(arbor, filename=None, fields=None, trees=None,
               max_file_size=524288):
    """
    Save the arbor to a file.

    This is the internal function called by Arbor.save_arbor.
    """

    if isinstance(trees, types.GeneratorType):
        trees = list(trees)

    arbor._plant_trees()
    update, filename = determine_save_state(
        arbor, filename, fields, trees)
    filename = determine_output_filename(filename, ".h5")
    fields = determine_field_list(arbor, fields, update)

    if not fields:
        mylog.warning(
            "No action will be taken for the following reasons:\n"
            " - This dataset is already a YTreeArbor.\n"
            " - No filename has been given.\n"
            " - No new analysis fields have been created.\n"
            " - No custom list of trees has been provided.")
        return None

    group_nnodes, group_ntrees, root_field_data = \
      save_data_files(arbor, filename, fields, trees,
                      max_file_size, update)

    header_filename = save_header_file(
        arbor, filename, fields, root_field_data,
        group_nnodes, group_ntrees)

    return header_filename
Example #3
0
    def add_alias_field(self, alias, field, units=None, force_add=True):
        """
        Add an alias field.
        """

        if alias in self:
            if force_add:
                ftype = self[alias].get("type", "on-disk")
                if ftype in ["alias", "derived"]:
                    fl = self.arbor.derived_field_list
                else:
                    fl = self.arbor.field_list
                mylog.warning(f"Overriding field \"{alias}\" that already "
                              f"exists as {ftype} field.")
                fl.pop(fl.index(alias))
            else:
                return

        if field not in self:
            if force_add:
                raise ArborFieldDependencyNotFound(field, alias, arbor=self)
            else:
                return

        if units is None:
            units = self[field].get("units")
        self.arbor.derived_field_list.append(alias)
        self[alias] = \
          {"type": "alias", "units": units,
           "dependencies": [field]}
        if "aliases" not in self[field]:
            self[field]["aliases"] = []
            self[field]["aliases"].append(alias)
Example #4
0
    def __init__(self, filename):
        if not os.path.exists(filename):
            mylog.warning(
                f"Cannot find data file: {filename}. "
                 "Will not be able to load field data.")

        self.filename = filename
        self.fh = None
Example #5
0
    def _parse_parameter_file(self):
        """
        Parse the file header, get things like:
        - cosmological parameters
        - box size
        - list of fields
        """

        for u in ['mass', 'vel', 'len']:
            setattr(self, '_lht_units_' + u,
                    getattr(self._lht0, 'units_' + u))
            # v, s = getattr(self._lht0, 'units_' + u).split()
            # setattr(self, '_lht_units_' + u, self.quan(float(v), s))

        self.hubble_constant = self._lht0.hubble_constant
        self.omega_matter = self._lht0.omega_matter
        self.omega_lambda = self._lht0.omega_lambda
        self.box_size = self.quan(self._lht0.box_size, self._lht_units_len)
        # self.box_size = self._lht0.box_size * self._lht_units_len

        # a list of all fields on disk
        fields = self._lht0.fields
        # a dictionary of information for each field
        # this can have specialized information for reading the field
        fi = {}
        # for example:
        # fi["mass"] = {"column": 4, "units": "Msun/h", ...}
        none_keys = ['Descendant', 'FirstProgenitor', 'NextProgenitor',
                     'FirstHaloInFOFgroup', 'NextHaloInFOFgroup',
                     'Len', 'MostBoundID',
                     'SnapNum', 'FileNr', 'SubhaloIndex',
                     'uid', 'desc_uid', 'scale_factor',
                     'Jx', 'Jy', 'Jz']
        mass_keys = ['M_Mean200', 'Mvir', 'M_TopHat', 'SubHalfMass']
        dist_keys = ['x', 'y', 'z']
        velo_keys = ['VelDisp', 'Vmax', 'vx', 'vy', 'vz']
        all_keys = [none_keys, mass_keys, dist_keys, velo_keys]
        all_units = ['', self._lht_units_mass, self._lht_units_len,
                     self._lht_units_vel]
        for keylist, unit in zip(all_keys, all_units):
            try:
                self.quan(1, unit)
                punit = unit
            except UnitParseError:  # pragma: no cover
                ytreeLogger.warning(f"Could not parse unit: {unit}")
                punit = ''
            for k in keylist:
                fi[k] = {'units': punit}
            
        self.field_list = fields
        self.field_info.update(fi)
Example #6
0
    def set_global_properties(self, validate=False):
        r"""Set attributes for all trees by loading all of the halos.

        Args:
            validate (bool, optional): If True, the data loaded from the
                file will be validated. Defaults to False.

        .. todo:: For small files, the data could be cached which would
                  greatly speed up loading.

        """
        # Tree num array
        self.treenum_arr = np.zeros(self.totnhalos, dtype='int64')
        start = self.nhalos_before_tree
        stop = start + self.nhalos_per_tree
        for t in range(self.ntrees):
            self.treenum_arr[start[t]:stop[t]] = t
        # Memmap/file object
        self.fobj = np.memmap(self.filename,
                              dtype=self.item_dtype,
                              mode='c',
                              offset=self.header_size)
        # Read all data
        data = self.read_all_trees(skip_add_fields=True, validate=validate)
        # File number
        self.filenum = data['FileNr'][0]
        if (data['SnapNum'][0] + 1) != len(
                self.scale_factors):  # pragma: no cover
            ytreeLogger.warning(
                f"First FoF central is in snapshot {data['SnapNum'][0] + 1}/{len(self.scale_factors)}."
            )
        # Halo unique IDs
        self.all_uids = np.bitwise_or(
            np.int64(self.filenum) << 32,
            np.arange(self.totnhalos, dtype='int64'))
        # Get descendant unique IDs
        desc = data['Descendant']
        pos_flag = (desc >= 0)
        desc_uid = np.zeros(self.totnhalos, dtype='int64') - 1
        desc_abs = self.get_total_index(self.treenum_arr, desc)
        desc_uid[pos_flag] = self.all_uids[desc_abs[pos_flag]]
        self.all_desc_uids = desc_uid
        # Add fields and cache root fields
        data = self.add_computed_fields(-1, data, validate=validate)
        root_idx = self.nhalos_before_tree
        self._root_data = dict()
        for k in self.fields:
            self._root_data[k] = data[k][root_idx]
Example #7
0
    def _parse_parameter_file(self):
        with h5py.File(self.parameter_filename, mode="r") as f:
            self._nfiles = f["Header"].attrs["NFiles"]
            self._nsnaps = f["Header"].attrs["NSnaps"]
            self._size = f["ForestInfo"].attrs["NForests"]
            self._file_count = f["ForestInfo"]["NForestsPerFile"][()]

        if self._nfiles < 1:
            mylog.warning(f"Dataset {self.parameter_filename} has no data files.")
            return

        fn = f"{self._prefix}{self._suffix}.0"
        if not os.path.exists(fn):
            raise RuntimeError(f"Data file not found: {fn}.")

        with h5py.File(fn, mode="r") as f:
            self.hubble_constant = f["Header/Simulation"].attrs["h_val"]
            self.omega_matter = f["Header/Simulation"].attrs["Omega_m"]
            self.omega_lambda = f["Header/Simulation"].attrs["Omega_Lambda"]
            self.box_size = self.quan(f["Header/Simulation"].attrs["Period"], "Mpc/h")
            if self._nsnaps < 1:
                mylog.warning(f"Dataset {self.parameter_filename} has no snapshots.")
                return

            self.units = {}
            for attr in ["Length_unit_to_kpc",
                         "Mass_unit_to_solarmass",
                         "Velocity_unit_to_kms"]:
                self.units[attr] = f["Header/Units"].attrs[attr]

            field_list = list(f["Snap_000"].keys())
            self._scale_factors = \
              np.array([f[f"Snap_{i:03d}"].attrs["scalefactor"]
                        for i in range(self._nsnaps)])

        self.field_list = field_list
        self.field_info.update({field: {} for field in field_list})

        self.field_list.append("scale_factor")
        self.field_info["scale_factor"] = {"source": "arbor"}
Example #8
0
    def _parse_parameter_file(self):
        f = h5py.File(self.parameter_filename, mode='r')

        # Is the file a collection of virtual data sets
        # pointing to multiple data files?
        virtual = self._virtual_dataset
        if virtual:
            fgroup = f.get('File0')
            if fgroup is None:
                raise ArborDataFileEmpty(self.filename)
        else:
            fgroup = f

        if 'halos' in fgroup['Forests']:
            # array of structs layout
            mylog.warning(
                "This dataset was written in array of structs format. "
                "Field access will be significantly slower than struct "
                "of arrays format.")
            self._aos = True
            ftypes = fgroup['Forests/halos'].dtype
            my_fi = dict((ftypes.names[i], {
                'dtype': ftypes[i]
            }) for i in range(len(ftypes)))
        else:
            # struct of arrays layout
            self._aos = False
            my_fi = dict((field, {
                'dtype': data.dtype
            }) for field, data in fgroup['Forests'].items())

        if virtual:
            aname = _access_names[self.access]['total']
            self._size = f.attrs[aname]
        header = fgroup.attrs['Consistent Trees_metadata'].astype(str)
        header = header.tolist()
        f.close()

        header_fi = parse_ctrees_header(self, header, ntrees_in_file=False)
        # Do some string manipulation to match the header with
        # altered names in the hdf5 file.
        new_fi = {}
        for field in header_fi:
            new_field = field
            # remove ?| characters
            new_field = re.sub(r'[?|]', '', new_field)
            # replace []/() characters with _
            new_field = re.sub(r'[\[\]\/\(\)]', '_', new_field)
            # remove leading/trailing underscores
            new_field = new_field.strip('_')
            # replace double underscore with single underscore
            new_field = new_field.replace('__', '_')

            new_fi[new_field] = header_fi[field].copy()
            if 'column' in new_fi[new_field]:
                del new_fi[new_field]['column']

        for field in my_fi:
            my_fi[field].update(new_fi.get(field, {}))

        self.field_list = list(my_fi.keys())
        self.field_info.update(my_fi)
Example #9
0
    def add_derived_field(self,
                          name,
                          function,
                          units=None,
                          dtype=None,
                          description=None,
                          vector_field=False,
                          force_add=True):
        """
        Add a derived field.
        """

        if name in self:
            if force_add:
                ftype = self[name].get("type", "on-disk")
                if ftype in ["alias", "derived"]:
                    fl = self.arbor.derived_field_list
                else:
                    fl = self.arbor.field_list
                mylog.warning(f"Overriding field \"{name}\" that already "
                              f"exists as {ftype} field.")
                fl.pop(fl.index(name))
            else:
                return

        if units is None:
            units = ""
        if dtype is None:
            dtype = self.arbor._default_dtype
        info = {
            "name": name,
            "type": "derived",
            "function": function,
            "units": units,
            "dtype": dtype,
            "vector_field": vector_field,
            "description": description
        }

        fc = FieldDetector(self.arbor, name=name)
        try:
            rv = function(info, fc)
        except TypeError as e:
            raise RuntimeError("""

Field function syntax in ytree has changed. Field functions must
now take two arguments, as in the following:
def my_field(field, data):
    return data['mass']

Check the TypeError exception above for more details.
""")
            raise e

        except ArborFieldDependencyNotFound as e:
            if force_add:
                raise e
            else:
                return

        rv.convert_to_units(units)
        info["dependencies"] = list(fc.keys())

        self.arbor.derived_field_list.append(name)
        self[name] = info