コード例 #1
0
 def _initialize_particle_handler(self):
     self._setup_data_io()
     template = self.dataset.filename_template
     ndoms = self.dataset.file_count
     cls = self.dataset._file_class
     self.data_files = [cls(self.dataset, self.io, template % {'num':i}, i)
                        for i in range(ndoms)]
     self.total_particles = sum(
             sum(d.total_particles.values()) for d in self.data_files)
     ds = self.dataset
     self.oct_handler = ParticleOctreeContainer(
         [1, 1, 1], ds.domain_left_edge, ds.domain_right_edge,
         over_refine = ds.over_refine_factor)
     self.oct_handler.n_ref = ds.n_ref
     mylog.info("Allocating for %0.3e particles", self.total_particles)
     # No more than 256^3 in the region finder.
     N = min(len(self.data_files), 256) 
     self.regions = ParticleRegions(
             ds.domain_left_edge, ds.domain_right_edge,
             [N, N, N], len(self.data_files))
     self._initialize_indices()
     self.oct_handler.finalize()
     self.max_level = self.oct_handler.max_level
     tot = sum(self.oct_handler.recursively_count().values())
     mylog.info("Identified %0.3e octs", tot)
コード例 #2
0
 def _detect_output_fields(self):
     self.field_list = []
     # Do this only on the root processor to save disk work.
     if self.comm.rank in (0, None):
         mylog.info("Gathering a field list (this may take a moment.)")
         field_list = set()
         random_sample = self._generate_random_grids()
         for grid in random_sample:
             if not hasattr(grid, 'filename'): continue
             try:
                 gf = self.io._read_field_names(grid)
             except self.io._read_exception:
                 raise IOError("Grid %s is a bit funky?", grid.id)
             mylog.debug("Grid %s has: %s", grid.id, gf)
             field_list = field_list.union(gf)
         if "AppendActiveParticleType" in self.dataset.parameters:
             ap_fields = self._detect_active_particle_fields()
             field_list = list(set(field_list).union(ap_fields))
         ptypes = self.dataset.particle_types
         ptypes_raw = self.dataset.particle_types_raw
     else:
         field_list = None
         ptypes = None
         ptypes_raw = None
     self.field_list = list(self.comm.mpi_bcast(field_list))
     self.dataset.particle_types = list(self.comm.mpi_bcast(ptypes))
     self.dataset.particle_types_raw = list(self.comm.mpi_bcast(ptypes_raw))
コード例 #3
0
    def _get_all_outputs(self, find_outputs=False):
        """
        Get all potential datasets and combine into a time-sorted list.
        """

        # Create the set of outputs from which further selection will be done.
        if find_outputs:
            self._find_outputs()

        elif self.parameters['dtDataDump'] > 0 and \
          self.parameters['CycleSkipDataDump'] > 0:
            mylog.info(
                "Simulation %s has both dtDataDump and CycleSkipDataDump set.",
                self.parameter_filename )
            mylog.info(
                "    Unable to calculate datasets.  " +
                "Attempting to search in the current directory")
            self._find_outputs()

        else:
            # Get all time or cycle outputs.
            if self.parameters['CycleSkipDataDump'] > 0:
                self._calculate_cycle_outputs()
            else:
                self._calculate_time_outputs()

            # Calculate times for redshift outputs.
            if self.cosmological_simulation:
                for output in self.all_redshift_outputs:
                    output["time"] = self.cosmology.t_from_z(output["redshift"])
                self.all_redshift_outputs.sort(key=lambda obj:obj["time"])

            self.all_outputs = self.all_time_outputs + self.all_redshift_outputs
            if self.parameters['CycleSkipDataDump'] <= 0:
                self.all_outputs.sort(key=lambda obj:obj['time'].to_ndarray())
コード例 #4
0
    def _parse_parameter_file(self):

        hvals = self._get_hvals()

        self.dimensionality = 3
        self.refine_by = 2
        self.parameters["HydroMethod"] = "sph"
        self.unique_identifier = \
            int(os.stat(self.parameter_filename)[stat.ST_CTIME])
        # Set standard values

        # We may have an overridden bounding box.
        if self.domain_left_edge is None:
            self.domain_left_edge = np.zeros(3, "float64")
            self.domain_right_edge = np.ones(3, "float64") * hvals["BoxSize"]
        nz = 1 << self.over_refine_factor
        self.domain_dimensions = np.ones(3, "int32") * nz
        self.periodicity = (True, True, True)

        self.cosmological_simulation = 1

        self.current_redshift = hvals["Redshift"]
        self.omega_lambda = hvals["OmegaLambda"]
        self.omega_matter = hvals["Omega0"]
        self.hubble_constant = hvals["HubbleParam"]
        # According to the Gadget manual, OmegaLambda will be zero for
        # non-cosmological datasets.  However, it may be the case that
        # individuals are running cosmological simulations *without* Lambda, in
        # which case we may be doing something incorrect here.
        # It may be possible to deduce whether ComovingIntegration is on
        # somehow, but opinions on this vary.
        if self.omega_lambda == 0.0:
            mylog.info("Omega Lambda is 0.0, so we are turning off Cosmology.")
            self.hubble_constant = 1.0  # So that scaling comes out correct
            self.cosmological_simulation = 0
            self.current_redshift = 0.0
            # This may not be correct.
            self.current_time = hvals["Time"]
        else:
            # Now we calculate our time based on the cosmology, because in
            # ComovingIntegration hvals["Time"] will in fact be the expansion
            # factor, not the actual integration time, so we re-calculate
            # global time from our Cosmology.
            cosmo = Cosmology(self.hubble_constant,
                              self.omega_matter, self.omega_lambda)
            self.current_time = cosmo.hubble_time(self.current_redshift)
            mylog.info("Calculating time from %0.3e to be %0.3e seconds",
                       hvals["Time"], self.current_time)
        self.parameters = hvals

        prefix = os.path.abspath(
            os.path.join(os.path.dirname(self.parameter_filename),
                         os.path.basename(self.parameter_filename).split(".", 1)[0]))

        if hvals["NumFiles"] > 1:
            self.filename_template = "%s.%%(num)s%s" % (prefix, self._suffix)
        else:
            self.filename_template = self.parameter_filename

        self.file_count = hvals["NumFiles"]
コード例 #5
0
    def _parse_parameter_file(self):
        if self.parameter_filename.startswith("http"):
            sdf_class = HTTPSDFRead
        else:
            sdf_class = SDFRead
        self.sdf_container = sdf_class(self.parameter_filename,
                                 header=self.sdf_header)

        # Reference
        self.parameters = self.sdf_container.parameters
        self.dimensionality = 3
        self.refine_by = 2
        try:
            self.unique_identifier = \
                int(os.stat(self.parameter_filename)[stat.ST_CTIME])
        except:
            self.unique_identifier = time.time()


        if None in (self.domain_left_edge, self.domain_right_edge):
            R0 = self.parameters['R0']
            if 'offset_center' in self.parameters and self.parameters['offset_center']:
                self.domain_left_edge = np.array([0, 0, 0])
                self.domain_right_edge = np.array([
                 2.0 * self.parameters.get("R%s" % ax, R0) for ax in 'xyz'])
            else:
                self.domain_left_edge = np.array([
                    -self.parameters.get("R%s" % ax, R0) for ax in 'xyz'])
                self.domain_right_edge = np.array([
                    +self.parameters.get("R%s" % ax, R0) for ax in 'xyz'])
            self.domain_left_edge *= self.parameters.get("a", 1.0)
            self.domain_right_edge *= self.parameters.get("a", 1.0)

        nz = 1 << self.over_refine_factor
        self.domain_dimensions = np.ones(3, "int32") * nz
        if "do_periodic" in self.parameters and self.parameters["do_periodic"]:
            self.periodicity = (True, True, True)
        else:
            self.periodicity = (False, False, False)

        self.cosmological_simulation = 1

        self.current_redshift = self.parameters.get("redshift", 0.0)
        self.omega_lambda = self.parameters["Omega0_lambda"]
        self.omega_matter = self.parameters["Omega0_m"]
        if "Omega0_fld" in self.parameters:
            self.omega_lambda += self.parameters["Omega0_fld"]
        if "Omega0_r" in self.parameters:
            # not correct, but most codes can't handle Omega0_r
            self.omega_matter += self.parameters["Omega0_r"]
        self.hubble_constant = self.parameters["h_100"]
        self.current_time = units_2HOT_v2_time * self.parameters.get("tpos", 0.0)
        mylog.info("Calculating time to be %0.3e seconds", self.current_time)
        self.filename_template = self.parameter_filename
        self.file_count = 1
コード例 #6
0
    def _set_code_unit_attributes(self):
        # Set a sane default for cosmological simulations.
        if self._unit_base is None and self.cosmological_simulation == 1:
            mylog.info("Assuming length units are in Mpc/h (comoving)")
            self._unit_base = dict(length = (1.0, "Mpccm/h"))
        # The other same defaults we will use from the standard Gadget
        # defaults.
        unit_base = self._unit_base or {}
        
        if "length" in unit_base:
            length_unit = unit_base["length"]
        elif "UnitLength_in_cm" in unit_base:
            if self.cosmological_simulation == 0:
                length_unit = (unit_base["UnitLength_in_cm"], "cm")
            else:
                length_unit = (unit_base["UnitLength_in_cm"], "cmcm/h")
        else:
            raise RuntimeError
        length_unit = _fix_unit_ordering(length_unit)
        self.length_unit = self.quan(length_unit[0], length_unit[1])
        
        if "velocity" in unit_base:
            velocity_unit = unit_base["velocity"]
        elif "UnitVelocity_in_cm_per_s" in unit_base:
            velocity_unit = (unit_base["UnitVelocity_in_cm_per_s"], "cm/s")
        else:
            if self.cosmological_simulation == 0:
                velocity_unit = (1e5, "cm/s")
            else:
                velocity_unit = (1e5, "cmcm/s")
        velocity_unit = _fix_unit_ordering(velocity_unit)
        self.velocity_unit = self.quan(velocity_unit[0], velocity_unit[1])

        # We set hubble_constant = 1.0 for non-cosmology, so this is safe.
        # Default to 1e10 Msun/h if mass is not specified.
        if "mass" in unit_base:
            mass_unit = unit_base["mass"]
        elif "UnitMass_in_g" in unit_base:
            if self.cosmological_simulation == 0:
                mass_unit = (unit_base["UnitMass_in_g"], "g")
            else:
                mass_unit = (unit_base["UnitMass_in_g"], "g/h")
        else:
            # Sane default
            mass_unit = (1.0, "1e10*Msun/h")
        mass_unit = _fix_unit_ordering(mass_unit)
        self.mass_unit = self.quan(mass_unit[0], mass_unit[1])

        if "time" in unit_base:
            time_unit = unit_base["time"]
        elif "UnitTime_in_s" in unit_base:
            time_unit = (unit_base["UnitTime_in_s"], "s")
        else:
            time_unit = (1., "s")        
        self.time_unit = self.quan(time_unit[0], time_unit[1])
コード例 #7
0
    def upload(self):
        api_key = ytcfg.get("yt", "hub_api_key")
        url = ytcfg.get("yt", "hub_url")
        if api_key == '':
            raise YTHubRegisterError
        metadata, (final_name, chunks) = self._generate_post()
        if hasattr(self, "_ds_mrep"):
            self._ds_mrep.upload()
        for i in metadata:
            if isinstance(metadata[i], np.ndarray):
                metadata[i] = metadata[i].tolist()
            elif hasattr(metadata[i], 'dtype'):
                metadata[i] = np.asscalar(metadata[i])
        metadata['obj_type'] = self.type
        if len(chunks) == 0:
            chunk_info = {'chunks': []}
        else:
            chunk_info = {'final_name': final_name, 'chunks': []}
            for cn, cv in chunks:
                chunk_info['chunks'].append((cn, cv.size * cv.itemsize))
        metadata = json.dumps(metadata)
        chunk_info = json.dumps(chunk_info)
        datagen, headers = multipart_encode({'metadata': metadata,
                                             'chunk_info': chunk_info,
                                             'api_key': api_key})
        request = urllib.request.Request(url, datagen, headers)
        # Actually do the request, and get the response
        try:
            rv = urllib.request.urlopen(request).read()
        except urllib.error.HTTPError as ex:
            if ex.code == 401:
                mylog.error("You must create an API key before uploading.")
                mylog.error("https://data.yt-project.org/getting_started.html")
                return
            else:
                raise ex
        uploader_info = json.loads(rv)
        new_url = url + "/handler/%s" % uploader_info['handler_uuid']
        for i, (cn, cv) in enumerate(chunks):
            remaining = cv.size * cv.itemsize
            f = TemporaryFile()
            np.save(f, cv)
            f.seek(0)
            pbar = UploaderBar("%s, % 2i/% 2i" %
                               (self.type, i + 1, len(chunks)))
            datagen, headers = multipart_encode({'chunk_data': f}, cb=pbar)
            request = urllib.request.Request(new_url, datagen, headers)
            rv = urllib.request.urlopen(request).read()

        datagen, headers = multipart_encode({'status': 'FINAL'})
        request = urllib.request.Request(new_url, datagen, headers)
        rv = json.loads(urllib.request.urlopen(request).read())
        mylog.info("Upload succeeded!  View here: %s", rv['url'])
        return rv
コード例 #8
0
    def _write_cosmology_outputs(self, filename, outputs, start_index,
                                 decimals=3):
        r"""
        Write cosmology output parameters for a cosmology splice.
        """

        mylog.info("Writing redshift output list to %s.", filename)
        f = open(filename, "w")
        for output in outputs:
            f.write("%f\n" % (1. / (1. + output["redshift"])))
        f.close()
コード例 #9
0
 def find_particles_by_type(self, ptype, max_num=None, additional_fields=None):
     """
     Returns a structure of arrays with all of the particles'
     positions, velocities, masses, types, IDs, and attributes for
     a particle type **ptype** for a maximum of **max_num**
     particles.  If non-default particle fields are used, provide
     them in **additional_fields**.
     """
     # Not sure whether this routine should be in the general HierarchyType.
     if self.grid_particle_count.sum() == 0:
         mylog.info("Data contains no particles.");
         return None
     if additional_fields is None:
         additional_fields = ['metallicity_fraction', 'creation_time',
                              'dynamical_time']
     pfields = [f for f in self.field_list if f.startswith('particle_')]
     nattr = self.dataset['NumberOfParticleAttributes']
     if nattr > 0:
         pfields += additional_fields[:nattr]
     # Find where the particles reside and count them
     if max_num is None: max_num = 1e100
     total = 0
     pstore = []
     for level in range(self.max_level, -1, -1):
         for grid in self.select_grids(level):
             index = np.where(grid['particle_type'] == ptype)[0]
             total += len(index)
             pstore.append(index)
             if total >= max_num: break
         if total >= max_num: break
     result = None
     if total > 0:
         result = {}
         for p in pfields:
             result[p] = np.zeros(total, 'float64')
         # Now we retrieve data for each field
         ig = count = 0
         for level in range(self.max_level, -1, -1):
             for grid in self.select_grids(level):
                 nidx = len(pstore[ig])
                 if nidx > 0:
                     for p in pfields:
                         result[p][count:count+nidx] = grid[p][pstore[ig]]
                     count += nidx
                 ig += 1
                 if count >= total: break
             if count >= total: break
         # Crop data if retrieved more than max_num
         if count > max_num:
             for p in pfields:
                 result[p] = result[p][0:max_num]
     return result
コード例 #10
0
    def _write_cosmology_outputs(self, filename, outputs, start_index,
                                 decimals=3):
        """
        Write cosmology output parameters for a cosmology splice.
        """

        mylog.info("Writing redshift output list to %s.", filename)
        f = open(filename, 'w')
        for q, output in enumerate(outputs):
            z_string = "%%s[%%d] = %%.%df" % decimals
            f.write(("CosmologyOutputRedshift[%d] = %."
                     + str(decimals) + "f\n") %
                    ((q + start_index), output['redshift']))
        f.close()
コード例 #11
0
    def _set_code_unit_attributes(self):
        # If no units passed in by user, set a sane default (Gadget-2 users guide).
        if self._unit_base is None:
            if self.cosmological_simulation == 1:
                mylog.info("Assuming length units are in kpc/h (comoving)")
                self._unit_base = dict(length = (1.0, "kpccm/h"))
            else:
                mylog.info("Assuming length units are in kpc (physical)")
                self._unit_base = dict(length = (1.0, "kpc"))

        # If units passed in by user, decide what to do about
        # co-moving and factors of h
        unit_base = self._unit_base or {}
        if "length" in unit_base:
            length_unit = unit_base["length"]
        elif "UnitLength_in_cm" in unit_base:
            if self.cosmological_simulation == 0:
                length_unit = (unit_base["UnitLength_in_cm"], "cm")
            else:
                length_unit = (unit_base["UnitLength_in_cm"], "cmcm/h")
        else:
            raise RuntimeError
        length_unit = _fix_unit_ordering(length_unit)
        self.length_unit = self.quan(length_unit[0], length_unit[1])

        unit_base = self._unit_base or {}
        if "velocity" in unit_base:
            velocity_unit = unit_base["velocity"]
        elif "UnitVelocity_in_cm_per_s" in unit_base:
            velocity_unit = (unit_base["UnitVelocity_in_cm_per_s"], "cm/s")
        else:
            velocity_unit = (1e5, "cm/s")
        velocity_unit = _fix_unit_ordering(velocity_unit)
        self.velocity_unit = self.quan(velocity_unit[0], velocity_unit[1])

        # We set hubble_constant = 1.0 for non-cosmology, so this is safe.
        # Default to 1e10 Msun/h if mass is not specified.
        if "mass" in unit_base:
            mass_unit = unit_base["mass"]
        elif "UnitMass_in_g" in unit_base:
            if self.cosmological_simulation == 0:
                mass_unit = (unit_base["UnitMass_in_g"], "g")
            else:
                mass_unit = (unit_base["UnitMass_in_g"], "g/h")
        else:
            # Sane default
            mass_unit = (1.0, "1e10*Msun/h")
        mass_unit = _fix_unit_ordering(mass_unit)
        self.mass_unit = self.quan(mass_unit[0], mass_unit[1])
        self.time_unit = self.length_unit / self.velocity_unit
コード例 #12
0
    def save(self, name=None, suffix=None):
        r"""
        Saves a 1d profile plot.

        Parameters
        ----------
        name : str
            The output file keyword.
        suffix : string
            Specify the image type by its suffix. If not specified, the output
            type will be inferred from the filename. Defaults to PNG.
        """
        if not self._plot_valid:
            self._setup_plots()
        unique = set(self.figures.values())
        if len(unique) < len(self.figures):
            iters = izip(range(len(unique)), sorted(unique))
        else:
            iters = iteritems(self.figures)
        if not suffix:
            suffix = "png"
        suffix = ".%s" % suffix
        if name is None:
            if len(self.profiles) == 1:
                prefix = self.profiles[0].ds
            else:
                prefix = "Multi-data"
            name = "%s%s" % (prefix, suffix)
        else:
            sfx = get_image_suffix(name)
            if sfx != '':
                suffix = sfx
                prefix = name[:name.rfind(suffix)]
            else:
                prefix = name
        xfn = self.profiles[0].x_field
        if isinstance(xfn, tuple):
            xfn = xfn[1]
        canvas_cls = get_canvas(name)
        fns = []
        for uid, fig in iters:
            if isinstance(uid, tuple):
                uid = uid[1]
            canvas = canvas_cls(fig)
            fns.append("%s_1d-Profile_%s_%s%s" % (prefix, xfn, uid, suffix))
            mylog.info("Saving %s", fns[-1])
            canvas.print_figure(fns[-1])
        return fns
コード例 #13
0
    def lock_grids_to_parents(self):
        r"""This function locks grid edges to their parents.

        This is useful in cases where the grid structure may be somewhat
        irregular, or where setting the left and right edges is a lossy
        process.  It is designed to correct situations where left/right edges
        may be set slightly incorrectly, resulting in discontinuities in images
        and the like.
        """
        mylog.info("Locking grids to parents.")
        for i, g in enumerate(self.grids):
            si = g.get_global_startindex()
            g.LeftEdge = self.ds.domain_left_edge + g.dds * si
            g.RightEdge = g.LeftEdge + g.ActiveDimensions * g.dds
            self.grid_left_edge[i,:] = g.LeftEdge
            self.grid_right_edge[i,:] = g.RightEdge
コード例 #14
0
def interpolate_ages(data, file_stars, interp_tb=None, interp_ages=None,
                     current_time=None):
    if interp_tb is None:
        t_stars, a_stars = read_star_field(file_stars,
                                     field="t_stars")
        # timestamp of file should match amr timestamp
        if current_time:
            tdiff = YTQuantity(b2t(t_stars), 'Gyr') - current_time.in_units('Gyr')
            if np.abs(tdiff) > 1e-4:
                mylog.info("Timestamp mismatch in star " +
                           "particle header: %s", tdiff)
        mylog.info("Interpolating ages")
        interp_tb, interp_ages = b2t(data)
        interp_tb = YTArray(interp_tb, 'Gyr')
        interp_ages = YTArray(interp_ages, 'Gyr')
    temp = np.interp(data, interp_tb, interp_ages)
    return interp_tb, interp_ages, temp
コード例 #15
0
def enable_plugins():
    import yt
    from yt.fields.my_plugin_fields import my_plugins_fields
    from yt.config import ytcfg
    my_plugin_name = ytcfg.get("yt","pluginfilename")
    # We assume that it is with respect to the $HOME/.yt directory
    if os.path.isfile(my_plugin_name):
        _fn = my_plugin_name
    else:
        _fn = os.path.expanduser("~/.yt/%s" % my_plugin_name)
    if os.path.isfile(_fn):
        mylog.info("Loading plugins from %s", _fn)
        execdict = yt.__dict__.copy()
        execdict['add_field'] = my_plugins_fields.add_field
        with open(_fn) as f:
            code = compile(f.read(), _fn, 'exec')
            exec(code, execdict)
コード例 #16
0
 def _initialize_index(self, data_file, regions):
     ds = data_file.ds
     morton = np.empty(sum(data_file.total_particles.values()),
                       dtype="uint64")
     ind = 0
     DLE, DRE = ds.domain_left_edge, ds.domain_right_edge
     dx = (DRE - DLE) / (2**_ORDER_MAX)
     self.domain_left_edge = DLE.in_units("code_length").ndarray_view()
     self.domain_right_edge = DRE.in_units("code_length").ndarray_view()
     with open(data_file.filename, "rb") as f:
         f.seek(ds._header_offset)
         for iptype, ptype in enumerate(self._ptypes):
             # We'll just add the individual types separately
             count = data_file.total_particles[ptype]
             if count == 0: continue
             start, stop = ind, ind + count
             while ind < stop:
                 c = min(CHUNKSIZE, stop - ind)
                 pp = np.fromfile(f, dtype = self._pdtypes[ptype],
                                  count = c)
                 mis = np.empty(3, dtype="float64")
                 mas = np.empty(3, dtype="float64")
                 for axi, ax in enumerate('xyz'):
                     mi = pp["Coordinates"][ax].min()
                     ma = pp["Coordinates"][ax].max()
                     mylog.debug("Spanning: %0.3e .. %0.3e in %s", mi, ma, ax)
                     mis[axi] = mi
                     mas[axi] = ma
                 pos = np.empty((pp.size, 3), dtype="float64")
                 for i, ax in enumerate("xyz"):
                     eps = np.finfo(pp["Coordinates"][ax].dtype).eps
                     pos[:,i] = pp["Coordinates"][ax]
                 regions.add_data_file(pos, data_file.file_id,
                                       data_file.ds.filter_bbox)
                 morton[ind:ind+c] = compute_morton(
                     pos[:,0], pos[:,1], pos[:,2],
                     DLE, DRE, data_file.ds.filter_bbox)
                 ind += c
     mylog.info("Adding %0.3e particles", morton.size)
     return morton
コード例 #17
0
    def save_data(self, array, node, name, set_attr=None, force=False, passthrough = False):
        """
        Arbitrary numpy data will be saved to the region in the datafile
        described by *node* and *name*.  If data file does not exist, it throws
        no error and simply does not save.
        """

        if self._data_mode != 'a': return
        try:
            node_loc = self._data_file[node]
            if name in node_loc and force:
                mylog.info("Overwriting node %s/%s", node, name)
                del self._data_file[node][name]
            elif name in node_loc and passthrough:
                return
        except:
            pass
        myGroup = self._data_file['/']
        for q in node.split('/'):
            if q: myGroup = myGroup.require_group(q)
        arr = myGroup.create_dataset(name,data=array)
        if set_attr is not None:
            for i, j in set_attr.items(): arr.attrs[i] = j
        self._data_file.flush()
コード例 #18
0
 def __init__(self, ds, wg, pool):
     mylog.info("Initializing IOCommunicator")
     self.ds = ds
     self.wg = wg # We don't need to use this!
     self.pool = pool
     self.comm = pool.comm
     # We read our grids here
     self.grids = []
     storage = {}
     grids = ds.index.grids.tolist()
     grids.sort(key=lambda a:a.filename)
     for sto, g in parallel_objects(grids, storage = storage):
         sto.result = self.comm.rank
         sto.result_id = g.id
         self.grids.append(g)
     self._id_offset = ds.index.grids[0]._id_offset
     mylog.info("Reading from disk ...")
     self.initialize_data()
     mylog.info("Broadcasting ...")
     self.comm.comm.bcast(storage, root = wg.ranks[0])
     mylog.info("Done.")
     self.hooks = []
コード例 #19
0
 def _rebuild_top_grids(self, level = 0):
     mylog.info("Rebuilding grids on level %s", level)
     cmask = (self.grid_levels.flat == (level + 1))
     cmsum = cmask.sum()
     mask = np.zeros(self.num_grids, dtype='bool')
     for grid in self.select_grids(level):
         mask[:] = 0
         LE = self.grid_left_edge[grid.id - grid._id_offset]
         RE = self.grid_right_edge[grid.id - grid._id_offset]
         grids, grid_i = self.get_box_grids(LE, RE)
         mask[grid_i] = 1
         grid._children_ids = []
         cgrids = self.grids[ ( mask * cmask).astype('bool') ]
         mylog.info("%s: %s / %s", grid, len(cgrids), cmsum)
         for cgrid in cgrids:
             grid._children_ids.append(cgrid.id)
             cgrid._parent_id = grid.id
     mylog.info("Finished rebuilding")
コード例 #20
0
ファイル: data_structures.py プロジェクト: seanlabean/yt
    def add_sph_fields(self, n_neighbors=32, kernel="cubic", sph_ptype="io"):
        """Add SPH fields for the specified particle type.

        For a particle type with "particle_position" and "particle_mass" already
        defined, this method adds the "smoothing_length" and "density" fields.
        "smoothing_length" is computed as the distance to the nth nearest
        neighbor. "density" is computed as the SPH (gather) smoothed mass. The
        SPH fields are added only if they don't already exist.

        Parameters
        ----------
        n_neighbors : int
            The number of neighbors to use in smoothing length computation.
        kernel : str
            The kernel function to use in density estimation.
        sph_ptype : str
            The SPH particle type. Each dataset has one sph_ptype only. This
            method will overwrite existing sph_ptype of the dataset.

        """
        mylog.info("Generating SPH fields")

        # Unify units
        l_unit = "code_length"
        m_unit = "code_mass"
        d_unit = "code_mass / code_length**3"

        # Read basic fields
        ad = self.all_data()
        pos = ad[sph_ptype, "particle_position"].to(l_unit).d
        mass = ad[sph_ptype, "particle_mass"].to(m_unit).d

        # Construct k-d tree
        kdtree = PyKDTree(
            pos.astype("float64"),
            left_edge=self.domain_left_edge.to_value(l_unit),
            right_edge=self.domain_right_edge.to_value(l_unit),
            periodic=self.periodicity,
            leafsize=2 * int(n_neighbors),
        )
        order = np.argsort(kdtree.idx)

        def exists(fname):
            if (sph_ptype, fname) in self.derived_field_list:
                mylog.info("Field ('%s','%s') already exists. Skipping",
                           sph_ptype, fname)
                return True
            else:
                mylog.info("Generating field ('%s','%s')", sph_ptype, fname)
                return False

        data = {}

        # Add smoothing length field
        fname = "smoothing_length"
        if not exists(fname):
            hsml = generate_smoothing_length(pos[kdtree.idx], kdtree,
                                             n_neighbors)
            hsml = hsml[order]
            data[(sph_ptype, "smoothing_length")] = (hsml, l_unit)
        else:
            hsml = ad[sph_ptype, fname].to(l_unit).d

        # Add density field
        fname = "density"
        if not exists(fname):
            dens = estimate_density(
                pos[kdtree.idx],
                mass[kdtree.idx],
                hsml[kdtree.idx],
                kdtree,
                kernel_name=kernel,
            )
            dens = dens[order]
            data[(sph_ptype, "density")] = (dens, d_unit)

        # Add fields
        self._sph_ptypes = (sph_ptype, )
        self.index.update_data(data)
        self.num_neighbors = n_neighbors
コード例 #21
0
 def finish(self):
     mylog.info("Finishing '%s'", self.title)
コード例 #22
0
def parallel_objects(objects, njobs = 0, storage = None, barrier = True,
                     dynamic = False):
    r"""This function dispatches components of an iterable to different
    processors.

    The parallel_objects function accepts an iterable, *objects*, and based on
    the number of jobs requested and number of available processors, decides
    how to dispatch individual objects to processors or sets of processors.
    This can implicitly include multi-level parallelism, such that the
    processor groups assigned each object can be composed of several or even
    hundreds of processors.  *storage* is also available, for collation of
    results at the end of the iteration loop.

    Calls to this function can be nested.

    This should not be used to iterate over datasets --
    :class:`~yt.data_objects.time_series.DatasetSeries` provides a much nicer
    interface for that.

    Parameters
    ----------
    objects : iterable
        The list of objects to dispatch to different processors.
    njobs : int
        How many jobs to spawn.  By default, one job will be dispatched for
        each available processor.
    storage : dict
        This is a dictionary, which will be filled with results during the
        course of the iteration.  The keys will be the dataset
        indices and the values will be whatever is assigned to the *result*
        attribute on the storage during iteration.
    barrier : bool
        Should a barier be placed at the end of iteration?
    dynamic : bool
        This governs whether or not dynamic load balancing will be enabled.
        This requires one dedicated processor; if this is enabled with a set of
        128 processors available, only 127 will be available to iterate over
        objects as one will be load balancing the rest.


    Examples
    --------
    Here is a simple example of iterating over a set of centers and making
    slice plots centered at each.

    >>> for c in parallel_objects(centers):
    ...     SlicePlot(ds, "x", "Density", center = c).save()
    ...

    Here's an example of calculating the angular momentum vector of a set of
    spheres, but with a set of four jobs of multiple processors each.  Note
    that we also store the results.

    >>> storage = {}
    >>> for sto, c in parallel_objects(centers, njobs=4, storage=storage):
    ...     sp = ds.sphere(c, (100, "kpc"))
    ...     sto.result = sp.quantities["AngularMomentumVector"]()
    ...
    >>> for sphere_id, L in sorted(storage.items()):
    ...     print centers[sphere_id], L
    ...

    """
    if dynamic:
        from .task_queue import dynamic_parallel_objects
        for my_obj in dynamic_parallel_objects(objects, njobs=njobs,
                                               storage=storage):
            yield my_obj
        return

    if not parallel_capable:
        njobs = 1
    my_communicator = communication_system.communicators[-1]
    my_size = my_communicator.size
    mylog.info("you have %s processors",my_size)
    if njobs <= 0:
        njobs = my_size
    if njobs > my_size:
        mylog.error("You have asked for %s jobs, but you only have %s processors.",
            njobs, my_size)
        raise RuntimeError
    my_rank = my_communicator.rank
    mylog.info("I am %s processor",my_rank)
    all_new_comms = np.array_split(np.arange(my_size), njobs)
    for i,comm_set in enumerate(all_new_comms):
        if my_rank in comm_set:
            my_new_id = i
            break
    if parallel_capable:
        communication_system.push_with_ids(all_new_comms[my_new_id].tolist())
    to_share = {}
    # If our objects object is slice-aware, like time series data objects are,
    # this will prevent intermediate objects from being created.
    oiter = itertools.islice(enumerate(objects), my_new_id, None, njobs)
    for result_id, obj in oiter:
        if storage is not None:
            rstore = ResultsStorage()
            rstore.result_id = result_id
            yield rstore, obj
            to_share[rstore.result_id] = rstore.result
        else:
            yield obj
    if parallel_capable:
        communication_system.pop()
    if storage is not None:
     
   # Now we have to broadcast it
        
        new_storage = my_communicator.par_combine_object(
                to_share, datatype = 'dict', op = 'join')
        mylog.info("my storage: %s",type(new_storage))
        

        storage.update(new_storage)
    if barrier:
        my_communicator.barrier()
コード例 #23
0
def enable_parallelism(suppress_logging=False, communicator=None):
    """
    This method is used inside a script to turn on MPI parallelism, via
    mpi4py.  More information about running yt in parallel can be found
    here: http://yt-project.org/docs/3.0/analyzing/parallel_computation.html

    Parameters
    ----------
    suppress_logging : bool
       If set to True, only rank 0 will log information after the initial
       setup of MPI.

    communicator : mpi4py.MPI.Comm
        The MPI communicator to use. This controls which processes yt can see.
        If not specified, will be set to COMM_WORLD.
    """
    global parallel_capable, MPI
    try:
        from mpi4py import MPI as _MPI
    except ImportError:
        mylog.info("mpi4py was not found. Disabling parallel computation")
        parallel_capable = False
        return
    MPI = _MPI
    exe_name = os.path.basename(sys.executable)

    # if no communicator specified, set to COMM_WORLD
    if communicator is None:
        communicator = MPI.COMM_WORLD

    parallel_capable = (communicator.size > 1)
    if not parallel_capable: return False
    mylog.info("Global parallel computation enabled: %s / %s",
               communicator.rank, communicator.size)
    communication_system.push(communicator)
    ytcfg["yt","__global_parallel_rank"] = str(communicator.rank)
    ytcfg["yt","__global_parallel_size"] = str(communicator.size)
    ytcfg["yt","__parallel"] = "True"
    if exe_name == "embed_enzo" or \
        ("_parallel" in dir(sys) and sys._parallel is True):
        ytcfg["yt","inline"] = "True"
    if communicator.rank > 0:
        if ytcfg.getboolean("yt","LogFile"):
            ytcfg["yt","LogFile"] = "False"
            yt.utilities.logger.disable_file_logging()
    yt.utilities.logger.uncolorize_logging()
    # Even though the uncolorize function already resets the format string,
    # we reset it again so that it includes the processor.
    f = logging.Formatter("P%03i %s" % (communicator.rank,
                                        yt.utilities.logger.ufstring))
    if len(yt.utilities.logger.ytLogger.handlers) > 0:
        yt.utilities.logger.ytLogger.handlers[0].setFormatter(f)

    if ytcfg.getboolean("yt", "parallel_traceback"):
        sys.excepthook = traceback_writer_hook("_%03i" % communicator.rank)
    else:
        sys.excepthook = default_mpi_excepthook

    if ytcfg.getint("yt","LogLevel") < 20:
        yt.utilities.logger.ytLogger.warning(
          "Log Level is set low -- this could affect parallel performance!")
    dtype_names.update(dict(
            float32 = MPI.FLOAT,
            float64 = MPI.DOUBLE,
            int32   = MPI.INT,
            int64   = MPI.LONG,
            c       = MPI.CHAR,
    ))
    op_names.update(dict(
        sum = MPI.SUM,
        min = MPI.MIN,
        max = MPI.MAX
    ))
    # Turn off logging on all but the root rank, if specified.
    if suppress_logging:
        if communicator.rank > 0:
            mylog.addFilter(FilterAllMessages())
    return True
コード例 #24
0
def virial_quantities(halo,
                      fields,
                      overdensity_field=("gas", "overdensity"),
                      critical_overdensity=200,
                      profile_storage="profiles"):
    r"""
    Calculate the value of the given fields at the virial radius defined at 
    the given critical density by interpolating from radial profiles.

    Parameters
    ----------    
    halo : Halo object
        The Halo object to be provided by the HaloCatalog.
    fields : string or list of strings
        The fields whose virial values are to be calculated.
    overdensity_field : string or tuple of strings
        The field used as the overdensity from which interpolation is done to 
        calculate virial quantities.
        Default: ("gas", "overdensity")
    critical_overdensity : float
        The value of the overdensity at which to evaulate the virial quantities.  
        Overdensity is with respect to the critical density.
        Default: 200
    profile_storage : string
        Name of the halo attribute that holds the profiles to be used.
        Default: "profiles"
    
    """

    mylog.info("Calculating virial quantities for halo %d." %
               halo.quantities["particle_identifier"])

    fields = ensure_list(fields)
    fields = [halo.data_object._determine_fields(field)[0] for field in fields]

    dds = halo.halo_catalog.data_ds
    profile_data = getattr(halo, profile_storage)

    if overdensity_field not in profile_data:
        raise RuntimeError(
            "virial_quantities callback requires profile of %s." %
            str(overdensity_field))

    overdensity = profile_data[overdensity_field]
    dfilter = np.isfinite(overdensity) & profile_data["used"] & (overdensity >
                                                                 0)

    v_fields = {}
    for field in fields:
        if isinstance(field, tuple):
            my_field = field[-1]
        else:
            my_field = field
        v_fields[field] = my_field
        v_field = "%s_%d" % (my_field, critical_overdensity)
        if v_field not in halo.halo_catalog.quantities:
            halo.halo_catalog.quantities.append(v_field)
    vquantities = dict([("%s_%d" % (v_fields[field], critical_overdensity),
                         dds.quan(0, profile_data[field].units)) \
                        for field in fields])

    if dfilter.sum() < 2:
        halo.quantities.update(vquantities)
        return

    # find interpolation index
    # require a negative slope, but not monotonicity
    vod = overdensity[dfilter].to_ndarray()
    if (vod > critical_overdensity).all():
        if vod[-1] < vod[-2]:
            index = -2
        else:
            halo.quantities.update(vquantities)
            return
    elif (vod < critical_overdensity).all():
        if vod[0] > vod[1]:
            index = 0
        else:
            halo.quantities.update(vquantities)
            return
    else:
        # take first instance of downward intersection with critical value
        intersections = (vod[:-1] >= critical_overdensity) & \
            (vod[1:] < critical_overdensity)
        if not intersections.any():
            halo.quantities.update(vquantities)
            return
        index = np.where(intersections)[0][0]

    for field in fields:
        v_prof = profile_data[field][dfilter].to_ndarray()
        slope = np.log(v_prof[index + 1] / v_prof[index]) / \
          np.log(vod[index + 1] / vod[index])
        value = dds.quan(
            np.exp(slope * np.log(critical_overdensity / vod[index])) *
            v_prof[index], profile_data[field].units).in_cgs()
        vquantities["%s_%d" % (v_fields[field], critical_overdensity)] = value

    halo.quantities.update(vquantities)
コード例 #25
0
def load_profiles(halo,
                  storage="profiles",
                  fields=None,
                  filename=None,
                  output_dir="."):
    r"""
    Load profile data from disk.

    Parameters
    ----------
    halo : Halo object
        The Halo object to be provided by the HaloCatalog.
    storage : string
        Name of the dictionary attribute to store profile data.
        Default: "profiles"
    fields : string or list of strings
        The fields to be loaded.  If None, all fields present will be loaded.
        Default : None
    filename : string
        The name of the file to be loaded.  The final filename will be 
        "<filename>_<id>.h5".  If None, filename is set to the value given 
        by the storage keyword.
        Default: None
    output_dir : string
        Name of directory where profile data will be read.  The full path will be
        the output_dir of the halo catalog concatenated with this directory.
        Default : "."
    
    """

    if filename is None:
        filename = storage
    output_file = os.path.join(
        halo.halo_catalog.output_dir, output_dir,
        "%s_%06d.h5" % (filename, halo.quantities["particle_identifier"]))
    if not os.path.exists(output_file):
        raise RuntimeError("Profile file not found: %s." % output_file)
    mylog.info("Loading halo %d profile data from %s." %
               (halo.quantities["particle_identifier"], output_file))

    fh = h5py.File(output_file, "r")
    if fields is None:
        profile_fields = fh["profiles"].keys()
    else:
        profile_fields = fields
    my_profile = {}
    my_group = fh["profiles"]
    for field in profile_fields:
        if field not in my_group:
            raise RuntimeError("%s field not present in %s." %
                               (field, output_file))
        my_profile[field] = _hdf5_yt_array(my_group,
                                           field,
                                           ds=halo.halo_catalog.halos_ds)
    setattr(halo, storage, my_profile)

    if "variance" in fh:
        my_variance = {}
        my_group = fh["variance"]
        if fields is None:
            profile_fields = my_group.keys()
        for field in profile_fields:
            if field not in my_group:
                raise RuntimeError("%s field not present in %s." %
                                   (field, output_file))
            my_variance[field] = _hdf5_yt_array(my_group,
                                                field,
                                                ds=halo.halo_catalog.halos_ds)
        setattr(halo, "%s_variance" % storage, my_variance)

    fh.close()
コード例 #26
0
def enable_parallelism(suppress_logging=False, communicator=None):
    """
    This method is used inside a script to turn on MPI parallelism, via
    mpi4py.  More information about running yt in parallel can be found
    here: https://yt-project.org/docs/3.0/analyzing/parallel_computation.html

    Parameters
    ----------
    suppress_logging : bool
       If set to True, only rank 0 will log information after the initial
       setup of MPI.

    communicator : mpi4py.MPI.Comm
        The MPI communicator to use. This controls which processes yt can see.
        If not specified, will be set to COMM_WORLD.
    """
    global parallel_capable, MPI
    try:
        from mpi4py import MPI as _MPI
    except ImportError:
        mylog.info("mpi4py was not found. Disabling parallel computation")
        parallel_capable = False
        return
    MPI = _MPI
    exe_name = os.path.basename(sys.executable)

    # if no communicator specified, set to COMM_WORLD
    if communicator is None:
        communicator = MPI.COMM_WORLD

    parallel_capable = (communicator.size > 1)
    if not parallel_capable: return False
    mylog.info("Global parallel computation enabled: %s / %s",
               communicator.rank, communicator.size)
    communication_system.push(communicator)
    ytcfg["yt", "__global_parallel_rank"] = str(communicator.rank)
    ytcfg["yt", "__global_parallel_size"] = str(communicator.size)
    ytcfg["yt", "__parallel"] = "True"
    if exe_name == "embed_enzo" or \
        ("_parallel" in dir(sys) and sys._parallel):
        ytcfg["yt", "inline"] = "True"
    if communicator.rank > 0:
        if ytcfg.getboolean("yt", "LogFile"):
            ytcfg["yt", "LogFile"] = "False"
            yt.utilities.logger.disable_file_logging()
    yt.utilities.logger.uncolorize_logging()
    # Even though the uncolorize function already resets the format string,
    # we reset it again so that it includes the processor.
    f = logging.Formatter("P%03i %s" %
                          (communicator.rank, yt.utilities.logger.ufstring))
    if len(yt.utilities.logger.ytLogger.handlers) > 0:
        yt.utilities.logger.ytLogger.handlers[0].setFormatter(f)

    if ytcfg.getboolean("yt", "parallel_traceback"):
        sys.excepthook = traceback_writer_hook("_%03i" % communicator.rank)
    else:
        sys.excepthook = default_mpi_excepthook

    if ytcfg.getint("yt", "LogLevel") < 20:
        yt.utilities.logger.ytLogger.warning(
            "Log Level is set low -- this could affect parallel performance!")
    dtype_names.update(
        dict(
            float32=MPI.FLOAT,
            float64=MPI.DOUBLE,
            int32=MPI.INT,
            int64=MPI.LONG,
            c=MPI.CHAR,
        ))
    op_names.update(dict(sum=MPI.SUM, min=MPI.MIN, max=MPI.MAX))
    # Turn off logging on all but the root rank, if specified.
    if suppress_logging:
        if communicator.rank > 0:
            mylog.addFilter(FilterAllMessages())
    return True
コード例 #27
0

def io_nodes(fn, n_io, n_work, func, *args, **kwargs):
    from yt.mods import load
    pool, wg = ProcessorPool.from_sizes([(n_io, "io"), (n_work, "work")])
    rv = None
    if wg.name == "work":
        ds = load(fn)
        with remote_io(ds, wg, pool):
            rv = func(ds, *args, **kwargs)
    elif wg.name == "io":
        ds = load(fn)
        io = IOCommunicator(ds, wg, pool)
        io.wait()
    # We should broadcast the result
    rv = pool.comm.mpi_bcast(rv, root=pool['work'].ranks[0])
    pool.free_all()
    mylog.debug("Return value: %s", rv)
    return rv


# Here is an example of how to use this functionality.
if __name__ == "__main__":

    def gq(ds):
        dd = ds.all_data()
        return dd.quantities["TotalQuantity"]("CellMassMsun")

    q = io_nodes("DD0087/DD0087", 8, 24, gq)
    mylog.info(q)
コード例 #28
0
 def par_combine_object(self, data, op, datatype=None):
     # op can be chosen from:
     #   cat
     #   join
     # data is selected to be of types:
     #   np.ndarray
     #   dict
     #   data field dict
     if datatype is not None:
         pass
     elif isinstance(data, dict):
         datatype == "dict"
     elif isinstance(data, np.ndarray):
         datatype == "array"
     elif isinstance(data, list):
         datatype == "list"
     # Now we have our datatype, and we conduct our operation
     if datatype == "dict" and op == "join":
         if self.comm.rank == 0:
             for i in range(1, self.comm.size):
                 data.update(self.comm.recv(source=i, tag=0))
         else:
             self.comm.send(data, dest=0, tag=0)
         data = self.comm.bcast(data, root=0)
         return data
     elif datatype == "dict" and op == "cat":
         field_keys = data.keys()
         field_keys.sort()
         size = data[field_keys[0]].shape[-1]
         sizes = np.zeros(self.comm.size, dtype='int64')
         outsize = np.array(size, dtype='int64')
         self.comm.Allgather([outsize, 1, MPI.LONG], [sizes, 1, MPI.LONG])
         # This nested concatenate is to get the shapes to work out correctly;
         # if we just add [0] to sizes, it will broadcast a summation, not a
         # concatenation.
         offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1]
         arr_size = self.comm.allreduce(size, op=MPI.SUM)
         for key in field_keys:
             dd = data[key]
             rv = self.alltoallv_array(dd, arr_size, offsets, sizes)
             data[key] = rv
         return data
     elif datatype == "array" and op == "cat":
         if data is None:
             ncols = -1
             size = 0
             dtype = 'float64'
             mylog.info(
                 'Warning: Array passed to par_combine_object was None. Setting dtype to float64. This may break things!'
             )
         else:
             dtype = data.dtype
             if len(data) == 0:
                 ncols = -1
                 size = 0
             elif len(data.shape) == 1:
                 ncols = 1
                 size = data.shape[0]
             else:
                 ncols, size = data.shape
         ncols = self.comm.allreduce(ncols, op=MPI.MAX)
         if ncols == 0:
             data = np.zeros(0, dtype=dtype)  # This only works for
         elif data is None:
             data = np.zeros((ncols, 0), dtype=dtype)
         size = data.shape[-1]
         sizes = np.zeros(self.comm.size, dtype='int64')
         outsize = np.array(size, dtype='int64')
         self.comm.Allgather([outsize, 1, MPI.LONG], [sizes, 1, MPI.LONG])
         # This nested concatenate is to get the shapes to work out correctly;
         # if we just add [0] to sizes, it will broadcast a summation, not a
         # concatenation.
         offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1]
         arr_size = self.comm.allreduce(size, op=MPI.SUM)
         data = self.alltoallv_array(data, arr_size, offsets, sizes)
         return data
     elif datatype == "list" and op == "cat":
         recv_data = self.comm.allgather(data)
         # Now flatten into a single list, since this
         # returns us a list of lists.
         data = []
         while recv_data:
             data.extend(recv_data.pop(0))
         return data
     raise NotImplementedError
コード例 #29
0
    ds.index.io.terminate()
    ds.index.io = original_io

def io_nodes(fn, n_io, n_work, func, *args, **kwargs):
    from yt.mods import load
    pool, wg = ProcessorPool.from_sizes([(n_io, "io"), (n_work, "work")])
    rv = None
    if wg.name == "work":
        ds = load(fn)
        with remote_io(ds, wg, pool):
            rv = func(ds, *args, **kwargs)
    elif wg.name == "io":
        ds = load(fn)
        io = IOCommunicator(ds, wg, pool)
        io.wait()
    # We should broadcast the result
    rv = pool.comm.mpi_bcast(rv, root=pool['work'].ranks[0])
    pool.free_all()
    mylog.debug("Return value: %s", rv)
    return rv

# Here is an example of how to use this functionality.
if __name__ == "__main__":
    def gq(ds):
        dd = ds.all_data()
        return dd.quantities["TotalQuantity"]("CellMassMsun")
    q = io_nodes("DD0087/DD0087", 8, 24, gq)
    mylog.info(q)


コード例 #30
0
def rootloginfo(*args):
    from yt.config import ytcfg
    if ytcfg.getint("yt", "__topcomm_parallel_rank") > 0: return
    mylog.info(*args)
コード例 #31
0
 def finish(self):
     mylog.info("Finishing '%s'", self.title)
コード例 #32
0
 def __init__(self, title, maxval):
     self.title = title
     mylog.info("Starting '%s'", title)
コード例 #33
0
ファイル: misc.py プロジェクト: ruithnadsteud/yt
def create_spectral_slabs(filename, slab_centers, slab_width, **kwargs):
    r"""
    Given a dictionary of spectral slab centers and a width in
    spectral units, extract data from a spectral cube at these slab
    centers and return a `FITSDataset` instance containing the different
    slabs as separate yt fields. Useful for extracting individual
    lines from a spectral cube and separating them out as different fields.

    Requires the SpectralCube (https://spectral-cube.readthedocs.io/en/latest/)
    library.

    All keyword arguments will be passed on to the `FITSDataset` constructor.

    Parameters
    ----------
    filename : string
        The spectral cube FITS file to extract the data from.
    slab_centers : dict of (float, string) tuples or YTQuantities
        The centers of the slabs, where the keys are the names
        of the new fields and the values are (float, string) tuples or
        YTQuantities, specifying a value for each center and its unit.
    slab_width : YTQuantity or (float, string) tuple
        The width of the slab along the spectral axis.

    Examples
    --------
    >>> slab_centers = {'13CN': (218.03117, 'GHz'),
    ...                 'CH3CH2CHO': (218.284256, 'GHz'),
    ...                 'CH3NH2': (218.40956, 'GHz')}
    >>> slab_width = (0.05, "GHz")
    >>> ds = create_spectral_slabs("intensity_cube.fits",
    ...                            slab_centers, slab_width,
    ...                            nan_mask=0.0)
    """
    from spectral_cube import SpectralCube

    from yt.frontends.fits.api import FITSDataset
    from yt.visualization.fits_image import FITSImageData

    cube = SpectralCube.read(filename)
    if not isinstance(slab_width, YTQuantity):
        slab_width = YTQuantity(slab_width[0], slab_width[1])
    slab_data = {}
    field_units = cube.header.get("bunit", "dimensionless")
    for k, v in slab_centers.items():
        if not isinstance(v, YTQuantity):
            slab_center = YTQuantity(v[0], v[1])
        else:
            slab_center = v
        mylog.info("Adding slab field %s at %g %s", k, slab_center.v,
                   slab_center.units)
        slab_lo = (slab_center - 0.5 * slab_width).to_astropy()
        slab_hi = (slab_center + 0.5 * slab_width).to_astropy()
        subcube = cube.spectral_slab(slab_lo, slab_hi)
        slab_data[k] = YTArray(subcube.filled_data[:, :, :], field_units)
    width = subcube.header["naxis3"] * cube.header["cdelt3"]
    w = subcube.wcs.copy()
    w.wcs.crpix[-1] = 0.5
    w.wcs.crval[-1] = -0.5 * width
    fid = FITSImageData(slab_data, wcs=w)
    for hdu in fid:
        hdu.header.pop("RESTFREQ", None)
        hdu.header.pop("RESTFRQ", None)
    ds = FITSDataset(fid, **kwargs)
    return ds
コード例 #34
0
def load_sample(fn=None, specific_file=None, pbar=True):
    """
    Load sample data with yt. Simple wrapper around yt.load to include fetching
    data with pooch.

    Parameters
    ----------
    fn : str or None
        The name of the sample data to load. This is generally the name of the
        folder of the dataset. For IsolatedGalaxy, the name would be
        `IsolatedGalaxy`.  If `None` is supplied, the return value
        will be a list of all known datasets (by name).

    specific_file : str, optional
        optional argument -- the name of the file to load that is located
        within sample dataset of `name`. For the dataset `enzo_cosmology_plus`,
        which has a number of timesteps available, one may wish to choose
        DD0003. The file specifically would be
        `enzo_cosmology_plus/DD0003/DD0003`, and the argument passed to this
        variable would be `DD0003/DD0003`

    pbar: bool
        display a progress bar

    """

    fido = PoochHandle()

    if fn is None:
        keys = []
        for key in fido._registry:
            for ext in _extensions_to_strip:
                if key.endswith(ext):
                    key = key[: -len(ext)]
            keys.append(key)
        return keys

    base_path = fido.pooch_obj.path

    registered_fname, name, extension = fido._validate_sample_fname(
        fn
    )  # todo: make this part of the class

    downloader = None
    if pbar:
        downloader = pooch.pooch.HTTPDownloader(progressbar=True)

    if extension != "h5":
        # we are going to assume most files that exist on the hub are
        # compressed in .tar folders. Some may not.
        processor = pooch.pooch.Untar()
    else:
        processor = None

    storage_fname = fido.pooch_obj.fetch(
        registered_fname, processor=processor, downloader=downloader
    )

    # The `folder_path` variable is used here to notify the user where the
    # files have been unpacked to. However, we can't assume this is reliable
    # because in some cases the common path will overlap with the `load_name`
    # variable of the file.
    folder_path = os.path.commonprefix(storage_fname)
    mylog.info("Files located at %s", folder_path)

    # Location of the file to load automatically, registered in the Fido class
    info = fido[registered_fname]
    file_lookup = info["load_name"]
    optional_args = info["load_kwargs"]

    if specific_file is None:
        # right now work on loading only untarred files. build out h5 later
        mylog.info("Default to loading %s for %s dataset", file_lookup, name)
        loaded_file = os.path.join(
            base_path, f"{registered_fname}.untar", name, file_lookup
        )
    else:
        mylog.info("Loading %s for %s dataset", specific_file, name)
        loaded_file = os.path.join(
            base_path, f"{registered_fname}.untar", name, specific_file
        )

    return load(loaded_file, **optional_args)
コード例 #35
0
ファイル: data_structures.py プロジェクト: matthewturk/yt
 def find_particles_by_type(self,
                            ptype,
                            max_num=None,
                            additional_fields=None):
     """
     Returns a structure of arrays with all of the particles'
     positions, velocities, masses, types, IDs, and attributes for
     a particle type **ptype** for a maximum of **max_num**
     particles.  If non-default particle fields are used, provide
     them in **additional_fields**.
     """
     # Not sure whether this routine should be in the general HierarchyType.
     if self.grid_particle_count.sum() == 0:
         mylog.info("Data contains no particles.")
         return None
     if additional_fields is None:
         additional_fields = [
             "metallicity_fraction",
             "creation_time",
             "dynamical_time",
         ]
     pfields = [f for f in self.field_list if f.startswith("particle_")]
     nattr = self.dataset["NumberOfParticleAttributes"]
     if nattr > 0:
         pfields += additional_fields[:nattr]
     # Find where the particles reside and count them
     if max_num is None:
         max_num = 1e100
     total = 0
     pstore = []
     for level in range(self.max_level, -1, -1):
         for grid in self.select_grids(level):
             index = np.where(grid["particle_type"] == ptype)[0]
             total += len(index)
             pstore.append(index)
             if total >= max_num:
                 break
         if total >= max_num:
             break
     result = None
     if total > 0:
         result = {}
         for p in pfields:
             result[p] = np.zeros(total, "float64")
         # Now we retrieve data for each field
         ig = count = 0
         for level in range(self.max_level, -1, -1):
             for grid in self.select_grids(level):
                 nidx = len(pstore[ig])
                 if nidx > 0:
                     for p in pfields:
                         result[p][count:count + nidx] = grid[p][pstore[ig]]
                     count += nidx
                 ig += 1
                 if count >= total:
                     break
             if count >= total:
                 break
         # Crop data if retrieved more than max_num
         if count > max_num:
             for p in pfields:
                 result[p] = result[p][0:max_num]
     return result
コード例 #36
0
    def get_time_series(self,
                        initial_time=None,
                        final_time=None,
                        initial_redshift=None,
                        final_redshift=None,
                        times=None,
                        redshifts=None,
                        tolerance=None,
                        parallel=True,
                        setup_function=None):
        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts), or by simply searching all subdirectories 
        within the simulation directory.

        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> gs = yt.simulation("my_simulation.par", "Gadget")
        
        >>> gs.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"))

        >>> gs.get_time_series(redshifts=[3, 2, 1, 0])

        >>> # after calling get_time_series
        >>> for ds in gs.piter():
        ...     p = ProjectionPlot(ds, "x", "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> gs.get_time_series(setup_function=print_time)
        >>> for ds in gs:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        my_all_outputs = self.all_outputs
        if not my_all_outputs:
            DatasetSeries.__init__(self,
                                   outputs=[],
                                   parallel=parallel,
                                   unit_base=self.unit_base)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time",
                                                  times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift",
                                                  redshifts,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time,
                                tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a["time"] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time],
                                     my_times)
            if my_initial_time == my_times[my_indices[0] - 1]:
                my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output["filename"]):
                init_outputs.append(output["filename"])
        if len(init_outputs) == 0 and len(my_outputs) > 0:
            mylog.warn("Could not find any datasets.  " +
                       "Check the value of OutputDir in your parameter file.")

        DatasetSeries.__init__(self,
                               outputs=init_outputs,
                               parallel=parallel,
                               setup_function=setup_function,
                               unit_base=self.unit_base)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))
コード例 #37
0
    def _parse_parameter_file(self):
        """
        Parse the SWIFT "parameter file" -- really this actually reads info
        from the main HDF5 file as everything is replicated there and usually
        parameterfiles are not transported.

        The header information from the HDF5 file is stored in an un-parsed
        format in self.parameters should users wish to use it.
        """

        self.unique_identifier = uuid4()

        # Read from the HDF5 file, this gives us all the info we need. The rest
        # of this function is just parsing.
        header = self._get_info_attributes("Header")
        runtime_parameters = self._get_info_attributes("RuntimePars")

        policy = self._get_info_attributes("Policy")
        # These are the parameterfile parameters from *.yml at runtime
        parameters = self._get_info_attributes("Parameters")

        # Not used in this function, but passed to parameters
        hydro = self._get_info_attributes("HydroScheme")
        subgrid = self._get_info_attributes("SubgridScheme")

        self.domain_right_edge = header["BoxSize"]
        self.domain_left_edge = np.zeros_like(self.domain_right_edge)

        self.dimensionality = int(header["Dimension"])

        # SWIFT is either all periodic, or not periodic at all
        periodic = int(runtime_parameters["PeriodicBoundariesOn"])

        if periodic:
            self._periodicity = [True] * self.dimensionality
        else:
            self._periodicity = [False] * self.dimensionality

        # Units get attached to this
        self.current_time = float(header["Time"])

        # Now cosmology enters the fray, as a runtime parameter.
        self.cosmological_simulation = int(policy["cosmological integration"])

        if self.cosmological_simulation:
            try:
                self.current_redshift = float(header["Redshift"])
                # These won't be present if self.cosmological_simulation is false
                self.omega_lambda = float(parameters["Cosmology:Omega_lambda"])
                self.omega_matter = float(parameters["Cosmology:Omega_m"])
                # This is "little h"
                self.hubble_constant = float(parameters["Cosmology:h"])
            except KeyError:
                mylog.warning(
                    "Could not find cosmology information in Parameters, "
                    "despite having ran with -c signifying a cosmological "
                    "run.")
                mylog.info("Setting up as a non-cosmological run. Check this!")
                self.cosmological_simulation = 0
                self.current_redshift = 0.0
                self.omega_lambda = 0.0
                self.omega_matter = 0.0
                self.hubble_constant = 0.0
        else:
            self.current_redshift = 0.0
            self.omega_lambda = 0.0
            self.omega_matter = 0.0
            self.hubble_constant = 0.0

        # Store the un-parsed information should people want it.
        self.parameters = dict(
            header=header,
            runtime_parameters=runtime_parameters,
            policy=policy,
            parameters=parameters,
            hydro=hydro,
            subgrid=subgrid,
        )

        # SWIFT never has multi file snapshots
        self.file_count = 1
        self.filename_template = self.parameter_filename

        return
コード例 #38
0
def load_sample(fn=None, progressbar: bool = True, timeout=None, **kwargs):
    """
    Load sample data with yt.

    This is a simple wrapper around `yt.load` to include fetching
    data with pooch from remote source.

    yt sample data can be found at:
    https://yt-project.org/data.

    The data registry table can be retrieved and visualized using
    `yt.sample_data.api.get_data_registry_table()`.

    This function requires pandas and pooch to be installed.

    Parameters
    ----------
    fn : str
        The `filename` of the dataset to load, as defined in the data registry
        table.

    progressbar: bool
        display a progress bar (tqdm).

    timeout: float or int (optional)
        Maximal waiting time, in seconds, after which download is aborted.
        `None` means "no limit". This parameter is directly passed to down to
        requests.get via pooch.HTTPDownloader

    Any additional keyword argument is passed down to `yt.load`.
    Note that in case of collision with predefined keyword arguments as set in
    the data registry, the ones passed to this function take priority.
    """

    if fn is None:
        print(
            "One can see which sample datasets are available at: https://yt-project.org/data\n"
            "or alternatively by running: yt.sample_data.api.get_data_registry_table()",
            file=sys.stderr,
        )
        return None

    from yt.sample_data.api import (
        _download_sample_data_file,
        _get_test_data_dir_path,
        get_data_registry_table,
    )

    pooch_logger = pooch.utils.get_logger()

    topdir, _, specific_file = str(fn).partition(os.path.sep)

    registry_table = get_data_registry_table()
    # PR 3089
    # note: in the future the registry table should be reindexed
    # so that the following line can be replaced with
    #
    # specs = registry_table.loc[fn]
    #
    # however we don't want to do it right now because the "filename" column is
    # currently incomplete

    try:
        specs = registry_table.query(f"`filename` == '{topdir}'").iloc[0]
    except IndexError as err:
        raise KeyError(f"Could not find '{fn}' in the registry.") from err

    if not specs["load_name"]:
        raise ValueError(
            "Registry appears to be corrupted: could not find a 'load_name' entry for this dataset."
        )

    kwargs = {**specs["load_kwargs"], **kwargs}

    try:
        data_dir = lookup_on_disk_data(fn)
    except FileNotFoundError:
        mylog.info("'%s' is not available locally. Looking up online.", fn)
    else:
        # if the data is already available locally, `load_sample`
        # only acts as a thin wrapper around `load`
        loadable_path = data_dir.joinpath(specs["load_name"], specific_file)
        mylog.info("Sample dataset found in '%s'", data_dir)
        if timeout is not None:
            mylog.info("Ignoring the `timeout` keyword argument received.")
        return load(loadable_path, **kwargs)

    try:
        save_dir = _get_test_data_dir_path()
        assert save_dir.is_dir()
    except (OSError, AssertionError):
        mylog.warning(
            "yt test data directory is not properly set up. "
            "Data will be saved to the current work directory instead.")
        save_dir = Path.cwd()

    # effectively silence the pooch's logger and create our own log instead
    pooch_logger.setLevel(100)
    mylog.info("Downloading from %s", specs["url"])

    # downloading via a pooch.Pooch instance behind the scenes
    filename = urlsplit(specs["url"]).path.split("/")[-1]

    tmp_file = _download_sample_data_file(filename,
                                          progressbar=progressbar,
                                          timeout=timeout)

    # pooch has functionalities to unpack downloaded archive files,
    # but it needs to be told in advance that we are downloading a tarball.
    # Since that information is not necessarily trival to guess from the filename,
    # we rely on the standard library to perform a conditional unpacking instead.
    if tarfile.is_tarfile(tmp_file):
        mylog.info("Untaring downloaded file to '%s'", save_dir)
        with tarfile.open(tmp_file) as fh:
            fh.extractall(save_dir)
        os.remove(tmp_file)
    else:
        os.replace(tmp_file, save_dir)

    loadable_path = Path.joinpath(save_dir, fn, specs["load_name"],
                                  specific_file)

    if specific_file and not loadable_path.exists():
        raise ValueError(f"Could not find file '{specific_file}'.")

    return load(loadable_path, **kwargs)
コード例 #39
0
    def get_time_series(self, time_data=True, redshift_data=True,
                        initial_time=None, final_time=None,
                        initial_redshift=None, final_redshift=None,
                        initial_cycle=None, final_cycle=None,
                        times=None, redshifts=None, tolerance=None,
                        parallel=True, setup_function=None):

        """
        Instantiate a DatasetSeries object for a set of outputs.

        If no additional keywords given, a DatasetSeries object will be
        created with all potential datasets created by the simulation.

        Outputs can be gather by specifying a time or redshift range
        (or combination of time and redshift), with a specific list of
        times or redshifts, a range of cycle numbers (for cycle based
        output), or by simply searching all subdirectories within the
        simulation directory.

        time_data : bool
            Whether or not to include time outputs when gathering
            datasets for time series.
            Default: True.
        redshift_data : bool
            Whether or not to include redshift outputs when gathering
            datasets for time series.
            Default: True.
        initial_time : tuple of type (float, str)
            The earliest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (5.0, "Gyr").  If None, the initial time of the 
            simulation is used.  This can be used in combination with 
            either final_time or final_redshift.
            Default: None.
        final_time : tuple of type (float, str)
            The latest time for outputs to be included.  This should be 
            given as the value and the string representation of the units.
            For example, (13.7, "Gyr"). If None, the final time of the 
            simulation is used.  This can be used in combination with either 
            initial_time or initial_redshift.
            Default: None.
        times : tuple of type (float array, str)
            A list of times for which outputs will be found and the units 
            of those values.  For example, ([0, 1, 2, 3], "s").
            Default: None.
        initial_redshift : float
            The earliest redshift for outputs to be included.  If None,
            the initial redshift of the simulation is used.  This can be
            used in combination with either final_time or
            final_redshift.
            Default: None.
        final_redshift : float
            The latest redshift for outputs to be included.  If None,
            the final redshift of the simulation is used.  This can be
            used in combination with either initial_time or
            initial_redshift.
            Default: None.
        redshifts : array_like
            A list of redshifts for which outputs will be found.
            Default: None.
        initial_cycle : float
            The earliest cycle for outputs to be included.  If None,
            the initial cycle of the simulation is used.  This can
            only be used with final_cycle.
            Default: None.
        final_cycle : float
            The latest cycle for outputs to be included.  If None,
            the final cycle of the simulation is used.  This can
            only be used in combination with initial_cycle.
            Default: None.
        tolerance : float
            Used in combination with "times" or "redshifts" keywords,
            this is the tolerance within which outputs are accepted
            given the requested times or redshifts.  If None, the
            nearest output is always taken.
            Default: None.
        parallel : bool/int
            If True, the generated DatasetSeries will divide the work
            such that a single processor works on each dataset.  If an
            integer is supplied, the work will be divided into that
            number of jobs.
            Default: True.
        setup_function : callable, accepts a ds
            This function will be called whenever a dataset is loaded.

        Examples
        --------

        >>> import yt
        >>> es = yt.simulation("my_simulation.par", "Enzo")
        
        >>> es.get_time_series(initial_redshift=10, final_time=(13.7, "Gyr"), 
                               redshift_data=False)

        >>> es.get_time_series(redshifts=[3, 2, 1, 0])

        >>> es.get_time_series(final_cycle=100000)

        >>> # after calling get_time_series
        >>> for ds in es.piter():
        ...     p = ProjectionPlot(ds, 'x', "density")
        ...     p.save()

        >>> # An example using the setup_function keyword
        >>> def print_time(ds):
        ...     print ds.current_time
        >>> es.get_time_series(setup_function=print_time)
        >>> for ds in es:
        ...     SlicePlot(ds, "x", "Density").save()

        """

        if (initial_redshift is not None or \
            final_redshift is not None) and \
            not self.cosmological_simulation:
            raise InvalidSimulationTimeSeries(
                "An initial or final redshift has been given for a " +
                "noncosmological simulation.")

        if time_data and redshift_data:
            my_all_outputs = self.all_outputs
        elif time_data:
            my_all_outputs = self.all_time_outputs
        elif redshift_data:
            my_all_outputs = self.all_redshift_outputs
        else:
            raise InvalidSimulationTimeSeries('Both time_data and redshift_data are False.')

        if not my_all_outputs:
            DatasetSeries.__init__(self, outputs=[], parallel=parallel)
            mylog.info("0 outputs loaded into time series.")
            return

        # Apply selection criteria to the set.
        if times is not None:
            my_outputs = self._get_outputs_by_key("time", times,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif redshifts is not None:
            my_outputs = self._get_outputs_by_key("redshift", redshifts,
                                                  tolerance=tolerance,
                                                  outputs=my_all_outputs)

        elif initial_cycle is not None or final_cycle is not None:
            if initial_cycle is None:
                initial_cycle = 0
            else:
                initial_cycle = max(initial_cycle, 0)
            if final_cycle is None:
                final_cycle = self.parameters['StopCycle']
            else:
                final_cycle = min(final_cycle, self.parameters['StopCycle'])

            my_outputs = my_all_outputs[int(ceil(float(initial_cycle) /
                                                 self.parameters['CycleSkipDataDump'])):
                                        (final_cycle /  self.parameters['CycleSkipDataDump'])+1]

        else:
            if initial_time is not None:
                if isinstance(initial_time, float):
                    initial_time = self.quan(initial_time, "code_time")
                elif isinstance(initial_time, tuple) and len(initial_time) == 2:
                    initial_time = self.quan(*initial_time)
                elif not isinstance(initial_time, YTArray):
                    raise RuntimeError(
                        "Error: initial_time must be given as a float or " +
                        "tuple of (value, units).")
            elif initial_redshift is not None:
                my_initial_time = self.cosmology.t_from_z(initial_redshift)
            else:
                my_initial_time = self.initial_time

            if final_time is not None:
                if isinstance(final_time, float):
                    final_time = self.quan(final_time, "code_time")
                elif isinstance(final_time, tuple) and len(final_time) == 2:
                    final_time = self.quan(*final_time)
                elif not isinstance(final_time, YTArray):
                    raise RuntimeError(
                        "Error: final_time must be given as a float or " +
                        "tuple of (value, units).")
                my_final_time = final_time.in_units("s")
            elif final_redshift is not None:
                my_final_time = self.cosmology.t_from_z(final_redshift)
            else:
                my_final_time = self.final_time

            my_initial_time.convert_to_units("s")
            my_final_time.convert_to_units("s")
            my_times = np.array([a['time'] for a in my_all_outputs])
            my_indices = np.digitize([my_initial_time, my_final_time], my_times)
            if my_initial_time == my_times[my_indices[0] - 1]: my_indices[0] -= 1
            my_outputs = my_all_outputs[my_indices[0]:my_indices[1]]

        init_outputs = []
        for output in my_outputs:
            if os.path.exists(output['filename']):
                init_outputs.append(output['filename'])
            
        DatasetSeries.__init__(self, outputs=init_outputs, parallel=parallel,
                                setup_function=setup_function)
        mylog.info("%d outputs loaded into time series.", len(init_outputs))
コード例 #40
0
def uuid_serve_functions(pre_routed = None, open_browser=False, port=9099,
                         repl = None, token = None):
    if pre_routed == None: pre_routed = route_functions
    debug(mode=True)
    if token is None: token = uuid.uuid1()
    for r in pre_routed:
        args, kwargs, f = pre_routed[r]
        if r[0] == "/": r = r[1:]
        rp = "/%s/%s" % (token, r)
        func_name = getattr(f, 'func_name', str(f))
        print("Routing from %s => %s" % (rp, func_name))
        route(rp, *args, **kwargs)(f)
    for w in route_watchers:
        if not hasattr(w, "_route_prefix"):
            print("WARNING: %s has no _route_prefix attribute.  Not notifying.")
            continue
            w._route_prefix = token
    repl._global_token = token
    repl.activate()
    repl.execution_thread.wait()
    print()
    print()
    print("=============================================================================")
    print("=============================================================================")
    print("Greetings, and welcome to Reason!")
    print("Your private token is %s ." % token)
    print("DO NOT SHARE THIS TOKEN.")
    print()
    print("Please direct your browser to:")
    print()
    print("     http://localhost:%s/%s/" % (port, token))
    print()
    print("=============================================================================")
    print()
    print("If you are currently ssh'd into a remote machine, you should be able")
    print("to create a new SSH tunnel by typing or copy/pasting this text")
    print("verbatim, while waiting to see the 'ssh>' prompt after the first line.")
    print()
    print("~C")
    print("-L%s:localhost:%s" % (port, port))
    print()
    print("and then pointing a web browser on your local machine to the above URL.")
    print()
    print("=============================================================================")
    print("=============================================================================")
    print()
    print()
    if open_browser:
        # We do some fancy footwork so that we can open the browser while the
        # server starts up.  I got this from some recipe whose URL escapes me.
        # Thank you, to whoever wrote it!
        def local_browse():
            """Start a browser after waiting for half a second."""
            import webbrowser, threading
            def _local_browse():
                webbrowser.open('http://localhost:%s/%s/' % (port, token))
            thread = threading.Timer(0.5, _local_browse)
            thread.start()
        local_browse()
    try:
        import yt.extern.rocket as rocket
        server_type = YTRocketServer
        log = logging.getLogger('Rocket')
        log.setLevel(logging.WARNING)
        kwargs = {'timeout': 600, 'max_threads': 2}
        if repl is not None:
            repl.server = YTRocketServer.server_info
    except ImportError:
        server_type = server_names.get("wsgiref")
        kwargs = {}
    server = server_type(host='localhost', port=port, **kwargs)
    mylog.info("Starting up the server.")
    run(server=server)
コード例 #41
0
def profile(halo,
            bin_fields,
            profile_fields,
            n_bins=32,
            extrema=None,
            logs=None,
            units=None,
            weight_field="cell_mass",
            accumulation=False,
            fractional=False,
            storage="profiles",
            output_dir="."):
    r"""
    Create 1, 2, or 3D profiles of a halo.

    Store profile data in a dictionary associated with the halo object.

    Parameters
    ----------
    halo : Halo object
        The Halo object to be provided by the HaloCatalog.
    bin_fields : list of strings
        The binning fields for the profile.
    profile_fields : string or list of strings
        The fields to be profiled.
    n_bins : int or list of ints
        The number of bins in each dimension.  If None, 32 bins for
        each bin are used for each bin field.
        Default: 32.
    extrema : dict of min, max tuples
        Minimum and maximum values of the bin_fields for the profiles.
        The keys correspond to the field names. Defaults to the extrema
        of the bin_fields of the dataset. If a units dict is provided, extrema
        are understood to be in the units specified in the dictionary.
    logs : dict of boolean values
        Whether or not to log the bin_fields for the profiles.
        The keys correspond to the field names. Defaults to the take_log
        attribute of the field.
    units : dict of strings
        The units of the fields in the profiles, including the bin_fields.
    weight_field : string
        Weight field for profiling.
        Default : "cell_mass"
    accumulation : bool or list of bools
        If True, the profile values for a bin n are the cumulative sum of
        all the values from bin 0 to n.  If -True, the sum is reversed so
        that the value for bin n is the cumulative sum from bin N (total bins)
        to n.  If the profile is 2D or 3D, a list of values can be given to
        control the summation in each dimension independently.
        Default: False.
    fractional : If True the profile values are divided by the sum of all
        the profile data such that the profile represents a probability
        distribution function.
    storage : string
        Name of the dictionary to store profiles.
        Default: "profiles"
    output_dir : string
        Name of directory where profile data will be written.  The full path will be
        the output_dir of the halo catalog concatenated with this directory.
        Default : "."

    """

    mylog.info("Calculating 1D profile for halo %d." %
               halo.quantities["particle_identifier"])

    dds = halo.halo_catalog.data_ds

    if dds is None:
        raise RuntimeError("Profile callback requires a data ds.")

    if not hasattr(halo, "data_object"):
        raise RuntimeError("Profile callback requires a data container.")

    if halo.data_object is None:
        mylog.info("Skipping halo %d since data_object is None." %
                   halo.quantities["particle_identifier"])
        return

    if output_dir is None:
        output_dir = storage
    output_dir = os.path.join(halo.halo_catalog.output_dir, output_dir)

    bin_fields = ensure_list(bin_fields)
    my_profile = create_profile(halo.data_object,
                                bin_fields,
                                profile_fields,
                                n_bins=n_bins,
                                extrema=extrema,
                                logs=logs,
                                units=units,
                                weight_field=weight_field,
                                accumulation=accumulation,
                                fractional=fractional)

    prof_store = dict([(field, my_profile[field]) \
                       for field in my_profile.field_data])
    prof_store[my_profile.x_field] = my_profile.x
    if len(bin_fields) > 1:
        prof_store[my_profile.y_field] = my_profile.y
    if len(bin_fields) > 2:
        prof_store[my_profile.z_field] = my_profile.z
    if hasattr(halo, storage):
        halo_store = getattr(halo, storage)
        if "used" in halo_store:
            halo_store["used"] &= my_profile.used
    else:
        halo_store = {"used": my_profile.used}
        setattr(halo, storage, halo_store)
    halo_store.update(prof_store)

    if my_profile.standard_deviation is not None:
        variance_store = dict([(field, my_profile.standard_deviation[field]) \
                               for field in my_profile.standard_deviation])
        variance_storage = "%s_variance" % storage
        if hasattr(halo, variance_storage):
            halo_variance_store = getattr(halo, variance_storage)
        else:
            halo_variance_store = {}
            setattr(halo, variance_storage, halo_variance_store)
        halo_variance_store.update(variance_store)
コード例 #42
0
def rootloginfo(*args):
    from yt.config import ytcfg
    if ytcfg.getint("yt", "__topcomm_parallel_rank") > 0: return
    mylog.info(*args)
コード例 #43
0
    def par_combine_object(self, data, op, datatype = None):
        # op can be chosen from:
        #   cat
        #   join
        # data is selected to be of types:
        #   np.ndarray
        #   dict
        #   data field dict

        if not isinstance(data,dict):
            print( 'DATA IS NOT A DICTIONARY')
        if datatype is not None:
            pass
        elif isinstance(data, dict):
            datatype == "dict"
        elif isinstance(data, np.ndarray):
            datatype == "array"
        elif isinstance(data, list):
            datatype == "list"
        # Now we have our datatype, and we conduct our operation
        
        if datatype == "dict" and op == "join":
            if self.comm.rank == 0:
                for i in range(1,self.comm.size):
                    data.update(self.comm.recv(source=i, tag=0))
            else:
                self.comm.send(data, dest=0, tag=0)
            if self.comm.rank == 0:
                mylog.info("datatype %s",type(data))
                for i in range(1,self.comm.size):
                    self.comm.send(data, dest=i, tag=i)
            else:
                data = self.comm.recv(source=0,tag=self.comm.rank)


            return data
        elif datatype == "dict" and op == "cat":
            field_keys = data.keys()
            field_keys.sort()
            size = data[field_keys[0]].shape[-1]
            sizes = np.zeros(self.comm.size, dtype='int64')
            outsize = np.array(size, dtype='int64')
            self.comm.Allgather([outsize, 1, MPI.LONG],
                                     [sizes, 1, MPI.LONG] )
            # This nested concatenate is to get the shapes to work out correctly;
            # if we just add [0] to sizes, it will broadcast a summation, not a
            # concatenation.
            offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1]
            arr_size = self.comm.allreduce(size, op=MPI.SUM)
            for key in field_keys:
                dd = data[key]
                rv = self.alltoallv_array(dd, arr_size, offsets, sizes)
                data[key] = rv
            return data
        elif datatype == "array" and op == "cat":
            if data is None:
                ncols = -1
                size = 0
                dtype = 'float64'
                mylog.info('Warning: Array passed to par_combine_object was None. Setting dtype to float64. This may break things!')
            else:
                dtype = data.dtype
                if len(data) == 0:
                    ncols = -1
                    size = 0
                elif len(data.shape) == 1:
                    ncols = 1
                    size = data.shape[0]
                else:
                    ncols, size = data.shape
            ncols = self.comm.allreduce(ncols, op=MPI.MAX)
            if ncols == 0:
                data = np.zeros(0, dtype=dtype) # This only works for
            elif data is None:
                data = np.zeros((ncols, 0), dtype=dtype)
            size = data.shape[-1]
            sizes = np.zeros(self.comm.size, dtype='int64')
            outsize = np.array(size, dtype='int64')
            self.comm.Allgather([outsize, 1, MPI.LONG],
                                     [sizes, 1, MPI.LONG] )
            # This nested concatenate is to get the shapes to work out correctly;
            # if we just add [0] to sizes, it will broadcast a summation, not a
            # concatenation.
            offsets = np.add.accumulate(np.concatenate([[0], sizes]))[:-1]
            arr_size = self.comm.allreduce(size, op=MPI.SUM)
            data = self.alltoallv_array(data, arr_size, offsets, sizes)
            return data
        elif datatype == "list" and op == "cat":
            recv_data = self.comm.allgather(data)
            # Now flatten into a single list, since this
            # returns us a list of lists.
            data = []
            while recv_data:
                data.extend(recv_data.pop(0))
            return data
        raise NotImplementedError
コード例 #44
0
 def __init__(self, title, maxval):
     self.title = title
     mylog.info("Starting '%s'", title)
コード例 #45
0
    def setup_fluid_fields(self):

        setup_magnetic_field_aliases(self, "amrvac",
                                     [f"mag{ax}" for ax in "xyz"])
        self._setup_velocity_fields()  # gas velocities
        self._setup_dust_fields()  # dust derived fields (including velocities)

        # fields with nested dependencies are defined thereafter
        # by increasing level of complexity
        us = self.ds.unit_system

        def _kinetic_energy_density(field, data):
            # devnote : have a look at issue 1301
            return 0.5 * data["gas", "density"] * data["gas",
                                                       "velocity_magnitude"]**2

        self.add_field(
            ("gas", "kinetic_energy_density"),
            function=_kinetic_energy_density,
            units=us["density"] * us["velocity"]**2,
            dimensions=dimensions.density * dimensions.velocity**2,
            sampling_type="cell",
        )

        # magnetic energy density
        if ("amrvac", "b1") in self.field_list:

            def _magnetic_energy_density(field, data):
                emag = 0.5 * data["gas", "magnetic_1"]**2
                for idim in "23":
                    if not ("amrvac", f"b{idim}") in self.field_list:
                        break
                    emag += 0.5 * data["gas", f"magnetic_{idim}"]**2
                # in AMRVAC the magnetic field is defined in units where mu0 = 1,
                # such that
                # Emag = 0.5*B**2 instead of Emag = 0.5*B**2 / mu0
                # To correctly transform the dimensionality from gauss**2 -> rho*v**2,
                # we have to take mu0 into account. If we divide here, units when adding
                # the field should be us["density"]*us["velocity"]**2.
                # If not, they should be us["magnetic_field"]**2 and division should
                # happen elsewhere.
                emag /= 4 * np.pi
                # divided by mu0 = 4pi in cgs,
                # yt handles 'mks' and 'code' unit systems internally.
                return emag

            self.add_field(
                ("gas", "magnetic_energy_density"),
                function=_magnetic_energy_density,
                units=us["density"] * us["velocity"]**2,
                dimensions=dimensions.density * dimensions.velocity**2,
                sampling_type="cell",
            )

        # Adding the thermal pressure field.
        # In AMRVAC we have multiple physics possibilities:
        # - if HD/MHD + energy equation P = (gamma-1)*(e - ekin (- emag)) for (M)HD
        # - if HD/MHD but solve_internal_e is true in parfile, P = (gamma-1)*e for both
        # - if (m)hd_energy is false in parfile (isothermal), P = c_adiab * rho**gamma

        def _full_thermal_pressure_HD(field, data):
            # energy density and pressure are actually expressed in the same unit
            pthermal = (data.ds.gamma -
                        1) * (data["gas", "energy_density"] -
                              data["gas", "kinetic_energy_density"])
            return pthermal

        def _full_thermal_pressure_MHD(field, data):
            pthermal = (
                _full_thermal_pressure_HD(field, data) -
                (data.ds.gamma - 1) * data["gas", "magnetic_energy_density"])
            return pthermal

        def _polytropic_thermal_pressure(field, data):
            return (data.ds.gamma - 1) * data["gas", "energy_density"]

        def _adiabatic_thermal_pressure(field, data):
            return data.ds._c_adiab * data["gas", "density"]**data.ds.gamma

        pressure_recipe = None
        if ("amrvac", "e") in self.field_list:
            if self.ds._e_is_internal:
                pressure_recipe = _polytropic_thermal_pressure
                mylog.info("Using polytropic EoS for thermal pressure.")
            elif ("amrvac", "b1") in self.field_list:
                pressure_recipe = _full_thermal_pressure_MHD
                mylog.info("Using full MHD energy for thermal pressure.")
            else:
                pressure_recipe = _full_thermal_pressure_HD
                mylog.info("Using full HD energy for thermal pressure.")
        elif self.ds._c_adiab is not None:
            pressure_recipe = _adiabatic_thermal_pressure
            mylog.info(
                "Using adiabatic EoS for thermal pressure (isothermal).")
            mylog.warning("If you used usr_set_pthermal you should "
                          "redefine the thermal_pressure field.")

        if pressure_recipe is not None:
            self.add_field(
                ("gas", "thermal_pressure"),
                function=pressure_recipe,
                units=us["density"] * us["velocity"]**2,
                dimensions=dimensions.density * dimensions.velocity**2,
                sampling_type="cell",
            )

            # sound speed and temperature depend on thermal pressure
            def _sound_speed(field, data):
                return np.sqrt(data.ds.gamma *
                               data["gas", "thermal_pressure"] /
                               data["gas", "density"])

            self.add_field(
                ("gas", "sound_speed"),
                function=_sound_speed,
                units=us["velocity"],
                dimensions=dimensions.velocity,
                sampling_type="cell",
            )
        else:
            mylog.warning(
                "e not found and no parfile passed, can not set thermal_pressure."
            )
コード例 #46
0
    def make_light_ray(self, seed=None, periodic=True,
                       left_edge=None, right_edge=None, min_level=None,
                       start_position=None, end_position=None,
                       trajectory=None,
                       fields=None, setup_function=None,
                       solution_filename=None, data_filename=None,
                       get_los_velocity=None, use_peculiar_velocity=True,
                       redshift=None, field_parameters=None, njobs=-1):
        """
        make_light_ray(seed=None, periodic=True,
                       left_edge=None, right_edge=None, min_level=None,
                       start_position=None, end_position=None,
                       trajectory=None, fields=None, setup_function=None,
                       solution_filename=None, data_filename=None,
                       use_peculiar_velocity=True, redshift=None,
                       njobs=-1)

        Create a light ray and get field values for each lixel.  A light
        ray consists of a list of field values for cells intersected by
        the ray and the path length of the ray through those cells.
        Light ray data must be written out to an hdf5 file.

        Parameters
        ----------
        seed : optional, int
            Seed for the random number generator.
            Default: None.
        periodic : optional, bool
            If True, ray trajectories will make use of periodic
            boundaries.  If False, ray trajectories will not be
            periodic.
            Default : True.
        left_edge : optional, iterable of floats or YTArray
            The left corner of the region in which rays are to be
            generated.  If None, the left edge will be that of the
            domain.  If specified without units, it is assumed to
            be in code units.
            Default: None.
        right_edge : optional, iterable of floats or YTArray
            The right corner of the region in which rays are to be
            generated.  If None, the right edge will be that of the
            domain.  If specified without units, it is assumed to
            be in code units.
            Default: None.
        min_level : optional, int
            The minimum refinement level of the spatial region in which
            the ray passes.  This can be used with zoom-in simulations
            where the high resolution region does not keep a constant
            geometry.
            Default: None.
        start_position : optional, iterable of floats or YTArray.
            Used only if creating a light ray from a single dataset.
            The coordinates of the starting position of the ray.
            If specified without units, it is assumed to be in code units.
            Default: None.
        end_position : optional, iterable of floats or YTArray.
            Used only if creating a light ray from a single dataset.
            The coordinates of the ending position of the ray.
            If specified without units, it is assumed to be in code units.
            Default: None.
        trajectory : optional, list of floats
            Used only if creating a light ray from a single dataset.
            The (r, theta, phi) direction of the light ray.  Use either
            end_position or trajectory, not both.
            Default: None.
        fields : optional, list
            A list of fields for which to get data.
            Default: None.
        setup_function : optional, callable, accepts a ds
            This function will be called on each dataset that is loaded
            to create the light ray.  For, example, this can be used to
            add new derived fields.
            Default: None.
        solution_filename : optional, string
            Path to a text file where the trajectories of each
            subray is written out.
            Default: None.
        data_filename : optional, string
            Path to output file for ray data.
            Default: None.
        use_peculiar_velocity : optional, bool
            If True, the peculiar velocity along the ray will be sampled for
            calculating the effective redshift combining the cosmological
            redshift and the doppler redshift.
            Default: True.
        redshift : optional, float
            Used with light rays made from single datasets to specify a
            starting redshift for the ray.  If not used, the starting
            redshift will be 0 for a non-cosmological dataset and
            the dataset redshift for a cosmological dataset.
            Default: None.
        njobs : optional, int
            The number of parallel jobs over which the segments will
            be split.  Choose -1 for one processor per segment.
            Default: -1.

        Examples
        --------

        Make a light ray from multiple datasets:

        >>> import yt
        >>> from yt.analysis_modules.cosmological_observation.light_ray.api import \
        ...     LightRay
        >>> my_ray = LightRay("enzo_tiny_cosmology/32Mpc_32.enzo", "Enzo",
        ...                   0., 0.1, time_data=False)
        ...
        >>> my_ray.make_light_ray(seed=12345,
        ...                       solution_filename="solution.txt",
        ...                       data_filename="my_ray.h5",
        ...                       fields=["temperature", "density"],
        ...                       use_peculiar_velocity=True)

        Make a light ray from a single dataset:

        >>> import yt
        >>> from yt.analysis_modules.cosmological_observation.light_ray.api import \
        ...     LightRay
        >>> my_ray = LightRay("IsolatedGalaxy/galaxy0030/galaxy0030")
        ...
        >>> my_ray.make_light_ray(start_position=[0., 0., 0.],
        ...                       end_position=[1., 1., 1.],
        ...                       solution_filename="solution.txt",
        ...                       data_filename="my_ray.h5",
        ...                       fields=["temperature", "density"],
        ...                       use_peculiar_velocity=True)

        """
        if self.simulation_type is None:
            domain = self.ds
        else:
            domain = self.simulation

        assumed_units = "code_length"
        if left_edge is None:
            left_edge = domain.domain_left_edge
        elif not hasattr(left_edge, 'units'):
            left_edge = domain.arr(left_edge, assumed_units)
        left_edge.convert_to_units('unitary')

        if right_edge is None:
            right_edge = domain.domain_right_edge
        elif not hasattr(right_edge, 'units'):
            right_edge = domain.arr(right_edge, assumed_units)
        right_edge.convert_to_units('unitary')

        if start_position is not None:
            if hasattr(start_position, 'units'):
                start_position = start_position
            else:
                start_position = self.ds.arr(start_position, assumed_units)
            start_position.convert_to_units('unitary')

        if end_position is not None:
            if hasattr(end_position, 'units'):
                end_position = end_position
            else:
                end_position = self.ds.arr(end_position, assumed_units)
            end_position.convert_to_units('unitary')

        if get_los_velocity is not None:
            use_peculiar_velocity = get_los_velocity
            mylog.warn("'get_los_velocity' kwarg is deprecated. " + \
                       "Use 'use_peculiar_velocity' instead.")

        # Calculate solution.
        self._calculate_light_ray_solution(seed=seed,
                                           left_edge=left_edge,
                                           right_edge=right_edge,
                                           min_level=min_level, periodic=periodic,
                                           start_position=start_position,
                                           end_position=end_position,
                                           trajectory=trajectory,
                                           filename=solution_filename)

        if field_parameters is None:
            field_parameters = {}

        # Initialize data structures.
        self._data = {}
        # temperature field is automatically added to fields
        if fields is None: fields = []
        if (('gas', 'temperature') not in fields) and \
           ('temperature' not in fields):
           fields.append(('gas', 'temperature'))
        data_fields = fields[:]
        all_fields = fields[:]
        all_fields.extend(['dl', 'dredshift', 'redshift'])
        all_fields.extend(['x', 'y', 'z', 'dx', 'dy', 'dz'])
        data_fields.extend(['x', 'y', 'z', 'dx', 'dy', 'dz'])
        if use_peculiar_velocity:
            all_fields.extend(['velocity_x', 'velocity_y', 'velocity_z', 
                               'velocity_los', 'redshift_eff', 
                               'redshift_dopp'])
            data_fields.extend(['velocity_x', 'velocity_y', 'velocity_z'])

        all_ray_storage = {}
        for my_storage, my_segment in parallel_objects(self.light_ray_solution,
                                                       storage=all_ray_storage,
                                                       njobs=njobs):

            # In case of simple rays, use the already loaded dataset: self.ds, 
            # otherwise, load dataset for segment.
            if self.ds is None:
                ds = load(my_segment['filename'], **self.load_kwargs)
            else:
                ds = self.ds

            my_segment['unique_identifier'] = ds.unique_identifier
            if redshift is not None:
                if ds.cosmological_simulation and redshift != ds.current_redshift:
                    mylog.warn("Generating light ray with different redshift than " +
                               "the dataset itself.")
                my_segment["redshift"] = redshift

            if setup_function is not None:
                setup_function(ds)

            if not ds.cosmological_simulation:
                next_redshift = my_segment["redshift"]
            elif self.near_redshift == self.far_redshift:
                if isinstance(my_segment["traversal_box_fraction"], YTArray) and \
                  not my_segment["traversal_box_fraction"].units.is_dimensionless:
                    segment_length = \
                      my_segment["traversal_box_fraction"].in_units("Mpccm / h")
                else:
                    segment_length = my_segment["traversal_box_fraction"] * \
                      ds.domain_width[0].in_units("Mpccm / h")
                next_redshift = my_segment["redshift"] - \
                  self._deltaz_forward(my_segment["redshift"],
                                       segment_length)
            elif my_segment.get("next", None) is None:
                next_redshift = self.near_redshift
            else:
                next_redshift = my_segment['next']['redshift']

            # Make sure start, end, left, right
            # are using the dataset's unit system.
            my_start = ds.arr(my_segment['start'])
            my_end   = ds.arr(my_segment['end'])
            my_left  = ds.arr(left_edge)
            my_right = ds.arr(right_edge)
            mylog.info("Getting segment at z = %s: %s to %s." %
                       (my_segment['redshift'], my_start, my_end))

            # Break periodic ray into non-periodic segments.
            sub_segments = periodic_ray(my_start, my_end,
                                        left=my_left, right=my_right)

            # Prepare data structure for subsegment.
            sub_data = {}
            sub_data['segment_redshift'] = my_segment['redshift']
            for field in all_fields:
                sub_data[field] = []

            # Get data for all subsegments in segment.
            for sub_segment in sub_segments:
                mylog.info("Getting subsegment: %s to %s." %
                           (list(sub_segment[0]), list(sub_segment[1])))
                sub_ray = ds.ray(sub_segment[0], sub_segment[1])
                for key, val in field_parameters.items():
                    sub_ray.set_field_parameter(key, val)
                asort = np.argsort(sub_ray["t"])
                sub_data['dl'].extend(sub_ray['dts'][asort] *
                                      vector_length(sub_ray.start_point,
                                                    sub_ray.end_point))

                for field in data_fields:
                    sub_data[field].extend(sub_ray[field][asort])

                if use_peculiar_velocity:
                    line_of_sight = sub_segment[0] - sub_segment[1]
                    line_of_sight /= ((line_of_sight**2).sum())**0.5
                    sub_vel = ds.arr([sub_ray['velocity_x'],
                                      sub_ray['velocity_y'],
                                      sub_ray['velocity_z']])
                    # Line of sight velocity = vel_los
                    sub_vel_los = (np.rollaxis(sub_vel, 1) * \
                                   line_of_sight).sum(axis=1)
                    sub_data['velocity_los'].extend(sub_vel_los[asort])

                    # doppler redshift:
                    # See https://en.wikipedia.org/wiki/Redshift and 
                    # Peebles eqns: 5.48, 5.49

                    # 1 + redshift_dopp = (1 + v*cos(theta)/c) / 
                    # sqrt(1 - v**2/c**2)

                    # where v is the peculiar velocity (ie physical velocity
                    # without the hubble flow, but no hubble flow in sim, so
                    # just the physical velocity).

                    # the bulk of the doppler redshift is from line of sight 
                    # motion, but there is a small amount from time dilation 
                    # of transverse motion, hence the inclusion of theta (the 
                    # angle between line of sight and the velocity). 
                    # theta is the angle between the ray vector (i.e. line of 
                    # sight) and the velocity vectors: a dot b = ab cos(theta)

                    sub_vel_mag = sub_ray['velocity_magnitude']
                    cos_theta = line_of_sight.dot(sub_vel) / sub_vel_mag
                    # Protect against stituations where velocity mag is exactly
                    # zero, in which case zero / zero = NaN.
                    cos_theta = np.nan_to_num(cos_theta)
                    redshift_dopp = \
                        (1 + sub_vel_mag * cos_theta / speed_of_light_cgs) / \
                         np.sqrt(1 - sub_vel_mag**2 / speed_of_light_cgs**2) - 1
                    sub_data['redshift_dopp'].extend(redshift_dopp[asort])
                    del sub_vel, sub_vel_los, sub_vel_mag, cos_theta, \
                        redshift_dopp

                sub_ray.clear_data()
                del sub_ray, asort

            for key in sub_data:
                sub_data[key] = ds.arr(sub_data[key]).in_cgs()

            # Get redshift for each lixel.  Assume linear relation between l 
            # and z.
            sub_data['dredshift'] = (my_segment['redshift'] - next_redshift) * \
                (sub_data['dl'] / vector_length(my_start, my_end).in_cgs())
            sub_data['redshift'] = my_segment['redshift'] - \
              sub_data['dredshift'].cumsum() + sub_data['dredshift']

            # When using the peculiar velocity, create effective redshift 
            # (redshift_eff) field combining cosmological redshift and 
            # doppler redshift.
            
            # then to add cosmological redshift and doppler redshifts, follow
            # eqn 3.75 in Peacock's Cosmological Physics:
            # 1 + z_eff = (1 + z_cosmo) * (1 + z_doppler)

            if use_peculiar_velocity:
               sub_data['redshift_eff'] = ((1 + sub_data['redshift_dopp']) * \
                                            (1 + sub_data['redshift'])) - 1

            # Remove empty lixels.
            sub_dl_nonzero = sub_data['dl'].nonzero()
            for field in all_fields:
                sub_data[field] = sub_data[field][sub_dl_nonzero]
            del sub_dl_nonzero

            # Add to storage.
            my_storage.result = sub_data

            del ds

        # Reconstruct ray data from parallel_objects storage.
        all_data = [my_data for my_data in all_ray_storage.values()]
        # This is now a list of segments where each one is a dictionary
        # with all the fields.
        all_data.sort(key=lambda a:a['segment_redshift'], reverse=True)
        # Flatten the list into a single dictionary containing fields
        # for the whole ray.
        all_data = _flatten_dict_list(all_data, exceptions=['segment_redshift'])
        self._data = all_data

        if data_filename is not None:
            self._write_light_ray(data_filename, all_data)
            ray_ds = load(data_filename)
            return ray_ds
        else:
            return None