def simulation(parameter_filename, simulation_type, find_outputs=False): """ Loads a simulation time series object of the specified simulation type. """ if simulation_type not in simulation_time_series_registry: raise YTSimulationNotIdentified(simulation_type) if os.path.exists(parameter_filename): valid_file = True elif os.path.exists(os.path.join(ytcfg.get("yt", "test_data_dir"), parameter_filename)): parameter_filename = os.path.join(ytcfg.get("yt", "test_data_dir"), parameter_filename) valid_file = True else: valid_file = False if not valid_file: raise YTOutputNotIdentified((parameter_filename, simulation_type), dict(find_outputs=find_outputs)) return simulation_time_series_registry[simulation_type](parameter_filename, find_outputs=find_outputs)
def __new__(cls, outputs, *args, **kwargs): if isinstance(outputs, string_types): outputs = get_filenames_from_glob_pattern(outputs) ret = super(DatasetSeries, cls).__new__(cls) try: ret._pre_outputs = outputs[:] except TypeError: raise YTOutputNotIdentified(outputs, {}) return ret
def from_filenames(cls, filenames, parallel = True, setup_function = None, **kwargs): r"""Create a time series from either a filename pattern or a list of filenames. This method provides an easy way to create a :class:`~yt.data_objects.time_series.DatasetSeries`, given a set of filenames or a pattern that matches them. Additionally, it can set the parallelism strategy. Parameters ---------- filenames : list or pattern This can either be a list of filenames (such as ["DD0001/DD0001", "DD0002/DD0002"]) or a pattern to match, such as "DD*/DD*.index"). If it's the former, they will be loaded in order. The latter will be identified with the glob module and then sorted. parallel : True, False or int This parameter governs the behavior when .piter() is called on the resultant DatasetSeries object. If this is set to False, the time series will not iterate in parallel when .piter() is called. If this is set to either True or an integer, it will be iterated with 1 or that integer number of processors assigned to each parameter file provided to the loop. setup_function : callable, accepts a ds This function will be called whenever a dataset is loaded. Examples -------- >>> def print_time(ds): ... print(ds.current_time) ... >>> ts = DatasetSeries.from_filenames( ... "GasSloshingLowRes/sloshing_low_res_hdf5_plt_cnt_0[0-6][0-9]0", ... setup_function = print_time) ... >>> for ds in ts: ... SlicePlot(ds, "x", "Density").save() """ if isinstance(filenames, str): filenames = get_filenames_from_glob_pattern(filenames) # This will crash with a less informative error if filenames is not # iterable, but the plural keyword should give users a clue... for fn in filenames: if not isinstance(fn, str): raise YTOutputNotIdentified("DataSeries accepts a list of " "strings, but " "received {0}".format(fn)) obj = cls(filenames[:], parallel = parallel, setup_function = setup_function, **kwargs) return obj
def get_filenames_from_glob_pattern(filenames): file_list = glob.glob(filenames) if len(file_list) == 0: data_dir = ytcfg.get("yt", "test_data_dir") pattern = os.path.join(data_dir, filenames) td_filenames = glob.glob(pattern) if len(td_filenames) > 0: file_list = td_filenames else: raise YTOutputNotIdentified(filenames, {}) return sorted(file_list)
def load(*args ,**kwargs): """ This function attempts to determine the base data type of a filename or other set of arguments by calling :meth:`yt.data_objects.static_output.Dataset._is_valid` until it finds a match, at which point it returns an instance of the appropriate :class:`yt.data_objects.static_output.Dataset` subclass. """ args = _sanitize_load_args(*args) candidates = [] valid_file = [] for argno, arg in enumerate(args): if isinstance(arg, str): if os.path.exists(arg): valid_file.append(True) elif arg.startswith("http"): valid_file.append(True) else: if os.path.exists(os.path.join(ytcfg.get("yt", "test_data_dir"), arg)): valid_file.append(True) args[argno] = os.path.join(ytcfg.get("yt", "test_data_dir"), arg) else: valid_file.append(False) else: valid_file.append(False) types_to_check = output_type_registry if not any(valid_file): try: from yt.data_objects.time_series import DatasetSeries ts = DatasetSeries.from_filenames(*args, **kwargs) return ts except (TypeError, YTOutputNotIdentified): pass # We check if either the first argument is a dict or list, in which # case we try identifying candidates. if len(args) > 0 and isinstance(args[0], (list, dict)): # This fixes issues where it is assumed the first argument is a # file types_to_check = dict((n, v) for n, v in output_type_registry.items() if n.startswith("stream_")) # Better way to do this is to override the output_type_registry else: mylog.error("None of the arguments provided to load() is a valid file") mylog.error("Please check that you have used a correct path") raise YTOutputNotIdentified(args, kwargs) for n, c in types_to_check.items(): if n is None: continue if c._is_valid(*args, **kwargs): candidates.append(n) # convert to classes candidates = [output_type_registry[c] for c in candidates] # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](*args, **kwargs) if len(candidates) == 0: if ytcfg.get("yt", "enzo_db") != '' \ and len(args) == 1 \ and isinstance(args[0], str): erdb = EnzoRunDatabase() fn = erdb.find_uuid(args[0]) n = "EnzoDataset" if n in output_type_registry \ and output_type_registry[n]._is_valid(fn): return output_type_registry[n](fn) mylog.error("Couldn't figure out output type for %s", args[0]) raise YTOutputNotIdentified(args, kwargs) mylog.error("Multiple output type candidates for %s:", args[0]) for c in candidates: mylog.error(" Possible: %s", c) raise YTOutputNotIdentified(args, kwargs)
def load(fn, *args, **kwargs): """ Load a Dataset or DatasetSeries object. The data format is automatically discovered, and the exact return type is the corresponding subclass of :class:`yt.data_objects.static_output.Dataset`. A :class:`yt.data_objects.time_series.DatasetSeries` is created if the first argument is a pattern. Parameters ---------- fn : str, os.Pathlike, or byte (types supported by os.path.expandusers) A path to the data location. This can be a file name, directory name, a glob pattern, or a url (for data types that support it). Additional arguments, if any, are passed down to the return class. Returns ------- :class:`yt.data_objects.static_output.Dataset` object If fn is a single path, create a Dataset from the appropriate subclass. :class:`yt.data_objects.time_series.DatasetSeries` If fn is a glob pattern (i.e. containing wildcards '[]?!*'), create a series. Raises ------ FileNotFoundError If fn does not match any existing file or directory. yt.utilities.exceptions.YTOutputNotIdentified If fn matches existing files or directories with undetermined format. yt.utilities.exceptions.YTAmbiguousDataType If the data format matches more than one class of similar specilization levels. """ fn = os.path.expanduser(fn) if any(wildcard in fn for wildcard in "[]?!*"): from yt.data_objects.time_series import DatasetSeries return DatasetSeries(fn, *args, **kwargs) # Unless the dataset starts with http, # look for it using the path or relative to the data dir (in this order). if not (os.path.exists(fn) or fn.startswith("http")): data_dir = ytcfg.get("yt", "test_data_dir") alt_fn = os.path.join(data_dir, fn) if os.path.exists(alt_fn): fn = alt_fn else: msg = f"No such file or directory: '{fn}'." if os.path.exists(data_dir): msg += f"\n(Also tried '{alt_fn}')." raise FileNotFoundError(msg) candidates = [] for cls in output_type_registry.values(): if cls._is_valid(fn, *args, **kwargs): candidates.append(cls) # Find only the lowest subclasses, i.e. most specialised front ends candidates = find_lowest_subclasses(candidates) if len(candidates) == 1: return candidates[0](fn, *args, **kwargs) if len(candidates) > 1: raise YTAmbiguousDataType(fn, candidates) raise YTOutputNotIdentified(fn, args, kwargs)