class Dataset(tree.Node): """Datasets contain grids or other datasets Note: Datasets should probably be created using a vfile's `_make_dataset` to make sure the info dict is propogated appropriately It is the programmer's responsibility to ensure objects added to a AOEUIDH dataset have __getitem__ and get_fields methods, this is not enforced """ children = None # Bucket or (time, grid) active_child = None topology_info = None geometry_info = None crds = None def __init__(self, *args, **kwargs): """info is for information that is shared for a whole tree, from vfile all the way down to fields """ super(Dataset, self).__init__(**kwargs) if self.children is None: self.children = Bucket(ordered=True) self.active_child = None for arg in args: self.add(arg) def add(self, child, set_active=True): self.prepare_child(child) self.children[child.name] = child if set_active: self.active_child = child def add_deferred(self, key, callback, callback_args=None, callback_kwargs=None, set_active=True): child = DeferredChild(callback, callback_args=callback_args, callback_kwargs=callback_kwargs, parent=self, name=key) self.add(child, set_active=set_active) def _clear_cache(self): for child in self.children: child.clear_cache() def clear_cache(self): """Clear all childrens' caches""" self._clear_cache() def remove_all_items(self): for child in self.children: self.tear_down_child(child) child.remove_all_items() self.children = Bucket(ordered=True) def activate(self, child_handle): """ it may not look like it, but this will recursively look in my active child for the handle because it uses getitem """ self.active_child = self.children[child_handle] def activate_time(self, time): """ this is basically 'activate' except it specifically picks out temporal datasets, and does all children, not just the active child """ for child in self.children: try: child.activate_time(time) except AttributeError: pass def nr_times(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100): for child in self.children: try: return child.nr_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def iter_times(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100, resolved=True): for child in self.iter_resolved_children(): try: return child.iter_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol, resolved=resolved) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def tslc_range(self, sel=slice(None), tdunit='s'): """Find endpoints for a time slice selection Note: If the selection is slice-by-location, the values are not adjusted to the nearest frame. For this functionality, you will want to use :py:func:`get_times` and pull out the first and last values. """ for child in self.children: try: return child.tslc_range(sel=sel, tdunit=tdunit) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def get_times(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100): return list(self.iter_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol, resolved=False)) def get_time(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100): try: return next(self.iter_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol)) except StopIteration: raise RuntimeError("Dataset has no time slices") def to_dataframe(self, fld_names=None, selection=Ellipsis, time_sel=slice(None), time_col='time', datetime_col='datetime'): """Consolidate grid's field data into pandas dataframe Args: fld_names (sequence, None): grab specific fields by name, or None to grab all fields selection (selection): optional spatial selection time (selection): optional time selection Returns: pandas.DataFrame """ # deferred import so that viscid does not depend on pandas import pandas frames = [child.to_dataframe(fld_names=fld_names, selection=selection, time_sel=time_sel, time_col=time_col, datetime_col=datetime_col) for child in self.children] frame = pandas.concat(frames, ignore_index=True, sort=False) # make sure crds are all at the beginning, since concat can reorder them col0 = list(frames[0].columns) frame = frame[col0 + list(set(frame.columns) - set(col0))] return frame def iter_fields(self, time=None, fld_names=None): """ generator for fields in the active dataset, this will recurse down to a grid """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.iter_fields(time=time, fld_names=fld_names) def iter_field_items(self, time=None, fld_names=None): """ generator for (name, field) in the active dataset, this will recurse down to a grid """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.iter_field_items(time=time, fld_names=fld_names) def field_dict(self, time=None, fld_names=None, **kwargs): """ fields as dict of {name: field} """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.field_dict(time=time, fld_names=fld_names) def print_tree(self, depth=-1, prefix=""): if prefix == "": print(self) prefix += tree_prefix for child in self.children: suffix = "" if child is self.active_child: suffix = " <-- active" print("{0}{1}{2}".format(prefix, child, suffix)) if depth != 0: child.print_tree(depth=depth - 1, prefix=prefix + tree_prefix) # def get_non_dataset(self): # """ recurse down datasets until active_grid is not a subclass # of Dataset """ # if isinstance(self.activate_grid, Dataset): # return self.active_grid.get_non_dataset() # else: # return self.active_grid def get_field(self, fldname, time=None, slc=Ellipsis): """ recurse down active children to get a field """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.get_field(fldname, time=time, slc=slc) def get_grid(self, time=None): """ recurse down active children to get a field """ child = self.active_child.resolve() if child is None: logger.error("Could not get appropriate child...") return None else: return child.get_grid(time=time) def get_child(self, item): """ get a child from this Dataset, """ return self.children[item].resolve() def __getitem__(self, item): """ if a child exists with handle, return it, else ask the active child if it knows what you want """ if item in self.children: return self.get_child(item) elif self.active_child is not None: return self.active_child[item] else: raise KeyError() def __delitem__(self, item): # FIXME, is it possable to de-resolve item to a DeferredChild? child = self.get_child(item) child.clear_cache() self.children.remove_item(child) def __len__(self): return self.children.__len__() def __setitem__(self, name, child): # um... is this kosher?? child.name = name self.add(child) def __contains__(self, item): # FIXME, is it possable to de-resolve item to a DeferredChild? if item in self.children: return True # FIXME: this might cause a bug somewhere someday if item in self.active_child: return True return False def __enter__(self): return self def __exit__(self, exc_type, value, traceback): self.clear_cache() return None def __iter__(self): return self.iter_resolved_children()
class Dataset(tree.Node): """Datasets contain grids or other datasets Note: Datasets should probably be created using a vfile's `_make_dataset` to make sure the info dict is propogated appropriately It is the programmer's responsibility to ensure objects added to a AOEUIDH dataset have __getitem__ and get_fields methods, this is not enforced """ children = None # Bucket or (time, grid) active_child = None topology_info = None geometry_info = None crds = None def __init__(self, *args, **kwargs): """info is for information that is shared for a whole tree, from vfile all the way down to fields """ super(Dataset, self).__init__(**kwargs) if self.children is None: self.children = Bucket(ordered=True) self.active_child = None for arg in args: self.add(arg) def add(self, child, set_active=True): self.prepare_child(child) self.children[child.name] = child if set_active: self.active_child = child def add_deferred(self, key, callback, callback_args=None, callback_kwargs=None, set_active=True): child = DeferredChild(callback, callback_args=callback_args, callback_kwargs=callback_kwargs, parent=self, name=key) self.add(child, set_active=set_active) def _clear_cache(self): for child in self.children: child.clear_cache() def clear_cache(self): """Clear all childrens' caches""" self._clear_cache() def remove_all_items(self): for child in self.children: self.tear_down_child(child) child.remove_all_items() self.children = Bucket(ordered=True) def activate(self, child_handle): """ it may not look like it, but this will recursively look in my active child for the handle because it uses getitem """ self.active_child = self.children[child_handle] def activate_time(self, time): """ this is basically 'activate' except it specifically picks out temporal datasets, and does all children, not just the active child """ for child in self.children: try: child.activate_time(time) except AttributeError: pass def nr_times(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100): for child in self.children: try: return child.nr_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def iter_times(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100, resolved=True): for child in self.iter_resolved_children(): try: return child.iter_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol, resolved=resolved) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def tslc_range(self, sel=slice(None), tdunit='s'): """Find endpoints for a time slice selection Note: If the selection is slice-by-location, the values are not adjusted to the nearest frame. For this functionality, you will want to use :py:func:`get_times` and pull out the first and last values. """ for child in self.children: try: return child.tslc_range(sel=sel, tdunit=tdunit) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def get_times(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100): return list( self.iter_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol, resolved=False)) def get_time(self, sel=slice(None), val_endpoint=True, interior=False, tdunit='s', tol=100): try: return next( self.iter_times(sel=sel, val_endpoint=val_endpoint, interior=interior, tdunit=tdunit, tol=tol)) except StopIteration: raise RuntimeError("Dataset has no time slices") def to_dataframe(self, fld_names=None, selection=Ellipsis, time_sel=slice(None), time_col='time', datetime_col='datetime'): """Consolidate grid's field data into pandas dataframe Args: fld_names (sequence, None): grab specific fields by name, or None to grab all fields selection (selection): optional spatial selection time (selection): optional time selection Returns: pandas.DataFrame """ # deferred import so that viscid does not depend on pandas import pandas frames = [ child.to_dataframe(fld_names=fld_names, selection=selection, time_sel=time_sel, time_col=time_col, datetime_col=datetime_col) for child in self.children ] frame = pandas.concat(frames, ignore_index=True, sort=False) # make sure crds are all at the beginning, since concat can reorder them col0 = list(frames[0].columns) frame = frame[col0 + list(set(frame.columns) - set(col0))] return frame def iter_fields(self, time=None, fld_names=None): """ generator for fields in the active dataset, this will recurse down to a grid """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.iter_fields(time=time, fld_names=fld_names) def iter_field_items(self, time=None, fld_names=None): """ generator for (name, field) in the active dataset, this will recurse down to a grid """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.iter_field_items(time=time, fld_names=fld_names) def field_dict(self, time=None, fld_names=None, **kwargs): """ fields as dict of {name: field} """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.field_dict(time=time, fld_names=fld_names) def print_tree(self, depth=-1, prefix=""): if prefix == "": print(self) prefix += tree_prefix for child in self.children: suffix = "" if child is self.active_child: suffix = " <-- active" print("{0}{1}{2}".format(prefix, child, suffix)) if depth != 0: child.print_tree(depth=depth - 1, prefix=prefix + tree_prefix) # def get_non_dataset(self): # """ recurse down datasets until active_grid is not a subclass # of Dataset """ # if isinstance(self.activate_grid, Dataset): # return self.active_grid.get_non_dataset() # else: # return self.active_grid def get_field(self, fldname, time=None, slc=Ellipsis): """ recurse down active children to get a field """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.get_field(fldname, time=time, slc=slc) def get_grid(self, time=None): """ recurse down active children to get a field """ child = self.active_child.resolve() if child is None: logger.error("Could not get appropriate child...") return None else: return child.get_grid(time=time) def get_child(self, item): """ get a child from this Dataset, """ return self.children[item].resolve() def __getitem__(self, item): """ if a child exists with handle, return it, else ask the active child if it knows what you want """ if item in self.children: return self.get_child(item) elif self.active_child is not None: return self.active_child[item] else: raise KeyError() def __delitem__(self, item): # FIXME, is it possable to de-resolve item to a DeferredChild? child = self.get_child(item) child.clear_cache() self.children.remove_item(child) def __len__(self): return self.children.__len__() def __setitem__(self, name, child): # um... is this kosher?? child.name = name self.add(child) def __contains__(self, item): # FIXME, is it possable to de-resolve item to a DeferredChild? if item in self.children: return True # FIXME: this might cause a bug somewhere someday if item in self.active_child: return True return False def __enter__(self): return self def __exit__(self, exc_type, value, traceback): self.clear_cache() return None def __iter__(self): return self.iter_resolved_children()
class Dataset(tree.Node): """Datasets contain grids or other datasets Note: Datasets should probably be created using a vfile's `_make_dataset` to make sure the info dict is propogated appropriately It is the programmer's responsibility to ensure objects added to a AOEUIDH dataset have __getitem__ and get_fields methods, this is not enforced """ children = None # Bucket or (time, grid) active_child = None topology_info = None geometry_info = None crds = None def __init__(self, *args, **kwargs): """info is for information that is shared for a whole tree, from vfile all the way down to fields """ super(Dataset, self).__init__(*args, **kwargs) self.children = Bucket(ordered=True) self.active_child = None def add(self, child, set_active=True): self.prepare_child(child) self.children[child.name] = child if set_active: self.active_child = child def _clear_cache(self): for child in self.children: child.clear_cache() def clear_cache(self): """Clear all childrens' caches""" self._clear_cache() def remove_all_items(self): for child in self.children: self.tear_down_child(child) child.remove_all_items() self.children = Bucket(ordered=True) def activate(self, child_handle): """ it may not look like it, but this will recursively look in my active child for the handle because it uses getitem """ self.active_child = self.children[child_handle] def activate_time(self, time): """ this is basically 'activate' except it specifically picks out temporal datasets, and does all children, not just the active child """ for child in self.children: try: child.activate_time(time) except AttributeError: pass def nr_times(self, slice_str=":"): for child in self.children: try: return child.nr_times(slice_str) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def iter_times(self, slice_str=":"): for child in self.children: try: return child.iter_times(slice_str) except AttributeError: pass raise RuntimeError("I find no temporal datasets") def get_times(self, slice_str=":"): return list(self.iter_times(slice_str=slice_str)) def get_time(self, slice_str=":"): try: return next(self.iter_times(slice_str)) except StopIteration: raise RuntimeError("Dataset has no time slices") def iter_fields(self, time=None, named=None): """ generator for fields in the active dataset, this will recurse down to a grid """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.iter_fields(time=time, named=named) def print_tree(self, depth=-1, prefix=""): if prefix == "": print(self) prefix += tree_prefix for child in self.children: suffix = "" if child is self.active_child: suffix = " <-- active" print("{0}{1}{2}".format(prefix, child, suffix)) if depth != 0: child.print_tree(depth=depth - 1, prefix=prefix + tree_prefix) # def get_non_dataset(self): # """ recurse down datasets until active_grid is not a subclass # of Dataset """ # if isinstance(self.activate_grid, Dataset): # return self.active_grid.get_non_dataset() # else: # return self.active_grid def get_field(self, fldname, time=None, slc=None): """ recurse down active children to get a field """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.get_field(fldname, time=time, slc=slc) def get_grid(self, time=None): """ recurse down active children to get a field """ child = self.active_child if child is None: logger.error("Could not get appropriate child...") return None else: return child.get_grid(time=time) def get_child(self, item): """ get a child from this Dataset, """ return self.children[item] def __getitem__(self, item): """ if a child exists with handle, return it, else ask the active child if it knows what you want """ if item in self.children: return self.get_child(item) elif self.active_child is not None: return self.active_child[item] else: raise KeyError() def __delitem__(self, item): child = self.get_child(item) child.clear_cache() self.children.remove_item(child) def __len__(self): return self.children.__len__() def __setitem__(self, name, child): # um... is this kosher?? child.name = name self.add(child) def __contains__(self, item): if item in self.children: return True # FIXME: this might cause a bug somewhere someday if item in self.active_child: return True return False def __enter__(self): return self def __exit__(self, exc_type, value, traceback): self.clear_cache() return None def __iter__(self): return self.children.__iter__()