def process_index( self, index: pd.Index, *, root: str = None, ) -> pd.Index: r"""Segment files or segments from an index. Args: index: index conform to audformat_ root: root folder to expand relative file paths Returns: Segmented index conform to audformat_ Raises: RuntimeError: if sampling rates do not match RuntimeError: if channel selection is invalid .. _audformat: https://audeering.github.io/audformat/data-format.html """ index = audformat.utils.to_segmented_index(index) utils.assert_index(index) if index.empty: return index return self.process_files( index.get_level_values('file'), starts=index.get_level_values('start'), ends=index.get_level_values('end'), root=root, )
def get_index( index_set: pd.Index, **levels_values ): """Utility function for obtaining the integer index array for given index set / level / value list combination.""" # Obtain mask for each level / values combination keyword arguments. mask = np.ones(len(index_set), dtype=np.bool) for level, values in levels_values.items(): # Ensure that values are passed as list. if isinstance(values, (list, tuple)): pass elif isinstance(values, np.ndarray): # Convert numpy arrays to list. values = values.tolist() values = [values] if not isinstance(values, list) else values else: # Convert single values into list with one item. values = [values] # Obtain mask. mask &= index_set.get_level_values(level).isin(values) # Obtain integer index array. index = np.flatnonzero(mask) # Assert that index is not empty. try: assert len(index) > 0 except AssertionError: logger.error(f"Empty index returned for: {levels_values}") raise return index
def get_index(index_set: pd.Index, raise_empty_index_error: bool = True, **levels_values): """Utility function for obtaining the integer index array for given index set / level / value list combination. :syntax: - ``get_index(electric_grid_model.nodes, node_type='source', phase=1)``: Get index array for entries in index set `electric_grid_model.nodes` with given `node_type` and `phase`. Arguments: index_set (pd.Index): Index set, e.g., `electric_grid_model.nodes`. Keyword Arguments: raise_empty_index_error (bool): If true, raise an exception if obtained index array is empty. This is the default behavior, because it is usually caused by an invalid level / value combination. level (value): All other keyword arguments are interpreted as level / value combinations, where `level` must correspond to a level name of the index set. """ # Obtain mask for each level / values combination keyword arguments. mask = np.ones(len(index_set), dtype=bool) for level, values in levels_values.items(): # Ensure that values are passed as list. if isinstance(values, (list, tuple)): pass elif isinstance(values, np.ndarray): # Convert numpy arrays to list. values = values.tolist() values = [values] if not isinstance(values, list) else values elif isinstance(values, pd.Index): # Convert pandas index to list. values = values.to_list() else: # Convert single values into list with one item. values = [values] # Obtain mask. mask &= index_set.get_level_values(level).isin(values) # Obtain integer index array. index = np.flatnonzero(mask) # Assert that index is not empty. if raise_empty_index_error: if not (len(index) > 0): raise ValueError(f"Empty index returned for: {levels_values}") return index
def __init__( self, plotting_func: Callable, columns: pd.Index, iterations: Union[List[int], None], experiments: Union[List[int], None], learner_names: Union[List[str], None], ): self.plotting_func = plotting_func self.columns = columns def _get_default_if_none(_list, level): if _list is None: return self._get_default_col_vals(columns=columns, level=level) else: return _list self.iterations = _get_default_if_none(iterations, 'Iteration') self.experiments = _get_default_if_none(experiments, 'Experiment') self.learner_names = _get_default_if_none(learner_names, 'Learner') self.learner_filter = np.in1d(columns.get_level_values('Learner'), self.learner_names)
class FoamFrame(DataFrame): """ Data reprensentation of OpenFOAM field (eulerian and lagrangian) and set files. Instantiated through read methods, e.g: read_sets, read_lag, read_eul, read_exp Examples: ---------- case = read_sets(folder="home/user/case",plot_properties={}) case.data # access data frame Parameters: ---------- folder: data location containing a time or sets folder files: search only for files with given name, None for all files plot_properties: dictionary for consistent plotting of ranges and ax labels skiplines: read only every n-th entry cloud: name of lagrangian cloud name: case name for plot legends Note: ---------- If data is accessed through [] only latest item is returned. For full times access iteratetimes() can be used. Categories: { "rad_pos": lambda field -> position "centreLine": [] lambda field -> i of [] } example: lambda field: re.search('[0-9]*\.[0-9]*').group()[0] TODO: use case as cases ojects with a 3-level index case['u'] acces time of all cases -> df.iloc[df.index.isin([1],level=1)] refactor plot into case objects itself, ?case.show('t','u', time_series = False) refactor origins make iteratetimes() access a delta """ def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('readtime', slice(0, None)) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) exclude = kwargs.get('exclude', [" "]) # FIXME times_stride = kwargs.get('times_stride', 1) times_range = kwargs.get('times_range', "all") # FIXME implement strides times_slice = times_range keys = ['skiplines', 'readtime', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func', 'exclude', 'times_stride', 'times_range', ] for k in keys: if k in kwargs: kwargs.pop(k) # TODO explain what happens here if folder is None: # super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if (folder in case_data_base) and Database: print("re-importing", end=" ") else: print("importing", end=" ") print(name + ": ", end="") origins, data = import_foam_folder( path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, exclude=exclude, times_slice=times_slice ) try: DataFrame.__init__(self, data) except Exception as e: print(e) self.properties = Props( origins, name, plot_properties, folder, symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync() def validate_origins(self, folder, origins): origins.update_hashes() if case_data_base.has_key(folder): if (case_data_base[folder]["hash"] == origins.dct["hash"]): print(" [consistent]") else: entries_new = len(origins.dct.keys()) entries_old = len(case_data_base[folder].keys()) if entries_new > entries_old: print("[new timestep] ") # print origins.dct.keys() case_data_base[folder] = origins.dct elif entries_new < entries_old: # print folder # print origins.dct.keys() # print case_data_base[folder].keys() print("[missing timestep]") case_data_base[folder] = origins.dct elif entries_new == entries_old: print("[corrupted]", end="") for time, loc, field, item in origins.hashes(): time_name, time_hash = time loc_name, loc_hash = loc field_name, field_hash = field filename, item_hash = item try: orig_hash = case_data_base[folder][time_name][loc_name][field_name][1] except: orig_hash = item_hash if (item_hash != orig_hash): print("") print("corrupted fields:") print("\t" + field_name + " in " + filename) case_data_base[folder] = origins.dct else: case_data_base[folder] = origins.dct def source(self, col): """ find corresponding file for column """ # return get time loc and return dict for every column # latest.source['u'] return # ---------------------------------------------------------------------- # Internal helper methods @property def _constructor(self): # override DataFrames constructor # to enable method chaining return FoamFrame def _is_idx(self, item): """ test if item is column or idx """ itemt = type(item) # if item is Series of booleans # it cant be an index from past.builtins import unicode from past.builtins import str as text if itemt not in [int, str, float, unicode, text]: return False else: return item in self.index.names @property def grouped(self): return self._is_idx("Group") @staticmethod def from_dict(input_dict, name="None", plot_properties=None, symb=".", show_func="scatter" ): """ import raw data from python dictionary format {(timestep, pos, ): [fields]} usage: {(0):[1,2,3]} """ pP = (PlotProperties() if not plot_properties else plot_properties) elems = len(input_dict[list(input_dict.keys())[0]]) zeros = [0 for _ in range(elems)] pos = (input_dict[("Pos")] if input_dict.get(("Pos"),False) else zeros) nums = list(range(elems)) if input_dict.get("Pos"): input_dict.pop("Pos") mi = MultiIndex( levels=[zeros, zeros, pos], labels=[nums, nums, nums], names=['Time', 'Loc', 'Pos']) ff = FoamFrame(DataFrame(input_dict, index=mi), folder=None) ff.properties = Props("raw", name, pP, "", symb, show_func) ff.index = mi return ff # ---------------------------------------------------------------------- # Info methods def __str__(self): return "FoamFrame: \n" + super(FoamFrame, self).__str__() @property def times(self): """ return times for case """ return set([_[0] for _ in self.index.values]) @property def locations(self): """ return times for case """ return set([_[1] for _ in self.index.values]) # ---------------------------------------------------------------------- # Selection methods def __getitem__(self, item): """ call pandas DataFrame __getitem__ if item is not an index """ if self._is_idx(item): try: level = self.index.names.index(item) return list(zip(*self.index.values))[level] except: return # print("failed ", item) NOTE for debugging else: if (type(item) is str) and item not in self.columns: return Series() else: return super(FoamFrame, self).__getitem__(item) @property def latest(self): """ return latest time for case """ ret = self.query('Time == {}'.format(self.latest_time)) ret.properties = self.properties return ret @property def latest_time(self): """ return value of latest time step """ return max(self.index.levels[0]) @property def earliest_time(self): """ return value of latest time step """ return min(self.index.levels[0]) def after(self, time): return self.filter("Time", index=lambda x: x > time) def at_time(self, time): """ return latest time for case """ ret = self.query('Time == {}'.format(time)) ret.properties = self.properties return ret def at(self, idx_name, idx_val): """ select from foamframe based on index name and value""" # TODO FIX This ret = self[self.index.get_level_values(idx_name) == idx_val] # match = [(val in idx_val) # for val in self.index.get_level_values(idx_name)] # ret = self[match] if idx_name == "Group": ret.index = ret.index.droplevel("Group") ret.properties = self.properties return ret def id(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Pos', idx_val=loc) def location(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Loc', idx_val=loc) def loc_names(self, key): """ search for all index names matching keyword""" return [_ for _ in self.index.get_level_values("Loc") if key in _] def field_names(self, key): """ search for all field names matching keyword""" return [_ for _ in self.columns if key in _] # ---------------------------------------------------------------------- # Manipulation methods def add(self, data, label): """ Add a given Series Usage: case.add(sqrt(uu),'u_rms') """ self.latest[label] = data return self def rename(self, search, replace): """ rename field names based on regex """ import re self.columns = [re.sub(search, replace, name) for name in self.columns] def rename_idx(self, search, replace): """ rename field names based on regex """ self.index = Index( [(t, replace if x == search else x, i) for t, x, i in list(self.index)], names=self.index.names) def rename_idxs(self, rename_map): """ rename multiple field names based dictionary of {search: replace} """ for s, r in rename_map.items(): self.rename_idx(s, r) # ---------------------------------------------------------------------- # Plotting methods def draw(self, x, y, z, title, func, figure, data=None, legend_prefix="", titles=None, **kwargs): data = (data if isinstance(data, DataFrame) else self) return rcParams["plotWrapper"].draw( x=x, y=y, z=z, data=data, title=title, func=func, figure=figure, legend_prefix="", titles=None, properties=self.properties, **kwargs) def histo_data(self, y, weights, bins): return np.histogram( y, density=True, weights=weights, bins=bins) def histogram(self, y, x=None, title="", figure=False, weights=False, **kwargs): figure = (figure if figure else rcParams["plotWrapper"].GnuplotFigure()) if weights: weights = self[weights] hist, edges = self.histo_data(self[y], weights, kwargs.get("bins", 50)) centres = [(edges[i] + edges[i+1])*0.5 for i in range(len(edges)-1)] df = DataFrame({'centres': centres, 'hist': hist}) return self.draw(x='centres', y='hist', z=None, data=df, title=title, func="quad", figure=figure, **kwargs) def cdf(self, y, x=None, title="", figure=False, weights=False, **kwargs): a, b = np.histogram(self[y], weights=self[weights], bins=20, normed=True) dx = b[1]-b[0] cdf = np.cumsum(a)*dx df = DataFrame({'centres': b[1:], 'hist': cdf}) return self.draw(x='centres', y='hist', z=None, data=df, title=title, func="line", figure=figure, **kwargs) def scatter(self, y, x='Pos', z=None, title="", figure=False, **kwargs): figure = (figure if figure else rcParams["plotWrapper"].GnuplotFigure()) return self.draw(x, y, z, title, func="scatter", figure=figure, **kwargs) def plot(self, y, x='Pos', z=None, title="", figure=False, **kwargs): figure = (figure if figure else rcParams["plotWrapper"].GnuplotFigure()) if kwargs.get('symbol', None): kwargs.pop('symbol') return self.draw(x, y, z, title, func="line", figure=figure, **kwargs) def show(self, y, x="Pos", figure=False, overlay="Field", style=None, legend_prefix="", post_pone_style=False, row=None, titles=None, **kwargs): if kwargs.get("props", False): props = kwargs.pop("props") self.properties.plot_properties.set(props) def create_figure(y_, f, title="", legend=""): # TODO use plot wrapper class here if kwargs.get("title"): title = kwargs.get("title") kwargs.pop("title") return getattr(self, self.properties.show_func)( y=y_, x=x, figure=f, legend_prefix=legend_prefix+legend, title=title, **kwargs) def create_figure_row(y, arow=None): # TODO let arow be an empty mutliplot if not arow: fn = kwargs.get("filename") arow = rcParams["plotWrapper"].GnuplotMultiplot([], filename=fn) if not self.grouped: y = (y if isinstance(y, list) else [y]) if overlay == "Field": # SINGLE FIGURE MUTLIPLE FIELDS ids = "".join(y) fig_id, f = (figure if figure else (ids, arow.get(ids))) for yi in y: create_figure(yi, f) arow[fig_id] = f if not overlay: # MULTIPLE FIGURES # test if figure with same id already exists # so that we can plot into it # otherwise create a new figure for i, yi in enumerate(y): title = ("" if not titles else titles[i]) f = arow.get(yi) arow[yi] = create_figure(yi, f, title=title) if self.grouped: groups = list(set(self["Group"]) if self["Group"] else set()) groups.sort() if overlay == "Group": # ALIGN ALONG GROUPS # for every yi a new figure is needed for yi in y: f = arow.get(yi) for group in groups: arow[yi] = self.at("Group", group).show( x=x, y=yi, title=yi, figure=(yi, f), overlay="Field", legend_prefix=legend_prefix, legend=str(group), **kwargs)[yi] if overlay == "Field": for group in groups: f = arow.get(group) field = self.at("Group", group) arow[group] = field.show(x=x, y=y, title=str(group), figure=(group, f), overlay="Field", post_pone_style=True, legend_prefix=legend_prefix, **kwargs)[group] return arow fig_row = create_figure_row(y, row) return fig_row def show_func(self, value): """ set the default plot style valid arguments: scatter, plot """ self.properties.show_func = value def set_plot_properties(self, **values): """ set plot properties """ self.properties.plot_properties.set(values) # ---------------------------------------------------------------------- # Filter methods def filter_fields(self, name, lower, upper): """ filter based on field values Examples: .filter_fields('T', 1000, 2000) """ return self.filter(name, field=lambda x: lower < x < upper) def filter_locations(self, index): """ filter based on locations Examples: .filter_location(Owls.isIn('radial')) .filter_location(Owls.isNotIn('radial')) """ return self.filter(name='Loc', index=index) def filter(self, name, index=None, field=None): """ filter on index or field values by given functioni Examples: .filter(name='T', field=lambda x: 1000<x<2000) .filter(name='Loc', index=lambda x: 0.2<field_to_float(x)<0.8) """ if index: ret = self[list(map(index, self.index.get_level_values(name)))] ret.properties = self.properties return ret elif field: ret = self[list(map(field,self[name]))] ret.properties = self.properties return ret else: return self # ---------------------------------------------------------------------- # Grouping methods def by_index(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, func) def by_field(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, func) def by_location(self, func=None): func = (func if func else lambda x: x) return self.by("Loc", func) def by_time(self, func=None): func = (func if func else lambda x: x) return self.by("Time", func) def by(self, name, func): ret = self.copy() # Too expensive ? pd.concat( [A, pd.DataFrame(s)], axis=1 ) ret.properties = self.properties if self._is_idx(name): index_values = ret.index.get_level_values(name) ret["Group"] = index_values.map(func) else: ret["Group"] = ret[name].map(func) ret.set_index("Group", append=True, inplace=True) ret.reorder_levels(['Time', 'Loc', 'Pos', 'Group']) return ret # ---------------------------------------------------------------------- # Compute methods def rolling_mean(self, y, x="Pos", n=10, weight=False): """ compute a rolling mean, returns a Series """ lower = min(self[x]) upper = max(self[x]) delta = (upper-lower)/n bds = [(lower + i*delta, lower + (i+1)*delta) for i in range(n)] bins = {y:[ self.filter(name=x, field=lambda x: (l < x < u))[y].mean() for (l,u) in bds]} bins.update({x: [(l+u)/2.0 for (l,u) in bds]}) return self.from_dict(bins, name="rl" + self.properties.name, plot_properties=self.properties.plot_properties, show_func="plot") def weighted_rolling_mean(self, y, x="Pos", n=10, weight=False): """ compute a rolling mean, returns a Series """ lower = min(self[x]) upper = max(self[x]) delta = (upper-lower)/n bds = [(lower + i*delta, lower + (i+1)*delta) for i in range(n)] bins = {y: [ np.average( a=self.filter(name=x, field=lambda x: (l < x < u))[y], weights=self.filter(name=x, field=lambda x: (l < x < u))[weight]) for (l, u) in bds]} bins.update({x: [(l+u)/2.0 for (l,u) in bds]}) return self.from_dict(bins, name="rl" + self.properties.name, plot_properties=self.properties.plot_properties, show_func="plot") def time_average(self, suffix="Avg", time_start=0.0): """ compute time average of fields """ fs = self.after(time_start) ret = fs.mean(level=["Loc", "Pos"]) latest = fs.latest ret.index = latest.index for c in self.columns: latest[c+suffix] = ret[c] return latest
class Grouping(object): def __init__(self, index, names=None): """ index : index-like Can be pandas MultiIndex or Index or array-like. If array-like and is a MultipleIndex (more than one grouping variable), groups are expected to be in each row. E.g., [('red', 1), ('red', 2), ('green', 1), ('green', 2)] names : list or str, optional The names to use for the groups. Should be a str if only one grouping variable is used. Notes ----- If index is already a pandas Index then there is no copy. """ if isinstance(index, (Index, MultiIndex)): if names is not None: if hasattr(index, 'set_names'): # newer pandas index.set_names(names, inplace=True) else: index.names = names self.index = index else: # array_like if _is_hierarchical(index): self.index = _make_hierarchical_index(index, names) else: self.index = Index(index, name=names) if names is None: names = _make_generic_names(self.index) if hasattr(self.index, 'set_names'): self.index.set_names(names, inplace=True) else: self.index.names = names self.nobs = len(self.index) self.nlevels = len(self.index.names) self.slices = None @property def index_shape(self): if hasattr(self.index, 'levshape'): return self.index.levshape else: return self.index.shape @property def levels(self): if hasattr(self.index, 'levels'): return self.index.levels else: return pd.Categorical(self.index).levels @property def labels(self): # this was index_int, but that's not a very good name... codes = getattr(self.index, 'codes', None) if codes is None: if hasattr(self.index, 'labels'): codes = self.index.labels else: codes = pd.Categorical(self.index).codes[None] return codes @property def group_names(self): return self.index.names def reindex(self, index=None, names=None): """ Resets the index in-place. """ # NOTE: this is not of much use if the rest of the data does not change # This needs to reset cache if names is None: names = self.group_names self = Grouping(index, names) def get_slices(self, level=0): """ Sets the slices attribute to be a list of indices of the sorted groups for the first index level. I.e., self.slices[0] is the index where each observation is in the first (sorted) group. """ # TODO: refactor this groups = self.index.get_level_values(level).unique() groups = np.array(groups) groups.sort() if isinstance(self.index, MultiIndex): self.slices = [ self.index.get_loc_level(x, level=level)[0] for x in groups ] else: self.slices = [self.index.get_loc(x) for x in groups] def count_categories(self, level=0): """ Sets the attribute counts to equal the bincount of the (integer-valued) labels. """ # TODO: refactor this not to set an attribute. Why would we do this? self.counts = np.bincount(self.labels[level]) def check_index(self, is_sorted=True, unique=True, index=None): """Sanity checks""" if not index: index = self.index if is_sorted: test = pd.DataFrame(lrange(len(index)), index=index) test_sorted = test.sort() if not test.index.equals(test_sorted.index): raise Exception('Data is not be sorted') if unique: if len(index) != len(index.unique()): raise Exception('Duplicate index entries') def sort(self, data, index=None): """Applies a (potentially hierarchical) sort operation on a numpy array or pandas series/dataframe based on the grouping index or a user-supplied index. Returns an object of the same type as the original data as well as the matching (sorted) Pandas index. """ if index is None: index = self.index if data_util._is_using_ndarray_type(data, None): if data.ndim == 1: out = pd.Series(data, index=index, copy=True) out = out.sort_index() else: out = pd.DataFrame(data, index=index) out = out.sort_index(inplace=False) # copies return np.array(out), out.index elif data_util._is_using_pandas(data, None): out = data out = out.reindex(index) # copies? out = out.sort_index() return out, out.index else: msg = 'data must be a Numpy array or a Pandas Series/DataFrame' raise ValueError(msg) def transform_dataframe(self, dataframe, function, level=0, **kwargs): """Apply function to each column, by group Assumes that the dataframe already has a proper index""" if dataframe.shape[0] != self.nobs: raise Exception('dataframe does not have the same shape as index') out = dataframe.groupby(level=level).apply(function, **kwargs) if 1 in out.shape: return np.ravel(out) else: return np.array(out) def transform_array(self, array, function, level=0, **kwargs): """Apply function to each column, by group """ if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') dataframe = pd.DataFrame(array, index=self.index) return self.transform_dataframe(dataframe, function, level=level, **kwargs) def transform_slices(self, array, function, level=0, **kwargs): """Apply function to each group. Similar to transform_array but does not coerce array to a DataFrame and back and only works on a 1D or 2D numpy array. function is called function(group, group_idx, **kwargs). """ array = np.asarray(array) if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') # always reset because level is given. need to refactor this. self.get_slices(level=level) processed = [] for s in self.slices: if array.ndim == 2: subset = array[s, :] elif array.ndim == 1: subset = array[s] processed.append(function(subset, s, **kwargs)) processed = np.array(processed) return processed.reshape(-1, processed.shape[-1]) # TODO: this is not general needs to be a PanelGrouping object def dummies_time(self): self.dummy_sparse(level=1) return self._dummies def dummies_groups(self, level=0): self.dummy_sparse(level=level) return self._dummies def dummy_sparse(self, level=0): """create a sparse indicator from a group array with integer labels Parameters ---------- groups : ndarray, int, 1d (nobs,) An array of group indicators for each observation. Group levels are assumed to be defined as consecutive integers, i.e. range(n_groups) where n_groups is the number of group levels. A group level with no observations for it will still produce a column of zeros. Returns ------- indi : ndarray, int8, 2d (nobs, n_groups) an indicator array with one row per observation, that has 1 in the column of the group level for that observation Examples -------- >>> g = np.array([0, 0, 2, 1, 1, 2, 0]) >>> indi = dummy_sparse(g) >>> indi <7x3 sparse matrix of type '<type 'numpy.int8'>' with 7 stored elements in Compressed Sparse Row format> >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) current behavior with missing groups >>> g = np.array([0, 0, 2, 0, 2, 0]) >>> indi = dummy_sparse(g) >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) """ indi = dummy_sparse(self.labels[level]) self._dummies = indi
class FoamFrame(DataFrame): """ Data reprensentation of OpenFOAM field (eulerian and lagrangian) and set files. Instantiated through read methods, e.g: read_sets, read_lag, read_eul, read_exp Examples: ---------- case = read_sets(folder="home/user/case",plot_properties={}) case.data # access data frame Parameters: ---------- folder: data location containing a time or sets folder files: search only for files with given name, None for all files plot_properties: dictionary for consistent plotting of ranges and ax labels skiplines: read only every n-th entry cloud: name of lagrangian cloud name: case name for plot legends Note: ---------- If data is accessed through [] only latest item is returned. For full times access iteratetimes() can be used. Categories: { "rad_pos": lambda field -> position "centreLine": [] lambda field -> i of [] } example: lambda field: re.search('[0-9]*\.[0-9]*').group()[0] TODO: use case as cases ojects with a 3-level index case['u'] acces time of all cases -> df.iloc[df.index.isin([1],level=1)] refactor plot into case objects itself, ?case.show('t','u', time_series = False) refactor origins make iteratetimes() access a delta """ def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('readtime', slice(0, None)) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) exclude = kwargs.get('exclude', [" "]) # FIXME times_stride = kwargs.get('times_stride', 1) times_range = kwargs.get('times_range', "all") # FIXME implement strides times_slice = times_range keys = [ 'skiplines', 'readtime', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func', 'exclude', 'times_stride', 'times_range', ] for k in keys: if k in kwargs: kwargs.pop(k) # TODO explain what happens here if folder is None: # super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if (folder in case_data_base) and Database: print("re-importing", end=" ") else: print("importing", end=" ") print(name + ": ", end="") origins, data = import_foam_folder(path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, exclude=exclude, times_slice=times_slice) try: DataFrame.__init__(self, data) except Exception as e: print(e) self.properties = Props(origins, name, plot_properties, folder, symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync() def validate_origins(self, folder, origins): origins.update_hashes() if case_data_base.has_key(folder): if (case_data_base[folder]["hash"] == origins.dct["hash"]): print(" [consistent]") else: entries_new = len(origins.dct.keys()) entries_old = len(case_data_base[folder].keys()) if entries_new > entries_old: print("[new timestep] ") # print origins.dct.keys() case_data_base[folder] = origins.dct elif entries_new < entries_old: # print folder # print origins.dct.keys() # print case_data_base[folder].keys() print("[missing timestep]") case_data_base[folder] = origins.dct elif entries_new == entries_old: print("[corrupted]", end="") for time, loc, field, item in origins.hashes(): time_name, time_hash = time loc_name, loc_hash = loc field_name, field_hash = field filename, item_hash = item try: orig_hash = case_data_base[folder][time_name][ loc_name][field_name][1] except: orig_hash = item_hash if (item_hash != orig_hash): print("") print("corrupted fields:") print("\t" + field_name + " in " + filename) case_data_base[folder] = origins.dct else: case_data_base[folder] = origins.dct def source(self, col): """ find corresponding file for column """ # return get time loc and return dict for every column # latest.source['u'] return # ---------------------------------------------------------------------- # Internal helper methods @property def _constructor(self): # override DataFrames constructor # to enable method chaining return FoamFrame def _is_idx(self, item): """ test if item is column or idx """ itemt = type(item) # if item is Series of booleans # it cant be an index from past.builtins import unicode from past.builtins import str as text if itemt not in [int, str, float, unicode, text]: return False else: return item in self.index.names @property def grouped(self): return self._is_idx("Group") @staticmethod def from_dict(input_dict, name="None", plot_properties=None, symb=".", show_func="scatter"): """ import raw data from python dictionary format {(timestep, pos, ): [fields]} usage: {(0):[1,2,3]} """ pP = (PlotProperties() if not plot_properties else plot_properties) elems = len(input_dict[list(input_dict.keys())[0]]) zeros = [0 for _ in range(elems)] pos = (input_dict[("Pos")] if input_dict.get( ("Pos"), False) else zeros) nums = list(range(elems)) if input_dict.get("Pos"): input_dict.pop("Pos") mi = MultiIndex(levels=[zeros, zeros, pos], labels=[nums, nums, nums], names=['Time', 'Loc', 'Pos']) ff = FoamFrame(DataFrame(input_dict, index=mi), folder=None) ff.properties = Props("raw", name, pP, "", symb, show_func) ff.index = mi return ff # ---------------------------------------------------------------------- # Info methods def __str__(self): return "FoamFrame: \n" + super(FoamFrame, self).__str__() @property def times(self): """ return times for case """ return set([_[0] for _ in self.index.values]) @property def locations(self): """ return times for case """ return set([_[1] for _ in self.index.values]) # ---------------------------------------------------------------------- # Selection methods def __getitem__(self, item): """ call pandas DataFrame __getitem__ if item is not an index """ if self._is_idx(item): try: level = self.index.names.index(item) return list(zip(*self.index.values))[level] except: return # print("failed ", item) NOTE for debugging else: if (type(item) is str) and item not in self.columns: return Series() else: return super(FoamFrame, self).__getitem__(item) @property def latest(self): """ return latest time for case """ ret = self.query('Time == {}'.format(self.latest_time)) ret.properties = self.properties return ret @property def latest_time(self): """ return value of latest time step """ return max(self.index.levels[0]) @property def earliest_time(self): """ return value of latest time step """ return min(self.index.levels[0]) def after(self, time): return self.filter("Time", index=lambda x: x > time) def at_time(self, time): """ return latest time for case """ ret = self.query('Time == {}'.format(time)) ret.properties = self.properties return ret def at(self, idx_name, idx_val): """ select from foamframe based on index name and value""" # TODO FIX This ret = self[self.index.get_level_values(idx_name) == idx_val] # match = [(val in idx_val) # for val in self.index.get_level_values(idx_name)] # ret = self[match] if idx_name == "Group": ret.index = ret.index.droplevel("Group") ret.properties = self.properties return ret def id(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Pos', idx_val=loc) def location(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Loc', idx_val=loc) def loc_names(self, key): """ search for all index names matching keyword""" return [_ for _ in self.index.get_level_values("Loc") if key in _] def field_names(self, key): """ search for all field names matching keyword""" return [_ for _ in self.columns if key in _] # ---------------------------------------------------------------------- # Manipulation methods def add(self, data, label): """ Add a given Series Usage: case.add(sqrt(uu),'u_rms') """ self.latest[label] = data return self def rename(self, search, replace): """ rename field names based on regex """ import re self.columns = [re.sub(search, replace, name) for name in self.columns] def rename_idx(self, search, replace): """ rename field names based on regex """ self.index = Index([(t, replace if x == search else x, i) for t, x, i in list(self.index)], names=self.index.names) def rename_idxs(self, rename_map): """ rename multiple field names based dictionary of {search: replace} """ for s, r in rename_map.items(): self.rename_idx(s, r) # ---------------------------------------------------------------------- # Plotting methods def draw(self, x, y, z, title, func, figure, data=None, legend_prefix="", titles=None, **kwargs): data = (data if isinstance(data, DataFrame) else self) return rcParams["plotWrapper"].draw(x=x, y=y, z=z, data=data, title=title, func=func, figure=figure, legend_prefix="", titles=None, properties=self.properties, **kwargs) def histo_data(self, y, weights, bins): return np.histogram(y, density=True, weights=weights, bins=bins) def histogram(self, y, x=None, title="", figure=False, weights=False, **kwargs): figure = (figure if figure else rcParams["plotWrapper"].GnuplotFigure()) if weights: weights = self[weights] hist, edges = self.histo_data(self[y], weights, kwargs.get("bins", 50)) centres = [(edges[i] + edges[i + 1]) * 0.5 for i in range(len(edges) - 1)] df = DataFrame({'centres': centres, 'hist': hist}) return self.draw(x='centres', y='hist', z=None, data=df, title=title, func="quad", figure=figure, **kwargs) def cdf(self, y, x=None, title="", figure=False, weights=False, **kwargs): a, b = np.histogram(self[y], weights=self[weights], bins=20, normed=True) dx = b[1] - b[0] cdf = np.cumsum(a) * dx df = DataFrame({'centres': b[1:], 'hist': cdf}) return self.draw(x='centres', y='hist', z=None, data=df, title=title, func="line", figure=figure, **kwargs) def scatter(self, y, x='Pos', z=None, title="", figure=False, **kwargs): figure = (figure if figure else rcParams["plotWrapper"].GnuplotFigure()) return self.draw(x, y, z, title, func="scatter", figure=figure, **kwargs) def plot(self, y, x='Pos', z=None, title="", figure=False, **kwargs): figure = (figure if figure else rcParams["plotWrapper"].GnuplotFigure()) if kwargs.get('symbol', None): kwargs.pop('symbol') return self.draw(x, y, z, title, func="line", figure=figure, **kwargs) def show(self, y, x="Pos", figure=False, overlay="Field", style=None, legend_prefix="", post_pone_style=False, row=None, titles=None, **kwargs): if kwargs.get("props", False): props = kwargs.pop("props") self.properties.plot_properties.set(props) def create_figure(y_, f, title="", legend=""): # TODO use plot wrapper class here if kwargs.get("title"): title = kwargs.get("title") kwargs.pop("title") return getattr(self, self.properties.show_func)( y=y_, x=x, figure=f, legend_prefix=legend_prefix + legend, title=title, **kwargs) def create_figure_row(y, arow=None): # TODO let arow be an empty mutliplot if not arow: fn = kwargs.get("filename") arow = rcParams["plotWrapper"].GnuplotMultiplot([], filename=fn) if not self.grouped: y = (y if isinstance(y, list) else [y]) if overlay == "Field": # SINGLE FIGURE MUTLIPLE FIELDS ids = "".join(y) fig_id, f = (figure if figure else (ids, arow.get(ids))) for yi in y: create_figure(yi, f) arow[fig_id] = f if not overlay: # MULTIPLE FIGURES # test if figure with same id already exists # so that we can plot into it # otherwise create a new figure for i, yi in enumerate(y): title = ("" if not titles else titles[i]) f = arow.get(yi) arow[yi] = create_figure(yi, f, title=title) if self.grouped: groups = list(set(self["Group"]) if self["Group"] else set()) groups.sort() if overlay == "Group": # ALIGN ALONG GROUPS # for every yi a new figure is needed for yi in y: f = arow.get(yi) for group in groups: arow[yi] = self.at("Group", group).show( x=x, y=yi, title=yi, figure=(yi, f), overlay="Field", legend_prefix=legend_prefix, legend=str(group), **kwargs)[yi] if overlay == "Field": for group in groups: f = arow.get(group) field = self.at("Group", group) arow[group] = field.show(x=x, y=y, title=str(group), figure=(group, f), overlay="Field", post_pone_style=True, legend_prefix=legend_prefix, **kwargs)[group] return arow fig_row = create_figure_row(y, row) return fig_row def show_func(self, value): """ set the default plot style valid arguments: scatter, plot """ self.properties.show_func = value def set_plot_properties(self, **values): """ set plot properties """ self.properties.plot_properties.set(values) # ---------------------------------------------------------------------- # Filter methods def filter_fields(self, name, lower, upper): """ filter based on field values Examples: .filter_fields('T', 1000, 2000) """ return self.filter(name, field=lambda x: lower < x < upper) def filter_locations(self, index): """ filter based on locations Examples: .filter_location(Owls.isIn('radial')) .filter_location(Owls.isNotIn('radial')) """ return self.filter(name='Loc', index=index) def filter(self, name, index=None, field=None): """ filter on index or field values by given functioni Examples: .filter(name='T', field=lambda x: 1000<x<2000) .filter(name='Loc', index=lambda x: 0.2<field_to_float(x)<0.8) """ if index: ret = self[list(map(index, self.index.get_level_values(name)))] ret.properties = self.properties return ret elif field: ret = self[list(map(field, self[name]))] ret.properties = self.properties return ret else: return self # ---------------------------------------------------------------------- # Grouping methods def by_index(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, func) def by_field(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, func) def by_location(self, func=None): func = (func if func else lambda x: x) return self.by("Loc", func) def by_time(self, func=None): func = (func if func else lambda x: x) return self.by("Time", func) def by(self, name, func): ret = self.copy( ) # Too expensive ? pd.concat( [A, pd.DataFrame(s)], axis=1 ) ret.properties = self.properties if self._is_idx(name): index_values = ret.index.get_level_values(name) ret["Group"] = index_values.map(func) else: ret["Group"] = ret[name].map(func) ret.set_index("Group", append=True, inplace=True) ret.reorder_levels(['Time', 'Loc', 'Pos', 'Group']) return ret # ---------------------------------------------------------------------- # Compute methods def rolling_mean(self, y, x="Pos", n=10, weight=False): """ compute a rolling mean, returns a Series """ lower = min(self[x]) upper = max(self[x]) delta = (upper - lower) / n bds = [(lower + i * delta, lower + (i + 1) * delta) for i in range(n)] bins = { y: [ self.filter(name=x, field=lambda x: (l < x < u))[y].mean() for (l, u) in bds ] } bins.update({x: [(l + u) / 2.0 for (l, u) in bds]}) return self.from_dict(bins, name="rl" + self.properties.name, plot_properties=self.properties.plot_properties, show_func="plot") def weighted_rolling_mean(self, y, x="Pos", n=10, weight=False): """ compute a rolling mean, returns a Series """ lower = min(self[x]) upper = max(self[x]) delta = (upper - lower) / n bds = [(lower + i * delta, lower + (i + 1) * delta) for i in range(n)] bins = { y: [ np.average(a=self.filter(name=x, field=lambda x: (l < x < u))[y], weights=self.filter(name=x, field=lambda x: (l < x < u))[weight]) for (l, u) in bds ] } bins.update({x: [(l + u) / 2.0 for (l, u) in bds]}) return self.from_dict(bins, name="rl" + self.properties.name, plot_properties=self.properties.plot_properties, show_func="plot") def time_average(self, suffix="Avg", time_start=0.0): """ compute time average of fields """ fs = self.after(time_start) ret = fs.mean(level=["Loc", "Pos"]) latest = fs.latest ret.index = latest.index for c in self.columns: latest[c + suffix] = ret[c] return latest
class Grouping(object): def __init__(self, index, names=None): """ index : index-like Can be pandas MultiIndex or Index or array-like. If array-like and is a MultipleIndex (more than one grouping variable), groups are expected to be in each row. E.g., [('red', 1), ('red', 2), ('green', 1), ('green', 2)] names : list or str, optional The names to use for the groups. Should be a str if only one grouping variable is used. Notes ----- If index is already a pandas Index then there is no copy. """ if isinstance(index, (Index, MultiIndex)): if names is not None: if hasattr(index, 'set_names'): # newer pandas index.set_names(names, inplace=True) else: index.names = names self.index = index else: # array-like if _is_hierarchical(index): self.index = _make_hierarchical_index(index, names) else: self.index = Index(index, name=names) if names is None: names = _make_generic_names(self.index) if hasattr(self.index, 'set_names'): self.index.set_names(names, inplace=True) else: self.index.names = names self.nobs = len(self.index) self.nlevels = len(self.index.names) self.slices = None @property def index_shape(self): if hasattr(self.index, 'levshape'): return self.index.levshape else: return self.index.shape @property def levels(self): if hasattr(self.index, 'levels'): return self.index.levels else: return pd.Categorical(self.index).levels @property def labels(self): # this was index_int, but that's not a very good name... if hasattr(self.index, 'labels'): return self.index.labels else: # pandas version issue here # Compat code for the labels -> codes change in pandas 0.15 # FIXME: use .codes directly when we don't want to support # pandas < 0.15 tmp = pd.Categorical(self.index) try: labl = tmp.codes except AttributeError: labl = tmp.labels # Old pandsd return labl[None] @property def group_names(self): return self.index.names def reindex(self, index=None, names=None): """ Resets the index in-place. """ # NOTE: this isn't of much use if the rest of the data doesn't change # This needs to reset cache if names is None: names = self.group_names self = Grouping(index, names) def get_slices(self, level=0): """ Sets the slices attribute to be a list of indices of the sorted groups for the first index level. I.e., self.slices[0] is the index where each observation is in the first (sorted) group. """ # TODO: refactor this groups = self.index.get_level_values(level).unique() groups = np.array(groups) groups.sort() if isinstance(self.index, MultiIndex): self.slices = [self.index.get_loc_level(x, level=level)[0] for x in groups] else: self.slices = [self.index.get_loc(x) for x in groups] def count_categories(self, level=0): """ Sets the attribute counts to equal the bincount of the (integer-valued) labels. """ # TODO: refactor this not to set an attribute. Why would we do this? self.counts = np.bincount(self.labels[level]) def check_index(self, is_sorted=True, unique=True, index=None): """Sanity checks""" if not index: index = self.index if is_sorted: test = pd.DataFrame(lrange(len(index)), index=index) test_sorted = test.sort() if not test.index.equals(test_sorted.index): raise Exception('Data is not be sorted') if unique: if len(index) != len(index.unique()): raise Exception('Duplicate index entries') def sort(self, data, index=None): """Applies a (potentially hierarchical) sort operation on a numpy array or pandas series/dataframe based on the grouping index or a user-supplied index. Returns an object of the same type as the original data as well as the matching (sorted) Pandas index. """ if index is None: index = self.index if data_util._is_using_ndarray_type(data, None): if data.ndim == 1: out = pd.Series(data, index=index, copy=True) out = out.sort_index() else: out = pd.DataFrame(data, index=index) out = out.sort_index(inplace=False) # copies return np.array(out), out.index elif data_util._is_using_pandas(data, None): out = data out = out.reindex(index) # copies? out = out.sort_index() return out, out.index else: msg = 'data must be a Numpy array or a Pandas Series/DataFrame' raise ValueError(msg) def transform_dataframe(self, dataframe, function, level=0, **kwargs): """Apply function to each column, by group Assumes that the dataframe already has a proper index""" if dataframe.shape[0] != self.nobs: raise Exception('dataframe does not have the same shape as index') out = dataframe.groupby(level=level).apply(function, **kwargs) if 1 in out.shape: return np.ravel(out) else: return np.array(out) def transform_array(self, array, function, level=0, **kwargs): """Apply function to each column, by group """ if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') dataframe = pd.DataFrame(array, index=self.index) return self.transform_dataframe(dataframe, function, level=level, **kwargs) def transform_slices(self, array, function, level=0, **kwargs): """Apply function to each group. Similar to transform_array but does not coerce array to a DataFrame and back and only works on a 1D or 2D numpy array. function is called function(group, group_idx, **kwargs). """ array = np.asarray(array) if array.shape[0] != self.nobs: raise Exception('array does not have the same shape as index') # always reset because level is given. need to refactor this. self.get_slices(level=level) processed = [] for s in self.slices: if array.ndim == 2: subset = array[s, :] elif array.ndim == 1: subset = array[s] processed.append(function(subset, s, **kwargs)) processed = np.array(processed) return processed.reshape(-1, processed.shape[-1]) # TODO: this isn't general needs to be a PanelGrouping object def dummies_time(self): self.dummy_sparse(level=1) return self._dummies def dummies_groups(self, level=0): self.dummy_sparse(level=level) return self._dummies def dummy_sparse(self, level=0): """create a sparse indicator from a group array with integer labels Parameters ---------- groups: ndarray, int, 1d (nobs,) an array of group indicators for each observation. Group levels are assumed to be defined as consecutive integers, i.e. range(n_groups) where n_groups is the number of group levels. A group level with no observations for it will still produce a column of zeros. Returns ------- indi : ndarray, int8, 2d (nobs, n_groups) an indicator array with one row per observation, that has 1 in the column of the group level for that observation Examples -------- >>> g = np.array([0, 0, 2, 1, 1, 2, 0]) >>> indi = dummy_sparse(g) >>> indi <7x3 sparse matrix of type '<type 'numpy.int8'>' with 7 stored elements in Compressed Sparse Row format> >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) current behavior with missing groups >>> g = np.array([0, 0, 2, 0, 2, 0]) >>> indi = dummy_sparse(g) >>> indi.todense() matrix([[1, 0, 0], [1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]], dtype=int8) """ from scipy import sparse groups = self.labels[level] indptr = np.arange(len(groups)+1) data = np.ones(len(groups), dtype=np.int8) self._dummies = sparse.csr_matrix((data, groups, indptr))
def _filter_on_value(columns: pd.Index, level: str, value: Any): return columns.get_level_values(level=level) == value
def _get_default_col_vals(columns: pd.Index, level: str): return columns.get_level_values(level=level).unique()
def unique_level(idx: pd.Index, level=0): return unique(idx.get_level_values(level)) if isinstance(idx, pd.MultiIndex) else idx
class FoamFrame(DataFrame): """ Data reprensentation of OpenFOAM field (eulerian and lagrangian) and set files. Instantiated through read methods, e.g: read_sets, read_lag, read_eul, read_exp Examples: ---------- case = read_sets(folder="home/user/case",plot_properties={}) case.data # access data frame Parameters: ---------- folder: data location containing a time or sets folder files: search only for files with given name, None for all files plot_properties: dictionary for consistent plotting of ranges and ax labels skiplines: read only every n-th entry cloud: name of lagrangian cloud name: case name for plot legends Note: ---------- If data is accessed through [] only latest item is returned. For full times access iteratetimes() can be used. Categories: { "rad_pos": lambda field -> position "centreLine": [] lambda field -> i of [] } example: lambda field: re.search('[0-9]*\.[0-9]*').group()[0] TODO: use case as cases ojects with a 3-level index case['u'] acces time of all cases -> df.iloc[df.index.isin([1],level=1)] refactor plot into case objects itself, ?case.show('t','u', time_series = False) refactor origins make iteratetimes() access a delta """ def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('skiptimes', 1) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', io.FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) exclude = kwargs.get('exclude', [" "]) # FIXME times_stride = kwargs.get('times_stride', 1) times_range = kwargs.get('times_range', "all") # FIXME implement strides times_slice = times_range keys = [ 'skiplines', 'skiptimes', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func', 'exclude', 'times_stride', 'times_range', ] for k in keys: if k in kwargs: kwargs.pop(k) # TODO explain what happens here if folder is None: # super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if (folder in case_data_base) and Database: print("re-importing", end=" ") else: print("importing", end=" ") print(name + ": ", end="") origins, data = io.import_foam_folder(path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, exclude=exclude, times_slice=times_slice) try: DataFrame.__init__(self, data) except: pass self.properties = Props(origins, name, plot_properties, folder, symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync() def validate_origins(self, folder, origins): origins.update_hashes() if case_data_base.has_key(folder): if (case_data_base[folder]["hash"] == origins.dct["hash"]): print(" [consistent]") else: entries_new = len(origins.dct.keys()) entries_old = len(case_data_base[folder].keys()) if entries_new > entries_old: print("[new timestep] ") # print origins.dct.keys() case_data_base[folder] = origins.dct elif entries_new < entries_old: # print folder # print origins.dct.keys() # print case_data_base[folder].keys() print("[missing timestep]") case_data_base[folder] = origins.dct elif entries_new == entries_old: print("[corrupted]", end="") for time, loc, field, item in origins.hashes(): time_name, time_hash = time loc_name, loc_hash = loc field_name, field_hash = field filename, item_hash = item try: orig_hash = case_data_base[folder][time_name][ loc_name][field_name][1] except: orig_hash = item_hash if (item_hash != orig_hash): print("") print("corrupted fields:") print("\t" + field_name + " in " + filename) case_data_base[folder] = origins.dct else: case_data_base[folder] = origins.dct def source(self, col): """ find corresponding file for column """ # return get time loc and return dict for every column # latest.source['u'] return # ---------------------------------------------------------------------- # Internal helper methods @property def _constructor(self): # override DataFrames constructor # to enable method chaining return FoamFrame def _is_idx(self, item): """ test if item is column or idx """ itemt = type(item) # if item is Series of booleans # it cant be an index from past.builtins import unicode from past.builtins import str as text if itemt not in [int, str, float, unicode, text]: return False else: return item in self.index.names @property def grouped(self): return self._is_idx("Group") @staticmethod def from_dict(input_dict, name="None", plot_properties=None, symb=".", show_func="scatter"): """ import raw data from python dictionary format {(timestep, pos, ): [fields]} usage: {(0):[1,2,3]} """ pP = (PlotProperties() if not plot_properties else plot_properties) ff = FoamFrame(input_dict, folder=None) ff.properties = Props("raw", name, pP, "", symb, show_func) return ff # ---------------------------------------------------------------------- # Info methods def __str__(self): return "FoamFrame: \n" + super(FoamFrame, self).__str__() @property def times(self): """ return times for case """ return set([_[0] for _ in self.index.values]) @property def locations(self): """ return times for case """ return set([_[1] for _ in self.index.values]) # ---------------------------------------------------------------------- # Selection methods def __getitem__(self, item): """ call pandas DataFrame __getitem__ if item is not an index """ if self._is_idx(item): try: level = self.index.names.index(item) return list(zip(*self.index.values))[level] except: return # print("failed ", item) NOTE for debugging else: if (type(item) is str) and item not in self.columns: return Series() else: return super(FoamFrame, self).__getitem__(item) @property def latest(self): """ return latest time for case """ ret = self.query('Time == {}'.format(self.latest_time)) ret.properties = self.properties return ret @property def latest_time(self): """ return value of latest time step """ return max(self.index.levels[0]) def at(self, idx_name, idx_val): """ select from foamframe based on index name and value""" # TODO FIX This ret = self[self.index.get_level_values(idx_name) == idx_val] # match = [(val in idx_val) # for val in self.index.get_level_values(idx_name)] # ret = self[match] if idx_name == "Group": ret.index = ret.index.droplevel("Group") ret.properties = self.properties return ret def id(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Pos', idx_val=loc) def location(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Loc', idx_val=loc) def loc_names(self, key): """ search for all index names matching keyword""" return [_ for _ in self.index.get_level_values("Loc") if key in _] def field_names(self, key): """ search for all field names matching keyword""" return [_ for _ in self.columns if key in _] # ---------------------------------------------------------------------- # Manipulation methods def add(self, data, label): """ Add a given Series Usage: case.add(sqrt(uu),'u_rms') """ self.latest[label] = data return self def rename(self, search, replace): """ rename field names based on regex """ import re self.columns = [re.sub(search, replace, name) for name in self.columns] def rename_idx(self, search, replace): """ rename field names based on regex """ self.index = Index([(t, replace if x == search else x, i) for t, x, i in list(self.index)], names=self.index.names) def rename_idxs(self, rename_map): """ rename multiple field names based dictionary of {search: replace} """ for s, r in rename_map.items(): self.rename_idx(s, r) # ---------------------------------------------------------------------- # Plotting methods def draw(self, x, y, z, title, func, figure, legend_prefix="", titles=None, **kwargs): # TODO Rename to _draw def _label(axis, field): label = kwargs.get(axis + '_label', False) if label: self.properties.plot_properties.insert( field, {axis + '_label': label}) else: label = self.properties.plot_properties.select( field, axis + '_label', "None") return label def _range(axis, field): from bokeh.models import Range1d p_range_args = kwargs.get(axis + '_range', False) if p_range_args: self.properties.plot_properties.insert( field, {axis + '_range': p_range}) else: p_range = self.properties.plot_properties.select( field, axis + '_range') if not p_range: return False else: return Range1d(start=p_range[0], end=p_range[1]) figure_properties = {"title": title} if kwargs.get('x_range', False): figure_properties.update({"x_range": kwargs.get('x_range')}) figure.set(**figure_properties) if func == "quad": getattr(figure, func)(top=y, bottom=0, left=x[:-1], right=x[1:], **kwargs) return figure colors = plt.next_color() spec_color = kwargs.get("color", False) spec_legend = kwargs.get("legend", False) y = (y if isinstance(y, list) else [y]) for yi in y: x_data, y_data = self[x], self[yi] # TODO FIXME for k in ['symbols', 'order', 'colors', 'symbol']: if k in kwargs.keys(): kwargs.pop(k) if not spec_color: kwargs.update({"color": next(colors)}) if not spec_legend: # NOTE title overrides legend, does that make sense always? yi = (yi if not title else "") if yi and legend_prefix: legend = legend_prefix + "-" + yi if not yi and legend_prefix: legend = legend_prefix if not legend_prefix: legend = yi kwargs.update({"legend": legend}) getattr(figure, func)(x=x_data, y=y_data, **kwargs) for ax, data in {'x': x, 'y': y[0]}.items(): if _label(ax, data): getattr(figure, ax + 'axis')[0].axis_label = _label(ax, data) # setattr(getattr(figure, ax + 'axis'), # 'axis_label', _label(ax, data)) if _range(ax, data): r = setattr(figure, ax + '_range', _range(ax, data)) return figure def histogram(self, y, x=None, title="", figure=False, **kwargs): figure = (figure if figure else plt.figure()) import numpy as np hist, edges = np.histogram(self[y], density=True, bins=50) return self.draw(x=edges, y=hist, z=None, title=title, func="quad", figure=figure, **kwargs) def scatter(self, y, x='Pos', z=False, title="", figure=False, **kwargs): figure = (figure if figure else plt.figure()) return self.draw(x, y, z, title, func="scatter", figure=figure, **kwargs) def plot(self, y, x='Pos', z=False, title="", figure=False, **kwargs): figure = (figure if figure else plt.figure()) if kwargs.get('symbol', None): kwargs.pop('symbol') return self.draw(x, y, z, title, func="line", figure=figure, **kwargs) def show(self, y, x="Pos", figure=False, overlay="Field", style=defstyle, legend_prefix="", post_pone_style=False, row=None, titles=None, **kwargs): style = (compose_styles(style, []) if isinstance(style, list) else style) if kwargs.get("props", False): props = kwargs.pop("props") self.properties.plot_properties.set(props) def create_figure(y_, f, title=""): if kwargs.get("title"): title = kwargs.get("title") kwargs.pop("title") return getattr(self, self.properties.show_func)( y=y_, x=x, figure=f, legend_prefix=legend_prefix, title=title, **kwargs) def create_figure_row(y, arow=None): arow = (arow if arow else OrderedDict()) if not self.grouped: y = (y if isinstance(y, list) else [y]) if overlay == "Field": # SINGLE FIGURE MUTLIPLE FIELDS fig_id, f = (figure if figure else ("".join(y), plt.figure())) for yi in y: create_figure(y, f) arow[fig_id] = f if not overlay: # MULTIPLE FIGURES # test if figure with same id already exists # so that we can plot into it # otherwise create a new figure for i, yi in enumerate(y): title = ("" if not titles else titles[i]) fig_id, f = ((yi, arow[yi]) if arow.get(yi, False) else (yi, plt.figure(title=title))) arow[fig_id] = create_figure(yi, f, title=title) if self.grouped: groups = list(set(self["Group"]) if self["Group"] else set()) groups.sort() if overlay == "Group": # ALIGN ALONG GROUPS # for every yi a new figure is needed #arow = (arow if arow else OrderedDict()) for yi in y: fig_id, f = ((yi, arow[yi]) if arow.get(yi, False) else (yi, plt.figure())) colors = plt.next_color() for name in groups: color = next(colors) arow[yi] = self.at("Group", name).show( x=x, y=yi, title=yi, figure=(fig_id, f), post_pone_style=True, overlay="Field", color=color, legend_prefix=legend_prefix, legend=str(name), **kwargs)[yi] if overlay == "Field": # row = (arow if arow else OrderedDict()) for group in groups: fig_id, f = ((group, arow[group]) if arow.get( group, False) else (group, plt.figure())) field = self.at("Group", group) arow[group] = field.show(x=x, y=y, title=str(group), figure=(group, f), overlay="Field", post_pone_style=True, legend_prefix=legend_prefix, **kwargs)[group] return arow fig_row = create_figure_row(y, row) return (fig_row if post_pone_style else bk.GridPlot(children=style( rows=[list(fig_row.values())]))) def show_func(self, value): """ set the default plot style valid arguments: scatter, plot """ self.properties.show_func = value def set_plot_properties(self, **values): """ set plot properties """ self.properties.plot_properties.set(values) # ---------------------------------------------------------------------- # Filter methods def filter_fields(self, name, lower, upper): """ filter based on field values Examples: .filter_fields('T', 1000, 2000) """ return self.filter(name, field=lambda x: lower < x < upper) def filter_locations(self, index): """ filter based on locations Examples: .filter_location(Owls.isIn('radial')) .filter_location(Owls.isNotIn('radial')) """ return self.filter(name='Loc', index=index) def filter(self, name, index=None, field=None): """ filter on index or field values by given functioni Examples: .filter(name='T', field=lambda x: 1000<x<2000) .filter(name='Loc', index=lambda x: 0.2<field_to_float(x)<0.8) """ if index: ret = self[list(map(index, self.index.get_level_values(name)))] ret.properties = self.properties return ret elif field: ret = self[list(map(field, self[name]))] ret.properties = self.properties return ret else: return self # ---------------------------------------------------------------------- # Grouping methods def by_index(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, func) def by_field(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, func) def by_location(self, func=None): func = (func if func else lambda x: x) return self.by("Loc", func) def by_time(self, func=None): func = (func if func else lambda x: x) return self.by("Time", func) def by(self, name, func): ret = self.copy( ) # Too expensive ? pd.concat( [A, pd.DataFrame(s)], axis=1 ) ret.properties = self.properties if self._is_idx(name): index_values = ret.index.get_level_values(name) ret["Group"] = index_values.map(func) else: ret["Group"] = ret[name].map(func) ret.set_index("Group", append=True, inplace=True) ret.reorder_levels(['Time', 'Loc', 'Pos', 'Group']) return ret
class FoamFrame(DataFrame): """ Data reprensentation of OpenFOAM field (eulerian and lagrangian) and set files. Instantiated through read methods, e.g: read_sets, read_lag, read_eul, read_exp Examples: ---------- case = read_sets(folder="home/user/case",plot_properties={}) case.data # access data frame Parameters: ---------- folder: data location containing a time or sets folder files: search only for files with given name, None for all files plot_properties: dictionary for consistent plotting of ranges and ax labels skiplines: read only every n-th entry cloud: name of lagrangian cloud name: case name for plot legends Note: ---------- If data is accessed through [] only latest item is returned. For full times access iteratetimes() can be used. Categories: { "rad_pos": lambda field -> position "centreLine": [] lambda field -> i of [] } example: lambda field: re.search('[0-9]*\.[0-9]*').group()[0] TODO: use case as cases ojects with a 3-level index case['u'] acces time of all cases -> df.iloc[df.index.isin([1],level=1)] refactor plot into case objects itself, ?case.show('t','u', time_series = False) refactor origins make iteratetimes() access a delta """ def __init__(self, *args, **kwargs): skip = kwargs.get('skiplines', 1) times = kwargs.get('skiptimes', 1) name = kwargs.get('name', 'None') symb = kwargs.get('symb', 'o') files = kwargs.get('search_files', None) properties = kwargs.get('properties', None) lines = kwargs.get('maxlines', 0) search = kwargs.get('search_pattern', io.FPNUMBER) folder = kwargs.get('folder', None) plot_properties = kwargs.get('plot_properties', PlotProperties()) show_func = kwargs.get('show_func', None) validate = kwargs.get('validate', True) preHooks = kwargs.get('preHooks', None) exclude = kwargs.get('exclude', [" "]) # FIXME keys = ['skiplines', 'skiptimes', 'preHooks', 'name', 'symb', 'search_files', 'properties', 'maxlines', 'search_pattern', 'folder', 'plot_properties', 'show_func', 'exclude', ] for k in keys: if k in kwargs: kwargs.pop(k) # TODO explain what happens here if folder is None: # super(FoamFrame, self).__init__(*args, **kwargs) DataFrame.__init__(self, *args, **kwargs) else: if preHooks: for hook in preHooks: hook.execute() if (folder in case_data_base) and Database: print("re-importing", end=" ") else: print("importing", end=" ") print(name + ": ", end="") origins, data = io.import_foam_folder( path=folder, search=search, files=files, skiplines=skip, maxlines=lines, skiptimes=times, exclude=exclude, ) try: DataFrame.__init__(self, data) except: pass self.properties = Props( origins, name, plot_properties, folder, # FIXME fix it for read logs data.index.levels[0], symb, show_func) if validate and Database: self.validate_origins(folder, origins) # register to database if Database: case_data_base.sync() def validate_origins(self, folder, origins): origins.update_hashes() if case_data_base.has_key(folder): if (case_data_base[folder]["hash"] == origins.dct["hash"]): print(" [consistent]") else: entries_new = len(origins.dct.keys()) entries_old = len(case_data_base[folder].keys()) if entries_new > entries_old: print("[new timestep] ") # print origins.dct.keys() case_data_base[folder] = origins.dct elif entries_new < entries_old: # print folder # print origins.dct.keys() # print case_data_base[folder].keys() print("[missing timestep]") case_data_base[folder] = origins.dct elif entries_new == entries_old: print("[corrupted]", end="") for time, loc, field, item in origins.hashes(): time_name, time_hash = time loc_name, loc_hash = loc field_name, field_hash = field filename, item_hash = item try: orig_hash = case_data_base[folder][time_name][loc_name][field_name][1] except: orig_hash = item_hash if (item_hash != orig_hash): print("") print("corrupted fields:") print("\t" + field_name + " in " + filename) case_data_base[folder] = origins.dct else: case_data_base[folder] = origins.dct def add(self, data, label): """ Add a given Series Usage: ------ing- case.add(sqrt(uu),'u_rms') """ self.latest[label] = data return self def source(self, col): """ find corresponding file for column """ # return get time loc and return dict for every column # latest.source['u'] return def __str__(self): return "FoamFrame: \n" + super(FoamFrame, self).__str__() @property def _constructor(self): # override DataFrames constructor # to enable method chaining return FoamFrame @property def times(self): """ return times for case """ return set([_[0] for _ in self.index.values]) @property def locations(self): """ return times for case """ return set([_[1] for _ in self.index.values]) @property def latest(self): """ return latest time for case """ import pandas as pd ret = self.loc[[self.properties.latest_time]] ret.properties = self.properties return ret # def _iter_names(self) # pass # # def get_hashes(self): # """ returns hashes of current selection based # on the data read from disk """ # pass def at(self, idx_name, idx_val): """ select from foamframe based on index name and value""" # TODO FIX This ret = self[self.index.get_level_values(idx_name) == idx_val] # match = [(val in idx_val) # for val in self.index.get_level_values(idx_name)] # ret = self[match] ret.properties = self.properties return ret def id(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Id', idx_val=loc) def location(self, loc): """ Return FoamFrame based on location """ return self.at(idx_name='Loc', idx_val=loc) def loc_names(self, key): """ search for all index names matching keyword""" return [_ for _ in self.index.get_level_values("Loc") if key in _] def field_names(self, key): """ search for all field names matching keyword""" return [_ for _ in self.columns if key in _] def rename(self, search, replace): """ rename field names based on regex """ import re self.columns = [re.sub(search, replace, name) for name in self.columns] def rename_idx(self, search, replace): """ rename field names based on regex """ self.index = Index( [(t, replace if x == search else x, i) for t, x, i in list(self.index)], names=self.index.names) def rename_idxs(self, rename_map): """ rename multiple field names based dictionary of {search: replace} """ for s, r in rename_map.items(): self.rename_idx(s, r) def _is_idx(self, item): """ test if item is column or idx """ itemt = type(item) # if item is Series of booleans # it cant be an index from past.builtins import unicode from past.builtins import str as text if itemt not in [int, str, float, unicode, text]: return False else: return item in self.index.names def __getitem__(self, item): """ call pandas DataFrame __getitem__ if item is not an index """ if self._is_idx(item): level = self.index.names.index(item) return list(zip(*self.index.values))[level] else: if (type(item) is str) and item not in self.columns: return Series() else: return super(FoamFrame, self).__getitem__(item) def draw(self, x, y, z, title, func, figure, **kwargs): def _label(axis, field): label = kwargs.get(axis + '_label', False) if label: self.properties.plot_properties.insert( field, {axis + '_label': label}) else: label = self.properties.plot_properties.select( field, axis + '_label', "None") return label def _range(axis, field): from bokeh.models import Range1d p_range_args = kwargs.get(axis + '_range', False) if p_range_args: self.properties.plot_properties.insert( field, {axis + '_range': p_range}) else: p_range = self.properties.plot_properties.select( field, axis + '_range') if not p_range: return False else: return Range1d(start=p_range[0], end=p_range[1]) figure_properties = {"title": title} if kwargs.get('x_range', False): figure_properties.update({"x_range": kwargs.get('x_range')}) figure.set(**figure_properties) colors = plt.next_color() spec_color = kwargs.get("color", False) spec_legend = kwargs.get("legend", False) if func == "quad": getattr(figure, func)(top=y, bottom=0, left=x[:-1], right=x[1:], **kwargs) return figure colors = plt.next_color() spec_color = kwargs.get("color", False) spec_legend = kwargs.get("legend", False) y = (y if isinstance(y, list) else [y]) for yi in y: x_data, y_data = self[x], self[yi] # TODO FIXME for k in ['symbols', 'order', 'colors', 'symbol']: if k in kwargs.keys(): kwargs.pop(k) if not spec_color: kwargs.update({"color": next(colors)}) if not spec_legend: kwargs.update({"legend": yi}) getattr(figure, func)(x=x_data, y=y_data, **kwargs) for ax, data in {'x': x, 'y': y[0]}.items(): if _label(ax, data): getattr(figure, ax+'axis')[0].axis_label = _label(ax, data) # setattr(getattr(figure, ax + 'axis'), # 'axis_label', _label(ax, data)) if _range(ax, data): r = setattr(figure, ax+'_range', _range(ax, data)) return figure def histogram(self, y, x=None, title="", figure=False, **kwargs): figure = (figure if figure else plt.figure()) import numpy as np hist, edges = np.histogram(self[y], density=True, bins=50) return self.draw(x=edges, y=hist, z=None, title=title, func="quad", figure=figure, **kwargs) def scatter(self, y, x='Pos', z=False, title="", figure=False, **kwargs): figure = (figure if figure else plt.figure()) return self.draw(x, y, z, title, func="scatter", figure=figure, **kwargs) def plot(self, y, x='Pos', z=False, title="", figure=False, **kwargs): figure = (figure if figure else plt.figure()) if kwargs.get('symbol', None): kwargs.pop('symbol') return self.draw(x, y, z, title, func="line", figure=figure, **kwargs) def show(self, y, x=None, figure=False, **kwargs): figure = (figure if figure else plt.figure()) if x: return getattr(self, self.properties.show_func)(y=y, x=x, figure=figure, **kwargs) else: return getattr(self, self.properties.show_func)(y=y, figure=figure, **kwargs) def show_func(self, value): """ set the default plot style valid arguments: scatter, plot """ self.properties.show_func = value def set_plot_properties(self, **values): """ set plot properties """ self.properties.plot_properties.set(values) def filter_locations(self, index): """ filter based on locations """ return self.filter(name='Loc', index=index) def filter(self, name, index=None, field=None): """ filter on index or field values by given functioni Examples: .filter(name='T', field=lambda x: 1000<x<2000) .filter(name='Loc', index=lambda x: 0.2<field_to_float(x)<0.8) """ if index: ret = self[list(map(index, self.index.get_level_values(name)))] ret.properties = self.properties return ret elif field: ret = self[list(map(field,self[name]))] ret.properties = self.properties return ret else: return self def by_index(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, index=func) # def map_level(self, dct, level=0): # index = self.index # index.set_levels([[dct.get(item, item) # for item in names] if i==level else names # #for i, names in enumerate(index.levels)], inplace=True) # for i, names in enumerate(index.levels)], inplace=False) # self.index = index # return self def by_field(self, field, func=None): func = (func if func else lambda x: x) return self.by(field, field=func) def by_location(self, func=None): func = (func if func else lambda x: x) return self.by("Loc", index=func) def by(self, name, index=None, field=None): """ facet by given function Examples: .by(index=lambda x: x) .by(field=lambda x: ('T_high' if x['T'] > 1000 else 'T_low')) """ ret = OrderedDict() if index: index_values = self.index.get_level_values(name) idx_values = sorted(set(index_values)) for val in idx_values: ret.update([(index(val), self[index_values == val])]) else: selection = self[name].apply(field) for cat in set(selection): ret.update([(cat, self[selection == cat])]) for _ in ret.values(): _.properties = self.properties return mf.MultiFrame(ret)