def __call__(self, data): if self.nlevels is None: # do not group return self.transform(data) dataframes, keys = [], [] group_levels = Utils.getGroupLevels( data, modify_levels=self.nlevels, ) for key, group in data.groupby(level=group_levels): self.debug('applying transformation on group %s' % str(key)) df = self.transform(group) if df is not None: dataframes.append(df) keys.append(key) df = pandas.concat(dataframes, keys=keys) if self.prune_dataframe: # reset dataframe index - keep the same levels Utils.pruneDataFrameIndex(df, original=data) self.debug("transform: finished") return df
def render(self): '''supply the:class:`Renderer.Renderer` with the data to render. The data supplied will depend on the ``groupby`` option. returns a ResultBlocks data structure. ''' self.debug("%s: rendering data started for %i items" % (self, len(self.data))) # initiate output structure results = ResultBlocks(title="") dataframe = self.data # dataframe.write_csv("test.csv") if dataframe is None: self.warn("%s: no data after conversion" % self) raise ValueError("no data for renderer") # special patch: set column names to pruned levels # if there are no column names if len(dataframe.columns) == len(self.pruned): if list(dataframe.columns) == list(range(len(dataframe.columns))): dataframe.columns = [x[1] for x in self.pruned] nlevels = Utils.getDataFrameLevels(dataframe) self.debug("%s: rendering data started. " "levels=%i, group_level=%s" % (self, nlevels, str(self.group_level))) if self.group_level < 0: # no grouping for renderers that will accept # a dataframe with any level of indices and no explicit # grouping has been asked for. results.append(self.renderer(dataframe, path=())) else: level = Utils.getGroupLevels( dataframe, max_level=self.group_level+1) self.debug("%s: grouping by levels: %s" % (self, str(level))) for key, work in dataframe.groupby(level=level): try: results.append(self.renderer(work, path=key)) except: self.error("%s: exception in rendering" % self) results.append( ResultBlocks(Utils.buildException("rendering"))) if len(results) == 0: self.warn("renderer returned no data.") raise ValueError("renderer returned no data.") self.debug("%s: rendering data finished with %i blocks" % (self.tracker, len(results))) return results
def __call__(self, dataframe, path): '''iterate over leaves/branches in data structure. This method will call the:meth:`render` method ''' result = ResultBlocks() if not self.split_at: result.extend(self.render(dataframe, path)) else: # split dataframe at first index level = Utils.getGroupLevels(dataframe) grouper = dataframe.groupby(level=level) if len(grouper) < self.split_at: result.extend(self.render(dataframe, path)) else: # build groups always, remove_always = [], set() if self.split_always: for key, work in grouper: for pat in self.split_always: rx = re.compile(pat) if rx.search(path2str(key)): always.append((key, work)) remove_always.add(key) grouper = dataframe.groupby(level=level) def _group_group(grouper, always, remove_always): group = always[:] for key, work in grouper: if key in remove_always: continue group.append((key, work)) if len(group) >= self.split_at: yield group group = always[:] # reconcile index names yield group first = True for group in _group_group(grouper, always, remove_always): # do not plot last dataframe that contains # only the common tracks to plot if not first and len(group) == len(always): continue first = False df = pandas.concat( [x[1] for x in group]) # reconcile index names df.index.names = dataframe.index.names result.extend(self.render(df, path)) return result
def __call__(self, dataframe, path): '''iterate over leaves/branches in data structure. This method will call the:meth:`render` method. Large dataframes are split into multiple, smaller rendered objects if self.split_at is not zero. By default, dataframes are split along the hierachical index. However, if there is only a single index, but multiple columns, the split is performed on the columns instead. This is used when splitting coordinate data as a result of the histogram transformation. ''' result = ResultBlocks() if not self.split_at: result.extend(self.render(dataframe, path)) else: # split dataframe at first index level = Utils.getGroupLevels(dataframe) grouper = dataframe.groupby(level=level) # split dataframe column wise if only one index # and multiple columns if len(grouper) == 1 and len(dataframe.columns) > self.split_at: columns = list(dataframe.columns) always = [] if self.split_keep_first_column: always.append(columns[0]) # columns to always keep always.extend([c for c in columns if c in self.split_always]) columns = [c for c in columns if c not in always] for x in range(0, len(columns), self.split_at): # extract a set of columns result.extend(self.render( dataframe.loc[:, always+columns[x:x+self.split_at]], path)) # split dataframe along index elif len(grouper) >= self.split_at: # build groups always, remove_always = [], set() if self.split_always: for key, work in grouper: for pat in self.split_always: rx = re.compile(pat) if rx.search(path2str(key)): always.append((key, work)) remove_always.add(key) grouper = dataframe.groupby(level=level) def _group_group(grouper, always, remove_always): group = always[:] for key, work in grouper: if key in remove_always: continue group.append((key, work)) if len(group) >= self.split_at: yield group group = always[:] # reconcile index names yield group first = True for group in _group_group(grouper, always, remove_always): # do not plot last dataframe that contains # only the common tracks to plot if not first and len(group) == len(always): continue first = False df = pandas.concat( [x[1] for x in group]) # reconcile index names df.index.names = dataframe.index.names result.extend(self.render(df, path)) else: # do not split dataframe result.extend(self.render(dataframe, path)) return result