def generate_dataframes(h5file, time="/time", scenarios="/scenarios"): """Helper function to generate pandas dataframes from `TablesRecorder` data. Parameters h5file : str A path to a H5 file created by `TablesRecorder`. time : str The internal table that contains the time information (default "/time") scenarios : str The internal table that contains the scenario information (default "/scenarios") """ store = H5Store(h5file, mode="r") # Get the time information if time: time_table = store.file.get_node(time) index = pandas.to_datetime( {k: time_table.col(k) for k in ("year", "month", "day")}) else: index = None # Get the scenario information if scenarios: scenarios_table = store.file.get_node(scenarios) scenarios = pandas.DataFrame( {k: scenarios_table.col(k) for k in ("name", "size")}) columns = pandas.MultiIndex.from_product( [range(row["size"]) for _, row in scenarios.iterrows()], names=[ row["name"].decode() for _, row in scenarios.iterrows() ], ) else: columns = None for node in store.file.walk_nodes("/", "CArray"): data = node.read() data = data.reshape((data.shape[0], -1)) df = pandas.DataFrame(data, index=index, columns=columns) yield node._v_name, df
def reset(self): import tables mode = "r+" # always need to append, as file already created in setup self.h5store = H5Store(self.h5file, self.filter_kwds, mode) self._arrays = {} for where, node in self._nodes: self._arrays[node] = self.h5store.file.get_node(where) self._time_table = None # Create time table # This is created in reset so that the table is always recreated if self.time is not None: group_name, node_name = self.time.rsplit('/', 1) if group_name == "": group_name = "/" description = { c: tables.Int64Col() for c in ('year', 'month', 'day', 'index') } try: self.h5store.file.remove_node(group_name, node_name) except tables.NoSuchNodeError: pass finally: self._time_table = self.h5store.file.create_table( group_name, node_name, description=description, createparents=True) self._routes_flow_array = None if self.routes_flows is not None: # Create a CArray for the flows # The first dimension is the number of timesteps. # The second dimension is the number of routes # The following dimensions are sized per scenario scenario_shape = list(self.model.scenarios.shape) shape = [ len(self.model.timestepper), len(self.model.solver.routes) ] + scenario_shape atom = tables.Float64Atom() try: self.h5store.file.remove_node(self.where, self.routes_flows) except tables.NoSuchNodeError: pass finally: self._routes_flow_array = self.h5store.file.create_carray( self.where, self.routes_flows, atom, shape, createparents=True) # Create routes table. This must be done in reset if self.routes is not None: group_name, node_name = self.routes.rsplit('/', 1) if group_name == "": group_name = "/" description = { # TODO make string length configurable 'start': tables.StringCol(1024), 'end': tables.StringCol(1024), } try: self.h5store.file.remove_node(group_name, node_name) except tables.NoSuchNodeError: pass finally: tbl = self.h5store.file.create_table( group_name, node_name, description=description, createparents=True) entry = tbl.row for route in self.model.solver.routes: node_first = route[0] node_last = route[-1] if node_first.parent is not None: node_first = node_first.parent if node_last.parent is not None: node_last = node_last.parent entry['start'] = node_first.name.encode('utf-8') entry['end'] = node_last.name.encode('utf-8') entry.append() tbl.flush()
def setup(self): """ Setup the tables """ from pywr.parameters import IndexParameter import tables # The first dimension is the number of timesteps. # The following dimensions are sized per scenario scenario_shape = list(self.model.scenarios.shape) shape = [len(self.model.timestepper)] + scenario_shape self.h5store = H5Store(self.h5file, self.filter_kwds, self.mode, title=self.title, metadata=self.metadata, create_directories=self.create_directories) # Create a CArray for each node self._arrays = {} # Default to all nodes if None given. if self.nodes is None: nodes = [((self.where + "/" + n.name).replace("//", "/"), n) for n in self.model.nodes.values()] else: nodes = [] for n in self.nodes: try: where, node = n except (TypeError, ValueError): node = n where = self.where + "/" + node # Accept a str, and lookup node by name instead. if isinstance(node, basestring): node = self.model.nodes[node] # Otherwise assume it is a node object anyway where = where.replace("//", "/") nodes.append((where, node)) if self.parameters is not None: nodes.extend(self.parameters) self._nodes = nodes for where, node in self._nodes: if isinstance(node, IndexParameter): atom = tables.Int32Atom() else: atom = tables.Float64Atom() group_name, node_name = where.rsplit("/", 1) if group_name == "": group_name = "/" self.h5store.file.create_carray(group_name, node_name, atom, shape, createparents=True) # Create scenario tables if self.scenarios is not None: group_name, node_name = self.scenarios.rsplit('/', 1) if group_name == "": group_name = "/" description = { # TODO make string length configurable 'name': tables.StringCol(1024), 'size': tables.Int64Col() } tbl = self.h5store.file.create_table(group_name, node_name, description=description, createparents=True) # Now add the scenarios entry = tbl.row for scenario in self.model.scenarios.scenarios: entry['name'] = scenario.name.encode('utf-8') entry['size'] = scenario.size entry.append() tbl.flush() if self.model.scenarios.user_combinations is not None: description = { s.name: tables.Int64Col() for s in self.model.scenarios.scenarios } tbl = self.h5store.file.create_table(group_name, 'scenario_combinations', description=description) entry = tbl.row for comb in self.model.scenarios.user_combinations: for s, i in zip(self.model.scenarios.scenarios, comb): entry[s.name] = i entry.append() tbl.flush() self.h5store = None