def run(self) -> None: """ Execute a scenario run. """ self.cero = CERO.create_empty() ceros = [in_conf.create_cero() for in_conf in self["input_conf"]] if ceros: self.cero = CERO.combine_ceros(ceros) print("Successfully loaded scenario inputs as CERO.") FromCERO.dataframe_out(self.cero, (self.get_name() + "_%03d_step_%02d.xlsx" % (self["run_no"], 0)), "xlsx") for idx, model in enumerate(self["models"]): m_cero = model.run(self.cero) print( "Completed run of model (%s) at %s." % (model["name"], dt.datetime.now().strftime('%Y-%m-%d %H:%M'))) # If ouput_conf is not defined for a model, then None is returned... if m_cero is None: continue if not CERO.is_cero(m_cero): raise TypeError( "Object returned from model run is *not* of CERO format.") if model.get("export_mod_xlsx", self.get("export_mod_xlsx", True)): # By default, export model outputs automatically to xlsx files model_out_file = (self.get_name() + "_%03d_%s.xlsx" % (self["run_no"], model["name"])) print("Exporting output of %s to %s." % (model["name"], model_out_file)) m_cero.to_excel(model_out_file) self.cero = CERO.combine_ceros([self.cero, m_cero]) if self.get("export_int_xlsx", True): # If true (default), export the intermediate steps to xlsx files isfn = (self.get_name() + "_%03d_step_%02d.xlsx" % (self["run_no"], idx + 1)) print("Exporting updated CERO to %s." % (isfn)) self.cero.to_excel(isfn) for out_conf in self["output_conf"]: out_conf.exec_procedures(self.cero) else: print("Completed generation of scenario outputs.")
def exec_ops(self, cero): """ :param cero: The cero (``pandas.DataFrame``) object upon which to execute the operations. No modifications will be applied to the original cero (i.e. all modifications are applied to a copy of ``cero``). :return: """ self._set_inputs(cero) for op in self["operations"]: ret = self._exec_op(op) if ret is not None: self.inputs = CERO.combine_ceros([self.inputs, ret], overwrite=True) if "outputs" in self and self["outputs"] is None: # The result of this procedures operations is to be explicitly ignored, may be useful when objective is simply to plot data return if (self.get("outputs", []) == []) or (self.get("outputs", True) == True): # Get all rows if none specified self["outputs"] = self.inputs.index.tolist() out_df = self.inputs.iloc[[self.inputs.index.get_loc(o) for o in self["outputs"]]] assert issubclass(type(out_df), pd.DataFrame) if "file" in self: # If file is specified, all 'outputs' from this procedure go to its own file output_type = os.path.splitext(self["file"])[1][1:] FromCERO.dataframe_out(out_df, self["file"], output_type, self.get("output_kwargs")) else: # procedure output name is that provided return {self["name"]: out_df}
def exec_procedures(self, cero): """ Execute all the procedures of the FromCERO object . :param pandas.DataFrame cero: A CERO to serve as input for the procedures. The argument is not mutated/modified. """ CERO.is_cero(cero, raise_exception=True, empty_ok=False) CERO.rename_index_values(cero, self.get("map", {})) self.output_procedures = OrderedDict() for procedure in self["procedures"]: try: ret = procedure.exec_ops(cero) # if ret is not None, should be dict with key: procedure["name"], value: resultant CERO except Exception as e: raise e.__class__(e.__str__() + " Error in procedure '%s'." % (procedure["name"])) if ret is None: ret = {} self.output_procedures.update(ret) else: if not self["procedures"]: # If empty list self.output_procedures["default_output"] = cero if any([not procedure.get("file") for procedure in self["procedures"]]): msg = "It has been detected that not all procedures direct output to file. Therefore some output will go to \'%s\'." % self["file"] print(msg) FromCERO._logger.info(msg) if self.output_procedures != {}: file_ext = os.path.splitext(self["file"])[1][1:] if file_ext in FromCERO.sup_output_types: out_df = CERO.combine_ceros(list(self.output_procedures.values())) FromCERO.dataframe_out(out_df, self["file"], output_type=file_ext) elif file_ext in FromCERO._Procedure.sup_output_types: raise ValueError("This data type is not supported for general export, because it probably has a more than 2 dimensions - export using 'procedures' instead.") else: raise ValueError("Unsupported data type detected for general export.")
def test_stitch_time(self): init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], }, orient='index', dtype=pd.np.float32) init.sort_index(inplace=True) init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y")) cero = CERO.combine_ceros([init, cero]) test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5], "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200] }, orient='index', dtype=pd.np.float32) test_df.sort_index(inplace=True) test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y")) proc = FromCERO._Procedure({"name": "test_stitch_time", "file": "test_stitch_time.csv", "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_cols": [2018]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_stitch_time.csv") proc = FromCERO._Procedure({"name": "test_stitch_time", "file": "test_stitch_time2.csv", "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_cols": 2018}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_stitch_time2.csv") out_file = "test_stitch_time3.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": 0}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file) out_file = "test_stitch_time4.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": [0]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file) out_file = "test_stitch_time5.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": [-3]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file)
def run(self, cero) -> 'CERO': """ Executes all data import/export operations (defined by ``input_conf`` and ``output_conf`` respectively) and the execution of any commands. :param pandas.DataFrame cero: A CERO that contains all necessary data for conversion to input files (for \ model execution). :return pandas.DataFrame: A CERO of relevant output data ('relevant' is defined by ``output_conf``). """ for input_conf in self["input_conf"]: input_conf.exec_procedures(cero) print( "Completed converting CERO to model input files (%s). Now processing commands..." % self["name"]) with _modified_environ(wd=self["wd"], **self.get("env_vars", {})): for cmdobj in self["cmds"]: cmd = {"type": "shell", "shell": True} # Default command if isinstance(cmdobj, str): # cmd is interpreted as shell command by default # cmdobj = cmdobj.split(" ") cmd.update({"args": cmdobj}) elif isinstance(cmdobj, dict): cmd.update(cmdobj) # Add user updates if "args" not in cmd: raise ValueError( "'args' must be provided for command of type 'dict'." ) else: raise TypeError( "Invalid command object in configuration file.") # Change to command-specific directory cmd_run_dir = cmd.pop("wd", self["wd"]) if cmd_run_dir: cmd_run_dir = os.path.abspath(cmd_run_dir) cmd_type = cmd.pop("type") # Execute commands msg = "In directory '%s', executing command '%s'." % ( cmd_run_dir, cmd) Model._logger.info(msg) with _modified_environ(wd=cmd_run_dir, **cmd.pop("env_vars", {})): # Depending on cmd_type, execute command in different ways... if cmd_type in ["shell"]: args = cmd.pop("args") Model._logger.info( "Executing shell command: %s, with keyword args: %s." % (args, cmd)) try: cmd["output"] = subprocess.check_output( args=args, stderr=subprocess.STDOUT, universal_newlines=True, **cmd) except subprocess.CalledProcessError as e: msg = ( "Command '%s' failed with returncode: %s, and message:\n" + "%s\n" + "Program logs may have more information.") % ( args, e.returncode, e.output) Model._logger.error(msg) print(msg) raise e Model._logger.info(cmd["output"]) print("Command returned: \n%s" % cmd["output"], end="") elif cmd_type in ["python_method"]: try: assert ("func" in cmd) except AssertionError: raise ValueError( "'func' must be defined for commands of type 'python_method'." ) func = getattr(modfuncs, cmd.pop("func")) cmd["output"] = func(*cmd["args"], **cmd["kwargs"]) else: raise ValueError("Unsupported command type specified.") if not self["output_conf"]: return CERO.create_empty() ceros = [] for oc in self["output_conf"]: ceros.append(oc.create_cero()) try: cero = CERO.combine_ceros(ceros, overwrite=False) except CERO.CEROIndexConflict: raise RuntimeWarning( "Attempts to duplicate the export of data - i.e. one or more data series are being " + "exported more than once (which should be avoided). The last procedure will define " + "the intended data.") cero = CERO.combine_ceros(ceros) return cero
def test_idxconflict(self): cero = DataTools.get_test_data(TestCERO._dd + "test_cero.pickle") with self.assertRaises(CERO.CEROIndexConflict): CERO.combine_ceros([cero, cero], overwrite=False, verify_cero=True)