Example #1
0
    def run(self) -> None:
        """
        Execute a scenario run.
        """

        self.cero = CERO.create_empty()

        ceros = [in_conf.create_cero() for in_conf in self["input_conf"]]
        if ceros:
            self.cero = CERO.combine_ceros(ceros)
            print("Successfully loaded scenario inputs as CERO.")

        FromCERO.dataframe_out(self.cero,
                               (self.get_name() + "_%03d_step_%02d.xlsx" %
                                (self["run_no"], 0)), "xlsx")

        for idx, model in enumerate(self["models"]):
            m_cero = model.run(self.cero)
            print(
                "Completed run of model (%s) at %s." %
                (model["name"], dt.datetime.now().strftime('%Y-%m-%d %H:%M')))

            # If ouput_conf is not defined for a model, then None is returned...
            if m_cero is None:
                continue

            if not CERO.is_cero(m_cero):
                raise TypeError(
                    "Object returned from model run is *not* of CERO format.")

            if model.get("export_mod_xlsx", self.get("export_mod_xlsx", True)):
                # By default, export model outputs automatically to xlsx files
                model_out_file = (self.get_name() + "_%03d_%s.xlsx" %
                                  (self["run_no"], model["name"]))
                print("Exporting output of %s to %s." %
                      (model["name"], model_out_file))
                m_cero.to_excel(model_out_file)

            self.cero = CERO.combine_ceros([self.cero, m_cero])

            if self.get("export_int_xlsx", True):
                # If true (default), export the intermediate steps to xlsx files
                isfn = (self.get_name() + "_%03d_step_%02d.xlsx" %
                        (self["run_no"], idx + 1))
                print("Exporting updated CERO to %s." % (isfn))
                self.cero.to_excel(isfn)

        for out_conf in self["output_conf"]:
            out_conf.exec_procedures(self.cero)

        else:
            print("Completed generation of scenario outputs.")
Example #2
0
        def exec_ops(self, cero):
            """
            :param cero: The cero (``pandas.DataFrame``) object upon which to execute the operations. No modifications will be applied to the original cero (i.e. all modifications are applied to a copy of ``cero``).
            :return:
            """

            self._set_inputs(cero)

            for op in self["operations"]:
                ret = self._exec_op(op)
                if ret is not None:
                    self.inputs = CERO.combine_ceros([self.inputs, ret], overwrite=True)

            if "outputs" in self and self["outputs"] is None:
                # The result of this procedures operations is to be explicitly ignored, may be useful when objective is simply to plot data
                return

            if (self.get("outputs", []) == []) or (self.get("outputs", True) == True):
                    # Get all rows if none specified
                    self["outputs"] = self.inputs.index.tolist()

            out_df = self.inputs.iloc[[self.inputs.index.get_loc(o) for o in self["outputs"]]]

            assert issubclass(type(out_df), pd.DataFrame)

            if "file" in self:
                # If file is specified, all 'outputs' from this procedure go to its own file
                output_type = os.path.splitext(self["file"])[1][1:]
                FromCERO.dataframe_out(out_df, self["file"], output_type, self.get("output_kwargs"))
            else:
                # procedure output name is that provided
                return {self["name"]: out_df}
Example #3
0
    def exec_procedures(self, cero):
        """ Execute all the procedures of the FromCERO object
        .
        :param pandas.DataFrame cero: A CERO to serve as input for the procedures. The argument is not mutated/modified.
        """

        CERO.is_cero(cero, raise_exception=True, empty_ok=False)

        CERO.rename_index_values(cero, self.get("map", {}))

        self.output_procedures = OrderedDict()

        for procedure in self["procedures"]:

            try:
                ret = procedure.exec_ops(cero)
                # if ret is not None, should be dict with key: procedure["name"], value: resultant CERO
            except Exception as e:
                raise e.__class__(e.__str__() + " Error in procedure '%s'." % (procedure["name"]))

            if ret is None:
                ret = {}

            self.output_procedures.update(ret)
        else:
            if not self["procedures"]: # If empty list
                self.output_procedures["default_output"] = cero

        if any([not procedure.get("file") for procedure in self["procedures"]]):
            msg = "It has been detected that not all procedures direct output to file. Therefore some output will go to \'%s\'." % self["file"]
            print(msg)
            FromCERO._logger.info(msg)

        if self.output_procedures != {}:
            file_ext = os.path.splitext(self["file"])[1][1:]
            if file_ext in FromCERO.sup_output_types:
                out_df = CERO.combine_ceros(list(self.output_procedures.values()))
                FromCERO.dataframe_out(out_df, self["file"], output_type=file_ext)
            elif file_ext in FromCERO._Procedure.sup_output_types:
                raise ValueError("This data type is not supported for general export, because it probably has a more than 2 dimensions - export using 'procedures' instead.")
            else:
                raise ValueError("Unsupported data type detected for general export.")
Example #4
0
    def test_stitch_time(self):

        init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3],
                                       }, orient='index',
                                      dtype=pd.np.float32)
        init.sort_index(inplace=True)
        init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]},
                                      orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y"))

        cero = CERO.combine_ceros([init, cero])

        test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5],
                                          "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200]
                                          },
                                      orient='index',
                                      dtype=pd.np.float32)
        test_df.sort_index(inplace=True)
        test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y"))

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": [2018]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time.csv")

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time2.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": 2018}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time2.csv")

        out_file = "test_stitch_time3.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": 0}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time4.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [0]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time5.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [-3]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)
Example #5
0
    def run(self, cero) -> 'CERO':
        """
        Executes all data import/export operations (defined by ``input_conf`` and ``output_conf`` respectively) and the execution of any commands.

        :param pandas.DataFrame cero: A CERO that contains all necessary data for conversion to input files (for \
        model execution).
        :return pandas.DataFrame: A CERO of relevant output data ('relevant' is defined by ``output_conf``).
        """

        for input_conf in self["input_conf"]:
            input_conf.exec_procedures(cero)

        print(
            "Completed converting CERO to model input files (%s). Now processing commands..."
            % self["name"])

        with _modified_environ(wd=self["wd"], **self.get("env_vars", {})):

            for cmdobj in self["cmds"]:

                cmd = {"type": "shell", "shell": True}  # Default command

                if isinstance(cmdobj, str):
                    # cmd is interpreted as shell command by default
                    # cmdobj = cmdobj.split(" ")
                    cmd.update({"args": cmdobj})
                elif isinstance(cmdobj, dict):
                    cmd.update(cmdobj)  # Add user updates
                    if "args" not in cmd:
                        raise ValueError(
                            "'args' must be provided for command of type 'dict'."
                        )
                else:
                    raise TypeError(
                        "Invalid command object in configuration file.")

                # Change to command-specific directory
                cmd_run_dir = cmd.pop("wd", self["wd"])
                if cmd_run_dir:
                    cmd_run_dir = os.path.abspath(cmd_run_dir)

                cmd_type = cmd.pop("type")

                # Execute commands
                msg = "In directory '%s', executing command '%s'." % (
                    cmd_run_dir, cmd)
                Model._logger.info(msg)
                with _modified_environ(wd=cmd_run_dir,
                                       **cmd.pop("env_vars", {})):

                    # Depending on cmd_type, execute command in different ways...
                    if cmd_type in ["shell"]:
                        args = cmd.pop("args")
                        Model._logger.info(
                            "Executing shell command: %s, with keyword args: %s."
                            % (args, cmd))
                        try:
                            cmd["output"] = subprocess.check_output(
                                args=args,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True,
                                **cmd)
                        except subprocess.CalledProcessError as e:
                            msg = (
                                "Command '%s' failed with returncode: %s, and message:\n"
                                + "%s\n" +
                                "Program logs may have more information.") % (
                                    args, e.returncode, e.output)
                            Model._logger.error(msg)
                            print(msg)
                            raise e
                        Model._logger.info(cmd["output"])
                        print("Command returned: \n%s" % cmd["output"], end="")
                    elif cmd_type in ["python_method"]:
                        try:
                            assert ("func" in cmd)
                        except AssertionError:
                            raise ValueError(
                                "'func' must be defined for commands of type 'python_method'."
                            )
                        func = getattr(modfuncs, cmd.pop("func"))
                        cmd["output"] = func(*cmd["args"], **cmd["kwargs"])
                    else:
                        raise ValueError("Unsupported command type specified.")

        if not self["output_conf"]:
            return CERO.create_empty()

        ceros = []
        for oc in self["output_conf"]:
            ceros.append(oc.create_cero())

        try:
            cero = CERO.combine_ceros(ceros, overwrite=False)
        except CERO.CEROIndexConflict:
            raise RuntimeWarning(
                "Attempts to duplicate the export of data - i.e. one or more data series are being "
                +
                "exported more than once (which should be avoided). The last procedure will define "
                + "the intended data.")
            cero = CERO.combine_ceros(ceros)

        return cero
Example #6
0
    def test_idxconflict(self):
        cero = DataTools.get_test_data(TestCERO._dd + "test_cero.pickle")

        with self.assertRaises(CERO.CEROIndexConflict):
            CERO.combine_ceros([cero, cero], overwrite=False, verify_cero=True)