コード例 #1
0
ファイル: test_tocero.py プロジェクト: charlie0389/ConCERO
    def test_empty_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml"))
        cero = to_cero.create_cero()
        with self.assertRaises(CERO.EmptyCERO):
            self.assertTrue(CERO.is_cero(cero, empty_ok=False))
        self.assertTrue(CERO.is_cero(cero))
コード例 #2
0
ファイル: test_libfuncs.py プロジェクト: charlie0389/ConCERO
    def test_fillna(self):
        df = pd.DataFrame.from_dict(
            {
                "A": [1, 2, 3],
                "B": [3, 4, 5],
                "C": [6, 7, 8]
            }, orient="index")
        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))
        df.sort_index(inplace=True)
        df.iloc[1, 1] = pd.np.nan
        df = df.astype(pd.np.float32)
        self.assertTrue(CERO.is_cero(df))

        libfuncs.fillna(df, value=0.0)
        self.assertTrue(df.iloc[1, 1] == 0.0)

        df.iloc[1, 1] = pd.np.nan
        libfuncs.fillna(df, method="bfill")
        self.assertTrue(df.iloc[1, 1] == 5.0)

        df.iloc[1, 1] = pd.np.nan
        libfuncs.fillna(df)
        self.assertTrue(df.iloc[1, 1] == 3.0)

        self.assertTrue(CERO.is_cero(df))
コード例 #3
0
ファイル: test_tocero.py プロジェクト: charlie0389/ConCERO
    def test_rename_2(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml"))
        cero = to_cero.create_cero()

        CERO.is_cero(cero)

        test_idx = ["PROFESSIONALS", ("1", "MANAGERS")
                    ]  # Rename operation always moves series to the end

        self.assertTrue(
            all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
コード例 #4
0
ファイル: scenario.py プロジェクト: charlie0389/ConCERO
    def run(self) -> None:
        """
        Execute a scenario run.
        """

        self.cero = CERO.create_empty()

        ceros = [in_conf.create_cero() for in_conf in self["input_conf"]]
        if ceros:
            self.cero = CERO.combine_ceros(ceros)
            print("Successfully loaded scenario inputs as CERO.")

        FromCERO.dataframe_out(self.cero,
                               (self.get_name() + "_%03d_step_%02d.xlsx" %
                                (self["run_no"], 0)), "xlsx")

        for idx, model in enumerate(self["models"]):
            m_cero = model.run(self.cero)
            print(
                "Completed run of model (%s) at %s." %
                (model["name"], dt.datetime.now().strftime('%Y-%m-%d %H:%M')))

            # If ouput_conf is not defined for a model, then None is returned...
            if m_cero is None:
                continue

            if not CERO.is_cero(m_cero):
                raise TypeError(
                    "Object returned from model run is *not* of CERO format.")

            if model.get("export_mod_xlsx", self.get("export_mod_xlsx", True)):
                # By default, export model outputs automatically to xlsx files
                model_out_file = (self.get_name() + "_%03d_%s.xlsx" %
                                  (self["run_no"], model["name"]))
                print("Exporting output of %s to %s." %
                      (model["name"], model_out_file))
                m_cero.to_excel(model_out_file)

            self.cero = CERO.combine_ceros([self.cero, m_cero])

            if self.get("export_int_xlsx", True):
                # If true (default), export the intermediate steps to xlsx files
                isfn = (self.get_name() + "_%03d_step_%02d.xlsx" %
                        (self["run_no"], idx + 1))
                print("Exporting updated CERO to %s." % (isfn))
                self.cero.to_excel(isfn)

        for out_conf in self["output_conf"]:
            out_conf.exec_procedures(self.cero)

        else:
            print("Completed generation of scenario outputs.")
コード例 #5
0
ファイル: libfuncs.py プロジェクト: charlie0389/ConCERO
def groupby(df: "CERO",
            *args,
            key: "Union[int, list[int]]" = None,
            match: str = None,
            agg: str = None,
            **kwargs):

    if key is None:
        raise TypeError(
            "'key' must be provided to 'groupby' function as either an int or list of ints."
        )
    elif not issubclass(type(key), list):
        key = [key]

    if not all([issubclass(type(k), int) for k in key]):
        raise TypeError(
            "'key' must be provided to 'groupby' function as either an int or list of ints."
        )

    defaults = {"axis": 0, "sort": False, "group_keys": False}
    defaults.update(kwargs)

    match = _Identifier.tupleize_name(match)
    m_ids = [match]
    if match is None:
        m_ids = _Identifier.unique_id_fields(df.index.values, key=key)

    conv = lambda x: tuple(x) if issubclass(type(x), str) else x
    m_ids = [conv(m) for m in m_ids]

    rename_dict = {}
    for m in m_ids:

        # Create func that identifies rows for grouping
        def f(x):
            return all([x[k] == m[idx] for idx, k in enumerate(key)])

        # Groupby and apply aggregation function
        agg_df = df.groupby(by=f, **defaults).agg(agg)

        # Put aggregated calculation in first row that meets the condition
        row_loc = next(x for x in df.index.values if f(x))
        df.iloc[df.index.get_loc(row_loc)] = agg_df.loc[True]

        # Rename row
        rename_dict.update(
            {row_loc: _Identifier.keep_only_fields(key, row_loc)[0]})

    CERO.rename_index_values(df, rename_dict, inplace=True)
    return df
コード例 #6
0
ファイル: from_cero.py プロジェクト: charlie0389/ConCERO
        def exec_ops(self, cero):
            """
            :param cero: The cero (``pandas.DataFrame``) object upon which to execute the operations. No modifications will be applied to the original cero (i.e. all modifications are applied to a copy of ``cero``).
            :return:
            """

            self._set_inputs(cero)

            for op in self["operations"]:
                ret = self._exec_op(op)
                if ret is not None:
                    self.inputs = CERO.combine_ceros([self.inputs, ret], overwrite=True)

            if "outputs" in self and self["outputs"] is None:
                # The result of this procedures operations is to be explicitly ignored, may be useful when objective is simply to plot data
                return

            if (self.get("outputs", []) == []) or (self.get("outputs", True) == True):
                    # Get all rows if none specified
                    self["outputs"] = self.inputs.index.tolist()

            out_df = self.inputs.iloc[[self.inputs.index.get_loc(o) for o in self["outputs"]]]

            assert issubclass(type(out_df), pd.DataFrame)

            if "file" in self:
                # If file is specified, all 'outputs' from this procedure go to its own file
                output_type = os.path.splitext(self["file"])[1][1:]
                FromCERO.dataframe_out(out_df, self["file"], output_type, self.get("output_kwargs"))
            else:
                # procedure output name is that provided
                return {self["name"]: out_df}
コード例 #7
0
    def test_plotoutput(self):

        try:
            import seaborn
        except ImportError:
            raise unittest.SkipTest("PyQt4 not installed, and therefore ConCERO's plotting capabilities cannot be used.")

        nf = "AssociateProfessionals.png"

        # CERO path
        png = DataTools.get_test_data(TestPlotOutput._dd + "test_plotoutput.png")

        cero = CERO.read_xlsx(TestPlotOutput._dd + "test_plotoutput.xlsx")
        fc = FromCERO(TestPlotOutput._dd + "test_plotoutput.yaml")
        fc.exec_procedures(cero)
        plt = DataTools.get_test_data(nf)

        # These lines have been commented out because figures are very hard to compare accurately - defaults seem to \
        # differ depending on operating system.
        # try:
        #     self.assertEqual(plt, png)
        # except AssertionError as e:
        #     raise e

        # Tidy up
        os.remove(os.path.relpath(nf))
コード例 #8
0
ファイル: test_libfuncs.py プロジェクト: charlie0389/ConCERO
    def test_apply_func(self):
        df = pd.DataFrame.from_dict(
            {
                "A": [1, 2, 3],
                "B": [3, 4, 5],
                "C": [6, 7, 8]
            },
            orient="index",
            dtype=pd.np.float32)
        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))
        df.sort_index(inplace=True)
        self.assertTrue(CERO.is_cero(df))

        libfuncs.apply_func(df, numpy_func="square")

        test_df = pd.DataFrame.from_dict(
            {
                "A": [1, 4, 9],
                "B": [9, 16, 25],
                "C": [36, 49, 64]
            },
            orient="index",
            dtype=pd.np.float32)
        test_df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))
        test_df.sort_index(inplace=True)

        self.assertTrue(df.equals(test_df))
コード例 #9
0
    def test__import_vd(self):

        fo = {
            "file": TestToCERO_FileObj._dd + "test__import_vd.VD",
            "date_col": 3,
            "val_col": 8
        }

        fo = ToCERO._FileObj(fo)
        df = fo._import_vd()
        df.columns.set_names([None], inplace=True)
        df = df.astype(pd.np.float32)

        test_df = pd.DataFrame.from_dict(
            {
                ("VAR_Act", "-", "FT_COMELC", "ACT", "2015", "PD", "-"): [
                    0.740833333333336, 0.740833333333336, 0.8005115537522,
                    0.829127920241238
                ]
            },
            orient="index",
            dtype=pd.np.float32)
        test_df.columns = pd.Index([2015, 2016, 2020, 2025])
        test_df.sort_index(inplace=True)

        self.assertTrue(test_df.equals(df))

        fo = {
            "file": TestToCERO_FileObj._dd + "test__import_vd.VD",
            "date_col": 3,
            "val_col": 8,
            "default_year": 2018
        }

        fo = ToCERO._FileObj(fo)
        df = fo._import_vd()
        df.columns.set_names([None], inplace=True)
        df = df.astype(pd.np.float32)
        df.sort_index(inplace=True)

        test_df = pd.DataFrame(data=[
            [
                0.740833333333336, 0.740833333333336, pd.np.nan,
                0.8005115537522, 0.829127920241238
            ],
            [pd.np.nan, pd.np.nan, 1.39891653080538, pd.np.nan, pd.np.nan],
            [pd.np.nan, pd.np.nan, 19.6047685777802, pd.np.nan, pd.np.nan],
            [pd.np.nan, pd.np.nan, 31516.8951973493, pd.np.nan, pd.np.nan],
        ],
                               columns=[2015, 2016, 2018, 2020, 2025],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index([
            ("VAR_Act", "-", "FT_COMELC", "ACT", "2015", "PD", "-"),
            ("Cost_Salv", "-", "EN_WinONS-26", "ADE", "2040", "-", "-"),
            ("Cost_NPV", "-", "EE_StmTurb009", "CQ", "-", "-", "ACT"),
            ("Reg_irec", "-", "-", "WA", "-", "-", "-"),
        ])
        test_df.sort_index(inplace=True)
        self.assertTrue(test_df.equals(df))
コード例 #10
0
ファイル: from_cero.py プロジェクト: charlie0389/ConCERO
        def _set_inputs(self, cero: pd.DataFrame):
            """Copies each data series in ``cero`` indexed by the items in ``inp_list`` to an ``OrderedDict``. This \
                    ensures that ``operations`` do not alter ``cero``.
                    """
            if self["inputs"] == []:
                # Input is entire CERO unless otherwise specified
                self["inputs"] = cero.index.tolist()

            # Check values in dataframe - check is necessary because KeyError is not thrown if some values are in index (pandas version 0.22).
            invalid_inputs = [i for i in self["inputs"] if i not in cero.index]

            try:
                self.inputs = copy.deepcopy(cero.iloc[[cero.index.get_loc(loc) for loc in self["inputs"]]]) # Reduce data frame to necessary data and copy
            except KeyError:
                invalid_inputs = [i for i in self["inputs"] if i not in cero.index]
                msg = ("Inputs %s do not exist. The most likely reason is that the configuration file is " +
                       "incorrectly specified, or lacks specification. If debugging level has been set to " +
                       "'DEBUG', then the input list is in the log file - note that this list may be " +
                       "extraordinarily long. Common causes of this problem include: \n" +
                       " 1. Set definition in configuration file includes elements that do not exist in the CERO.\n" +
                       " 2. Mis-spellings of identifiers in the configuration file (which includes names of sets for 'inputs' or 'arrays').\n" +
                       " 3. Incorrect ordering of sets in the identifier."
                       ) % invalid_inputs
                FromCERO._logger.error(msg)
                raise KeyError(msg)

            assert (isinstance(self.inputs, pd.DataFrame))

            map_dict = {}
            for map_op in self.get("map", []):

                idx = map_op.get("idx")
                orig_s = self["sets"][map_op["orig"]]
                ns = self["sets"][map_op["new"]]

                for val in self.inputs.index.values:

                    new_val = val

                    if idx is not None and (val[idx] in orig_s) and (not isinstance(val, str)):
                        new_val = val[:idx] + (ns[orig_s.index(val[idx])],) + val[idx+1:]
                    elif val in orig_s:
                        new_val = ns[orig_s.index(val)]

                    map_dict.update({val: new_val})
                CERO.rename_index_values(self.inputs, map_dict, inplace=True)
コード例 #11
0
ファイル: from_cero.py プロジェクト: charlie0389/ConCERO
    def exec_procedures(self, cero):
        """ Execute all the procedures of the FromCERO object
        .
        :param pandas.DataFrame cero: A CERO to serve as input for the procedures. The argument is not mutated/modified.
        """

        CERO.is_cero(cero, raise_exception=True, empty_ok=False)

        CERO.rename_index_values(cero, self.get("map", {}))

        self.output_procedures = OrderedDict()

        for procedure in self["procedures"]:

            try:
                ret = procedure.exec_ops(cero)
                # if ret is not None, should be dict with key: procedure["name"], value: resultant CERO
            except Exception as e:
                raise e.__class__(e.__str__() + " Error in procedure '%s'." % (procedure["name"]))

            if ret is None:
                ret = {}

            self.output_procedures.update(ret)
        else:
            if not self["procedures"]: # If empty list
                self.output_procedures["default_output"] = cero

        if any([not procedure.get("file") for procedure in self["procedures"]]):
            msg = "It has been detected that not all procedures direct output to file. Therefore some output will go to \'%s\'." % self["file"]
            print(msg)
            FromCERO._logger.info(msg)

        if self.output_procedures != {}:
            file_ext = os.path.splitext(self["file"])[1][1:]
            if file_ext in FromCERO.sup_output_types:
                out_df = CERO.combine_ceros(list(self.output_procedures.values()))
                FromCERO.dataframe_out(out_df, self["file"], output_type=file_ext)
            elif file_ext in FromCERO._Procedure.sup_output_types:
                raise ValueError("This data type is not supported for general export, because it probably has a more than 2 dimensions - export using 'procedures' instead.")
            else:
                raise ValueError("Unsupported data type detected for general export.")
コード例 #12
0
ファイル: test_tocero.py プロジェクト: charlie0389/ConCERO
    def test_complex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd +
                               r'test_complex_xlsx_import.yaml'))
        cero = to_cero.create_cero()

        df = DataTools.get_test_data(TestToCERO._dd +
                                     "test_complex_xlsx_result.pickle")

        self.assertTrue(CERO.is_cero(cero))
        self.assertTrue(cero.equals(df))
コード例 #13
0
    def test_csv_complex(self):

        test_df = pd.DataFrame(data=np.array([[3.78981, 2.73377],
                                              [2.22027, 3.99257]]),
                               index=[("a", "b"), "c"],
                               dtype=pd.np.float32)
        test_df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018], format="%Y"))

        cero = CERO.read_csv(TestCERO._dd + "test_csv_complex.csv")

        self.assertTrue(test_df.equals(cero))
コード例 #14
0
ファイル: from_cero.py プロジェクト: charlie0389/ConCERO
        def _exec_op(self, op: dict):

            # Apply operation to procedure
            func_name = op.pop('func', "noop") # Perform noop (no-operation) if no func provided.
            op_args = op.pop('args', [])
            rename = op.pop("rename", None)

            arrays = None
            if "arrays" in op:
                arrays = op.pop("arrays")
                if issubclass(type(arrays), str):
                    arrays = [arrays]
                arrays = _Identifier.get_all_idents(arrays, sets=self["sets"])

            for mod in self["libfuncs"]:
                if hasattr(mod, func_name):
                    func = getattr(mod, func_name)
                    break
            else:
                msg = ('Invalid function name provided - \'%s\'. Function does not exist in any of the modules %s. It may be necessary to create a python module with the necessary functions and provide this file with the \'libfuncs\' option.' %
                            (func_name, self["libfuncs"]))
                FromCERO._logger.error(msg)
                raise AttributeError(msg)

            FromCERO._logger.debug("Function call: %s(*arrays, **op)" % func.__name__)

            ret = func(self.inputs, *op_args, locs=arrays, **op)
            op['func'] = func.__name__  # For cleanliness of presentation

            if rename is not None:

                if ret is None:
                    ret = getattr(libfuncs, "noop")(self.inputs, *op_args, locs=arrays, **op)

                if isinstance(rename, str):
                    rename = {ret.index.tolist()[0]: rename} # Rename the first index by default

                if issubclass(type(rename), list):
                    # Build mapping dictionary
                    rename = _Identifier.get_mapping_dict(ret.index.tolist(), rename, sets=self.get("sets"))
                elif issubclass(type(rename), dict):
                    rename = _Identifier.get_one_to_one_mapping(rename, sets=self.get("sets"))

                # At this point, rename should be one-to-one mapping dict

                renamed = CERO.rename_index_values(ret.loc[list(rename.keys())], rename, inplace=False)
                ret = renamed.loc[list(rename.values())]  # Restrict renamed to only the rows that have been specified

                # Note that ret will be restricted to only those values that have been renamed.

            return ret
コード例 #15
0
 def init_df():
     df = pd.DataFrame.from_dict(
         {
             "A": [1, 2, 3],
             "B": [3, 4, 5],
             "C": [6, 7, 8]
         },
         orient="index")
     df.columns = pd.DatetimeIndex(
         pd.to_datetime([2017, 2018, 2019], format="%Y"))
     df.sort_index(inplace=True)
     df = df.astype(pd.np.float32)
     self.assertTrue(CERO.is_cero(df))
     return df
コード例 #16
0
    def test_export_to_csv(self):

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6], }, orient='index',
                                      dtype=pd.np.float32)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))
        cero.sort_index(inplace=True)
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(cfg.d_td + "test_procedure_export_csv.yaml")
        fc.exec_procedures(cero)

        df1 = pd.read_csv("csv_export.csv", index_col=0)
        test_list = [1, 2, 3]
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = ["A", "B", "C"]
        self.assertTrue(all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("csv_export.csv")
コード例 #17
0
ファイル: test_tocero.py プロジェクト: charlie0389/ConCERO
    def test_regex_format(self):

        tc = ToCERO({
            "files": [{
                "file": TestToCERO._dd + "test_csv_regex.csv",
                "time_regex":
                r"(Y\d{4}).*",  # Regex could pick out just the year, but want to test 'time_fmt' as well...
                "time_fmt": r"Y%Y"
            }]
        })
        cero = tc.create_cero()

        test_df = pd.DataFrame(data=[[1, 2], [3, 4]],
                               columns=[2016, 2017],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index(["A", "B"])
        test_df.columns = pd.DatetimeIndex(
            pd.to_datetime([2016, 2017], format="%Y"))
        test_df.sort_index(inplace=True)

        self.assertTrue(cero.equals(test_df))
コード例 #18
0
ファイル: test_fromcero.py プロジェクト: charlie0389/ConCERO
    def test_sets_and_mapping(self):

        cero = pd.DataFrame.from_dict(
            {
                "A": [1],
                "B": [2],
                "C": [3],
                "D": [4],
                "E": [5],
                "F": [6],
            },
            orient='index',
            dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(
            data=pd.to_datetime([2018], format="%Y"))
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping.yaml")
        fc.exec_procedures(cero)

        df1 = pd.read_csv("test_fromcero_mapping1.csv", index_col=0)
        test_list = [1, 2, 3]
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = ["A", "B", "C"]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        df2 = pd.read_csv("test_fromcero_mapping2.csv", index_col=0)
        test_list = [4, 5, 6]
        df2_vals = [x[0] for x in df2.values.tolist()]
        self.assertTrue(all([x == y for (x, y) in zip(test_list, df2_vals)]))
        test_list = ["G", "H", "I"]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df2.index.tolist())]))

        os.remove("test_fromcero_mapping1.csv")
        os.remove("test_fromcero_mapping2.csv")
コード例 #19
0
    def test_csv_orientation(self):

        tc = ToCERO._FileObj(
            {"file": TestToCERO_FileObj._dd + "test_csv_orientation.csv"})

        with self.assertRaises(TypeError):
            df = tc._import_file()

        tc = ToCERO._FileObj({
            "file": TestToCERO_FileObj._dd + "test_csv_orientation.csv",
            "orientation": "cols"
        })

        df = tc._import_file()

        test_df = pd.DataFrame(data=[[1, 2], [3, 4]],
                               columns=[2016, 2017],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index(["A", "B"])
        test_df.sort_index(inplace=True)

        self.assertTrue(df.equals(test_df))
コード例 #20
0
    def test_stitch_time(self):

        init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3],
                                       }, orient='index',
                                      dtype=pd.np.float32)
        init.sort_index(inplace=True)
        init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]},
                                      orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y"))

        cero = CERO.combine_ceros([init, cero])

        test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5],
                                          "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200]
                                          },
                                      orient='index',
                                      dtype=pd.np.float32)
        test_df.sort_index(inplace=True)
        test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y"))

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": [2018]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time.csv")

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time2.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": 2018}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time2.csv")

        out_file = "test_stitch_time3.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": 0}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time4.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [0]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time5.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [-3]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)
コード例 #21
0
    def test_is_cero(self):
        """Tests the validation method by feeding it deliberately False data."""

        df = None

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_inv_type):
            CERO.is_cero(df)

        df = pd.DataFrame.from_dict({
            "A": [1, 2, 3],
            "B": [4, 5, 6]
        },
                                    orient="index",
                                    dtype=int)
        df.index = pd.MultiIndex.from_tuples([("A", 1), ("A", 2)])
        # ^^ ``df`` is not even close to being a CERO...

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_bad_idx):
            CERO.is_cero(df)

        df.index = pd.Index(["A", "A"])

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_bad_col):
            CERO.is_cero(df)

        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2017, 2019], format="%Y"))

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_idx_nunique):
            CERO.is_cero(df)

        df.index = pd.Index(["A", "B"])

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_col_nunique):
            CERO.is_cero(df)

        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_val_type):
            CERO.is_cero(df)

        df = df.astype(pd.np.float32, copy=False)

        self.assertTrue(CERO.is_cero(df))
コード例 #22
0
ファイル: test_fromcero.py プロジェクト: charlie0389/ConCERO
    def test_sets_and_mapping2(self):

        cero = pd.DataFrame.from_dict(
            {
                ("A", "1"): [1],
                ("A", "2"): [2],
                ("A", "3"): [3],
                ("B", "1"): [4],
                ("B", "2"): [5],
                ("B", "3"): [6],
                ("C", "1"): [7],
                ("C", "2"): [8],
                ("C", "3"): [9],
            },
            orient='index',
            dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(
            data=pd.to_datetime([2018], format="%Y"))
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml")
        fc.exec_procedures(cero)

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping1.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"),
                     ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"),
                     ("I", "3")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping2.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"),
                     ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"),
                     ("C", "I")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("test_fromcero_complexmapping1.xlsx")
        os.remove("test_fromcero_complexmapping2.xlsx")
コード例 #23
0
    def test_create_empty(self):

        empty_cero = CERO.create_empty()
        self.assertTrue(CERO.is_cero(empty_cero))
コード例 #24
0
    def test_name_map(self):
        def init_df():
            df = pd.DataFrame.from_dict(
                {
                    "A": [1, 2, 3],
                    "B": [3, 4, 5],
                    "C": [6, 7, 8]
                },
                orient="index")
            df.columns = pd.DatetimeIndex(
                pd.to_datetime([2017, 2018, 2019], format="%Y"))
            df.sort_index(inplace=True)
            df = df.astype(pd.np.float32)
            self.assertTrue(CERO.is_cero(df))
            return df

        df = init_df()
        mapping = dict([("A", "D"), ("B", "E"), ("C", "F")])

        res = CERO.rename_index_values(df, mapping)

        test_names = ["D", "E", "F"]

        self.assertIsNone(res)
        self.assertTrue(
            all([x == y for (x, y) in zip(df.index.tolist(), test_names)]))

        # Test 2

        df = init_df()
        mapping = dict([("B", "E"), ("C", "F")])

        res = CERO.rename_index_values(df, mapping)

        test_names = ["A", "E", "F"]

        self.assertIsNone(res)
        self.assertTrue(
            all([x == y for (x, y) in zip(df.index.tolist(), test_names)]))

        # Test 3

        df = init_df()
        mapping = dict([("A", "D"), ("B", "E"), ("C", "F")])

        res = CERO.rename_index_values(df, mapping, inplace=False)

        test_names = ["D", "E", "F"]
        test_names_df = ["A", "B", "C"]

        self.assertTrue(
            all([x == y for (x, y) in zip(res.index.tolist(), test_names)]))
        self.assertTrue(
            all([x == y for (x, y) in zip(df.index.tolist(), test_names_df)]))

        # Test 4

        df = init_df()
        mapping = dict([("B", "E"), ("C", "F")])

        res = CERO.rename_index_values(df, mapping, inplace=False)

        test_names = ["A", "E", "F"]
        test_names_df = ["A", "B", "C"]

        self.assertTrue(
            all([x == y for (x, y) in zip(res.index.tolist(), test_names)]))
        self.assertTrue(
            all([x == y for (x, y) in zip(df.index.tolist(), test_names_df)]))
コード例 #25
0
    def run(self, cero) -> 'CERO':
        """
        Executes all data import/export operations (defined by ``input_conf`` and ``output_conf`` respectively) and the execution of any commands.

        :param pandas.DataFrame cero: A CERO that contains all necessary data for conversion to input files (for \
        model execution).
        :return pandas.DataFrame: A CERO of relevant output data ('relevant' is defined by ``output_conf``).
        """

        for input_conf in self["input_conf"]:
            input_conf.exec_procedures(cero)

        print(
            "Completed converting CERO to model input files (%s). Now processing commands..."
            % self["name"])

        with _modified_environ(wd=self["wd"], **self.get("env_vars", {})):

            for cmdobj in self["cmds"]:

                cmd = {"type": "shell", "shell": True}  # Default command

                if isinstance(cmdobj, str):
                    # cmd is interpreted as shell command by default
                    # cmdobj = cmdobj.split(" ")
                    cmd.update({"args": cmdobj})
                elif isinstance(cmdobj, dict):
                    cmd.update(cmdobj)  # Add user updates
                    if "args" not in cmd:
                        raise ValueError(
                            "'args' must be provided for command of type 'dict'."
                        )
                else:
                    raise TypeError(
                        "Invalid command object in configuration file.")

                # Change to command-specific directory
                cmd_run_dir = cmd.pop("wd", self["wd"])
                if cmd_run_dir:
                    cmd_run_dir = os.path.abspath(cmd_run_dir)

                cmd_type = cmd.pop("type")

                # Execute commands
                msg = "In directory '%s', executing command '%s'." % (
                    cmd_run_dir, cmd)
                Model._logger.info(msg)
                with _modified_environ(wd=cmd_run_dir,
                                       **cmd.pop("env_vars", {})):

                    # Depending on cmd_type, execute command in different ways...
                    if cmd_type in ["shell"]:
                        args = cmd.pop("args")
                        Model._logger.info(
                            "Executing shell command: %s, with keyword args: %s."
                            % (args, cmd))
                        try:
                            cmd["output"] = subprocess.check_output(
                                args=args,
                                stderr=subprocess.STDOUT,
                                universal_newlines=True,
                                **cmd)
                        except subprocess.CalledProcessError as e:
                            msg = (
                                "Command '%s' failed with returncode: %s, and message:\n"
                                + "%s\n" +
                                "Program logs may have more information.") % (
                                    args, e.returncode, e.output)
                            Model._logger.error(msg)
                            print(msg)
                            raise e
                        Model._logger.info(cmd["output"])
                        print("Command returned: \n%s" % cmd["output"], end="")
                    elif cmd_type in ["python_method"]:
                        try:
                            assert ("func" in cmd)
                        except AssertionError:
                            raise ValueError(
                                "'func' must be defined for commands of type 'python_method'."
                            )
                        func = getattr(modfuncs, cmd.pop("func"))
                        cmd["output"] = func(*cmd["args"], **cmd["kwargs"])
                    else:
                        raise ValueError("Unsupported command type specified.")

        if not self["output_conf"]:
            return CERO.create_empty()

        ceros = []
        for oc in self["output_conf"]:
            ceros.append(oc.create_cero())

        try:
            cero = CERO.combine_ceros(ceros, overwrite=False)
        except CERO.CEROIndexConflict:
            raise RuntimeWarning(
                "Attempts to duplicate the export of data - i.e. one or more data series are being "
                +
                "exported more than once (which should be avoided). The last procedure will define "
                + "the intended data.")
            cero = CERO.combine_ceros(ceros)

        return cero
コード例 #26
0
    def wrapper(df: pd.DataFrame,
                *args,
                locs: "List[Union[tuple, str]]" = None,
                ilocs: "List[int]" = None,
                start_year: "Union[pd.datetime, int]" = None,
                end_year: "Union[pd.datetime, int]" = None,
                **kwargs):

        """
        :param df: An CERO, which may or may not be a strict superset of data to perform the operation on.
        :param args: Passed to the encapsulated function as positional arguments, immediately after the restricted \
        ``df``.
        :param locs: ``locs``, if provided, must be a list of identifiers that correspond to values of ``df.index``. \
        It is ``df``, reduced to these specific indices, that a wrapped function will receive as an argument. An \
        error is raised if both ``locs`` and ``ilocs`` is specified.
        :param ilocs: Identical in nature to ``locs``, though instead a list of integers (zero-indexed) is \
        provided (corresponding to the row number of ``df``). An \
        error is raised if both ``locs`` and ``ilocs`` is specified.
        :param start_year: Note that ``df`` is a CERO, and CEROs have a ``pandas.DatetimeIndex`` on columns. \
        ``start_year`` restricts the CERO to years after and including ``start_year``.
        :param end_year: Note that ``df`` is a CERO, and CEROs have a ``pandas.DatetimeIndex`` on columns. \
        ``end_year`` restricts the CERO to years up to and including ``end_year``.
        :param kwargs: Keyword arguments to be passed to the encapsulated function.
        :return: The return value of the encapsulated function.
        """

        try:
            assert(isinstance(df, pd.DataFrame))
        except AssertionError:
            raise TypeError("First function argument must be of pandas.DataFrame type.")

        # Convert integer to datetime type
        if isinstance(start_year, int):
            start_year = pd.datetime(start_year, 1, 1)
        if isinstance(end_year, int):
            end_year = pd.datetime(end_year, 1, 1)

        # Get index locations
        if start_year is not None:
            start_year = df.columns.get_loc(start_year)
        if end_year is not None:
            end_year= df.columns.get_loc(end_year)

        if (locs is not None) and (ilocs is not None):
            raise TypeError("Only one of 'locs' or 'ilocs' can be provided (not both).")

        if locs is not None:
            ilocs = [df.index.get_loc(loc) for loc in locs]
        if ilocs is None:
            ilocs = pd.IndexSlice[0:]

        df_cp = df.iloc[ilocs, start_year:end_year].copy(deep=False) # df_cp is always different object to df

        ret = func(df_cp, *args, **kwargs)
        if ret is None:
            return ret
        elif issubclass(type(ret), pd.Series):
            # If series, convert to dataframe
            ret = pd.DataFrame(data=[ret])

        CERO.is_cero(ret) # Performs checks to ensure ret is a valid CERO
        return ret
コード例 #27
0
    def test_idxconflict(self):
        cero = DataTools.get_test_data(TestCERO._dd + "test_cero.pickle")

        with self.assertRaises(CERO.CEROIndexConflict):
            CERO.combine_ceros([cero, cero], overwrite=False, verify_cero=True)
コード例 #28
0
ファイル: test_tocero.py プロジェクト: charlie0389/ConCERO
    def test_multiindex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml'))
        cero = to_cero.create_cero()
        self.assertTrue(CERO.is_cero(cero))