Example #1
0
    def test_nrows_skiprows(self):
        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_2.yaml"))
        cero = to_cero.create_cero()

        self.assertTrue(
            np.allclose(cero.values[0],
                        [0.00551917898595782, 0.00551917898595782]))
Example #2
0
    def test_load_set_inputs(self):

        cero = pd.DataFrame.from_dict({"A": [1, 2, 3, 4, 5],
                                       "B": [6, 4, 5, 6, 7],
                                       "C": [4, 5, 8, 7, 8],
                                       "D": [9, 10, 12, 11, 2]},
                                      orient="index",
                                      dtype=pd.np.float32)

        cero.columns = pd.DatetimeIndex(pd.to_datetime([2017, 2018, 2019, 2020, 2021], format="%Y"))
        cero.sort_index(inplace=True)

        proc = FromCERO._Procedure({"name": "test_proc",
                             "sets": {"a_set": ["A", "B", "C", "D"]},
                             "inputs": ["a_set"],
                             "operations": [{"func": "noop",
                                             "arrays": ["a_set"]}],
                             "file": "test_load_set_inputs.csv",
                             })
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_load_set_inputs.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(cero))

        os.remove("test_load_set_inputs.csv")
Example #3
0
    def test_run_checks(self):

        with self.assertRaises(FileNotFoundError):
            ToCERO.run_checks({
                "files": [{
                    "file": "not_a__FileObj object.",
                    "search_paths": [TestToCERO._dd]
                }]
            })

        self.assertFalse(
            ToCERO.run_checks(
                {
                    "files": [{
                        "file": "not_a__FileObj object.",
                        "search_paths": [TestToCERO._dd]
                    }]
                },
                raise_exception=False))

        self.assertTrue(
            ToCERO.run_checks({
                "files": [{
                    "file": "test_csv.csv",
                    "search_paths": [TestToCERO._dd]
                }]
            }))
Example #4
0
    def test_empty_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml"))
        cero = to_cero.create_cero()
        with self.assertRaises(CERO.EmptyCERO):
            self.assertTrue(CERO.is_cero(cero, empty_ok=False))
        self.assertTrue(CERO.is_cero(cero))
Example #5
0
    def test_local_libfuncs(self):

        shutil.copy2(TestFromCERO_Procedure._dd + "test_local_libfuncs.py", os.getcwd())

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]},
                                      orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        test_df = pd.DataFrame.from_dict({"A": [2], "B": [4], "C": [6]},
                                      orient='index',
                                      dtype=pd.np.float32)
        test_df.sort_index(inplace=True)
        test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        proc = FromCERO._Procedure({"libfuncs": "test_local_libfuncs.py",
                             "ref_dir": ".",
                             "name": "test_set",
                             "inputs": ["A", "B", "C"],
                             "operations": [{"func": "test_local_recursive_op"}],
                             "file": "test_local_libfuncs.csv"})

        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_local_libfuncs.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_local_libfuncs.py")
        os.remove("test_local_libfuncs.csv")
Example #6
0
    def test__import_vd(self):

        fo = {
            "file": TestToCERO_FileObj._dd + "test__import_vd.VD",
            "date_col": 3,
            "val_col": 8
        }

        fo = ToCERO._FileObj(fo)
        df = fo._import_vd()
        df.columns.set_names([None], inplace=True)
        df = df.astype(pd.np.float32)

        test_df = pd.DataFrame.from_dict(
            {
                ("VAR_Act", "-", "FT_COMELC", "ACT", "2015", "PD", "-"): [
                    0.740833333333336, 0.740833333333336, 0.8005115537522,
                    0.829127920241238
                ]
            },
            orient="index",
            dtype=pd.np.float32)
        test_df.columns = pd.Index([2015, 2016, 2020, 2025])
        test_df.sort_index(inplace=True)

        self.assertTrue(test_df.equals(df))

        fo = {
            "file": TestToCERO_FileObj._dd + "test__import_vd.VD",
            "date_col": 3,
            "val_col": 8,
            "default_year": 2018
        }

        fo = ToCERO._FileObj(fo)
        df = fo._import_vd()
        df.columns.set_names([None], inplace=True)
        df = df.astype(pd.np.float32)
        df.sort_index(inplace=True)

        test_df = pd.DataFrame(data=[
            [
                0.740833333333336, 0.740833333333336, pd.np.nan,
                0.8005115537522, 0.829127920241238
            ],
            [pd.np.nan, pd.np.nan, 1.39891653080538, pd.np.nan, pd.np.nan],
            [pd.np.nan, pd.np.nan, 19.6047685777802, pd.np.nan, pd.np.nan],
            [pd.np.nan, pd.np.nan, 31516.8951973493, pd.np.nan, pd.np.nan],
        ],
                               columns=[2015, 2016, 2018, 2020, 2025],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index([
            ("VAR_Act", "-", "FT_COMELC", "ACT", "2015", "PD", "-"),
            ("Cost_Salv", "-", "EN_WinONS-26", "ADE", "2040", "-", "-"),
            ("Cost_NPV", "-", "EE_StmTurb009", "CQ", "-", "-", "ACT"),
            ("Reg_irec", "-", "-", "WA", "-", "-", "-"),
        ])
        test_df.sort_index(inplace=True)
        self.assertTrue(test_df.equals(df))
    def test_sceninputs_to_cero2(self):
        '''Test xlsx to CERO conversion.'''
        s2c = ToCERO(conf=(TestScenIn2CERO._dd + 'test_xlsx_to_cero.yaml'))
        cero = s2c.create_cero()
        df = DataTools.get_test_data(
            os.path.normpath('data/test_xlsx_to_cero.pickle'))

        self.assertTrue(
            cero.loc[df.index].equals(df))  # Order-independent test
Example #8
0
    def test_vurm2cero(self):
        '''Tests VURM2CERO conversion process.'''

        v2c = ToCERO(conf=(TestVURM2CERO._dd + r'test_vurm_to_cero.yaml'))
        cero = v2c.create_cero()
        df = DataTools.get_test_data(TestVURM2CERO._dd +
                                     r'test_vurm_to_cero_finaldata.pickle')

        self.assertTrue(cero.equals(df))
Example #9
0
    def test_gtape2cero(self):

        dd = os.path.join(os.path.dirname(__file__), "data", "")

        g2c = ToCERO(dd + r'test_gtape_to_cero.yaml')
        cero = g2c.create_cero()
        df = DataTools.get_test_data(dd +
                                     r'test_gtape_to_cero_finaldata.pickle')

        self.assertTrue(cero.equals(df))
Example #10
0
    def test_complex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd +
                               r'test_complex_xlsx_import.yaml'))
        cero = to_cero.create_cero()

        df = DataTools.get_test_data(TestToCERO._dd +
                                     "test_complex_xlsx_result.pickle")

        self.assertTrue(CERO.is_cero(cero))
        self.assertTrue(cero.equals(df))
Example #11
0
    def test_rename_2(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml"))
        cero = to_cero.create_cero()

        CERO.is_cero(cero)

        test_idx = ["PROFESSIONALS", ("1", "MANAGERS")
                    ]  # Rename operation always moves series to the end

        self.assertTrue(
            all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
Example #12
0
    def test_time_dim(self):

        shutil.copy2(TestHAR2CERO._dd + "test_timedim.har", "test_timedim.har")

        h2c = ToCERO(conf=(TestHAR2CERO._dd +
                           r'test_har_to_cero_timedim.yaml'))
        cero = h2c.create_cero()
        df = DataTools.get_test_data(TestHAR2CERO._dd +
                                     r'test_har_to_cero_timedim.pickle')

        self.assertTrue(
            cero.loc[df.index].equals(df))  # Order-independent test

        os.remove("test_timedim.har")
Example #13
0
    def test_sceninputs_to_cero(self):
        '''Tests the conversion of hars to ceros.'''

        shutil.copy2(TestHAR2CERO._dd + "Mdatnew7.har", "Mdatnew7.har")

        s2c = ToCERO(conf=(TestHAR2CERO._dd + r'test_har_to_cero.yaml'))
        cero = s2c.create_cero()
        df = DataTools.get_test_data(TestHAR2CERO._dd +
                                     r'test_har_to_cero.pickle')

        self.assertTrue(
            cero.loc[df.index].equals(df))  # Order-independent test

        os.remove("Mdatnew7.har")
Example #14
0
    def is_valid(self, raise_exception=True) -> bool:
        """ Performs static checks on ``self`` to ensure it is a valid Scenario object."""

        req_keys = ["name", "models", "input_conf", "output_conf"]

        if not all([k in self.keys() for k in req_keys]):
            raise TypeError(
                ("Not all required key-value pairs have been defined. " +
                 "It is necessary to define all of %s.") % req_keys)

        if not isinstance(self["models"], list):
            raise TypeError(
                "Scenario property \'models\' must be defined as a list.")

        for model in self["models"]:
            if not issubclass(type(model), Model):
                raise TypeError("Object '%s' is of type '%s', not 'Model'." %
                                (model, type(model)))

            if not model.check_config(raise_exception=raise_exception,
                                      runtime=False):
                return False

        for ic in self["input_conf"]:
            if not ToCERO.check_config(
                    ic, raise_exception=raise_exception, runtime=False):
                return False

        for oc in self["output_conf"]:
            if not FromCERO.check_config(
                    oc, raise_exception=raise_exception, runtime=False):
                return False

        return True
Example #15
0
    def is_valid(self, raise_exception=True):
        """
        Checks the validity of ``self`` as a ``Model`` object. Method does not ensure runtime issues will not occur.
        :param bool raise_exception:
        :return bool: Returns `True` if ``self`` is a valid ``Model``.
        """
        req_keys = ["name", "cmds", "input_conf", "output_conf"]

        if not all([k in self for k in req_keys]):

            msg = (
                "All models must have all of the keys: %s. Attempted to create model"
                + " with at least one of these keys missing.") % req_keys

            Model._logger.error(msg)
            if raise_exception:
                raise TypeError(msg)
            print(msg)
            return False

        for ic in self["input_conf"]:
            if not FromCERO.check_config(
                    ic, raise_exception=raise_exception, runtime=False):
                return False

        for oc in self["output_conf"]:
            if not ToCERO.check_config(
                    oc, raise_exception=raise_exception, runtime=False):
                return False

        return True
Example #16
0
    def test_regex_format(self):

        tc = ToCERO({
            "files": [{
                "file": TestToCERO._dd + "test_csv_regex.csv",
                "time_regex":
                r"(Y\d{4}).*",  # Regex could pick out just the year, but want to test 'time_fmt' as well...
                "time_fmt": r"Y%Y"
            }]
        })
        cero = tc.create_cero()

        test_df = pd.DataFrame(data=[[1, 2], [3, 4]],
                               columns=[2016, 2017],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index(["A", "B"])
        test_df.columns = pd.DatetimeIndex(
            pd.to_datetime([2016, 2017], format="%Y"))
        test_df.sort_index(inplace=True)

        self.assertTrue(cero.equals(test_df))
Example #17
0
    def test_csv_orientation(self):

        tc = ToCERO._FileObj(
            {"file": TestToCERO_FileObj._dd + "test_csv_orientation.csv"})

        with self.assertRaises(TypeError):
            df = tc._import_file()

        tc = ToCERO._FileObj({
            "file": TestToCERO_FileObj._dd + "test_csv_orientation.csv",
            "orientation": "cols"
        })

        df = tc._import_file()

        test_df = pd.DataFrame(data=[[1, 2], [3, 4]],
                               columns=[2016, 2017],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index(["A", "B"])
        test_df.sort_index(inplace=True)

        self.assertTrue(df.equals(test_df))
Example #18
0
    def run_checks(self, raise_exception=True):
        """
        Performs runtime checks on ``self`` to ensure it is a valid Scenario object. Failure of runtime checks indicates that the scenario is not ready to run.

        :param bool raise_exception: If `True` (default) then an exception is raised on check failure. Otherwise (on check failure) `False` is returned.
        :return:
        """

        for ic in self["input_conf"]:
            if not ToCERO.check_config(
                    ic, raise_exception=raise_exception, runtime=True):
                return False
        return True
Example #19
0
    def test__import_gdx(self):

        fo = {
            "file": TestToCERO_FileObj._dd + "test__import_gdx.gdx",
            "symbols": {
                "name": "L_EXPORT",
                "date_col": 2
            }
        }
        fo = ToCERO._FileObj(fo)
        df = fo._import_gdx()

        with open(TestToCERO_FileObj._dd + "test__import_gdx.pickle",
                  "rb") as f:
            test_df = pickle.load(f)

        self.assertTrue(df.equals(test_df))
Example #20
0
    def test_is_valid(self):

        with self.assertRaises(TypeError):
            ToCERO.is_valid({"files": 1})

        self.assertFalse(ToCERO.is_valid({"files": 1}, raise_exception=False))

        with self.assertRaises(TypeError):
            ToCERO.is_valid({"files": "not a list"})

        self.assertFalse(
            ToCERO.is_valid({"files": "not a list"}, raise_exception=False))

        with self.assertRaises(TypeError):
            ToCERO.is_valid({"files": {"file": "not_a__FileObj object."}})

        self.assertFalse(
            ToCERO.is_valid({"files": {
                "file": "not_a__FileObj object."
            }},
                            raise_exception=False))

        self.assertTrue({"files": {"file": "Mdatnew7.har"}})
Example #21
0
    def test_har_repetitive_sets(self):

        fo = ToCERO._FileObj({
            "file":
            "test.har",
            "search_paths":
            TestToCERO_FileObj._dd,
            "head_arrs": [{
                "name": "ARR7",
                "default_year": 2018
            }]
        })
        cero = fo.import_file_as_cero()

        df = DataTools.get_test_data(TestToCERO_FileObj._dd +
                                     "test_har_repetitive_sets.pickle")

        self.assertTrue(cero.equals(df))
Example #22
0
    def test_stitch_time(self):

        tc = ToCERO(TestToCERO._dd + "test_time_stitch.yaml")
        cero = tc.create_cero()

        fc = ToCERO({
            "files": [{
                "file": TestToCERO._dd + "test_time_stitch.xlsx",
                "sheet": "data_final"
            }]
        })
        fin_cero = fc.create_cero()

        self.assertTrue(cero.equals(fin_cero))
Example #23
0
    def test_multiindex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml'))
        cero = to_cero.create_cero()
        self.assertTrue(CERO.is_cero(cero))
Example #24
0
    def test_nrows_empty(self):
        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_3.yaml"))
        cero = to_cero.create_cero()

        self.assertTrue(np.all(np.isnan(cero.values[0])))
Example #25
0
    def test_stitch_time(self):

        init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3],
                                       }, orient='index',
                                      dtype=pd.np.float32)
        init.sort_index(inplace=True)
        init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]},
                                      orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y"))

        cero = CERO.combine_ceros([init, cero])

        test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5],
                                          "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200]
                                          },
                                      orient='index',
                                      dtype=pd.np.float32)
        test_df.sort_index(inplace=True)
        test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y"))

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": [2018]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time.csv")

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time2.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": 2018}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time2.csv")

        out_file = "test_stitch_time3.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": 0}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time4.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [0]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time5.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [-3]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)
Example #26
0
    def test_output_cero(self):
        """
        Tests the behaviour of the "outputs" argument is correct.
        """

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]}, orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": ["A"]})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."),"test_output_cero.csv")}]})
        df = tc.create_cero()

        self.assertTrue(cero.loc[["A"]].equals(df))

        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero2.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": True})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]})
        df = tc.create_cero()
        self.assertTrue(cero.equals(df))

        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero3.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": None})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        self.assertFalse(os.path.isfile("test_output_cero3.csv"))

        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero4.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": False})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        self.assertFalse(os.path.isfile("test_output_cero4.csv"))


        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero5.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": "."})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]})
        df = tc.create_cero()
        self.assertTrue(cero.equals(df))

        os.remove("test_output_cero.csv")
        os.remove("test_output_cero2.csv")
        os.remove("test_output_cero5.csv")
Example #27
0
    def __init__(self, sc_def: dict, *args, parent: dict = None, **kwargs):
        """
        :param sc_def: A scenario definition object.
        :param args: Passed to the superclass (dict) as positional arguments at initialisation.
        :param kwargs: Passed to the superclass (dict) as keyword arguments at initialisation.
        """

        defaults = {
            "name": None,
            "run_no": None,
            "search_paths": [],
            "ref_dir": None,
            "models": [],
            "input_conf": [],
            "output_conf": []
        }

        if parent is None:
            parent = {}

        defaults.update(parent)

        try:
            assert isinstance(sc_def, dict)
        except AssertionError:
            raise TypeError(
                "Scenario definition provided in incorrect format - type %s instead of dict."
                % type(sc_def))

        defaults.update(sc_def)

        sc_def = defaults
        super().__init__(sc_def, *args, **kwargs)

        if not self.get("name"):
            self["name"] = "scenario_unnamed"
            self._logger.warn(
                "Scenario name has not been specified - scenario named '%s'." %
                self["name"])

        if not issubclass(type(self.get("run_no")), int):
            self["run_no"] = 1
            self._logger.info(
                "Scenario run_no (run number) has not been specified (or is not of integer type) - defaults to %s."
                % self["run_no"])

        if isinstance(self["search_paths"], str):
            self["search_paths"] = [os.path.abspath(self["search_paths"])]
        elif not self["search_paths"]:
            self["search_paths"].append(os.path.abspath("."))

        if self["ref_dir"] is None:
            self["ref_dir"] = os.path.abspath(".")

        model_parent = {
            "search_paths": self["search_paths"],
            "ref_dir": self["ref_dir"]
        }
        self["models"] = [
            Model(m, parent=model_parent) for m in self.get("models")
        ]

        if isinstance(self["input_conf"], str):
            self["input_conf"] = [self["input_conf"]]
        if isinstance(self["output_conf"], str):
            self["output_conf"] = [self["output_conf"]]

        # Load ToCERO conf
        par_dict = {"search_paths": self["search_paths"]}
        for idx, ic in enumerate(self["input_conf"]):
            self["input_conf"][idx] = self.find_file(ic)
            self["input_conf"][idx] = ToCERO(self["input_conf"][idx],
                                             parent=par_dict)

        # Load FromCERO conf
        par_dict = {"ref_dir": self["ref_dir"]}
        for idx, oc in enumerate(self["output_conf"]):
            self["output_conf"][idx] = self.find_file(oc)
            self["output_conf"][idx] = FromCERO(self["output_conf"][idx],
                                                parent=par_dict)

        self.is_valid()  # Check Scenario is valid
Example #28
0
    def test_groupby_and_aggregate(self):
        """ Dependent on ToCERO being functional.

        :return:
        """

        tc = ToCERO({
            "files": [{
                "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx",
                "sheet": "groupby",
                "index_col": [0, 1]
            }]
        })

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=0, match="a", agg="sum")
        test_list = ["a", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")]
        test_vals = [6, 2, 3, 4, 5]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=1, match="b", agg="mean")
        test_list = ["b", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")]
        test_vals = [3.3333333333, 2, 3, 4, 5]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=0, agg="count")
        test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"]
        test_vals = [3, 2, 3, 1, 1]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=0, agg="count")
        test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"]
        test_vals = [3, 2, 3, 1, 1]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))

        tc = ToCERO({
            "files": [{
                "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx",
                "sheet": "groupby_2",
                "index_col": [0, 1, 2]
            }]
        })

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=[0, 1], agg="count")
        test_list = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'),
                     ('c', 'b'), ('a', 'b', '2'), ('a', 'c', '2'),
                     ('a', 'd', '3'), ('a', 'c', '3')]
        test_vals = [2, 3, 2, 1, 1, 6, 7, 8, 9]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))
Example #29
0
    def __init__(self, model: dict, *args, parent: "Model" = None, **kwargs):
        """

        :param model: A `dict` containing ``Model`` options.
        :param args: Passed to superclass (`dict`) at initialisation.
        :param "Model" parent: If provided, inherits all key-value pairs from ``parent``.
        :param kwargs: Passed to superclass (`dict`) at initialisation.
        """

        defaults = {
            "name": "default_model_name",
            "cmds": [],
            "input_conf": [],
            "output_conf": [],
            "search_paths": [],
            "wd": None
        }
        defaults.update(model)

        if parent is None:
            parent = {}

        defaults.update(parent)

        super().__init__(defaults, *args, **kwargs)

        if self["name"] == "default_model_name":
            Model._logger.warning(
                "Model not named - default name '%s' assigned." % self["name"])

        # Command string processing
        if isinstance(self["cmds"], str):
            self["cmds"] = [self["cmds"]]

        if not self["cmds"]:
            Model._logger.info("No commands specified for model '%s'." %
                               defaults["name"])

        if not self["search_paths"]:
            self["search_paths"].append(os.path.abspath("."))

        if isinstance(self["input_conf"], str):
            self["input_conf"] = [self["input_conf"]]
        if isinstance(self["output_conf"], str):
            self["output_conf"] = [self["output_conf"]]

        # Locate and load configuration files...
        for idx, input_conf in enumerate(self["input_conf"]):
            self["input_conf"][idx] = self.find_file(input_conf)
            par_dict = {
                "ref_dir":
                os.path.abspath(os.path.dirname(self["input_conf"][idx]))
            }
            self["input_conf"][idx] = FromCERO(self["input_conf"][idx],
                                               parent=par_dict)

        # Locate and load configuration files...
        for idx, output_conf in enumerate(self["output_conf"]):
            self["output_conf"][idx] = self.find_file(output_conf)
            par_dict = {
                "search_paths":
                os.path.abspath(os.path.dirname(self["output_conf"][idx]))
            }
            self["output_conf"][idx] = ToCERO(self["output_conf"][idx],
                                              parent=par_dict)
Example #30
0
    def test_sets_and_mapping2(self):

        cero = pd.DataFrame.from_dict(
            {
                ("A", "1"): [1],
                ("A", "2"): [2],
                ("A", "3"): [3],
                ("B", "1"): [4],
                ("B", "2"): [5],
                ("B", "3"): [6],
                ("C", "1"): [7],
                ("C", "2"): [8],
                ("C", "3"): [9],
            },
            orient='index',
            dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(
            data=pd.to_datetime([2018], format="%Y"))
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml")
        fc.exec_procedures(cero)

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping1.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"),
                     ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"),
                     ("I", "3")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping2.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"),
                     ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"),
                     ("C", "I")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("test_fromcero_complexmapping1.xlsx")
        os.remove("test_fromcero_complexmapping2.xlsx")