Beispiel #1
0
    def test_stitch_time(self):

        tc = ToCERO(TestToCERO._dd + "test_time_stitch.yaml")
        cero = tc.create_cero()

        fc = ToCERO({
            "files": [{
                "file": TestToCERO._dd + "test_time_stitch.xlsx",
                "sheet": "data_final"
            }]
        })
        fin_cero = fc.create_cero()

        self.assertTrue(cero.equals(fin_cero))
Beispiel #2
0
    def test_nrows_skiprows(self):
        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_2.yaml"))
        cero = to_cero.create_cero()

        self.assertTrue(
            np.allclose(cero.values[0],
                        [0.00551917898595782, 0.00551917898595782]))
Beispiel #3
0
    def test_load_set_inputs(self):

        cero = pd.DataFrame.from_dict({"A": [1, 2, 3, 4, 5],
                                       "B": [6, 4, 5, 6, 7],
                                       "C": [4, 5, 8, 7, 8],
                                       "D": [9, 10, 12, 11, 2]},
                                      orient="index",
                                      dtype=pd.np.float32)

        cero.columns = pd.DatetimeIndex(pd.to_datetime([2017, 2018, 2019, 2020, 2021], format="%Y"))
        cero.sort_index(inplace=True)

        proc = FromCERO._Procedure({"name": "test_proc",
                             "sets": {"a_set": ["A", "B", "C", "D"]},
                             "inputs": ["a_set"],
                             "operations": [{"func": "noop",
                                             "arrays": ["a_set"]}],
                             "file": "test_load_set_inputs.csv",
                             })
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_load_set_inputs.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(cero))

        os.remove("test_load_set_inputs.csv")
Beispiel #4
0
    def test_empty_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml"))
        cero = to_cero.create_cero()
        with self.assertRaises(CERO.EmptyCERO):
            self.assertTrue(CERO.is_cero(cero, empty_ok=False))
        self.assertTrue(CERO.is_cero(cero))
Beispiel #5
0
    def test_local_libfuncs(self):

        shutil.copy2(TestFromCERO_Procedure._dd + "test_local_libfuncs.py", os.getcwd())

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]},
                                      orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        test_df = pd.DataFrame.from_dict({"A": [2], "B": [4], "C": [6]},
                                      orient='index',
                                      dtype=pd.np.float32)
        test_df.sort_index(inplace=True)
        test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        proc = FromCERO._Procedure({"libfuncs": "test_local_libfuncs.py",
                             "ref_dir": ".",
                             "name": "test_set",
                             "inputs": ["A", "B", "C"],
                             "operations": [{"func": "test_local_recursive_op"}],
                             "file": "test_local_libfuncs.csv"})

        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_local_libfuncs.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_local_libfuncs.py")
        os.remove("test_local_libfuncs.csv")
    def test_sceninputs_to_cero2(self):
        '''Test xlsx to CERO conversion.'''
        s2c = ToCERO(conf=(TestScenIn2CERO._dd + 'test_xlsx_to_cero.yaml'))
        cero = s2c.create_cero()
        df = DataTools.get_test_data(
            os.path.normpath('data/test_xlsx_to_cero.pickle'))

        self.assertTrue(
            cero.loc[df.index].equals(df))  # Order-independent test
    def test_vurm2cero(self):
        '''Tests VURM2CERO conversion process.'''

        v2c = ToCERO(conf=(TestVURM2CERO._dd + r'test_vurm_to_cero.yaml'))
        cero = v2c.create_cero()
        df = DataTools.get_test_data(TestVURM2CERO._dd +
                                     r'test_vurm_to_cero_finaldata.pickle')

        self.assertTrue(cero.equals(df))
Beispiel #8
0
    def test_gtape2cero(self):

        dd = os.path.join(os.path.dirname(__file__), "data", "")

        g2c = ToCERO(dd + r'test_gtape_to_cero.yaml')
        cero = g2c.create_cero()
        df = DataTools.get_test_data(dd +
                                     r'test_gtape_to_cero_finaldata.pickle')

        self.assertTrue(cero.equals(df))
Beispiel #9
0
    def test_complex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd +
                               r'test_complex_xlsx_import.yaml'))
        cero = to_cero.create_cero()

        df = DataTools.get_test_data(TestToCERO._dd +
                                     "test_complex_xlsx_result.pickle")

        self.assertTrue(CERO.is_cero(cero))
        self.assertTrue(cero.equals(df))
Beispiel #10
0
    def test_rename_2(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml"))
        cero = to_cero.create_cero()

        CERO.is_cero(cero)

        test_idx = ["PROFESSIONALS", ("1", "MANAGERS")
                    ]  # Rename operation always moves series to the end

        self.assertTrue(
            all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
Beispiel #11
0
    def test_time_dim(self):

        shutil.copy2(TestHAR2CERO._dd + "test_timedim.har", "test_timedim.har")

        h2c = ToCERO(conf=(TestHAR2CERO._dd +
                           r'test_har_to_cero_timedim.yaml'))
        cero = h2c.create_cero()
        df = DataTools.get_test_data(TestHAR2CERO._dd +
                                     r'test_har_to_cero_timedim.pickle')

        self.assertTrue(
            cero.loc[df.index].equals(df))  # Order-independent test

        os.remove("test_timedim.har")
Beispiel #12
0
    def test_sceninputs_to_cero(self):
        '''Tests the conversion of hars to ceros.'''

        shutil.copy2(TestHAR2CERO._dd + "Mdatnew7.har", "Mdatnew7.har")

        s2c = ToCERO(conf=(TestHAR2CERO._dd + r'test_har_to_cero.yaml'))
        cero = s2c.create_cero()
        df = DataTools.get_test_data(TestHAR2CERO._dd +
                                     r'test_har_to_cero.pickle')

        self.assertTrue(
            cero.loc[df.index].equals(df))  # Order-independent test

        os.remove("Mdatnew7.har")
Beispiel #13
0
    def test_regex_format(self):

        tc = ToCERO({
            "files": [{
                "file": TestToCERO._dd + "test_csv_regex.csv",
                "time_regex":
                r"(Y\d{4}).*",  # Regex could pick out just the year, but want to test 'time_fmt' as well...
                "time_fmt": r"Y%Y"
            }]
        })
        cero = tc.create_cero()

        test_df = pd.DataFrame(data=[[1, 2], [3, 4]],
                               columns=[2016, 2017],
                               dtype=pd.np.float32)
        test_df.index = CERO.create_cero_index(["A", "B"])
        test_df.columns = pd.DatetimeIndex(
            pd.to_datetime([2016, 2017], format="%Y"))
        test_df.sort_index(inplace=True)

        self.assertTrue(cero.equals(test_df))
Beispiel #14
0
    def test_groupby_and_aggregate(self):
        """ Dependent on ToCERO being functional.

        :return:
        """

        tc = ToCERO({
            "files": [{
                "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx",
                "sheet": "groupby",
                "index_col": [0, 1]
            }]
        })

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=0, match="a", agg="sum")
        test_list = ["a", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")]
        test_vals = [6, 2, 3, 4, 5]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=1, match="b", agg="mean")
        test_list = ["b", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")]
        test_vals = [3.3333333333, 2, 3, 4, 5]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=0, agg="count")
        test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"]
        test_vals = [3, 2, 3, 1, 1]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=0, agg="count")
        test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"]
        test_vals = [3, 2, 3, 1, 1]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))

        tc = ToCERO({
            "files": [{
                "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx",
                "sheet": "groupby_2",
                "index_col": [0, 1, 2]
            }]
        })

        cero = tc.create_cero()
        cero = libfuncs.groupby(cero, key=[0, 1], agg="count")
        test_list = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'),
                     ('c', 'b'), ('a', 'b', '2'), ('a', 'c', '2'),
                     ('a', 'd', '3'), ('a', 'c', '3')]
        test_vals = [2, 3, 2, 1, 1, 6, 7, 8, 9]
        self.assertTrue(
            all([
                np.isclose(x, y) for (x, y) in zip(
                    test_vals, cero[pd.datetime.strptime("2018",
                                                         "%Y")].tolist())
            ]))
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))
Beispiel #15
0
    def test_stitch_time(self):

        init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3],
                                       }, orient='index',
                                      dtype=pd.np.float32)
        init.sort_index(inplace=True)
        init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]},
                                      orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y"))

        cero = CERO.combine_ceros([init, cero])

        test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5],
                                          "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200]
                                          },
                                      orient='index',
                                      dtype=pd.np.float32)
        test_df.sort_index(inplace=True)
        test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y"))

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": [2018]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time.csv")

        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": "test_stitch_time2.csv",
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_cols": 2018}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove("test_stitch_time2.csv")

        out_file = "test_stitch_time3.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": 0}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time4.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [0]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)

        out_file = "test_stitch_time5.csv"
        proc = FromCERO._Procedure({"name": "test_stitch_time",
                                    "file": out_file,
                                    "sets": {"a_set": ["A", "B", "C"],
                                             "b_set": ["D", "E", "F"]},
                                    "inputs": ["a_set", "b_set"],
                                    "operations": [{"func": "noop",
                                                    "rename": {"b_set": "a_set"}},
                                                   {"func": "pc_change",
                                                    "arrays": ["a_set"],
                                                    "init_icols": [-3]}],
                                    "ref_dir": "."})
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]})
        df = tc.create_cero()

        self.assertTrue(df.equals(test_df))

        os.remove(out_file)
Beispiel #16
0
    def test_output_cero(self):
        """
        Tests the behaviour of the "outputs" argument is correct.
        """

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]}, orient='index',
                                      dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))

        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": ["A"]})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)

        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."),"test_output_cero.csv")}]})
        df = tc.create_cero()

        self.assertTrue(cero.loc[["A"]].equals(df))

        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero2.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": True})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]})
        df = tc.create_cero()
        self.assertTrue(cero.equals(df))

        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero3.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": None})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        self.assertFalse(os.path.isfile("test_output_cero3.csv"))

        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero4.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": ".",
                                    "outputs": False})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        self.assertFalse(os.path.isfile("test_output_cero4.csv"))


        # Another test...
        proc = FromCERO._Procedure({"name": "test_output_cero",
                                    "file": "test_output_cero5.csv",
                                    "inputs": ["A", "B", "C"],
                                    "ref_dir": "."})

        """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object."""
        proc.exec_ops(cero)
        tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]})
        df = tc.create_cero()
        self.assertTrue(cero.equals(df))

        os.remove("test_output_cero.csv")
        os.remove("test_output_cero2.csv")
        os.remove("test_output_cero5.csv")
Beispiel #17
0
    def test_sets_and_mapping2(self):

        cero = pd.DataFrame.from_dict(
            {
                ("A", "1"): [1],
                ("A", "2"): [2],
                ("A", "3"): [3],
                ("B", "1"): [4],
                ("B", "2"): [5],
                ("B", "3"): [6],
                ("C", "1"): [7],
                ("C", "2"): [8],
                ("C", "3"): [9],
            },
            orient='index',
            dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(
            data=pd.to_datetime([2018], format="%Y"))
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml")
        fc.exec_procedures(cero)

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping1.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"),
                     ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"),
                     ("I", "3")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping2.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"),
                     ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"),
                     ("C", "I")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("test_fromcero_complexmapping1.xlsx")
        os.remove("test_fromcero_complexmapping2.xlsx")
Beispiel #18
0
    def test_nrows_empty(self):
        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_3.yaml"))
        cero = to_cero.create_cero()

        self.assertTrue(np.all(np.isnan(cero.values[0])))
Beispiel #19
0
    def test_multiindex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml'))
        cero = to_cero.create_cero()
        self.assertTrue(CERO.is_cero(cero))