Example #1
0
    def test_empty_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml"))
        cero = to_cero.create_cero()
        with self.assertRaises(CERO.EmptyCERO):
            self.assertTrue(CERO.is_cero(cero, empty_ok=False))
        self.assertTrue(CERO.is_cero(cero))
Example #2
0
    def test_fillna(self):
        df = pd.DataFrame.from_dict(
            {
                "A": [1, 2, 3],
                "B": [3, 4, 5],
                "C": [6, 7, 8]
            }, orient="index")
        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))
        df.sort_index(inplace=True)
        df.iloc[1, 1] = pd.np.nan
        df = df.astype(pd.np.float32)
        self.assertTrue(CERO.is_cero(df))

        libfuncs.fillna(df, value=0.0)
        self.assertTrue(df.iloc[1, 1] == 0.0)

        df.iloc[1, 1] = pd.np.nan
        libfuncs.fillna(df, method="bfill")
        self.assertTrue(df.iloc[1, 1] == 5.0)

        df.iloc[1, 1] = pd.np.nan
        libfuncs.fillna(df)
        self.assertTrue(df.iloc[1, 1] == 3.0)

        self.assertTrue(CERO.is_cero(df))
Example #3
0
    def test_rename_2(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml"))
        cero = to_cero.create_cero()

        CERO.is_cero(cero)

        test_idx = ["PROFESSIONALS", ("1", "MANAGERS")
                    ]  # Rename operation always moves series to the end

        self.assertTrue(
            all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
Example #4
0
    def test_apply_func(self):
        df = pd.DataFrame.from_dict(
            {
                "A": [1, 2, 3],
                "B": [3, 4, 5],
                "C": [6, 7, 8]
            },
            orient="index",
            dtype=pd.np.float32)
        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))
        df.sort_index(inplace=True)
        self.assertTrue(CERO.is_cero(df))

        libfuncs.apply_func(df, numpy_func="square")

        test_df = pd.DataFrame.from_dict(
            {
                "A": [1, 4, 9],
                "B": [9, 16, 25],
                "C": [36, 49, 64]
            },
            orient="index",
            dtype=pd.np.float32)
        test_df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))
        test_df.sort_index(inplace=True)

        self.assertTrue(df.equals(test_df))
Example #5
0
    def exec_procedures(self, cero):
        """ Execute all the procedures of the FromCERO object
        .
        :param pandas.DataFrame cero: A CERO to serve as input for the procedures. The argument is not mutated/modified.
        """

        CERO.is_cero(cero, raise_exception=True, empty_ok=False)

        CERO.rename_index_values(cero, self.get("map", {}))

        self.output_procedures = OrderedDict()

        for procedure in self["procedures"]:

            try:
                ret = procedure.exec_ops(cero)
                # if ret is not None, should be dict with key: procedure["name"], value: resultant CERO
            except Exception as e:
                raise e.__class__(e.__str__() + " Error in procedure '%s'." % (procedure["name"]))

            if ret is None:
                ret = {}

            self.output_procedures.update(ret)
        else:
            if not self["procedures"]: # If empty list
                self.output_procedures["default_output"] = cero

        if any([not procedure.get("file") for procedure in self["procedures"]]):
            msg = "It has been detected that not all procedures direct output to file. Therefore some output will go to \'%s\'." % self["file"]
            print(msg)
            FromCERO._logger.info(msg)

        if self.output_procedures != {}:
            file_ext = os.path.splitext(self["file"])[1][1:]
            if file_ext in FromCERO.sup_output_types:
                out_df = CERO.combine_ceros(list(self.output_procedures.values()))
                FromCERO.dataframe_out(out_df, self["file"], output_type=file_ext)
            elif file_ext in FromCERO._Procedure.sup_output_types:
                raise ValueError("This data type is not supported for general export, because it probably has a more than 2 dimensions - export using 'procedures' instead.")
            else:
                raise ValueError("Unsupported data type detected for general export.")
Example #6
0
    def test_complex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd +
                               r'test_complex_xlsx_import.yaml'))
        cero = to_cero.create_cero()

        df = DataTools.get_test_data(TestToCERO._dd +
                                     "test_complex_xlsx_result.pickle")

        self.assertTrue(CERO.is_cero(cero))
        self.assertTrue(cero.equals(df))
Example #7
0
    def run(self) -> None:
        """
        Execute a scenario run.
        """

        self.cero = CERO.create_empty()

        ceros = [in_conf.create_cero() for in_conf in self["input_conf"]]
        if ceros:
            self.cero = CERO.combine_ceros(ceros)
            print("Successfully loaded scenario inputs as CERO.")

        FromCERO.dataframe_out(self.cero,
                               (self.get_name() + "_%03d_step_%02d.xlsx" %
                                (self["run_no"], 0)), "xlsx")

        for idx, model in enumerate(self["models"]):
            m_cero = model.run(self.cero)
            print(
                "Completed run of model (%s) at %s." %
                (model["name"], dt.datetime.now().strftime('%Y-%m-%d %H:%M')))

            # If ouput_conf is not defined for a model, then None is returned...
            if m_cero is None:
                continue

            if not CERO.is_cero(m_cero):
                raise TypeError(
                    "Object returned from model run is *not* of CERO format.")

            if model.get("export_mod_xlsx", self.get("export_mod_xlsx", True)):
                # By default, export model outputs automatically to xlsx files
                model_out_file = (self.get_name() + "_%03d_%s.xlsx" %
                                  (self["run_no"], model["name"]))
                print("Exporting output of %s to %s." %
                      (model["name"], model_out_file))
                m_cero.to_excel(model_out_file)

            self.cero = CERO.combine_ceros([self.cero, m_cero])

            if self.get("export_int_xlsx", True):
                # If true (default), export the intermediate steps to xlsx files
                isfn = (self.get_name() + "_%03d_step_%02d.xlsx" %
                        (self["run_no"], idx + 1))
                print("Exporting updated CERO to %s." % (isfn))
                self.cero.to_excel(isfn)

        for out_conf in self["output_conf"]:
            out_conf.exec_procedures(self.cero)

        else:
            print("Completed generation of scenario outputs.")
Example #8
0
 def init_df():
     df = pd.DataFrame.from_dict(
         {
             "A": [1, 2, 3],
             "B": [3, 4, 5],
             "C": [6, 7, 8]
         },
         orient="index")
     df.columns = pd.DatetimeIndex(
         pd.to_datetime([2017, 2018, 2019], format="%Y"))
     df.sort_index(inplace=True)
     df = df.astype(pd.np.float32)
     self.assertTrue(CERO.is_cero(df))
     return df
Example #9
0
    def test_export_to_csv(self):

        cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6], }, orient='index',
                                      dtype=pd.np.float32)
        cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y"))
        cero.sort_index(inplace=True)
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(cfg.d_td + "test_procedure_export_csv.yaml")
        fc.exec_procedures(cero)

        df1 = pd.read_csv("csv_export.csv", index_col=0)
        test_list = [1, 2, 3]
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = ["A", "B", "C"]
        self.assertTrue(all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("csv_export.csv")
Example #10
0
    def test_sets_and_mapping(self):

        cero = pd.DataFrame.from_dict(
            {
                "A": [1],
                "B": [2],
                "C": [3],
                "D": [4],
                "E": [5],
                "F": [6],
            },
            orient='index',
            dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(
            data=pd.to_datetime([2018], format="%Y"))
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping.yaml")
        fc.exec_procedures(cero)

        df1 = pd.read_csv("test_fromcero_mapping1.csv", index_col=0)
        test_list = [1, 2, 3]
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = ["A", "B", "C"]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        df2 = pd.read_csv("test_fromcero_mapping2.csv", index_col=0)
        test_list = [4, 5, 6]
        df2_vals = [x[0] for x in df2.values.tolist()]
        self.assertTrue(all([x == y for (x, y) in zip(test_list, df2_vals)]))
        test_list = ["G", "H", "I"]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df2.index.tolist())]))

        os.remove("test_fromcero_mapping1.csv")
        os.remove("test_fromcero_mapping2.csv")
Example #11
0
    def test_sets_and_mapping2(self):

        cero = pd.DataFrame.from_dict(
            {
                ("A", "1"): [1],
                ("A", "2"): [2],
                ("A", "3"): [3],
                ("B", "1"): [4],
                ("B", "2"): [5],
                ("B", "3"): [6],
                ("C", "1"): [7],
                ("C", "2"): [8],
                ("C", "3"): [9],
            },
            orient='index',
            dtype=pd.np.float32)
        cero.sort_index(inplace=True)
        cero.columns = pd.DatetimeIndex(
            data=pd.to_datetime([2018], format="%Y"))
        self.assertTrue(CERO.is_cero(cero))

        fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml")
        fc.exec_procedures(cero)

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping1.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"),
                     ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"),
                     ("I", "3")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        tc = ToCERO({
            "files": [{
                "file": "test_fromcero_complexmapping2.xlsx",
                "sheet": "CERO",
                "index_col": [0, 1]
            }]
        })
        df1 = tc.create_cero()
        test_list = list(range(1, 10))
        df1_vals = [x[0] for x in df1.values.tolist()]
        self.assertTrue(
            all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)]))
        test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"),
                     ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"),
                     ("C", "I")]
        self.assertTrue(
            all([x == y for (x, y) in zip(test_list, df1.index.tolist())]))

        os.remove("test_fromcero_complexmapping1.xlsx")
        os.remove("test_fromcero_complexmapping2.xlsx")
Example #12
0
    def test_multiindex_xlsx(self):

        to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml'))
        cero = to_cero.create_cero()
        self.assertTrue(CERO.is_cero(cero))
Example #13
0
    def wrapper(df: pd.DataFrame,
                *args,
                locs: "List[Union[tuple, str]]" = None,
                ilocs: "List[int]" = None,
                start_year: "Union[pd.datetime, int]" = None,
                end_year: "Union[pd.datetime, int]" = None,
                **kwargs):

        """
        :param df: An CERO, which may or may not be a strict superset of data to perform the operation on.
        :param args: Passed to the encapsulated function as positional arguments, immediately after the restricted \
        ``df``.
        :param locs: ``locs``, if provided, must be a list of identifiers that correspond to values of ``df.index``. \
        It is ``df``, reduced to these specific indices, that a wrapped function will receive as an argument. An \
        error is raised if both ``locs`` and ``ilocs`` is specified.
        :param ilocs: Identical in nature to ``locs``, though instead a list of integers (zero-indexed) is \
        provided (corresponding to the row number of ``df``). An \
        error is raised if both ``locs`` and ``ilocs`` is specified.
        :param start_year: Note that ``df`` is a CERO, and CEROs have a ``pandas.DatetimeIndex`` on columns. \
        ``start_year`` restricts the CERO to years after and including ``start_year``.
        :param end_year: Note that ``df`` is a CERO, and CEROs have a ``pandas.DatetimeIndex`` on columns. \
        ``end_year`` restricts the CERO to years up to and including ``end_year``.
        :param kwargs: Keyword arguments to be passed to the encapsulated function.
        :return: The return value of the encapsulated function.
        """

        try:
            assert(isinstance(df, pd.DataFrame))
        except AssertionError:
            raise TypeError("First function argument must be of pandas.DataFrame type.")

        # Convert integer to datetime type
        if isinstance(start_year, int):
            start_year = pd.datetime(start_year, 1, 1)
        if isinstance(end_year, int):
            end_year = pd.datetime(end_year, 1, 1)

        # Get index locations
        if start_year is not None:
            start_year = df.columns.get_loc(start_year)
        if end_year is not None:
            end_year= df.columns.get_loc(end_year)

        if (locs is not None) and (ilocs is not None):
            raise TypeError("Only one of 'locs' or 'ilocs' can be provided (not both).")

        if locs is not None:
            ilocs = [df.index.get_loc(loc) for loc in locs]
        if ilocs is None:
            ilocs = pd.IndexSlice[0:]

        df_cp = df.iloc[ilocs, start_year:end_year].copy(deep=False) # df_cp is always different object to df

        ret = func(df_cp, *args, **kwargs)
        if ret is None:
            return ret
        elif issubclass(type(ret), pd.Series):
            # If series, convert to dataframe
            ret = pd.DataFrame(data=[ret])

        CERO.is_cero(ret) # Performs checks to ensure ret is a valid CERO
        return ret
Example #14
0
    def test_create_empty(self):

        empty_cero = CERO.create_empty()
        self.assertTrue(CERO.is_cero(empty_cero))
Example #15
0
    def test_is_cero(self):
        """Tests the validation method by feeding it deliberately False data."""

        df = None

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_inv_type):
            CERO.is_cero(df)

        df = pd.DataFrame.from_dict({
            "A": [1, 2, 3],
            "B": [4, 5, 6]
        },
                                    orient="index",
                                    dtype=int)
        df.index = pd.MultiIndex.from_tuples([("A", 1), ("A", 2)])
        # ^^ ``df`` is not even close to being a CERO...

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_bad_idx):
            CERO.is_cero(df)

        df.index = pd.Index(["A", "A"])

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_bad_col):
            CERO.is_cero(df)

        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2017, 2019], format="%Y"))

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_idx_nunique):
            CERO.is_cero(df)

        df.index = pd.Index(["A", "B"])

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_col_nunique):
            CERO.is_cero(df)

        df.columns = pd.DatetimeIndex(
            pd.to_datetime([2017, 2018, 2019], format="%Y"))

        with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_val_type):
            CERO.is_cero(df)

        df = df.astype(pd.np.float32, copy=False)

        self.assertTrue(CERO.is_cero(df))