def test_empty_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml")) cero = to_cero.create_cero() with self.assertRaises(CERO.EmptyCERO): self.assertTrue(CERO.is_cero(cero, empty_ok=False)) self.assertTrue(CERO.is_cero(cero))
def test_fillna(self): df = pd.DataFrame.from_dict( { "A": [1, 2, 3], "B": [3, 4, 5], "C": [6, 7, 8] }, orient="index") df.columns = pd.DatetimeIndex( pd.to_datetime([2017, 2018, 2019], format="%Y")) df.sort_index(inplace=True) df.iloc[1, 1] = pd.np.nan df = df.astype(pd.np.float32) self.assertTrue(CERO.is_cero(df)) libfuncs.fillna(df, value=0.0) self.assertTrue(df.iloc[1, 1] == 0.0) df.iloc[1, 1] = pd.np.nan libfuncs.fillna(df, method="bfill") self.assertTrue(df.iloc[1, 1] == 5.0) df.iloc[1, 1] = pd.np.nan libfuncs.fillna(df) self.assertTrue(df.iloc[1, 1] == 3.0) self.assertTrue(CERO.is_cero(df))
def test_rename_2(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml")) cero = to_cero.create_cero() CERO.is_cero(cero) test_idx = ["PROFESSIONALS", ("1", "MANAGERS") ] # Rename operation always moves series to the end self.assertTrue( all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
def test_apply_func(self): df = pd.DataFrame.from_dict( { "A": [1, 2, 3], "B": [3, 4, 5], "C": [6, 7, 8] }, orient="index", dtype=pd.np.float32) df.columns = pd.DatetimeIndex( pd.to_datetime([2017, 2018, 2019], format="%Y")) df.sort_index(inplace=True) self.assertTrue(CERO.is_cero(df)) libfuncs.apply_func(df, numpy_func="square") test_df = pd.DataFrame.from_dict( { "A": [1, 4, 9], "B": [9, 16, 25], "C": [36, 49, 64] }, orient="index", dtype=pd.np.float32) test_df.columns = pd.DatetimeIndex( pd.to_datetime([2017, 2018, 2019], format="%Y")) test_df.sort_index(inplace=True) self.assertTrue(df.equals(test_df))
def exec_procedures(self, cero): """ Execute all the procedures of the FromCERO object . :param pandas.DataFrame cero: A CERO to serve as input for the procedures. The argument is not mutated/modified. """ CERO.is_cero(cero, raise_exception=True, empty_ok=False) CERO.rename_index_values(cero, self.get("map", {})) self.output_procedures = OrderedDict() for procedure in self["procedures"]: try: ret = procedure.exec_ops(cero) # if ret is not None, should be dict with key: procedure["name"], value: resultant CERO except Exception as e: raise e.__class__(e.__str__() + " Error in procedure '%s'." % (procedure["name"])) if ret is None: ret = {} self.output_procedures.update(ret) else: if not self["procedures"]: # If empty list self.output_procedures["default_output"] = cero if any([not procedure.get("file") for procedure in self["procedures"]]): msg = "It has been detected that not all procedures direct output to file. Therefore some output will go to \'%s\'." % self["file"] print(msg) FromCERO._logger.info(msg) if self.output_procedures != {}: file_ext = os.path.splitext(self["file"])[1][1:] if file_ext in FromCERO.sup_output_types: out_df = CERO.combine_ceros(list(self.output_procedures.values())) FromCERO.dataframe_out(out_df, self["file"], output_type=file_ext) elif file_ext in FromCERO._Procedure.sup_output_types: raise ValueError("This data type is not supported for general export, because it probably has a more than 2 dimensions - export using 'procedures' instead.") else: raise ValueError("Unsupported data type detected for general export.")
def test_complex_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r'test_complex_xlsx_import.yaml')) cero = to_cero.create_cero() df = DataTools.get_test_data(TestToCERO._dd + "test_complex_xlsx_result.pickle") self.assertTrue(CERO.is_cero(cero)) self.assertTrue(cero.equals(df))
def run(self) -> None: """ Execute a scenario run. """ self.cero = CERO.create_empty() ceros = [in_conf.create_cero() for in_conf in self["input_conf"]] if ceros: self.cero = CERO.combine_ceros(ceros) print("Successfully loaded scenario inputs as CERO.") FromCERO.dataframe_out(self.cero, (self.get_name() + "_%03d_step_%02d.xlsx" % (self["run_no"], 0)), "xlsx") for idx, model in enumerate(self["models"]): m_cero = model.run(self.cero) print( "Completed run of model (%s) at %s." % (model["name"], dt.datetime.now().strftime('%Y-%m-%d %H:%M'))) # If ouput_conf is not defined for a model, then None is returned... if m_cero is None: continue if not CERO.is_cero(m_cero): raise TypeError( "Object returned from model run is *not* of CERO format.") if model.get("export_mod_xlsx", self.get("export_mod_xlsx", True)): # By default, export model outputs automatically to xlsx files model_out_file = (self.get_name() + "_%03d_%s.xlsx" % (self["run_no"], model["name"])) print("Exporting output of %s to %s." % (model["name"], model_out_file)) m_cero.to_excel(model_out_file) self.cero = CERO.combine_ceros([self.cero, m_cero]) if self.get("export_int_xlsx", True): # If true (default), export the intermediate steps to xlsx files isfn = (self.get_name() + "_%03d_step_%02d.xlsx" % (self["run_no"], idx + 1)) print("Exporting updated CERO to %s." % (isfn)) self.cero.to_excel(isfn) for out_conf in self["output_conf"]: out_conf.exec_procedures(self.cero) else: print("Completed generation of scenario outputs.")
def init_df(): df = pd.DataFrame.from_dict( { "A": [1, 2, 3], "B": [3, 4, 5], "C": [6, 7, 8] }, orient="index") df.columns = pd.DatetimeIndex( pd.to_datetime([2017, 2018, 2019], format="%Y")) df.sort_index(inplace=True) df = df.astype(pd.np.float32) self.assertTrue(CERO.is_cero(df)) return df
def test_export_to_csv(self): cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6], }, orient='index', dtype=pd.np.float32) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) cero.sort_index(inplace=True) self.assertTrue(CERO.is_cero(cero)) fc = FromCERO(cfg.d_td + "test_procedure_export_csv.yaml") fc.exec_procedures(cero) df1 = pd.read_csv("csv_export.csv", index_col=0) test_list = [1, 2, 3] df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue(all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = ["A", "B", "C"] self.assertTrue(all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) os.remove("csv_export.csv")
def test_sets_and_mapping(self): cero = pd.DataFrame.from_dict( { "A": [1], "B": [2], "C": [3], "D": [4], "E": [5], "F": [6], }, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex( data=pd.to_datetime([2018], format="%Y")) self.assertTrue(CERO.is_cero(cero)) fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping.yaml") fc.exec_procedures(cero) df1 = pd.read_csv("test_fromcero_mapping1.csv", index_col=0) test_list = [1, 2, 3] df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = ["A", "B", "C"] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) df2 = pd.read_csv("test_fromcero_mapping2.csv", index_col=0) test_list = [4, 5, 6] df2_vals = [x[0] for x in df2.values.tolist()] self.assertTrue(all([x == y for (x, y) in zip(test_list, df2_vals)])) test_list = ["G", "H", "I"] self.assertTrue( all([x == y for (x, y) in zip(test_list, df2.index.tolist())])) os.remove("test_fromcero_mapping1.csv") os.remove("test_fromcero_mapping2.csv")
def test_sets_and_mapping2(self): cero = pd.DataFrame.from_dict( { ("A", "1"): [1], ("A", "2"): [2], ("A", "3"): [3], ("B", "1"): [4], ("B", "2"): [5], ("B", "3"): [6], ("C", "1"): [7], ("C", "2"): [8], ("C", "3"): [9], }, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex( data=pd.to_datetime([2018], format="%Y")) self.assertTrue(CERO.is_cero(cero)) fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml") fc.exec_procedures(cero) tc = ToCERO({ "files": [{ "file": "test_fromcero_complexmapping1.xlsx", "sheet": "CERO", "index_col": [0, 1] }] }) df1 = tc.create_cero() test_list = list(range(1, 10)) df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"), ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"), ("I", "3")] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) tc = ToCERO({ "files": [{ "file": "test_fromcero_complexmapping2.xlsx", "sheet": "CERO", "index_col": [0, 1] }] }) df1 = tc.create_cero() test_list = list(range(1, 10)) df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"), ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"), ("C", "I")] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) os.remove("test_fromcero_complexmapping1.xlsx") os.remove("test_fromcero_complexmapping2.xlsx")
def test_multiindex_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml')) cero = to_cero.create_cero() self.assertTrue(CERO.is_cero(cero))
def wrapper(df: pd.DataFrame, *args, locs: "List[Union[tuple, str]]" = None, ilocs: "List[int]" = None, start_year: "Union[pd.datetime, int]" = None, end_year: "Union[pd.datetime, int]" = None, **kwargs): """ :param df: An CERO, which may or may not be a strict superset of data to perform the operation on. :param args: Passed to the encapsulated function as positional arguments, immediately after the restricted \ ``df``. :param locs: ``locs``, if provided, must be a list of identifiers that correspond to values of ``df.index``. \ It is ``df``, reduced to these specific indices, that a wrapped function will receive as an argument. An \ error is raised if both ``locs`` and ``ilocs`` is specified. :param ilocs: Identical in nature to ``locs``, though instead a list of integers (zero-indexed) is \ provided (corresponding to the row number of ``df``). An \ error is raised if both ``locs`` and ``ilocs`` is specified. :param start_year: Note that ``df`` is a CERO, and CEROs have a ``pandas.DatetimeIndex`` on columns. \ ``start_year`` restricts the CERO to years after and including ``start_year``. :param end_year: Note that ``df`` is a CERO, and CEROs have a ``pandas.DatetimeIndex`` on columns. \ ``end_year`` restricts the CERO to years up to and including ``end_year``. :param kwargs: Keyword arguments to be passed to the encapsulated function. :return: The return value of the encapsulated function. """ try: assert(isinstance(df, pd.DataFrame)) except AssertionError: raise TypeError("First function argument must be of pandas.DataFrame type.") # Convert integer to datetime type if isinstance(start_year, int): start_year = pd.datetime(start_year, 1, 1) if isinstance(end_year, int): end_year = pd.datetime(end_year, 1, 1) # Get index locations if start_year is not None: start_year = df.columns.get_loc(start_year) if end_year is not None: end_year= df.columns.get_loc(end_year) if (locs is not None) and (ilocs is not None): raise TypeError("Only one of 'locs' or 'ilocs' can be provided (not both).") if locs is not None: ilocs = [df.index.get_loc(loc) for loc in locs] if ilocs is None: ilocs = pd.IndexSlice[0:] df_cp = df.iloc[ilocs, start_year:end_year].copy(deep=False) # df_cp is always different object to df ret = func(df_cp, *args, **kwargs) if ret is None: return ret elif issubclass(type(ret), pd.Series): # If series, convert to dataframe ret = pd.DataFrame(data=[ret]) CERO.is_cero(ret) # Performs checks to ensure ret is a valid CERO return ret
def test_create_empty(self): empty_cero = CERO.create_empty() self.assertTrue(CERO.is_cero(empty_cero))
def test_is_cero(self): """Tests the validation method by feeding it deliberately False data.""" df = None with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_inv_type): CERO.is_cero(df) df = pd.DataFrame.from_dict({ "A": [1, 2, 3], "B": [4, 5, 6] }, orient="index", dtype=int) df.index = pd.MultiIndex.from_tuples([("A", 1), ("A", 2)]) # ^^ ``df`` is not even close to being a CERO... with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_bad_idx): CERO.is_cero(df) df.index = pd.Index(["A", "A"]) with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_bad_col): CERO.is_cero(df) df.columns = pd.DatetimeIndex( pd.to_datetime([2017, 2017, 2019], format="%Y")) with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_idx_nunique): CERO.is_cero(df) df.index = pd.Index(["A", "B"]) with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_col_nunique): CERO.is_cero(df) df.columns = pd.DatetimeIndex( pd.to_datetime([2017, 2018, 2019], format="%Y")) with self.assertRaises(CERO.InvalidCERO, msg=CERO._msg_val_type): CERO.is_cero(df) df = df.astype(pd.np.float32, copy=False) self.assertTrue(CERO.is_cero(df))