def test_nrows_skiprows(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_2.yaml")) cero = to_cero.create_cero() self.assertTrue( np.allclose(cero.values[0], [0.00551917898595782, 0.00551917898595782]))
def test_load_set_inputs(self): cero = pd.DataFrame.from_dict({"A": [1, 2, 3, 4, 5], "B": [6, 4, 5, 6, 7], "C": [4, 5, 8, 7, 8], "D": [9, 10, 12, 11, 2]}, orient="index", dtype=pd.np.float32) cero.columns = pd.DatetimeIndex(pd.to_datetime([2017, 2018, 2019, 2020, 2021], format="%Y")) cero.sort_index(inplace=True) proc = FromCERO._Procedure({"name": "test_proc", "sets": {"a_set": ["A", "B", "C", "D"]}, "inputs": ["a_set"], "operations": [{"func": "noop", "arrays": ["a_set"]}], "file": "test_load_set_inputs.csv", }) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_load_set_inputs.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(cero)) os.remove("test_load_set_inputs.csv")
def test_run_checks(self): with self.assertRaises(FileNotFoundError): ToCERO.run_checks({ "files": [{ "file": "not_a__FileObj object.", "search_paths": [TestToCERO._dd] }] }) self.assertFalse( ToCERO.run_checks( { "files": [{ "file": "not_a__FileObj object.", "search_paths": [TestToCERO._dd] }] }, raise_exception=False)) self.assertTrue( ToCERO.run_checks({ "files": [{ "file": "test_csv.csv", "search_paths": [TestToCERO._dd] }] }))
def test_empty_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml")) cero = to_cero.create_cero() with self.assertRaises(CERO.EmptyCERO): self.assertTrue(CERO.is_cero(cero, empty_ok=False)) self.assertTrue(CERO.is_cero(cero))
def test_local_libfuncs(self): shutil.copy2(TestFromCERO_Procedure._dd + "test_local_libfuncs.py", os.getcwd()) cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) test_df = pd.DataFrame.from_dict({"A": [2], "B": [4], "C": [6]}, orient='index', dtype=pd.np.float32) test_df.sort_index(inplace=True) test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) proc = FromCERO._Procedure({"libfuncs": "test_local_libfuncs.py", "ref_dir": ".", "name": "test_set", "inputs": ["A", "B", "C"], "operations": [{"func": "test_local_recursive_op"}], "file": "test_local_libfuncs.csv"}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_local_libfuncs.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_local_libfuncs.py") os.remove("test_local_libfuncs.csv")
def test__import_vd(self): fo = { "file": TestToCERO_FileObj._dd + "test__import_vd.VD", "date_col": 3, "val_col": 8 } fo = ToCERO._FileObj(fo) df = fo._import_vd() df.columns.set_names([None], inplace=True) df = df.astype(pd.np.float32) test_df = pd.DataFrame.from_dict( { ("VAR_Act", "-", "FT_COMELC", "ACT", "2015", "PD", "-"): [ 0.740833333333336, 0.740833333333336, 0.8005115537522, 0.829127920241238 ] }, orient="index", dtype=pd.np.float32) test_df.columns = pd.Index([2015, 2016, 2020, 2025]) test_df.sort_index(inplace=True) self.assertTrue(test_df.equals(df)) fo = { "file": TestToCERO_FileObj._dd + "test__import_vd.VD", "date_col": 3, "val_col": 8, "default_year": 2018 } fo = ToCERO._FileObj(fo) df = fo._import_vd() df.columns.set_names([None], inplace=True) df = df.astype(pd.np.float32) df.sort_index(inplace=True) test_df = pd.DataFrame(data=[ [ 0.740833333333336, 0.740833333333336, pd.np.nan, 0.8005115537522, 0.829127920241238 ], [pd.np.nan, pd.np.nan, 1.39891653080538, pd.np.nan, pd.np.nan], [pd.np.nan, pd.np.nan, 19.6047685777802, pd.np.nan, pd.np.nan], [pd.np.nan, pd.np.nan, 31516.8951973493, pd.np.nan, pd.np.nan], ], columns=[2015, 2016, 2018, 2020, 2025], dtype=pd.np.float32) test_df.index = CERO.create_cero_index([ ("VAR_Act", "-", "FT_COMELC", "ACT", "2015", "PD", "-"), ("Cost_Salv", "-", "EN_WinONS-26", "ADE", "2040", "-", "-"), ("Cost_NPV", "-", "EE_StmTurb009", "CQ", "-", "-", "ACT"), ("Reg_irec", "-", "-", "WA", "-", "-", "-"), ]) test_df.sort_index(inplace=True) self.assertTrue(test_df.equals(df))
def test_sceninputs_to_cero2(self): '''Test xlsx to CERO conversion.''' s2c = ToCERO(conf=(TestScenIn2CERO._dd + 'test_xlsx_to_cero.yaml')) cero = s2c.create_cero() df = DataTools.get_test_data( os.path.normpath('data/test_xlsx_to_cero.pickle')) self.assertTrue( cero.loc[df.index].equals(df)) # Order-independent test
def test_vurm2cero(self): '''Tests VURM2CERO conversion process.''' v2c = ToCERO(conf=(TestVURM2CERO._dd + r'test_vurm_to_cero.yaml')) cero = v2c.create_cero() df = DataTools.get_test_data(TestVURM2CERO._dd + r'test_vurm_to_cero_finaldata.pickle') self.assertTrue(cero.equals(df))
def test_gtape2cero(self): dd = os.path.join(os.path.dirname(__file__), "data", "") g2c = ToCERO(dd + r'test_gtape_to_cero.yaml') cero = g2c.create_cero() df = DataTools.get_test_data(dd + r'test_gtape_to_cero_finaldata.pickle') self.assertTrue(cero.equals(df))
def test_complex_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r'test_complex_xlsx_import.yaml')) cero = to_cero.create_cero() df = DataTools.get_test_data(TestToCERO._dd + "test_complex_xlsx_result.pickle") self.assertTrue(CERO.is_cero(cero)) self.assertTrue(cero.equals(df))
def test_rename_2(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml")) cero = to_cero.create_cero() CERO.is_cero(cero) test_idx = ["PROFESSIONALS", ("1", "MANAGERS") ] # Rename operation always moves series to the end self.assertTrue( all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
def test_time_dim(self): shutil.copy2(TestHAR2CERO._dd + "test_timedim.har", "test_timedim.har") h2c = ToCERO(conf=(TestHAR2CERO._dd + r'test_har_to_cero_timedim.yaml')) cero = h2c.create_cero() df = DataTools.get_test_data(TestHAR2CERO._dd + r'test_har_to_cero_timedim.pickle') self.assertTrue( cero.loc[df.index].equals(df)) # Order-independent test os.remove("test_timedim.har")
def test_sceninputs_to_cero(self): '''Tests the conversion of hars to ceros.''' shutil.copy2(TestHAR2CERO._dd + "Mdatnew7.har", "Mdatnew7.har") s2c = ToCERO(conf=(TestHAR2CERO._dd + r'test_har_to_cero.yaml')) cero = s2c.create_cero() df = DataTools.get_test_data(TestHAR2CERO._dd + r'test_har_to_cero.pickle') self.assertTrue( cero.loc[df.index].equals(df)) # Order-independent test os.remove("Mdatnew7.har")
def is_valid(self, raise_exception=True) -> bool: """ Performs static checks on ``self`` to ensure it is a valid Scenario object.""" req_keys = ["name", "models", "input_conf", "output_conf"] if not all([k in self.keys() for k in req_keys]): raise TypeError( ("Not all required key-value pairs have been defined. " + "It is necessary to define all of %s.") % req_keys) if not isinstance(self["models"], list): raise TypeError( "Scenario property \'models\' must be defined as a list.") for model in self["models"]: if not issubclass(type(model), Model): raise TypeError("Object '%s' is of type '%s', not 'Model'." % (model, type(model))) if not model.check_config(raise_exception=raise_exception, runtime=False): return False for ic in self["input_conf"]: if not ToCERO.check_config( ic, raise_exception=raise_exception, runtime=False): return False for oc in self["output_conf"]: if not FromCERO.check_config( oc, raise_exception=raise_exception, runtime=False): return False return True
def is_valid(self, raise_exception=True): """ Checks the validity of ``self`` as a ``Model`` object. Method does not ensure runtime issues will not occur. :param bool raise_exception: :return bool: Returns `True` if ``self`` is a valid ``Model``. """ req_keys = ["name", "cmds", "input_conf", "output_conf"] if not all([k in self for k in req_keys]): msg = ( "All models must have all of the keys: %s. Attempted to create model" + " with at least one of these keys missing.") % req_keys Model._logger.error(msg) if raise_exception: raise TypeError(msg) print(msg) return False for ic in self["input_conf"]: if not FromCERO.check_config( ic, raise_exception=raise_exception, runtime=False): return False for oc in self["output_conf"]: if not ToCERO.check_config( oc, raise_exception=raise_exception, runtime=False): return False return True
def test_regex_format(self): tc = ToCERO({ "files": [{ "file": TestToCERO._dd + "test_csv_regex.csv", "time_regex": r"(Y\d{4}).*", # Regex could pick out just the year, but want to test 'time_fmt' as well... "time_fmt": r"Y%Y" }] }) cero = tc.create_cero() test_df = pd.DataFrame(data=[[1, 2], [3, 4]], columns=[2016, 2017], dtype=pd.np.float32) test_df.index = CERO.create_cero_index(["A", "B"]) test_df.columns = pd.DatetimeIndex( pd.to_datetime([2016, 2017], format="%Y")) test_df.sort_index(inplace=True) self.assertTrue(cero.equals(test_df))
def test_csv_orientation(self): tc = ToCERO._FileObj( {"file": TestToCERO_FileObj._dd + "test_csv_orientation.csv"}) with self.assertRaises(TypeError): df = tc._import_file() tc = ToCERO._FileObj({ "file": TestToCERO_FileObj._dd + "test_csv_orientation.csv", "orientation": "cols" }) df = tc._import_file() test_df = pd.DataFrame(data=[[1, 2], [3, 4]], columns=[2016, 2017], dtype=pd.np.float32) test_df.index = CERO.create_cero_index(["A", "B"]) test_df.sort_index(inplace=True) self.assertTrue(df.equals(test_df))
def run_checks(self, raise_exception=True): """ Performs runtime checks on ``self`` to ensure it is a valid Scenario object. Failure of runtime checks indicates that the scenario is not ready to run. :param bool raise_exception: If `True` (default) then an exception is raised on check failure. Otherwise (on check failure) `False` is returned. :return: """ for ic in self["input_conf"]: if not ToCERO.check_config( ic, raise_exception=raise_exception, runtime=True): return False return True
def test__import_gdx(self): fo = { "file": TestToCERO_FileObj._dd + "test__import_gdx.gdx", "symbols": { "name": "L_EXPORT", "date_col": 2 } } fo = ToCERO._FileObj(fo) df = fo._import_gdx() with open(TestToCERO_FileObj._dd + "test__import_gdx.pickle", "rb") as f: test_df = pickle.load(f) self.assertTrue(df.equals(test_df))
def test_is_valid(self): with self.assertRaises(TypeError): ToCERO.is_valid({"files": 1}) self.assertFalse(ToCERO.is_valid({"files": 1}, raise_exception=False)) with self.assertRaises(TypeError): ToCERO.is_valid({"files": "not a list"}) self.assertFalse( ToCERO.is_valid({"files": "not a list"}, raise_exception=False)) with self.assertRaises(TypeError): ToCERO.is_valid({"files": {"file": "not_a__FileObj object."}}) self.assertFalse( ToCERO.is_valid({"files": { "file": "not_a__FileObj object." }}, raise_exception=False)) self.assertTrue({"files": {"file": "Mdatnew7.har"}})
def test_har_repetitive_sets(self): fo = ToCERO._FileObj({ "file": "test.har", "search_paths": TestToCERO_FileObj._dd, "head_arrs": [{ "name": "ARR7", "default_year": 2018 }] }) cero = fo.import_file_as_cero() df = DataTools.get_test_data(TestToCERO_FileObj._dd + "test_har_repetitive_sets.pickle") self.assertTrue(cero.equals(df))
def test_stitch_time(self): tc = ToCERO(TestToCERO._dd + "test_time_stitch.yaml") cero = tc.create_cero() fc = ToCERO({ "files": [{ "file": TestToCERO._dd + "test_time_stitch.xlsx", "sheet": "data_final" }] }) fin_cero = fc.create_cero() self.assertTrue(cero.equals(fin_cero))
def test_multiindex_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml')) cero = to_cero.create_cero() self.assertTrue(CERO.is_cero(cero))
def test_nrows_empty(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_3.yaml")) cero = to_cero.create_cero() self.assertTrue(np.all(np.isnan(cero.values[0])))
def test_stitch_time(self): init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], }, orient='index', dtype=pd.np.float32) init.sort_index(inplace=True) init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y")) cero = CERO.combine_ceros([init, cero]) test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5], "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200] }, orient='index', dtype=pd.np.float32) test_df.sort_index(inplace=True) test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y")) proc = FromCERO._Procedure({"name": "test_stitch_time", "file": "test_stitch_time.csv", "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_cols": [2018]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_stitch_time.csv") proc = FromCERO._Procedure({"name": "test_stitch_time", "file": "test_stitch_time2.csv", "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_cols": 2018}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_stitch_time2.csv") out_file = "test_stitch_time3.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": 0}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file) out_file = "test_stitch_time4.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": [0]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file) out_file = "test_stitch_time5.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": [-3]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file)
def test_output_cero(self): """ Tests the behaviour of the "outputs" argument is correct. """ cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": ["A"]}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."),"test_output_cero.csv")}]}) df = tc.create_cero() self.assertTrue(cero.loc[["A"]].equals(df)) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero2.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": True}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]}) df = tc.create_cero() self.assertTrue(cero.equals(df)) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero3.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": None}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) self.assertFalse(os.path.isfile("test_output_cero3.csv")) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero4.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": False}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) self.assertFalse(os.path.isfile("test_output_cero4.csv")) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero5.csv", "inputs": ["A", "B", "C"], "ref_dir": "."}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]}) df = tc.create_cero() self.assertTrue(cero.equals(df)) os.remove("test_output_cero.csv") os.remove("test_output_cero2.csv") os.remove("test_output_cero5.csv")
def __init__(self, sc_def: dict, *args, parent: dict = None, **kwargs): """ :param sc_def: A scenario definition object. :param args: Passed to the superclass (dict) as positional arguments at initialisation. :param kwargs: Passed to the superclass (dict) as keyword arguments at initialisation. """ defaults = { "name": None, "run_no": None, "search_paths": [], "ref_dir": None, "models": [], "input_conf": [], "output_conf": [] } if parent is None: parent = {} defaults.update(parent) try: assert isinstance(sc_def, dict) except AssertionError: raise TypeError( "Scenario definition provided in incorrect format - type %s instead of dict." % type(sc_def)) defaults.update(sc_def) sc_def = defaults super().__init__(sc_def, *args, **kwargs) if not self.get("name"): self["name"] = "scenario_unnamed" self._logger.warn( "Scenario name has not been specified - scenario named '%s'." % self["name"]) if not issubclass(type(self.get("run_no")), int): self["run_no"] = 1 self._logger.info( "Scenario run_no (run number) has not been specified (or is not of integer type) - defaults to %s." % self["run_no"]) if isinstance(self["search_paths"], str): self["search_paths"] = [os.path.abspath(self["search_paths"])] elif not self["search_paths"]: self["search_paths"].append(os.path.abspath(".")) if self["ref_dir"] is None: self["ref_dir"] = os.path.abspath(".") model_parent = { "search_paths": self["search_paths"], "ref_dir": self["ref_dir"] } self["models"] = [ Model(m, parent=model_parent) for m in self.get("models") ] if isinstance(self["input_conf"], str): self["input_conf"] = [self["input_conf"]] if isinstance(self["output_conf"], str): self["output_conf"] = [self["output_conf"]] # Load ToCERO conf par_dict = {"search_paths": self["search_paths"]} for idx, ic in enumerate(self["input_conf"]): self["input_conf"][idx] = self.find_file(ic) self["input_conf"][idx] = ToCERO(self["input_conf"][idx], parent=par_dict) # Load FromCERO conf par_dict = {"ref_dir": self["ref_dir"]} for idx, oc in enumerate(self["output_conf"]): self["output_conf"][idx] = self.find_file(oc) self["output_conf"][idx] = FromCERO(self["output_conf"][idx], parent=par_dict) self.is_valid() # Check Scenario is valid
def test_groupby_and_aggregate(self): """ Dependent on ToCERO being functional. :return: """ tc = ToCERO({ "files": [{ "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx", "sheet": "groupby", "index_col": [0, 1] }] }) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=0, match="a", agg="sum") test_list = ["a", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")] test_vals = [6, 2, 3, 4, 5] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=1, match="b", agg="mean") test_list = ["b", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")] test_vals = [3.3333333333, 2, 3, 4, 5] self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=0, agg="count") test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"] test_vals = [3, 2, 3, 1, 1] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=0, agg="count") test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"] test_vals = [3, 2, 3, 1, 1] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) tc = ToCERO({ "files": [{ "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx", "sheet": "groupby_2", "index_col": [0, 1, 2] }] }) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=[0, 1], agg="count") test_list = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'), ('c', 'b'), ('a', 'b', '2'), ('a', 'c', '2'), ('a', 'd', '3'), ('a', 'c', '3')] test_vals = [2, 3, 2, 1, 1, 6, 7, 8, 9] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))
def __init__(self, model: dict, *args, parent: "Model" = None, **kwargs): """ :param model: A `dict` containing ``Model`` options. :param args: Passed to superclass (`dict`) at initialisation. :param "Model" parent: If provided, inherits all key-value pairs from ``parent``. :param kwargs: Passed to superclass (`dict`) at initialisation. """ defaults = { "name": "default_model_name", "cmds": [], "input_conf": [], "output_conf": [], "search_paths": [], "wd": None } defaults.update(model) if parent is None: parent = {} defaults.update(parent) super().__init__(defaults, *args, **kwargs) if self["name"] == "default_model_name": Model._logger.warning( "Model not named - default name '%s' assigned." % self["name"]) # Command string processing if isinstance(self["cmds"], str): self["cmds"] = [self["cmds"]] if not self["cmds"]: Model._logger.info("No commands specified for model '%s'." % defaults["name"]) if not self["search_paths"]: self["search_paths"].append(os.path.abspath(".")) if isinstance(self["input_conf"], str): self["input_conf"] = [self["input_conf"]] if isinstance(self["output_conf"], str): self["output_conf"] = [self["output_conf"]] # Locate and load configuration files... for idx, input_conf in enumerate(self["input_conf"]): self["input_conf"][idx] = self.find_file(input_conf) par_dict = { "ref_dir": os.path.abspath(os.path.dirname(self["input_conf"][idx])) } self["input_conf"][idx] = FromCERO(self["input_conf"][idx], parent=par_dict) # Locate and load configuration files... for idx, output_conf in enumerate(self["output_conf"]): self["output_conf"][idx] = self.find_file(output_conf) par_dict = { "search_paths": os.path.abspath(os.path.dirname(self["output_conf"][idx])) } self["output_conf"][idx] = ToCERO(self["output_conf"][idx], parent=par_dict)
def test_sets_and_mapping2(self): cero = pd.DataFrame.from_dict( { ("A", "1"): [1], ("A", "2"): [2], ("A", "3"): [3], ("B", "1"): [4], ("B", "2"): [5], ("B", "3"): [6], ("C", "1"): [7], ("C", "2"): [8], ("C", "3"): [9], }, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex( data=pd.to_datetime([2018], format="%Y")) self.assertTrue(CERO.is_cero(cero)) fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml") fc.exec_procedures(cero) tc = ToCERO({ "files": [{ "file": "test_fromcero_complexmapping1.xlsx", "sheet": "CERO", "index_col": [0, 1] }] }) df1 = tc.create_cero() test_list = list(range(1, 10)) df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"), ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"), ("I", "3")] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) tc = ToCERO({ "files": [{ "file": "test_fromcero_complexmapping2.xlsx", "sheet": "CERO", "index_col": [0, 1] }] }) df1 = tc.create_cero() test_list = list(range(1, 10)) df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"), ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"), ("C", "I")] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) os.remove("test_fromcero_complexmapping1.xlsx") os.remove("test_fromcero_complexmapping2.xlsx")