def test_stitch_time(self): tc = ToCERO(TestToCERO._dd + "test_time_stitch.yaml") cero = tc.create_cero() fc = ToCERO({ "files": [{ "file": TestToCERO._dd + "test_time_stitch.xlsx", "sheet": "data_final" }] }) fin_cero = fc.create_cero() self.assertTrue(cero.equals(fin_cero))
def test_nrows_skiprows(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_2.yaml")) cero = to_cero.create_cero() self.assertTrue( np.allclose(cero.values[0], [0.00551917898595782, 0.00551917898595782]))
def test_load_set_inputs(self): cero = pd.DataFrame.from_dict({"A": [1, 2, 3, 4, 5], "B": [6, 4, 5, 6, 7], "C": [4, 5, 8, 7, 8], "D": [9, 10, 12, 11, 2]}, orient="index", dtype=pd.np.float32) cero.columns = pd.DatetimeIndex(pd.to_datetime([2017, 2018, 2019, 2020, 2021], format="%Y")) cero.sort_index(inplace=True) proc = FromCERO._Procedure({"name": "test_proc", "sets": {"a_set": ["A", "B", "C", "D"]}, "inputs": ["a_set"], "operations": [{"func": "noop", "arrays": ["a_set"]}], "file": "test_load_set_inputs.csv", }) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_load_set_inputs.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(cero)) os.remove("test_load_set_inputs.csv")
def test_empty_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_empty_xlsx.yaml")) cero = to_cero.create_cero() with self.assertRaises(CERO.EmptyCERO): self.assertTrue(CERO.is_cero(cero, empty_ok=False)) self.assertTrue(CERO.is_cero(cero))
def test_local_libfuncs(self): shutil.copy2(TestFromCERO_Procedure._dd + "test_local_libfuncs.py", os.getcwd()) cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) test_df = pd.DataFrame.from_dict({"A": [2], "B": [4], "C": [6]}, orient='index', dtype=pd.np.float32) test_df.sort_index(inplace=True) test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) proc = FromCERO._Procedure({"libfuncs": "test_local_libfuncs.py", "ref_dir": ".", "name": "test_set", "inputs": ["A", "B", "C"], "operations": [{"func": "test_local_recursive_op"}], "file": "test_local_libfuncs.csv"}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_local_libfuncs.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_local_libfuncs.py") os.remove("test_local_libfuncs.csv")
def test_sceninputs_to_cero2(self): '''Test xlsx to CERO conversion.''' s2c = ToCERO(conf=(TestScenIn2CERO._dd + 'test_xlsx_to_cero.yaml')) cero = s2c.create_cero() df = DataTools.get_test_data( os.path.normpath('data/test_xlsx_to_cero.pickle')) self.assertTrue( cero.loc[df.index].equals(df)) # Order-independent test
def test_vurm2cero(self): '''Tests VURM2CERO conversion process.''' v2c = ToCERO(conf=(TestVURM2CERO._dd + r'test_vurm_to_cero.yaml')) cero = v2c.create_cero() df = DataTools.get_test_data(TestVURM2CERO._dd + r'test_vurm_to_cero_finaldata.pickle') self.assertTrue(cero.equals(df))
def test_gtape2cero(self): dd = os.path.join(os.path.dirname(__file__), "data", "") g2c = ToCERO(dd + r'test_gtape_to_cero.yaml') cero = g2c.create_cero() df = DataTools.get_test_data(dd + r'test_gtape_to_cero_finaldata.pickle') self.assertTrue(cero.equals(df))
def test_complex_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r'test_complex_xlsx_import.yaml')) cero = to_cero.create_cero() df = DataTools.get_test_data(TestToCERO._dd + "test_complex_xlsx_result.pickle") self.assertTrue(CERO.is_cero(cero)) self.assertTrue(cero.equals(df))
def test_rename_2(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_rename_2.yaml")) cero = to_cero.create_cero() CERO.is_cero(cero) test_idx = ["PROFESSIONALS", ("1", "MANAGERS") ] # Rename operation always moves series to the end self.assertTrue( all([x == y for (x, y) in zip(test_idx, cero.index.tolist())]))
def test_time_dim(self): shutil.copy2(TestHAR2CERO._dd + "test_timedim.har", "test_timedim.har") h2c = ToCERO(conf=(TestHAR2CERO._dd + r'test_har_to_cero_timedim.yaml')) cero = h2c.create_cero() df = DataTools.get_test_data(TestHAR2CERO._dd + r'test_har_to_cero_timedim.pickle') self.assertTrue( cero.loc[df.index].equals(df)) # Order-independent test os.remove("test_timedim.har")
def test_sceninputs_to_cero(self): '''Tests the conversion of hars to ceros.''' shutil.copy2(TestHAR2CERO._dd + "Mdatnew7.har", "Mdatnew7.har") s2c = ToCERO(conf=(TestHAR2CERO._dd + r'test_har_to_cero.yaml')) cero = s2c.create_cero() df = DataTools.get_test_data(TestHAR2CERO._dd + r'test_har_to_cero.pickle') self.assertTrue( cero.loc[df.index].equals(df)) # Order-independent test os.remove("Mdatnew7.har")
def test_regex_format(self): tc = ToCERO({ "files": [{ "file": TestToCERO._dd + "test_csv_regex.csv", "time_regex": r"(Y\d{4}).*", # Regex could pick out just the year, but want to test 'time_fmt' as well... "time_fmt": r"Y%Y" }] }) cero = tc.create_cero() test_df = pd.DataFrame(data=[[1, 2], [3, 4]], columns=[2016, 2017], dtype=pd.np.float32) test_df.index = CERO.create_cero_index(["A", "B"]) test_df.columns = pd.DatetimeIndex( pd.to_datetime([2016, 2017], format="%Y")) test_df.sort_index(inplace=True) self.assertTrue(cero.equals(test_df))
def test_groupby_and_aggregate(self): """ Dependent on ToCERO being functional. :return: """ tc = ToCERO({ "files": [{ "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx", "sheet": "groupby", "index_col": [0, 1] }] }) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=0, match="a", agg="sum") test_list = ["a", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")] test_vals = [6, 2, 3, 4, 5] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=1, match="b", agg="mean") test_list = ["b", ("a", "c"), ("a", "d"), ("b", "b"), ("c", "b")] test_vals = [3.3333333333, 2, 3, 4, 5] self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=0, agg="count") test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"] test_vals = [3, 2, 3, 1, 1] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=0, agg="count") test_list = ["a", ("a", "c"), ("a", "d"), "b", "c"] test_vals = [3, 2, 3, 1, 1] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())])) tc = ToCERO({ "files": [{ "file": TestLibfuncs._dd + "test_groupby_and_aggregate.xlsx", "sheet": "groupby_2", "index_col": [0, 1, 2] }] }) cero = tc.create_cero() cero = libfuncs.groupby(cero, key=[0, 1], agg="count") test_list = [('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'b'), ('c', 'b'), ('a', 'b', '2'), ('a', 'c', '2'), ('a', 'd', '3'), ('a', 'c', '3')] test_vals = [2, 3, 2, 1, 1, 6, 7, 8, 9] self.assertTrue( all([ np.isclose(x, y) for (x, y) in zip( test_vals, cero[pd.datetime.strptime("2018", "%Y")].tolist()) ])) self.assertTrue( all([x == y for (x, y) in zip(test_list, cero.index.tolist())]))
def test_stitch_time(self): init = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3], }, orient='index', dtype=pd.np.float32) init.sort_index(inplace=True) init.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) cero = pd.DataFrame.from_dict({"D": [100, 200], "E": [50, 0], "F": [-50, 200]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2019, 2020], format="%Y")) cero = CERO.combine_ceros([init, cero]) test_df = pd.DataFrame.from_dict({"A": [1, 2, 6], "B": [2, 3, 3], "C": [3, 1.5, 4.5], "D": [pd.np.nan, 100, 200], "E": [pd.np.nan, 50, 0], "F": [pd.np.nan, -50, 200] }, orient='index', dtype=pd.np.float32) test_df.sort_index(inplace=True) test_df.columns = pd.DatetimeIndex(data=pd.to_datetime([2018, 2019, 2020], format="%Y")) proc = FromCERO._Procedure({"name": "test_stitch_time", "file": "test_stitch_time.csv", "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_cols": [2018]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_stitch_time.csv") proc = FromCERO._Procedure({"name": "test_stitch_time", "file": "test_stitch_time2.csv", "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_cols": 2018}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_stitch_time2.csv")}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove("test_stitch_time2.csv") out_file = "test_stitch_time3.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": 0}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file) out_file = "test_stitch_time4.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": [0]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file) out_file = "test_stitch_time5.csv" proc = FromCERO._Procedure({"name": "test_stitch_time", "file": out_file, "sets": {"a_set": ["A", "B", "C"], "b_set": ["D", "E", "F"]}, "inputs": ["a_set", "b_set"], "operations": [{"func": "noop", "rename": {"b_set": "a_set"}}, {"func": "pc_change", "arrays": ["a_set"], "init_icols": [-3]}], "ref_dir": "."}) proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), out_file)}]}) df = tc.create_cero() self.assertTrue(df.equals(test_df)) os.remove(out_file)
def test_output_cero(self): """ Tests the behaviour of the "outputs" argument is correct. """ cero = pd.DataFrame.from_dict({"A": [1], "B": [2], "C": [3]}, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex(data=pd.to_datetime([2018], format="%Y")) proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": ["A"]}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."),"test_output_cero.csv")}]}) df = tc.create_cero() self.assertTrue(cero.loc[["A"]].equals(df)) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero2.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": True}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]}) df = tc.create_cero() self.assertTrue(cero.equals(df)) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero3.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": None}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) self.assertFalse(os.path.isfile("test_output_cero3.csv")) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero4.csv", "inputs": ["A", "B", "C"], "ref_dir": ".", "outputs": False}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) self.assertFalse(os.path.isfile("test_output_cero4.csv")) # Another test... proc = FromCERO._Procedure({"name": "test_output_cero", "file": "test_output_cero5.csv", "inputs": ["A", "B", "C"], "ref_dir": "."}) """Because single item in outputs, error may be raised (but shouldn't) on attempting to export a Pandas.Series object instead of a Pandas.DataFrame object.""" proc.exec_ops(cero) tc = ToCERO({"files": [{"file": os.path.join(os.path.abspath("."), "test_output_cero2.csv")}]}) df = tc.create_cero() self.assertTrue(cero.equals(df)) os.remove("test_output_cero.csv") os.remove("test_output_cero2.csv") os.remove("test_output_cero5.csv")
def test_sets_and_mapping2(self): cero = pd.DataFrame.from_dict( { ("A", "1"): [1], ("A", "2"): [2], ("A", "3"): [3], ("B", "1"): [4], ("B", "2"): [5], ("B", "3"): [6], ("C", "1"): [7], ("C", "2"): [8], ("C", "3"): [9], }, orient='index', dtype=pd.np.float32) cero.sort_index(inplace=True) cero.columns = pd.DatetimeIndex( data=pd.to_datetime([2018], format="%Y")) self.assertTrue(CERO.is_cero(cero)) fc = FromCERO(TestFromCERO._dd + "test_fromcero_mapping2.yaml") fc.exec_procedures(cero) tc = ToCERO({ "files": [{ "file": "test_fromcero_complexmapping1.xlsx", "sheet": "CERO", "index_col": [0, 1] }] }) df1 = tc.create_cero() test_list = list(range(1, 10)) df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = [("G", "1"), ("G", "2"), ("G", "3"), ("H", "1"), ("H", "2"), ("H", "3"), ("I", "1"), ("I", "2"), ("I", "3")] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) tc = ToCERO({ "files": [{ "file": "test_fromcero_complexmapping2.xlsx", "sheet": "CERO", "index_col": [0, 1] }] }) df1 = tc.create_cero() test_list = list(range(1, 10)) df1_vals = [x[0] for x in df1.values.tolist()] self.assertTrue( all([np.isclose(x, y) for (x, y) in zip(test_list, df1_vals)])) test_list = [("A", "G"), ("A", "H"), ("A", "I"), ("B", "G"), ("B", "H"), ("B", "I"), ("C", "G"), ("C", "H"), ("C", "I")] self.assertTrue( all([x == y for (x, y) in zip(test_list, df1.index.tolist())])) os.remove("test_fromcero_complexmapping1.xlsx") os.remove("test_fromcero_complexmapping2.xlsx")
def test_nrows_empty(self): to_cero = ToCERO(conf=(TestToCERO._dd + r"test_nrows_3.yaml")) cero = to_cero.create_cero() self.assertTrue(np.all(np.isnan(cero.values[0])))
def test_multiindex_xlsx(self): to_cero = ToCERO(conf=(TestToCERO._dd + r'test_multiindex_xlsx.yaml')) cero = to_cero.create_cero() self.assertTrue(CERO.is_cero(cero))