def test_1_1_permutations(self, tmp_path, engine): df_1_1 = pd.DataFrame(data, columns=reg_columns, index=reg_index) dset_1_1 = mp.Dataset(df_1_1) dset_1_1.to_excel(tmp_path / "dset_1_0.xlsx", engine=engine, header=True, index=False) dset_1_1.to_excel(tmp_path / "dset_0_1.xlsx", engine=engine, header=False, index=True) dset_1_1.to_excel(tmp_path / "dset_1_1.xlsx", engine=engine, header=True, index=True) dset_1_1.to_excel(tmp_path / "dset_0_0.xlsx", engine=engine, header=False, index=False) # 1_1: test 1 header 1 index dset_1_1_parsed = mp.read_excel(tmp_path / "dset_1_1.xlsx") assert_frame_equal(dset_1_1, dset_1_1_parsed) # 1_0: test 1 header 0 index dset_1_1_parsed = mp.read_excel(tmp_path / "dset_1_0.xlsx") dset_1_1_parsed.index = reg_index assert_frame_equal(dset_1_1, dset_1_1_parsed) # 0_1: test 0 header 1 index dset_1_1_parsed = mp.read_excel(tmp_path / "dset_0_1.xlsx") dset_1_1_parsed.index.name = None # if no header, pandas inserts a default index name of 0 dset_1_1_parsed.columns = reg_columns assert_frame_equal(dset_1_1, dset_1_1_parsed) # 0_0: test 0 header 0 index dset_1_1_parsed = mp.read_excel(tmp_path / "dset_0_0.xlsx") dset_1_1_parsed.index = reg_index dset_1_1_parsed.columns = reg_columns assert_frame_equal(dset_1_1, dset_1_1_parsed)
def test_dups(cli_link_small_with_dups): mal = mp.read_excel(cli_link_small_with_dups, as_collection=True) dups = mal.get_duplicates() assert len(dups["instr2_all"]) == 12 assert len(dups["instr3_all"]) == 17
def test_2_1(self, tmp_path, engine): df_2_1 = pd.DataFrame(data, columns=mi_columns, index=reg_index) dset_2_1 = mp.Dataset(df_2_1) dset_2_1.to_excel(tmp_path / "dset_2_1.xlsx", engine=engine, header=True, index=True) # 2_1: test 2 header 1 index dset_2_1_parsed = mp.read_excel(tmp_path / "dset_2_1.xlsx") assert_frame_equal(dset_2_1, dset_2_1_parsed)
def test_1_2(self, tmp_path, engine): df_1_2 = pd.DataFrame(data, columns=reg_columns, index=mi_index) dset_1_2 = mp.Dataset(df_1_2) dset_1_2.to_excel(tmp_path / "dset_1_2.xlsx", engine=engine, header=True, index=True) # 1_2: test 1 header 2 index dset_1_2_parsed = mp.read_excel(tmp_path / "dset_1_2.xlsx") assert_frame_equal(dset_1_2, dset_1_2_parsed)
def test_read_file(cli_link_small_with_merge): mal = mp.read_excel(cli_link_small_with_merge, as_collection=True) assert type(mal) is MergeableAnchoredList mal_dict = mal.to_excel_dict() assert mal_dict["primary"]["name"] == "small" assert mal_dict["primary"]["id_col_name"] == "InstrID"
def test_basiclist(tmp_path): dset1 = mp.Dataset({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) dset1.to_excel(tmp_path / "dset1.xlsx") with mp.MACPieExcelFile(tmp_path / "dset1.xlsx") as reader: dset1_from_file = mp.read_excel(reader, sheet_name="NO_NAME") # dset1_from_file = mp.read_excel(tmp_path / "dset1.xlsx", sheet_name="NO_NAME") assert dset1.equals(dset1_from_file) dset2 = mp.Dataset( { "A": [1, 2, 3], "albert": [4, 5, 6], "C": [7, 8, 9] }, id_col_name="albert", name="renee", tags=["a", "b"], ) basic_list = mp.BasicList([dset1, dset2]) with mp.MACPieExcelWriter(tmp_path / "basic_list.xlsx") as writer: basic_list.to_excel(writer) with mp.MACPieExcelFile(tmp_path / "basic_list.xlsx") as reader: basic_list_from_file = mp.read_excel(reader, as_collection=True) # basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", as_collection=True) assert len(basic_list_from_file) == 2 assert basic_list_from_file[0].equals(dset1) assert basic_list_from_file[1].equals(dset2)
def test_basic_collection(self, tmp_path, engine): basic_list = mp.BasicList([reg_dset, mi_dset]) with mp.MACPieExcelWriter(tmp_path / "basic_list.xlsx", engine=engine) as writer: basic_list.to_excel(writer) basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx") basic_list_from_file.index = reg_index assert_frame_equal(basic_list_from_file, reg_dset) basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", sheet_name=0) basic_list_from_file.index = reg_index assert_frame_equal(basic_list_from_file, reg_dset) basic_list_from_file = mp.read_excel( tmp_path / "basic_list.xlsx", sheet_name="mi_test_name" ) basic_list_from_file.index = mi_index assert_frame_equal(basic_list_from_file, mi_dset) basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", sheet_name=[0, 1]) reg_dset_parsed = basic_list_from_file[0] reg_dset_parsed.index = reg_index assert_frame_equal(reg_dset_parsed, reg_dset) mi_dset_parsed = basic_list_from_file[1] mi_dset_parsed.index = mi_index assert_frame_equal(mi_dset_parsed, mi_dset) basic_list_from_file = mp.read_excel( tmp_path / "basic_list.xlsx", sheet_name=[0, "mi_test_name"] ) reg_dset_parsed = basic_list_from_file[0] reg_dset_parsed.index = reg_index assert_frame_equal(reg_dset_parsed, reg_dset) mi_dset_parsed = basic_list_from_file["mi_test_name"] mi_dset_parsed.index = mi_index assert_frame_equal(mi_dset_parsed, mi_dset) basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", sheet_name=None) reg_dset_parsed = basic_list_from_file["NO_NAME"] reg_dset_parsed.index = reg_index assert_frame_equal(reg_dset_parsed, reg_dset) mi_dset_parsed = basic_list_from_file["mi_test_name"] mi_dset_parsed.index = mi_index assert_frame_equal(mi_dset_parsed, mi_dset)
def test_2_2_permutations(self, tmp_path, engine): df_2_2 = pd.DataFrame(data, columns=mi_columns, index=mi_index) dset_2_2 = mp.Dataset(df_2_2) dset_2_2.to_excel(tmp_path / "dset_2_2.xlsx", engine=engine, header=True, index=True) dset_2_2.to_excel(tmp_path / "dset_2_0.xlsx", engine=engine, header=True, index=False) dset_2_2.to_excel(tmp_path / "dset_0_2.xlsx", engine=engine, header=False, index=True) dset_2_2.to_excel(tmp_path / "dset_0_0.xlsx", engine=engine, header=False, index=False) # 2_2: test 2 header 2 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_2.xlsx") assert_frame_equal(dset_2_2, dset_2_2_parsed) # 2_0: test 2 header 0 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_0.xlsx") dset_2_2_parsed.index = mi_index assert_frame_equal(dset_2_2, dset_2_2_parsed) # 0_2: test 0 header 2 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_2.xlsx") dset_2_2_parsed.index.names = mi_index.names dset_2_2_parsed.columns = mi_columns assert_frame_equal(dset_2_2, dset_2_2_parsed) # 0_0: test 0 header 0 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_0.xlsx") dset_2_2_parsed.index = mi_index dset_2_2_parsed.columns = mi_columns assert_frame_equal(dset_2_2, dset_2_2_parsed) # test legacy format of merge_cells=False dset_2_2.to_excel( tmp_path / "dset_2_2_no_merge.xlsx", engine=engine, merge_cells=False, header=True, index=True, ) dset_2_2.to_excel( tmp_path / "dset_2_0_no_merge.xlsx", engine=engine, merge_cells=False, header=True, index=False, ) dset_2_2.to_excel( tmp_path / "dset_0_2_no_merge.xlsx", engine=engine, merge_cells=False, header=False, index=True, ) dset_2_2.to_excel( tmp_path / "dset_0_0_no_merge.xlsx", engine=engine, merge_cells=False, header=False, index=False, ) # 2_2: test 2 header 2 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_2_no_merge.xlsx") dset_2_2_parsed.columns = mi_columns assert_frame_equal(dset_2_2, dset_2_2_parsed) # 2_0: test 2 header 0 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_0_no_merge.xlsx") dset_2_2_parsed.index = mi_index dset_2_2_parsed.columns = mi_columns assert_frame_equal(dset_2_2, dset_2_2_parsed) # 0_2: test 0 header 2 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_2_no_merge.xlsx") dset_2_2_parsed.index.names = mi_index.names dset_2_2_parsed.columns = mi_columns assert_frame_equal(dset_2_2, dset_2_2_parsed) # 0_0: test 0 header 0 index dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_0_no_merge.xlsx") dset_2_2_parsed.index = mi_index dset_2_2_parsed.columns = mi_columns assert_frame_equal(dset_2_2, dset_2_2_parsed)
def execute(self): self.results = read_excel(self.primary, as_collection=True)