예제 #1
0
    def test_1_1_permutations(self, tmp_path, engine):
        df_1_1 = pd.DataFrame(data, columns=reg_columns, index=reg_index)

        dset_1_1 = mp.Dataset(df_1_1)

        dset_1_1.to_excel(tmp_path / "dset_1_0.xlsx", engine=engine, header=True, index=False)
        dset_1_1.to_excel(tmp_path / "dset_0_1.xlsx", engine=engine, header=False, index=True)
        dset_1_1.to_excel(tmp_path / "dset_1_1.xlsx", engine=engine, header=True, index=True)
        dset_1_1.to_excel(tmp_path / "dset_0_0.xlsx", engine=engine, header=False, index=False)

        # 1_1: test 1 header 1 index
        dset_1_1_parsed = mp.read_excel(tmp_path / "dset_1_1.xlsx")
        assert_frame_equal(dset_1_1, dset_1_1_parsed)

        # 1_0: test 1 header 0 index
        dset_1_1_parsed = mp.read_excel(tmp_path / "dset_1_0.xlsx")
        dset_1_1_parsed.index = reg_index
        assert_frame_equal(dset_1_1, dset_1_1_parsed)

        # 0_1: test 0 header 1 index
        dset_1_1_parsed = mp.read_excel(tmp_path / "dset_0_1.xlsx")
        dset_1_1_parsed.index.name = None  # if no header, pandas inserts a default index name of 0
        dset_1_1_parsed.columns = reg_columns
        assert_frame_equal(dset_1_1, dset_1_1_parsed)

        # 0_0: test 0 header 0 index
        dset_1_1_parsed = mp.read_excel(tmp_path / "dset_0_0.xlsx")
        dset_1_1_parsed.index = reg_index
        dset_1_1_parsed.columns = reg_columns
        assert_frame_equal(dset_1_1, dset_1_1_parsed)
예제 #2
0
def test_dups(cli_link_small_with_dups):
    mal = mp.read_excel(cli_link_small_with_dups, as_collection=True)

    dups = mal.get_duplicates()

    assert len(dups["instr2_all"]) == 12
    assert len(dups["instr3_all"]) == 17
예제 #3
0
    def test_2_1(self, tmp_path, engine):
        df_2_1 = pd.DataFrame(data, columns=mi_columns, index=reg_index)
        dset_2_1 = mp.Dataset(df_2_1)
        dset_2_1.to_excel(tmp_path / "dset_2_1.xlsx", engine=engine, header=True, index=True)

        # 2_1: test 2 header 1 index
        dset_2_1_parsed = mp.read_excel(tmp_path / "dset_2_1.xlsx")
        assert_frame_equal(dset_2_1, dset_2_1_parsed)
예제 #4
0
    def test_1_2(self, tmp_path, engine):
        df_1_2 = pd.DataFrame(data, columns=reg_columns, index=mi_index)
        dset_1_2 = mp.Dataset(df_1_2)
        dset_1_2.to_excel(tmp_path / "dset_1_2.xlsx", engine=engine, header=True, index=True)

        # 1_2: test 1 header 2 index
        dset_1_2_parsed = mp.read_excel(tmp_path / "dset_1_2.xlsx")
        assert_frame_equal(dset_1_2, dset_1_2_parsed)
예제 #5
0
def test_read_file(cli_link_small_with_merge):
    mal = mp.read_excel(cli_link_small_with_merge, as_collection=True)

    assert type(mal) is MergeableAnchoredList

    mal_dict = mal.to_excel_dict()

    assert mal_dict["primary"]["name"] == "small"
    assert mal_dict["primary"]["id_col_name"] == "InstrID"
예제 #6
0
def test_basiclist(tmp_path):
    dset1 = mp.Dataset({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})

    dset1.to_excel(tmp_path / "dset1.xlsx")

    with mp.MACPieExcelFile(tmp_path / "dset1.xlsx") as reader:
        dset1_from_file = mp.read_excel(reader, sheet_name="NO_NAME")
    # dset1_from_file = mp.read_excel(tmp_path / "dset1.xlsx", sheet_name="NO_NAME")

    assert dset1.equals(dset1_from_file)

    dset2 = mp.Dataset(
        {
            "A": [1, 2, 3],
            "albert": [4, 5, 6],
            "C": [7, 8, 9]
        },
        id_col_name="albert",
        name="renee",
        tags=["a", "b"],
    )

    basic_list = mp.BasicList([dset1, dset2])

    with mp.MACPieExcelWriter(tmp_path / "basic_list.xlsx") as writer:
        basic_list.to_excel(writer)

    with mp.MACPieExcelFile(tmp_path / "basic_list.xlsx") as reader:
        basic_list_from_file = mp.read_excel(reader, as_collection=True)
    # basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", as_collection=True)

    assert len(basic_list_from_file) == 2

    assert basic_list_from_file[0].equals(dset1)

    assert basic_list_from_file[1].equals(dset2)
예제 #7
0
    def test_basic_collection(self, tmp_path, engine):
        basic_list = mp.BasicList([reg_dset, mi_dset])

        with mp.MACPieExcelWriter(tmp_path / "basic_list.xlsx", engine=engine) as writer:
            basic_list.to_excel(writer)

        basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx")
        basic_list_from_file.index = reg_index
        assert_frame_equal(basic_list_from_file, reg_dset)

        basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", sheet_name=0)
        basic_list_from_file.index = reg_index
        assert_frame_equal(basic_list_from_file, reg_dset)

        basic_list_from_file = mp.read_excel(
            tmp_path / "basic_list.xlsx", sheet_name="mi_test_name"
        )
        basic_list_from_file.index = mi_index
        assert_frame_equal(basic_list_from_file, mi_dset)

        basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", sheet_name=[0, 1])
        reg_dset_parsed = basic_list_from_file[0]
        reg_dset_parsed.index = reg_index
        assert_frame_equal(reg_dset_parsed, reg_dset)
        mi_dset_parsed = basic_list_from_file[1]
        mi_dset_parsed.index = mi_index
        assert_frame_equal(mi_dset_parsed, mi_dset)

        basic_list_from_file = mp.read_excel(
            tmp_path / "basic_list.xlsx", sheet_name=[0, "mi_test_name"]
        )
        reg_dset_parsed = basic_list_from_file[0]
        reg_dset_parsed.index = reg_index
        assert_frame_equal(reg_dset_parsed, reg_dset)
        mi_dset_parsed = basic_list_from_file["mi_test_name"]
        mi_dset_parsed.index = mi_index
        assert_frame_equal(mi_dset_parsed, mi_dset)

        basic_list_from_file = mp.read_excel(tmp_path / "basic_list.xlsx", sheet_name=None)
        reg_dset_parsed = basic_list_from_file["NO_NAME"]
        reg_dset_parsed.index = reg_index
        assert_frame_equal(reg_dset_parsed, reg_dset)
        mi_dset_parsed = basic_list_from_file["mi_test_name"]
        mi_dset_parsed.index = mi_index
        assert_frame_equal(mi_dset_parsed, mi_dset)
예제 #8
0
    def test_2_2_permutations(self, tmp_path, engine):
        df_2_2 = pd.DataFrame(data, columns=mi_columns, index=mi_index)

        dset_2_2 = mp.Dataset(df_2_2)

        dset_2_2.to_excel(tmp_path / "dset_2_2.xlsx", engine=engine, header=True, index=True)
        dset_2_2.to_excel(tmp_path / "dset_2_0.xlsx", engine=engine, header=True, index=False)
        dset_2_2.to_excel(tmp_path / "dset_0_2.xlsx", engine=engine, header=False, index=True)
        dset_2_2.to_excel(tmp_path / "dset_0_0.xlsx", engine=engine, header=False, index=False)

        # 2_2: test 2 header 2 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_2.xlsx")
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # 2_0: test 2 header 0 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_0.xlsx")
        dset_2_2_parsed.index = mi_index
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # 0_2: test 0 header 2 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_2.xlsx")
        dset_2_2_parsed.index.names = mi_index.names
        dset_2_2_parsed.columns = mi_columns
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # 0_0: test 0 header 0 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_0.xlsx")
        dset_2_2_parsed.index = mi_index
        dset_2_2_parsed.columns = mi_columns
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # test legacy format of merge_cells=False
        dset_2_2.to_excel(
            tmp_path / "dset_2_2_no_merge.xlsx",
            engine=engine,
            merge_cells=False,
            header=True,
            index=True,
        )
        dset_2_2.to_excel(
            tmp_path / "dset_2_0_no_merge.xlsx",
            engine=engine,
            merge_cells=False,
            header=True,
            index=False,
        )
        dset_2_2.to_excel(
            tmp_path / "dset_0_2_no_merge.xlsx",
            engine=engine,
            merge_cells=False,
            header=False,
            index=True,
        )
        dset_2_2.to_excel(
            tmp_path / "dset_0_0_no_merge.xlsx",
            engine=engine,
            merge_cells=False,
            header=False,
            index=False,
        )

        # 2_2: test 2 header 2 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_2_no_merge.xlsx")
        dset_2_2_parsed.columns = mi_columns
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # 2_0: test 2 header 0 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_2_0_no_merge.xlsx")
        dset_2_2_parsed.index = mi_index
        dset_2_2_parsed.columns = mi_columns
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # 0_2: test 0 header 2 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_2_no_merge.xlsx")
        dset_2_2_parsed.index.names = mi_index.names
        dset_2_2_parsed.columns = mi_columns
        assert_frame_equal(dset_2_2, dset_2_2_parsed)

        # 0_0: test 0 header 0 index
        dset_2_2_parsed = mp.read_excel(tmp_path / "dset_0_0_no_merge.xlsx")
        dset_2_2_parsed.index = mi_index
        dset_2_2_parsed.columns = mi_columns
        assert_frame_equal(dset_2_2, dset_2_2_parsed)
예제 #9
0
 def execute(self):
     self.results = read_excel(self.primary, as_collection=True)