def test_excel_table_lineage():
    temp_filepath = "./temp_test_excel_table_lineage.xlsx"
    ec = ExcelConfiguration()
    reader = ExcelReader(ec)
    max_cols = len(ExcelReader.TEMPLATE_HEADERS["TablesLineage"])

    # "Target Table", "Target Type", "Target Classifications",
    # "Source Table", "Source Type", "Source Classifications",
    # "Process Name", "Process Type"

    json_rows = [
        ["table1", "demo_type", None,
         "table0", "demo_type2", None,
         "proc01", "proc_type"
         ]
    ]

    setup_workbook(temp_filepath, "TablesLineage", max_cols, json_rows)

    results = reader.parse_table_lineage(temp_filepath)

    try:
        assert(results[0].to_json(minimum=True) == {
            "typeName": "demo_type", "guid": -1001, "qualifiedName": "table1"})
        assert(results[1].to_json(minimum=True) == {
            "typeName": "demo_type2", "guid": -1002, "qualifiedName": "table0"})
        assert(results[2].to_json(minimum=True) == {
            "typeName": "proc_type", "guid": -1003, "qualifiedName": "proc01"})
    finally:
        remove_workbook(temp_filepath)
Beispiel #2
0
def test_excel_column_lineage():
    temp_filepath = "./temp_test_excel_column_lineage.xlsx"
    ec = ExcelConfiguration()
    reader = ExcelReader(ec)
    max_cols_tl = len(ExcelReader.TEMPLATE_HEADERS["TablesLineage"])
    max_cols_cl = len(ExcelReader.TEMPLATE_HEADERS["ColumnsLineage"])

    # "Target Table", "Target Type", "Target Classifications",
    # "Source Table", "Source Type", "Source Classifications",
    # "Process Name", "Process Type"

    json_rows = [[
        "table1", "demo_table", None, "table0", "demo_table", None, "proc01",
        "demo_process"
    ]]

    # "Target Table", "Target Column", "Target Classifications",
    # "Source Table", "Source Column", "Source Classifications",
    # "Transformation"
    json_rows_col = [
        ["table1", "t00", None, "table0", "t00", None, None],
        ["table1", "tcombo", None, "table0", "tA", None, None],
        ["table1", "tcombo", None, "table0", "tB", None, None],
    ]

    setup_workbook(temp_filepath, "TablesLineage", max_cols_tl, json_rows)
    setup_workbook(temp_filepath, "ColumnsLineage", max_cols_cl, json_rows_col)

    atlas_types = column_lineage_scaffold("demo")

    table_entities = reader.parse_table_lineage(temp_filepath)

    # For column mappings, table_entities do not contain columnMapping
    assert (all(["columnMapping" not in e.attributes for e in table_entities]))

    column_entities = reader.parse_column_lineage(temp_filepath,
                                                  table_entities,
                                                  atlas_types,
                                                  use_column_mapping=True)

    try:
        table1 = None
        table0 = None
        proc01 = None
        t00 = None
        table1_t00 = None
        table0_t00 = None
        col_lineage_process = None
        table_lookup = {e.get_name(): e for e in table_entities}
        column_lookup = {e.get_name(): e for e in column_entities}

        # We have five columns (t00 > t00) + ((tA + tB) > tcombo)
        # and two processes
        assert (len(column_entities) == 7)

        # Because of column mappings is TRUE, table entities are modified
        assert ("columnMapping" in table_lookup["proc01"].attributes)
        resulting_col_map = json.loads(
            table_lookup["proc01"].attributes["columnMapping"])[0]
        expected_col_map = {
            "DatasetMapping": {
                "Source": "table0",
                "Sink": "table1"
            },
            "ColumnMapping": [{
                "Source": "t00",
                "Sink": "t00"
            }, {
                "Source": "tA",
                "Sink": "tcombo"
            }, {
                "Source": "tB",
                "Sink": "tcombo"
            }]
        }
        assert (resulting_col_map["DatasetMapping"] ==
                expected_col_map["DatasetMapping"])
        assert (len(resulting_col_map["ColumnMapping"]) == 3)
        assert (resulting_col_map["ColumnMapping"][0]
                in expected_col_map["ColumnMapping"])
        assert (resulting_col_map["ColumnMapping"][1]
                in expected_col_map["ColumnMapping"])
        assert (resulting_col_map["ColumnMapping"][2]
                in expected_col_map["ColumnMapping"])
    finally:
        remove_workbook(temp_filepath)