def test_excel_table_lineage(): temp_filepath = "./temp_test_excel_table_lineage.xlsx" ec = ExcelConfiguration() reader = ExcelReader(ec) max_cols = len(ExcelReader.TEMPLATE_HEADERS["TablesLineage"]) # "Target Table", "Target Type", "Target Classifications", # "Source Table", "Source Type", "Source Classifications", # "Process Name", "Process Type" json_rows = [ ["table1", "demo_type", None, "table0", "demo_type2", None, "proc01", "proc_type" ] ] setup_workbook(temp_filepath, "TablesLineage", max_cols, json_rows) results = reader.parse_table_lineage(temp_filepath) try: assert(results[0].to_json(minimum=True) == { "typeName": "demo_type", "guid": -1001, "qualifiedName": "table1"}) assert(results[1].to_json(minimum=True) == { "typeName": "demo_type2", "guid": -1002, "qualifiedName": "table0"}) assert(results[2].to_json(minimum=True) == { "typeName": "proc_type", "guid": -1003, "qualifiedName": "proc01"}) finally: remove_workbook(temp_filepath)
def test_excel_column_lineage(): temp_filepath = "./temp_test_excel_column_lineage.xlsx" ec = ExcelConfiguration() reader = ExcelReader(ec) max_cols_tl = len(ExcelReader.TEMPLATE_HEADERS["TablesLineage"]) max_cols_cl = len(ExcelReader.TEMPLATE_HEADERS["ColumnsLineage"]) # "Target Table", "Target Type", "Target Classifications", # "Source Table", "Source Type", "Source Classifications", # "Process Name", "Process Type" json_rows = [[ "table1", "demo_table", None, "table0", "demo_table", None, "proc01", "demo_process" ]] # "Target Table", "Target Column", "Target Classifications", # "Source Table", "Source Column", "Source Classifications", # "Transformation" json_rows_col = [ ["table1", "t00", None, "table0", "t00", None, None], ["table1", "tcombo", None, "table0", "tA", None, None], ["table1", "tcombo", None, "table0", "tB", None, None], ] setup_workbook(temp_filepath, "TablesLineage", max_cols_tl, json_rows) setup_workbook(temp_filepath, "ColumnsLineage", max_cols_cl, json_rows_col) atlas_types = column_lineage_scaffold("demo") table_entities = reader.parse_table_lineage(temp_filepath) # For column mappings, table_entities do not contain columnMapping assert (all(["columnMapping" not in e.attributes for e in table_entities])) column_entities = reader.parse_column_lineage(temp_filepath, table_entities, atlas_types, use_column_mapping=True) try: table1 = None table0 = None proc01 = None t00 = None table1_t00 = None table0_t00 = None col_lineage_process = None table_lookup = {e.get_name(): e for e in table_entities} column_lookup = {e.get_name(): e for e in column_entities} # We have five columns (t00 > t00) + ((tA + tB) > tcombo) # and two processes assert (len(column_entities) == 7) # Because of column mappings is TRUE, table entities are modified assert ("columnMapping" in table_lookup["proc01"].attributes) resulting_col_map = json.loads( table_lookup["proc01"].attributes["columnMapping"])[0] expected_col_map = { "DatasetMapping": { "Source": "table0", "Sink": "table1" }, "ColumnMapping": [{ "Source": "t00", "Sink": "t00" }, { "Source": "tA", "Sink": "tcombo" }, { "Source": "tB", "Sink": "tcombo" }] } assert (resulting_col_map["DatasetMapping"] == expected_col_map["DatasetMapping"]) assert (len(resulting_col_map["ColumnMapping"]) == 3) assert (resulting_col_map["ColumnMapping"][0] in expected_col_map["ColumnMapping"]) assert (resulting_col_map["ColumnMapping"][1] in expected_col_map["ColumnMapping"]) assert (resulting_col_map["ColumnMapping"][2] in expected_col_map["ColumnMapping"]) finally: remove_workbook(temp_filepath)