def test_insert(save_catalog, query): source = save_catalog.get_source("test") parsed = parse(query) visitor = analyze_dml_query(save_catalog, parsed, source) assert visitor is not None assert len(visitor.target_columns) == 5 assert visitor.target_table.fqdn == ("test", "default", "page_lookup") assert len(visitor.source_columns) == 5 assert [table.fqdn for table in visitor.source_tables ] == [("test", "default", "page_lookup_redirect")]
def test_visitor(save_catalog, parse_queries_fixture): catalog = save_catalog source = catalog.get_source("test") dml = [ analyze_dml_query(catalog, parsed, source) for parsed in parse_queries_fixture ] assert len(dml) == 5 for d in dml: assert len(d.source_tables) > 0 and d.target_table is not None
def test_insert_cols(save_catalog): source = save_catalog.get_source("test") query = "INSERT INTO page_lookup_nonredirect(page_id, page_version) SELECT page.page_id, page.page_latest FROM page" parsed = parse(query) visitor = analyze_dml_query(save_catalog, parsed, source) assert visitor is not None assert len(visitor.target_columns) == 2 assert visitor.target_table.fqdn == ("test", "default", "page_lookup_nonredirect") assert len(visitor.source_columns) == 2 assert [table.fqdn for table in visitor.source_tables ] == [("test", "default", "page")]
def test_insert_with_join(save_catalog): source = save_catalog.get_source("test") query = "insert into page_lookup_redirect select original_page.page_id redirect_id, original_page.page_title redirect_title, final_page.page_title as true_title, final_page.page_id, final_page.page_latest from page final_page join redirect on (redirect.page_title = final_page.page_title) join page original_page on (redirect.rd_from = original_page.page_id)" parsed = parse(query) visitor = analyze_dml_query(save_catalog, parsed, source) assert visitor is not None assert len(visitor.target_columns) == 5 assert visitor.target_table.fqdn == ("test", "default", "page_lookup_redirect") assert len(visitor.source_columns) == 5 assert sorted([table.fqdn for table in visitor.source_tables]) == [ ("test", "default", "page"), ("test", "default", "redirect"), ]
def post(self): args = self._parser.parse_args() logging.debug("Parse query: {}".format(args["query"])) try: parsed = parse(args["query"], args["name"]) except ParseError as error: raise ParseErrorHTTP(description=str(error)) try: source = self._catalog.get_source_by_id(args["source_id"]) logging.debug("Parsing query for source {}".format(source)) chosen_visitor = analyze_dml_query(self._catalog, parsed, source) job_execution = extract_lineage( catalog=self._catalog, visited_query=chosen_visitor, source=source, parsed=parsed, start_time=datetime.datetime.fromisoformat(args["start_time"]), end_time=datetime.datetime.fromisoformat(args["end_time"]), ) return ( { "data": { "id": job_execution.id, "type": "job_executions", "attributes": { "job_id": job_execution.job_id, "started_at": job_execution.started_at.strftime( "%Y-%m-%d %H:%M:%S" ), "ended_at": job_execution.ended_at.strftime( "%Y-%m-%d %H:%M:%S" ), "status": job_execution.status.name, }, } }, 200, ) except TableNotFound as table_error: raise TableNotFoundHTTP(description=str(table_error)) except ColumnNotFound as column_error: raise ColumnNotFoundHTTP(description=str(column_error)) except SemanticError as semantic_error: raise SemanticErrorHTTP(description=str(semantic_error)) finally: self._catalog.scoped_session.remove()
def get_graph(save_catalog, parse_queries_fixture, graph_sdk): catalog = save_catalog source = catalog.get_source("test") job_ids = [] for parsed in parse_queries_fixture: visitor = analyze_dml_query(catalog, parsed, source) job_execution = extract_lineage( catalog, visitor, source, parsed, datetime.datetime.now(), datetime.datetime.now(), ) job_ids.append(job_execution.job_id) graph = load_graph(graph_sdk, job_ids) yield graph, catalog
def test_ctas(save_catalog): query = """ CREATE TEMP TABLE temp_table_x(page_title) AS select redirect_title from page_lookup_nonredirect where redirect_title is not null """ source = save_catalog.get_source("test") schema = save_catalog.get_schema("test", "default") save_catalog.update_source(source, schema) parsed = parse(query) visitor = analyze_dml_query(save_catalog, parsed, source) assert visitor is not None assert len(visitor.target_columns) == 1 assert visitor.target_table.fqdn == ("test", "default", "temp_table_x") assert len(visitor.source_columns) == 1 assert [table.fqdn for table in visitor.source_tables ] == [("test", "default", "page_lookup_nonredirect")]
def test_col_exprs(save_catalog): query = """ INSERT INTO page_lookup_redirect(true_title) SELECT BTRIM(TO_CHAR(DATEADD (MONTH,-1,('20' ||MAX ("redirect_id") || '-01')::DATE)::DATE,'YY-MM')) AS "max_month" FROM page_lookup_nonredirect; """ source = save_catalog.get_source("test") parsed = parse(query) visitor = analyze_dml_query(catalog=save_catalog, parsed=parsed, source=source) assert visitor is not None assert len(visitor.target_columns) == 1 assert visitor.target_table.fqdn == ("test", "default", "page_lookup_redirect") assert len(visitor.source_columns) == 1 assert [table.fqdn for table in visitor.source_tables ] == [("test", "default", "page_lookup_nonredirect")]