Example #1
0
def test_insert(save_catalog, query):
    source = save_catalog.get_source("test")
    parsed = parse(query)
    visitor = analyze_dml_query(save_catalog, parsed, source)
    assert visitor is not None

    assert len(visitor.target_columns) == 5
    assert visitor.target_table.fqdn == ("test", "default", "page_lookup")
    assert len(visitor.source_columns) == 5
    assert [table.fqdn for table in visitor.source_tables
            ] == [("test", "default", "page_lookup_redirect")]
Example #2
0
def test_visitor(save_catalog, parse_queries_fixture):
    catalog = save_catalog
    source = catalog.get_source("test")
    dml = [
        analyze_dml_query(catalog, parsed, source)
        for parsed in parse_queries_fixture
    ]
    assert len(dml) == 5

    for d in dml:
        assert len(d.source_tables) > 0 and d.target_table is not None
Example #3
0
def test_insert_cols(save_catalog):
    source = save_catalog.get_source("test")
    query = "INSERT INTO page_lookup_nonredirect(page_id, page_version) SELECT page.page_id, page.page_latest FROM page"
    parsed = parse(query)
    visitor = analyze_dml_query(save_catalog, parsed, source)
    assert visitor is not None

    assert len(visitor.target_columns) == 2
    assert visitor.target_table.fqdn == ("test", "default",
                                         "page_lookup_nonredirect")
    assert len(visitor.source_columns) == 2
    assert [table.fqdn for table in visitor.source_tables
            ] == [("test", "default", "page")]
Example #4
0
def test_insert_with_join(save_catalog):
    source = save_catalog.get_source("test")
    query = "insert into page_lookup_redirect select original_page.page_id redirect_id, original_page.page_title redirect_title, final_page.page_title as true_title, final_page.page_id, final_page.page_latest from page final_page join redirect on (redirect.page_title = final_page.page_title) join page original_page on (redirect.rd_from = original_page.page_id)"
    parsed = parse(query)
    visitor = analyze_dml_query(save_catalog, parsed, source)
    assert visitor is not None

    assert len(visitor.target_columns) == 5
    assert visitor.target_table.fqdn == ("test", "default",
                                         "page_lookup_redirect")
    assert len(visitor.source_columns) == 5
    assert sorted([table.fqdn for table in visitor.source_tables]) == [
        ("test", "default", "page"),
        ("test", "default", "redirect"),
    ]
Example #5
0
    def post(self):
        args = self._parser.parse_args()
        logging.debug("Parse query: {}".format(args["query"]))
        try:
            parsed = parse(args["query"], args["name"])
        except ParseError as error:
            raise ParseErrorHTTP(description=str(error))

        try:
            source = self._catalog.get_source_by_id(args["source_id"])
            logging.debug("Parsing query for source {}".format(source))
            chosen_visitor = analyze_dml_query(self._catalog, parsed, source)
            job_execution = extract_lineage(
                catalog=self._catalog,
                visited_query=chosen_visitor,
                source=source,
                parsed=parsed,
                start_time=datetime.datetime.fromisoformat(args["start_time"]),
                end_time=datetime.datetime.fromisoformat(args["end_time"]),
            )

            return (
                {
                    "data": {
                        "id": job_execution.id,
                        "type": "job_executions",
                        "attributes": {
                            "job_id": job_execution.job_id,
                            "started_at": job_execution.started_at.strftime(
                                "%Y-%m-%d %H:%M:%S"
                            ),
                            "ended_at": job_execution.ended_at.strftime(
                                "%Y-%m-%d %H:%M:%S"
                            ),
                            "status": job_execution.status.name,
                        },
                    }
                },
                200,
            )
        except TableNotFound as table_error:
            raise TableNotFoundHTTP(description=str(table_error))
        except ColumnNotFound as column_error:
            raise ColumnNotFoundHTTP(description=str(column_error))
        except SemanticError as semantic_error:
            raise SemanticErrorHTTP(description=str(semantic_error))
        finally:
            self._catalog.scoped_session.remove()
Example #6
0
def get_graph(save_catalog, parse_queries_fixture, graph_sdk):
    catalog = save_catalog
    source = catalog.get_source("test")
    job_ids = []
    for parsed in parse_queries_fixture:
        visitor = analyze_dml_query(catalog, parsed, source)
        job_execution = extract_lineage(
            catalog,
            visitor,
            source,
            parsed,
            datetime.datetime.now(),
            datetime.datetime.now(),
        )
        job_ids.append(job_execution.job_id)
    graph = load_graph(graph_sdk, job_ids)
    yield graph, catalog
Example #7
0
def test_ctas(save_catalog):
    query = """
        CREATE TEMP TABLE temp_table_x(page_title) AS select redirect_title from page_lookup_nonredirect
        where redirect_title is not null
    """
    source = save_catalog.get_source("test")
    schema = save_catalog.get_schema("test", "default")
    save_catalog.update_source(source, schema)
    parsed = parse(query)
    visitor = analyze_dml_query(save_catalog, parsed, source)
    assert visitor is not None

    assert len(visitor.target_columns) == 1
    assert visitor.target_table.fqdn == ("test", "default", "temp_table_x")
    assert len(visitor.source_columns) == 1
    assert [table.fqdn for table in visitor.source_tables
            ] == [("test", "default", "page_lookup_nonredirect")]
Example #8
0
def test_col_exprs(save_catalog):
    query = """
        INSERT INTO page_lookup_redirect(true_title)
        SELECT
            BTRIM(TO_CHAR(DATEADD (MONTH,-1,('20' ||MAX ("redirect_id") || '-01')::DATE)::DATE,'YY-MM')) AS "max_month"
        FROM page_lookup_nonredirect;
    """
    source = save_catalog.get_source("test")
    parsed = parse(query)
    visitor = analyze_dml_query(catalog=save_catalog,
                                parsed=parsed,
                                source=source)
    assert visitor is not None

    assert len(visitor.target_columns) == 1
    assert visitor.target_table.fqdn == ("test", "default",
                                         "page_lookup_redirect")
    assert len(visitor.source_columns) == 1
    assert [table.fqdn for table in visitor.source_tables
            ] == [("test", "default", "page_lookup_nonredirect")]