Exemple #1
0
def extract_lineage(
    catalog: Catalog,
    visited_query: DmlVisitor,
    source: CatSource,
    parsed: Parsed,
    start_time,
    end_time,
) -> JobExecution:
    job = catalog.add_job(name=parsed.name,
                          source=source,
                          context={"query": parsed.query})
    job_execution = catalog.add_job_execution(
        job=job,
        started_at=start_time,
        ended_at=end_time,
        status=JobExecutionStatus.SUCCESS,
    )
    for source, target in zip(visited_query.source_columns,
                              visited_query.target_columns):
        for column in source.columns:
            edge = catalog.add_column_lineage(column, target, job_execution.id,
                                              {})
            logging.debug("Added {}".format(edge))

    return job_execution
def create_graph(catalog: Catalog,
                 visited_queries: List[DmlVisitor]) -> DbGraph:
    logger = LogMixin()
    job_ids = set()
    for query in visited_queries:
        job = catalog.add_job(query.name, {})
        job_execution = catalog.add_job_execution(job, datetime.now(),
                                                  datetime.now(),
                                                  JobExecutionStatus.SUCCESS)
        for source, target in zip(query.source_columns, query.target_columns):
            edge = catalog.add_column_lineage(source, target, job_execution.id,
                                              {})
            job_ids.add(job.id)
            logger.logger.debug("Added {}".format(edge))

    graph = DbGraph(catalog, job_ids)
    graph.load()
    return graph