예제 #1
0
def test_client_sends_proper_json_with_minimal_event():
    session = MagicMock()
    client = OpenLineageClient(url="http://example.com", session=session)

    client.emit(
        RunEvent(RunState.START, "2020-01-01", Run("1"),
                 Job("openlineage", "job"), "producer"))

    session.post.assert_called_with(
        "http://example.com/api/v1/lineage",
        '{"eventTime": "2020-01-01", "eventType": "START", "inputs": [], "job": '
        '{"facets": {}, "name": "job", "namespace": "openlineage"}, "outputs": [], '
        '"producer": "producer", "run": {"facets": {}, "runId": "1"}}',
        timeout=5.0,
        verify=True)
예제 #2
0
    def _build_run(run_id: str,
                   parent_run_id: Optional[str] = None,
                   job_name: Optional[str] = None,
                   nominal_start_time: Optional[str] = None,
                   nominal_end_time: Optional[str] = None) -> Run:
        facets = {}
        if nominal_start_time:
            facets.update({
                "nominalTime":
                NominalTimeRunFacet(nominal_start_time, nominal_end_time)
            })
        if parent_run_id:
            facets.update({
                "parentRun":
                ParentRunFacet.create(parent_run_id, _DAG_NAMESPACE, job_name)
            })

        return Run(run_id, facets)
예제 #3
0
def test_marquez_dag(job_id_mapping, mock_get_or_create_openlineage_client,
                     clear_db_airflow_dags, session=None):

    dag = DAG(
        DAG_ID,
        schedule_interval='@daily',
        default_args=DAG_DEFAULT_ARGS,
        description=DAG_DESCRIPTION
    )
    # (1) Mock the marquez client method calls
    mock_marquez_client = mock.Mock()
    mock_get_or_create_openlineage_client.return_value = mock_marquez_client
    run_id_completed = f"{DAG_RUN_ID}.{TASK_ID_COMPLETED}"
    run_id_failed = f"{DAG_RUN_ID}.{TASK_ID_FAILED}"
    # mock_uuid.side_effect = [run_id_completed, run_id_failed]

    # (2) Add task that will be marked as completed
    task_will_complete = DummyOperator(
        task_id=TASK_ID_COMPLETED,
        dag=dag
    )
    completed_task_location = get_location(task_will_complete.dag.fileloc)

    # (3) Add task that will be marked as failed
    task_will_fail = DummyOperator(
        task_id=TASK_ID_FAILED,
        dag=dag
    )
    failed_task_location = get_location(task_will_complete.dag.fileloc)

    # (4) Create DAG run and mark as running
    dagrun = dag.create_dagrun(
        run_id=DAG_RUN_ID,
        execution_date=DEFAULT_DATE,
        state=State.RUNNING)

    # Assert emit calls
    start_time = '2016-01-01T00:00:00.000000Z'
    end_time = '2016-01-02T00:00:00.000000Z'

    emit_calls = [
        mock.call(RunEvent(
            eventType=RunState.START,
            eventTime=mock.ANY,
            run=Run(run_id_completed, {"nominalTime": NominalTimeRunFacet(start_time, end_time)}),
            job=Job("default", f"{DAG_ID}.{TASK_ID_COMPLETED}", {
                "documentation": DocumentationJobFacet(DAG_DESCRIPTION),
                "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location)
            }),
            producer=PRODUCER,
            inputs=[],
            outputs=[]
        )),
        mock.call(RunEvent(
            eventType=RunState.START,
            eventTime=mock.ANY,
            run=Run(run_id_failed, {"nominalTime": NominalTimeRunFacet(start_time, end_time)}),
            job=Job("default", f"{DAG_ID}.{TASK_ID_FAILED}", {
                "documentation": DocumentationJobFacet(DAG_DESCRIPTION),
                "sourceCodeLocation": SourceCodeLocationJobFacet("", failed_task_location)
            }),
            producer=PRODUCER,
            inputs=[],
            outputs=[]
        ))
    ]
    log.info(
        f"{ [name for name, args, kwargs in mock_marquez_client.mock_calls]}")
    mock_marquez_client.emit.assert_has_calls(emit_calls)

    # (5) Start task that will be marked as completed
    task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    # (6) Start task that will be marked as failed
    ti1 = TaskInstance(task=task_will_fail, execution_date=DEFAULT_DATE)
    ti1.state = State.FAILED
    session.add(ti1)
    session.commit()

    job_id_mapping.pop.side_effect = [run_id_completed, run_id_failed]

    dag.handle_callback(dagrun, success=False, session=session)

    emit_calls += [
        mock.call(RunEvent(
            eventType=RunState.COMPLETE,
            eventTime=mock.ANY,
            run=Run(run_id_completed),
            job=Job("default", f"{DAG_ID}.{TASK_ID_COMPLETED}"),
            producer=PRODUCER,
            inputs=[],
            outputs=[]
        )),
        mock.call(RunEvent(
            eventType=RunState.FAIL,
            eventTime=mock.ANY,
            run=Run(run_id_failed),
            job=Job("default", f"{DAG_ID}.{TASK_ID_FAILED}"),
            producer=PRODUCER,
            inputs=[],
            outputs=[]
        ))
    ]
    mock_marquez_client.emit.assert_has_calls(emit_calls)
예제 #4
0
def test_marquez_dag_with_extractor_returning_two_steps(
        job_id_mapping,
        mock_get_or_create_openlineage_client,
        clear_db_airflow_dags,
        session=None):

    # --- test setup
    dag_id = 'test_marquez_dag_with_extractor_returning_two_steps'
    dag = DAG(
        dag_id,
        schedule_interval='@daily',
        default_args=DAG_DEFAULT_ARGS,
        description=DAG_DESCRIPTION
    )

    dag_run_id = 'test_marquez_dag_with_extractor_returning_two_steps_run_id'
    run_id = f"{dag_run_id}.{TASK_ID_COMPLETED}"

    # Mock the marquez client method calls
    mock_marquez_client = mock.Mock()
    mock_get_or_create_openlineage_client.return_value = mock_marquez_client

    # Add task that will be marked as completed
    task_will_complete = TestFixtureDummyOperator(
        task_id=TASK_ID_COMPLETED,
        dag=dag
    )
    completed_task_location = get_location(task_will_complete.dag.fileloc)

    # Add the dummy extractor to the list for the task above
    _DAG_EXTRACTORS[task_will_complete.__class__] = TestFixtureDummyExtractorWithMultipleSteps

    # --- pretend run the DAG

    # Create DAG run and mark as running
    dagrun = dag.create_dagrun(
        run_id=dag_run_id,
        execution_date=DEFAULT_DATE,
        state=State.RUNNING)

    # --- Asserts that the job starting triggers openlineage event

    start_time = '2016-01-01T00:00:00.000000Z'
    end_time = '2016-01-02T00:00:00.000000Z'

    mock_marquez_client.emit.assert_called_once_with(
        RunEvent(
            RunState.START,
            mock.ANY,
            Run(run_id, {"nominalTime": NominalTimeRunFacet(start_time, end_time)}),
            Job("default", f"{dag_id}.{TASK_ID_COMPLETED}", {
                "documentation": DocumentationJobFacet(DAG_DESCRIPTION),
                "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location)
            }),
            PRODUCER,
            [OpenLineageDataset(DAG_NAMESPACE, 'extract_input1', {
                "dataSource": DataSourceDatasetFacet(
                    name='dummy_source_name',
                    uri='http://dummy/source/url'
                )
            })],
            []
        )
    )

    mock_marquez_client.reset_mock()

    # --- Pretend complete the task
    job_id_mapping.pop.return_value = run_id

    task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    dag.handle_callback(dagrun, success=True, session=session)

    # --- Assert that the openlineage call is done

    mock_marquez_client.emit.assert_called_once_with(
        RunEvent(
            RunState.COMPLETE,
            mock.ANY,
            Run(run_id),
            Job("default", f"{dag_id}.{TASK_ID_COMPLETED}"),
            PRODUCER,
            [OpenLineageDataset(DAG_NAMESPACE, 'extract_input1', {
                "dataSource": DataSourceDatasetFacet(
                    name='dummy_source_name',
                    uri='http://dummy/source/url'
                )
            })],
            []
        )
    )
예제 #5
0
def test_marquez_dag_with_extract_on_complete(
        job_id_mapping,
        mock_get_or_create_openlineage_client,
        clear_db_airflow_dags,
        session=None):

    # --- test setup
    dag_id = 'test_marquez_dag_with_extractor_on_complete'
    dag = DAG(
        dag_id,
        schedule_interval='@daily',
        default_args=DAG_DEFAULT_ARGS,
        description=DAG_DESCRIPTION
    )

    dag_run_id = 'test_marquez_dag_with_extractor_run_id'
    run_id = f"{dag_run_id}.{TASK_ID_COMPLETED}"
    # Mock the marquez client method calls
    mock_marquez_client = mock.Mock()
    mock_get_or_create_openlineage_client.return_value = mock_marquez_client

    # Add task that will be marked as completed
    task_will_complete = TestFixtureDummyOperator(
        task_id=TASK_ID_COMPLETED,
        dag=dag
    )
    completed_task_location = get_location(task_will_complete.dag.fileloc)

    # Add the dummy extractor to the list for the task above
    _DAG_EXTRACTORS[task_will_complete.__class__] = \
        TestFixtureDummyExtractorOnComplete

    # Create DAG run and mark as running
    dagrun = dag.create_dagrun(
        run_id=dag_run_id,
        execution_date=DEFAULT_DATE,
        state=State.RUNNING)

    start_time = '2016-01-01T00:00:00.000000Z'
    end_time = '2016-01-02T00:00:00.000000Z'

    mock_marquez_client.emit.assert_has_calls([
        mock.call(RunEvent(
            eventType=RunState.START,
            eventTime=mock.ANY,
            run=Run(run_id, {
                "nominalTime": NominalTimeRunFacet(start_time, end_time)
            }),
            job=Job("default",  f"{dag_id}.{TASK_ID_COMPLETED}", {
                "documentation": DocumentationJobFacet(DAG_DESCRIPTION),
                "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location)
            }),
            producer=PRODUCER,
            inputs=[],
            outputs=[]
        ))
    ])

    mock_marquez_client.reset_mock()

    # --- Pretend complete the task
    job_id_mapping.pop.return_value = run_id

    task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    dag.handle_callback(dagrun, success=True, session=session)

    mock_marquez_client.emit.assert_has_calls([
        mock.call(RunEvent(
            eventType=RunState.COMPLETE,
            eventTime=mock.ANY,
            run=Run(run_id),
            job=Job("default", f"{dag_id}.{TASK_ID_COMPLETED}"),
            producer=PRODUCER,
            inputs=[OpenLineageDataset(
                namespace='default',
                name='schema.extract_on_complete_input1',
                facets={
                    'dataSource': DataSourceDatasetFacet(
                        name='dummy_source_name',
                        uri='http://dummy/source/url'
                    ),
                    'schema': SchemaDatasetFacet(
                        fields=[
                            SchemaField(name='field1', type='text', description=''),
                            SchemaField(name='field2', type='text', description='')
                        ]
                    )
                })
            ],
            outputs=[OpenLineageDataset(
                namespace='default',
                name='extract_on_complete_output1',
                facets={
                    'dataSource': DataSourceDatasetFacet(
                        name='dummy_source_name',
                        uri='http://dummy/source/url'
                    )
                })
            ]
        ))
    ])