def test_client_sends_proper_json_with_minimal_event(): session = MagicMock() client = OpenLineageClient(url="http://example.com", session=session) client.emit( RunEvent(RunState.START, "2020-01-01", Run("1"), Job("openlineage", "job"), "producer")) session.post.assert_called_with( "http://example.com/api/v1/lineage", '{"eventTime": "2020-01-01", "eventType": "START", "inputs": [], "job": ' '{"facets": {}, "name": "job", "namespace": "openlineage"}, "outputs": [], ' '"producer": "producer", "run": {"facets": {}, "runId": "1"}}', timeout=5.0, verify=True)
def _build_run(run_id: str, parent_run_id: Optional[str] = None, job_name: Optional[str] = None, nominal_start_time: Optional[str] = None, nominal_end_time: Optional[str] = None) -> Run: facets = {} if nominal_start_time: facets.update({ "nominalTime": NominalTimeRunFacet(nominal_start_time, nominal_end_time) }) if parent_run_id: facets.update({ "parentRun": ParentRunFacet.create(parent_run_id, _DAG_NAMESPACE, job_name) }) return Run(run_id, facets)
def test_marquez_dag(job_id_mapping, mock_get_or_create_openlineage_client, clear_db_airflow_dags, session=None): dag = DAG( DAG_ID, schedule_interval='@daily', default_args=DAG_DEFAULT_ARGS, description=DAG_DESCRIPTION ) # (1) Mock the marquez client method calls mock_marquez_client = mock.Mock() mock_get_or_create_openlineage_client.return_value = mock_marquez_client run_id_completed = f"{DAG_RUN_ID}.{TASK_ID_COMPLETED}" run_id_failed = f"{DAG_RUN_ID}.{TASK_ID_FAILED}" # mock_uuid.side_effect = [run_id_completed, run_id_failed] # (2) Add task that will be marked as completed task_will_complete = DummyOperator( task_id=TASK_ID_COMPLETED, dag=dag ) completed_task_location = get_location(task_will_complete.dag.fileloc) # (3) Add task that will be marked as failed task_will_fail = DummyOperator( task_id=TASK_ID_FAILED, dag=dag ) failed_task_location = get_location(task_will_complete.dag.fileloc) # (4) Create DAG run and mark as running dagrun = dag.create_dagrun( run_id=DAG_RUN_ID, execution_date=DEFAULT_DATE, state=State.RUNNING) # Assert emit calls start_time = '2016-01-01T00:00:00.000000Z' end_time = '2016-01-02T00:00:00.000000Z' emit_calls = [ mock.call(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id_completed, {"nominalTime": NominalTimeRunFacet(start_time, end_time)}), job=Job("default", f"{DAG_ID}.{TASK_ID_COMPLETED}", { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] )), mock.call(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id_failed, {"nominalTime": NominalTimeRunFacet(start_time, end_time)}), job=Job("default", f"{DAG_ID}.{TASK_ID_FAILED}", { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", failed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] )) ] log.info( f"{ [name for name, args, kwargs in mock_marquez_client.mock_calls]}") mock_marquez_client.emit.assert_has_calls(emit_calls) # (5) Start task that will be marked as completed task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # (6) Start task that will be marked as failed ti1 = TaskInstance(task=task_will_fail, execution_date=DEFAULT_DATE) ti1.state = State.FAILED session.add(ti1) session.commit() job_id_mapping.pop.side_effect = [run_id_completed, run_id_failed] dag.handle_callback(dagrun, success=False, session=session) emit_calls += [ mock.call(RunEvent( eventType=RunState.COMPLETE, eventTime=mock.ANY, run=Run(run_id_completed), job=Job("default", f"{DAG_ID}.{TASK_ID_COMPLETED}"), producer=PRODUCER, inputs=[], outputs=[] )), mock.call(RunEvent( eventType=RunState.FAIL, eventTime=mock.ANY, run=Run(run_id_failed), job=Job("default", f"{DAG_ID}.{TASK_ID_FAILED}"), producer=PRODUCER, inputs=[], outputs=[] )) ] mock_marquez_client.emit.assert_has_calls(emit_calls)
def test_marquez_dag_with_extractor_returning_two_steps( job_id_mapping, mock_get_or_create_openlineage_client, clear_db_airflow_dags, session=None): # --- test setup dag_id = 'test_marquez_dag_with_extractor_returning_two_steps' dag = DAG( dag_id, schedule_interval='@daily', default_args=DAG_DEFAULT_ARGS, description=DAG_DESCRIPTION ) dag_run_id = 'test_marquez_dag_with_extractor_returning_two_steps_run_id' run_id = f"{dag_run_id}.{TASK_ID_COMPLETED}" # Mock the marquez client method calls mock_marquez_client = mock.Mock() mock_get_or_create_openlineage_client.return_value = mock_marquez_client # Add task that will be marked as completed task_will_complete = TestFixtureDummyOperator( task_id=TASK_ID_COMPLETED, dag=dag ) completed_task_location = get_location(task_will_complete.dag.fileloc) # Add the dummy extractor to the list for the task above _DAG_EXTRACTORS[task_will_complete.__class__] = TestFixtureDummyExtractorWithMultipleSteps # --- pretend run the DAG # Create DAG run and mark as running dagrun = dag.create_dagrun( run_id=dag_run_id, execution_date=DEFAULT_DATE, state=State.RUNNING) # --- Asserts that the job starting triggers openlineage event start_time = '2016-01-01T00:00:00.000000Z' end_time = '2016-01-02T00:00:00.000000Z' mock_marquez_client.emit.assert_called_once_with( RunEvent( RunState.START, mock.ANY, Run(run_id, {"nominalTime": NominalTimeRunFacet(start_time, end_time)}), Job("default", f"{dag_id}.{TASK_ID_COMPLETED}", { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location) }), PRODUCER, [OpenLineageDataset(DAG_NAMESPACE, 'extract_input1', { "dataSource": DataSourceDatasetFacet( name='dummy_source_name', uri='http://dummy/source/url' ) })], [] ) ) mock_marquez_client.reset_mock() # --- Pretend complete the task job_id_mapping.pop.return_value = run_id task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) dag.handle_callback(dagrun, success=True, session=session) # --- Assert that the openlineage call is done mock_marquez_client.emit.assert_called_once_with( RunEvent( RunState.COMPLETE, mock.ANY, Run(run_id), Job("default", f"{dag_id}.{TASK_ID_COMPLETED}"), PRODUCER, [OpenLineageDataset(DAG_NAMESPACE, 'extract_input1', { "dataSource": DataSourceDatasetFacet( name='dummy_source_name', uri='http://dummy/source/url' ) })], [] ) )
def test_marquez_dag_with_extract_on_complete( job_id_mapping, mock_get_or_create_openlineage_client, clear_db_airflow_dags, session=None): # --- test setup dag_id = 'test_marquez_dag_with_extractor_on_complete' dag = DAG( dag_id, schedule_interval='@daily', default_args=DAG_DEFAULT_ARGS, description=DAG_DESCRIPTION ) dag_run_id = 'test_marquez_dag_with_extractor_run_id' run_id = f"{dag_run_id}.{TASK_ID_COMPLETED}" # Mock the marquez client method calls mock_marquez_client = mock.Mock() mock_get_or_create_openlineage_client.return_value = mock_marquez_client # Add task that will be marked as completed task_will_complete = TestFixtureDummyOperator( task_id=TASK_ID_COMPLETED, dag=dag ) completed_task_location = get_location(task_will_complete.dag.fileloc) # Add the dummy extractor to the list for the task above _DAG_EXTRACTORS[task_will_complete.__class__] = \ TestFixtureDummyExtractorOnComplete # Create DAG run and mark as running dagrun = dag.create_dagrun( run_id=dag_run_id, execution_date=DEFAULT_DATE, state=State.RUNNING) start_time = '2016-01-01T00:00:00.000000Z' end_time = '2016-01-02T00:00:00.000000Z' mock_marquez_client.emit.assert_has_calls([ mock.call(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id, { "nominalTime": NominalTimeRunFacet(start_time, end_time) }), job=Job("default", f"{dag_id}.{TASK_ID_COMPLETED}", { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] )) ]) mock_marquez_client.reset_mock() # --- Pretend complete the task job_id_mapping.pop.return_value = run_id task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) dag.handle_callback(dagrun, success=True, session=session) mock_marquez_client.emit.assert_has_calls([ mock.call(RunEvent( eventType=RunState.COMPLETE, eventTime=mock.ANY, run=Run(run_id), job=Job("default", f"{dag_id}.{TASK_ID_COMPLETED}"), producer=PRODUCER, inputs=[OpenLineageDataset( namespace='default', name='schema.extract_on_complete_input1', facets={ 'dataSource': DataSourceDatasetFacet( name='dummy_source_name', uri='http://dummy/source/url' ), 'schema': SchemaDatasetFacet( fields=[ SchemaField(name='field1', type='text', description=''), SchemaField(name='field2', type='text', description='') ] ) }) ], outputs=[OpenLineageDataset( namespace='default', name='extract_on_complete_output1', facets={ 'dataSource': DataSourceDatasetFacet( name='dummy_source_name', uri='http://dummy/source/url' ) }) ] )) ])