def _get_location(task): try: if hasattr(task, 'file_path') and task.file_path: return get_location(task.file_path) else: return get_location(task.dag.fileloc) except Exception: return None
def _get_location(task): from openlineage.airflow.utils import get_location try: if hasattr(task, 'file_path') and task.file_path: return get_location(task.file_path) else: return get_location(task.dag.fileloc) except Exception: return None
def test_bad_file_path(git_mock): log.debug("test_bad_file_path()") with pytest.raises(FileNotFoundError): # invalid file get_location("dags/missing-dag.py")
def test_dag_location(git_mock): assert ('https://github.com/OpenLineage/OpenLineage/blob/' 'abcd1234/integration/airflow/tests/test_dags/' 'test_dag.py' == get_location("tests/test_dags/test_dag.py"))
def test_openlineage_dag_adds_custom_facets( mock_get_or_create_openlineage_client, new_lineage_run_id, clear_db_airflow_dags, ): openlineage.airflow.dag.extractors.clear() openlineage.airflow.dag.extractor_mapper.extractors.pop('TestFixtureDummyOperator', None) dag = DAG( DAG_ID, schedule_interval='@daily', default_args=DAG_DEFAULT_ARGS, description=DAG_DESCRIPTION ) # Mock the openlineage client method calls mock_openlineage_client = mock.Mock() mock_get_or_create_openlineage_client.return_value = mock_openlineage_client run_id = str(uuid.uuid4()) job_id = f"{DAG_ID}.{TASK_ID_COMPLETED}" new_lineage_run_id.return_value = run_id # Add task that will be marked as completed task_will_complete = DummyOperator( task_id=TASK_ID_COMPLETED, dag=dag ) completed_task_location = get_location(task_will_complete.dag.fileloc) # Start run dag.create_dagrun( run_id=DAG_RUN_ID, execution_date=DEFAULT_DATE, state=State.RUNNING) # Assert emit calls start_time = '2016-01-01T00:00:00.000000Z' end_time = '2016-01-02T00:00:00.000000Z' mock_openlineage_client.emit.assert_called_once_with(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id, { "nominalTime": NominalTimeRunFacet(start_time, end_time), "parentRun": ParentRunFacet.create( runId=DAG_RUN_ID, namespace=DAG_NAMESPACE, name=job_id ), "airflow_runArgs": AirflowRunArgsRunFacet(False), "airflow_version": AirflowVersionRunFacet( operator="airflow.operators.dummy_operator.DummyOperator", taskInfo=mock.ANY, airflowVersion=AIRFLOW_VERSION, openlineageAirflowVersion=OPENLINEAGE_AIRFLOW_VERSION ) }), job=Job("default", job_id, { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] ))
def test_openlineage_dag_with_extract_on_complete( job_id_mapping, mock_get_or_create_openlineage_client, get_custom_facets, new_lineage_run_id, clear_db_airflow_dags, session=None): # --- test setup # Add the dummy extractor to the list for the task above openlineage.airflow.dag.extractors.clear() openlineage.airflow.dag.extractor_mapper.extractors[TestFixtureDummyOperator.__name__] = \ TestFixtureDummyExtractorOnComplete dag_id = 'test_openlineage_dag_with_extractor_on_complete' dag = DAG( dag_id, schedule_interval='@daily', default_args=DAG_DEFAULT_ARGS, description=DAG_DESCRIPTION ) dag_run_id = 'test_openlineage_dag_with_extractor_run_id' run_id = str(uuid.uuid4()) job_id = f"{dag_id}.{TASK_ID_COMPLETED}" # Mock the openlineage client method calls mock_openlineage_client = mock.Mock() mock_get_or_create_openlineage_client.return_value = mock_openlineage_client get_custom_facets.return_value = {} new_lineage_run_id.return_value = run_id # Add task that will be marked as completed task_will_complete = TestFixtureDummyOperator( task_id=TASK_ID_COMPLETED, dag=dag ) completed_task_location = get_location(task_will_complete.dag.fileloc) # Create DAG run and mark as running dagrun = dag.create_dagrun( run_id=dag_run_id, execution_date=DEFAULT_DATE, state=State.RUNNING) start_time = '2016-01-01T00:00:00.000000Z' end_time = '2016-01-02T00:00:00.000000Z' mock_openlineage_client.emit.assert_has_calls([ mock.call(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id, { "nominalTime": NominalTimeRunFacet(start_time, end_time), "parentRun": ParentRunFacet.create( runId=dag_run_id, namespace=DAG_NAMESPACE, name=job_id ) }), job=Job("default", job_id, { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] )) ]) mock_openlineage_client.reset_mock() # --- Pretend complete the task job_id_mapping.pop.return_value = run_id task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) dag.handle_callback(dagrun, success=True, session=session) mock_openlineage_client.emit.assert_has_calls([ mock.call(RunEvent( eventType=RunState.COMPLETE, eventTime=mock.ANY, run=Run(run_id), job=Job("default", job_id), producer=PRODUCER, inputs=[OpenLineageDataset( namespace='dummy://localhost:1234', name='schema.extract_on_complete_input1', facets={ 'dataSource': DataSourceDatasetFacet( name='dummy://localhost:1234', uri='dummy://localhost:1234?query_tag=asdf' ), 'schema': SchemaDatasetFacet( fields=[ SchemaField(name='field1', type='text', description=''), SchemaField(name='field2', type='text', description='') ] ) }) ], outputs=[OpenLineageDataset( namespace='dummy://localhost:1234', name='extract_on_complete_output1', facets={ 'dataSource': DataSourceDatasetFacet( name='dummy://localhost:1234', uri='dummy://localhost:1234?query_tag=asdf' ) }) ] )) ])
def test_openlineage_dag( job_id_mapping, mock_get_or_create_openlineage_client, get_custom_facets, new_lineage_run_id, clear_db_airflow_dags, session=None ): dag = DAG( DAG_ID, schedule_interval='@daily', default_args=DAG_DEFAULT_ARGS, description=DAG_DESCRIPTION ) # (1) Mock the openlineage client method calls mock_ol_client = mock.Mock() mock_get_or_create_openlineage_client.return_value = mock_ol_client run_id_completed = str(uuid.uuid4()) run_id_failed = str(uuid.uuid4()) job_id_completed = f"{DAG_ID}.{TASK_ID_COMPLETED}" job_id_failed = f"{DAG_ID}.{TASK_ID_FAILED}" get_custom_facets.return_value = {} new_lineage_run_id.side_effect = [ run_id_completed, run_id_failed, run_id_completed, run_id_failed ] # (2) Add task that will be marked as completed task_will_complete = DummyOperator( task_id=TASK_ID_COMPLETED, dag=dag ) completed_task_location = get_location(task_will_complete.dag.fileloc) # (3) Add task that will be marked as failed task_will_fail = DummyOperator( task_id=TASK_ID_FAILED, dag=dag ) failed_task_location = get_location(task_will_complete.dag.fileloc) # (4) Create DAG run and mark as running dagrun = dag.create_dagrun( run_id=DAG_RUN_ID, execution_date=DEFAULT_DATE, state=State.RUNNING) # Assert emit calls start_time = '2016-01-01T00:00:00.000000Z' end_time = '2016-01-02T00:00:00.000000Z' emit_calls = [ mock.call(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id_completed, { "nominalTime": NominalTimeRunFacet(start_time, end_time), "parentRun": ParentRunFacet.create( runId=DAG_RUN_ID, namespace=DAG_NAMESPACE, name=job_id_completed ) }), job=Job("default", job_id_completed, { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", completed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] )), mock.call(RunEvent( eventType=RunState.START, eventTime=mock.ANY, run=Run(run_id_failed, { "nominalTime": NominalTimeRunFacet(start_time, end_time), "parentRun": ParentRunFacet.create( runId=DAG_RUN_ID, namespace=DAG_NAMESPACE, name=job_id_failed ) }), job=Job("default", job_id_failed, { "documentation": DocumentationJobFacet(DAG_DESCRIPTION), "sourceCodeLocation": SourceCodeLocationJobFacet("", failed_task_location) }), producer=PRODUCER, inputs=[], outputs=[] )) ] log.info( f"{ [name for name, args, kwargs in mock_ol_client.mock_calls]}") mock_ol_client.emit.assert_has_calls(emit_calls) # (5) Start task that will be marked as completed task_will_complete.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE) # (6) Start task that will be marked as failed ti1 = TaskInstance(task=task_will_fail, execution_date=DEFAULT_DATE) ti1.state = State.FAILED session.add(ti1) session.commit() job_id_mapping.pop.side_effect = [run_id_completed, run_id_failed] dag.handle_callback(dagrun, success=False, session=session) emit_calls += [ mock.call(RunEvent( eventType=RunState.COMPLETE, eventTime=mock.ANY, run=Run(run_id_completed), job=Job("default", job_id_completed), producer=PRODUCER, inputs=[], outputs=[] )), mock.call(RunEvent( eventType=RunState.FAIL, eventTime=mock.ANY, run=Run(run_id_failed), job=Job("default", job_id_failed), producer=PRODUCER, inputs=[], outputs=[] )) ] mock_ol_client.emit.assert_has_calls(emit_calls)
def test_get_location_no_file_path(): assert get_location(None) is None assert get_location("") is None