def test_execute_with_docker_conn_id_use_hook(self, operator_client_mock, operator_docker_hook): # Mock out a Docker client, so operations don't raise errors client_mock = mock.Mock(name='DockerOperator.APIClient mock', spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.attach.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} operator_client_mock.return_value = client_mock # Create the DockerOperator operator = DockerOperator(image='publicregistry/someimage', owner='unittest', task_id='unittest', docker_conn_id='some_conn_id') # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = client_mock operator_docker_hook.return_value = hook_mock operator.execute(None) self.assertEqual( operator_client_mock.call_count, 0, 'Client was called on the operator instead of the hook') self.assertEqual( operator_docker_hook.call_count, 1, 'Hook was not called although docker_conn_id configured') self.assertEqual(client_mock.pull.call_count, 1, 'Image was not pulled using operator client')
def test_execute_no_docker_conn_id_no_hook(self, operator_client_mock): # Mock out a Docker client, so operations don't raise errors client_mock = mock.Mock(name='DockerOperator.APIClient mock', spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.attach.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} operator_client_mock.return_value = client_mock # Create the DockerOperator operator = DockerOperator(image='publicregistry/someimage', owner='unittest', task_id='unittest') # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = client_mock operator.get_hook = mock.Mock(name='DockerOperator.get_hook mock', spec=DockerOperator.get_hook, return_value=hook_mock) operator.execute(None) self.assertEqual(operator.get_hook.call_count, 0, 'Hook called though no docker_conn_id configured')
def test_execute_container_fails(self): self.client_mock.wait.return_value = {"StatusCode": 1} operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with self.assertRaises(AirflowException): operator.execute(None)
def test_execute_tls(self, tls_class_mock): tls_mock = mock.Mock() tls_class_mock.return_value = tls_mock operator = DockerOperator( docker_url='tcp://127.0.0.1:2376', image='ubuntu', owner='unittest', task_id='unittest', tls_client_cert='cert.pem', tls_ca_cert='ca.pem', tls_client_key='key.pem', ) operator.execute(None) tls_class_mock.assert_called_once_with( assert_hostname=None, ca_cert='ca.pem', client_cert=('cert.pem', 'key.pem'), ssl_version=None, verify=True, ) self.client_class_mock.assert_called_once_with( base_url='https://127.0.0.1:2376', tls=tls_mock, version=None)
def test_execute_tls(self, client_class_mock, tls_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.attach.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock tls_mock = mock.Mock() tls_class_mock.return_value = tls_mock operator = DockerOperator(docker_url='tcp://127.0.0.1:2376', image='ubuntu', owner='unittest', task_id='unittest', tls_client_cert='cert.pem', tls_ca_cert='ca.pem', tls_client_key='key.pem') operator.execute(None) tls_class_mock.assert_called_once_with(assert_hostname=None, ca_cert='ca.pem', client_cert=('cert.pem', 'key.pem'), ssl_version=None, verify=True) client_class_mock.assert_called_once_with( base_url='https://127.0.0.1:2376', tls=tls_mock, version=None)
def test_auto_remove_container_fails(self): self.client_mock.wait.return_value = {"StatusCode": 1} operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest', auto_remove=True) operator.container = {'Id': 'some_id'} with pytest.raises(AirflowException): operator.execute(None) self.client_mock.remove_container.assert_called_once_with('some_id')
def test_privileged(self): privileged = mock.Mock() operator = DockerOperator(task_id='test', image='test', privileged=privileged) operator.execute(None) self.client_mock.create_container.assert_called_once() assert 'host_config' in self.client_mock.create_container.call_args[1] assert 'privileged' in self.client_mock.create_host_config.call_args[1] assert privileged is self.client_mock.create_host_config.call_args[1]['privileged']
def test_extra_hosts(self): hosts_obj = mock.Mock() operator = DockerOperator(task_id='test', image='test', extra_hosts=hosts_obj) operator.execute(None) self.client_mock.create_container.assert_called_once() assert 'host_config' in self.client_mock.create_container.call_args[1] assert 'extra_hosts' in self.client_mock.create_host_config.call_args[1] assert hosts_obj is self.client_mock.create_host_config.call_args[1]['extra_hosts']
def test_execute(self): operator = DockerOperator( api_version='1.19', command='env', environment={'UNIT': 'TEST'}, private_environment={'PRIVATE': 'MESSAGE'}, image='ubuntu:latest', network_mode='bridge', owner='unittest', task_id='unittest', volumes=['/host/path:/container/path'], entrypoint='["sh", "-c"]', working_dir='/container/path', shm_size=1000, host_tmp_dir='/host/airflow', container_name='test_container', tty=True, ) operator.execute(None) self.client_class_mock.assert_called_once_with( base_url='unix://var/run/docker.sock', tls=None, version='1.19' ) self.client_mock.create_container.assert_called_once_with( command='env', name='test_container', environment={'AIRFLOW_TMP_DIR': '/tmp/airflow', 'UNIT': 'TEST', 'PRIVATE': 'MESSAGE'}, host_config=self.client_mock.create_host_config.return_value, image='ubuntu:latest', user=None, entrypoint=['sh', '-c'], working_dir='/container/path', tty=True, ) self.client_mock.create_host_config.assert_called_once_with( binds=['/host/path:/container/path', '/mkdtemp:/tmp/airflow'], network_mode='bridge', shm_size=1000, cpu_shares=1024, mem_limit=None, auto_remove=False, dns=None, dns_search=None, cap_add=None, extra_hosts=None, privileged=False, ) self.tempdir_mock.assert_called_once_with(dir='/host/airflow', prefix='airflowtmp') self.client_mock.images.assert_called_once_with(name='ubuntu:latest') self.client_mock.attach.assert_called_once_with( container='some_id', stdout=True, stderr=True, stream=True ) self.client_mock.pull.assert_called_once_with('ubuntu:latest', stream=True, decode=True) self.client_mock.wait.assert_called_once_with('some_id') assert ( operator.cli.pull('ubuntu:latest', stream=True, decode=True) == self.client_mock.pull.return_value )
def test_execute(self, client_class_mock, tempdir_mock): host_config = mock.Mock() tempdir_mock.return_value.__enter__.return_value = '/mkdtemp' client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = host_config client_mock.images.return_value = [] client_mock.attach.return_value = ['container log'] client_mock.logs.return_value = ['container log'] client_mock.pull.return_value = [b'{"status":"pull log"}'] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock operator = DockerOperator(api_version='1.19', command='env', environment={'UNIT': 'TEST'}, image='ubuntu:latest', network_mode='bridge', owner='unittest', task_id='unittest', volumes=['/host/path:/container/path'], working_dir='/container/path', shm_size=1000, host_tmp_dir='/host/airflow', container_name='test_container', tty=True) operator.execute(None) client_class_mock.assert_called_once_with(base_url='unix://var/run/docker.sock', tls=None, version='1.19') client_mock.create_container.assert_called_once_with(command='env', name='test_container', environment={ 'AIRFLOW_TMP_DIR': '/tmp/airflow', 'UNIT': 'TEST' }, host_config=host_config, image='ubuntu:latest', user=None, working_dir='/container/path', tty=True ) client_mock.create_host_config.assert_called_once_with(binds=['/host/path:/container/path', '/mkdtemp:/tmp/airflow'], network_mode='bridge', shm_size=1000, cpu_shares=1024, mem_limit=None, auto_remove=False, dns=None, dns_search=None) tempdir_mock.assert_called_once_with(dir='/host/airflow', prefix='airflowtmp') client_mock.images.assert_called_once_with(name='ubuntu:latest') client_mock.attach.assert_called_once_with(container='some_id', stdout=True, stderr=True, stream=True) client_mock.pull.assert_called_once_with('ubuntu:latest', stream=True, decode=True) client_mock.wait.assert_called_once_with('some_id')
def test_execute_unicode_logs(self): self.client_mock.attach.return_value = ['unicode container log 😁'] originalRaiseExceptions = logging.raiseExceptions # pylint: disable=invalid-name logging.raiseExceptions = True operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with mock.patch('traceback.print_exception') as print_exception_mock: operator.execute(None) logging.raiseExceptions = originalRaiseExceptions print_exception_mock.assert_not_called()
def test_execute_no_docker_conn_id_no_hook(self): # Create the DockerOperator operator = DockerOperator(image='publicregistry/someimage', owner='unittest', task_id='unittest') # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = self.client_mock operator.get_hook = mock.Mock( name='DockerOperator.get_hook mock', spec=DockerOperator.get_hook, return_value=hook_mock ) operator.execute(None) assert operator.get_hook.call_count == 0, 'Hook called though no docker_conn_id configured'
def test_execute_container_fails(self, client_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.attach.return_value = [] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 1} client_class_mock.return_value = client_mock operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with self.assertRaises(AirflowException): operator.execute(None)
def test_private_environment_is_private(self): operator = DockerOperator(private_environment={'PRIVATE': 'MESSAGE'}, image='ubuntu:latest', task_id='unittest') assert operator._private_environment == { 'PRIVATE': 'MESSAGE' }, "To keep this private, it must be an underscored attribute."
def get_docker_operator(label: str, command: str) -> DockerOperator: operator = DockerOperator( image=f"airflow-{label}", command=command, network_mode="bridge", task_id=f"docker-{label}", do_xcom_push=False, volumes=VOLUMES, ) return operator
def test_execute_with_docker_conn_id_use_hook(self, hook_class_mock): # Create the DockerOperator operator = DockerOperator( image='publicregistry/someimage', owner='unittest', task_id='unittest', docker_conn_id='some_conn_id', ) # Mock out the DockerHook hook_mock = mock.Mock(name='DockerHook mock', spec=DockerHook) hook_mock.get_conn.return_value = self.client_mock hook_class_mock.return_value = hook_mock operator.execute(None) assert self.client_class_mock.call_count == 0, 'Client was called on the operator instead of the hook' assert hook_class_mock.call_count == 1, 'Hook was not called although docker_conn_id configured' assert self.client_mock.pull.call_count == 1, 'Image was not pulled using operator client'
def test_extra_hosts(self): hosts_obj = mock.Mock() operator = DockerOperator(task_id='test', image='test', extra_hosts=hosts_obj) operator.execute(None) self.client_mock.create_container.assert_called_once() self.assertIn( 'host_config', self.client_mock.create_container.call_args.kwargs, ) self.assertIn( 'extra_hosts', self.client_mock.create_host_config.call_args.kwargs, ) self.assertIs( hosts_obj, self.client_mock.create_host_config.call_args. kwargs['extra_hosts'], )
def test_execute_unicode_logs(self, client_class_mock): client_mock = mock.Mock(spec=APIClient) client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.create_host_config.return_value = mock.Mock() client_mock.images.return_value = [] client_mock.attach.return_value = ['unicode container log 😁'] client_mock.pull.return_value = [] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock originalRaiseExceptions = logging.raiseExceptions # pylint: disable=invalid-name logging.raiseExceptions = True operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') with mock.patch('traceback.print_exception') as print_exception_mock: operator.execute(None) logging.raiseExceptions = originalRaiseExceptions print_exception_mock.assert_not_called()
def test_execute_xcom_behavior(self): self.client_mock.pull.return_value = [b'{"status":"pull log"}'] kwargs = { 'api_version': '1.19', 'command': 'env', 'environment': { 'UNIT': 'TEST' }, 'private_environment': { 'PRIVATE': 'MESSAGE' }, 'image': 'ubuntu:latest', 'network_mode': 'bridge', 'owner': 'unittest', 'task_id': 'unittest', 'volumes': ['/host/path:/container/path'], 'working_dir': '/container/path', 'shm_size': 1000, 'host_tmp_dir': '/host/airflow', 'container_name': 'test_container', 'tty': True, } xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=True) no_xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=False) xcom_push_result = xcom_push_operator.execute(None) no_xcom_push_result = no_xcom_push_operator.execute(None) self.assertEqual(xcom_push_result, b'container log') self.assertIs(no_xcom_push_result, None)
def test_on_kill(): client_mock = mock.Mock(spec=APIClient) operator = DockerOperator(image='ubuntu', owner='unittest', task_id='unittest') operator.cli = client_mock operator.container = {'Id': 'some_id'} operator.on_kill() client_mock.stop.assert_called_once_with('some_id')
def test_execute_xcom_behavior(self, client_class_mock, tempdir_mock): tempdir_mock.return_value.__enter__.return_value = '/mkdtemp' client_mock = mock.Mock(spec=APIClient) client_mock.images.return_value = [] client_mock.create_container.return_value = {'Id': 'some_id'} client_mock.attach.return_value = ['container log'] client_mock.pull.return_value = [b'{"status":"pull log"}'] client_mock.wait.return_value = {"StatusCode": 0} client_class_mock.return_value = client_mock kwargs = { 'api_version': '1.19', 'command': 'env', 'environment': { 'UNIT': 'TEST' }, 'private_environment': { 'PRIVATE': 'MESSAGE' }, 'image': 'ubuntu:latest', 'network_mode': 'bridge', 'owner': 'unittest', 'task_id': 'unittest', 'volumes': ['/host/path:/container/path'], 'working_dir': '/container/path', 'shm_size': 1000, 'host_tmp_dir': '/host/airflow', 'container_name': 'test_container', 'tty': True, } xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=True) no_xcom_push_operator = DockerOperator(**kwargs, do_xcom_push=False) xcom_push_result = xcom_push_operator.execute(None) no_xcom_push_result = no_xcom_push_operator.execute(None) self.assertEqual(xcom_push_result, b'container log') self.assertIs(no_xcom_push_result, None)
t_move = DockerOperator( api_version="1.19", docker_url="tcp://localhost:2375", # replace it with swarm/docker endpoint image="centos:latest", network_mode="bridge", mounts=[ Mount(source="/your/host/input_dir/path", target="/your/input_dir/path", type="bind"), Mount(source="/your/host/output_dir/path", target="/your/output_dir/path", type="bind"), ], command=[ "/bin/bash", "-c", "/bin/sleep 30; " "/bin/mv {{ params.source_location }}/" + str(t_view.output) + " {{ params.target_location }};" "/bin/echo '{{ params.target_location }}/" + f"{t_view.output}';", ], task_id="move_data", do_xcom_push=True, params={ "source_location": "/your/input_dir/path", "target_location": "/your/output_dir/path" }, dag=dag, )
schedule_interval="@weekly", start_date=days_ago(30), ) as dag: data_sensor = ExternalTaskSensor( task_id="data-sensor", external_dag_id="download", external_task_id="download", check_existence=True, timeout=30, ) split = DockerOperator( image="airflow-split", command=f"-l {DATA_RAW_PATH} -s {DATA_SPLIT_PATH}", network_mode="bridge", task_id="split", do_xcom_push=False, auto_remove=True, volumes=[f"{HOST_DATA_DIR}:/data"], ) fit_transformer = DockerOperator( image="airflow-fit-transformer", command=f"-l {DATA_SPLIT_PATH} -s {DATA_TRANSFORMED_PATH} -m {MODEL_PATH}", network_mode="bridge", task_id="fit_transformer", do_xcom_push=False, auto_remove=True, volumes=[f"{HOST_DATA_DIR}:/data"], )
'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG('docker_sample', default_args=default_args, schedule_interval=timedelta(minutes=10)) t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag) t2 = BashOperator(task_id='sleep', bash_command='sleep 5', retries=3, dag=dag) t3 = DockerOperator( api_version='1.19', docker_url='tcp://localhost:2375', # Set your docker URL command='/bin/sleep 30', image='centos:latest', network_mode='bridge', task_id='docker_op_tester', dag=dag) t4 = BashOperator(task_id='print_hello', bash_command='echo "hello world!!!"', dag=dag) t1 >> t2 t1 >> t3 t3 >> t4
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG( 'ml-example-docker', default_args=default_args, description='A simple ml example', schedule_interval=timedelta(days=1), start_date=days_ago(2), tags=['example'], ) t1 = DockerOperator( task_id='preprocessing-step', image='warvito/preprocessing-airflow:v1', command="python run.py", dag=dag, ) t2 = DockerOperator( task_id='classification-step', image='warvito/toy-example-classifier-airflow:v1', command="python run.py", dag=dag, ) t1 >> t2
filepath='/opt/airflow/data/raw/{{ ds }}/data.csv', task_id="await-data", poke_interval=10, retries=100, ) model_await = FileSensor( filepath='/opt/airflow/{{ var.value.model_dir }}/model.pkl', task_id="await-model", poke_interval=10, retries=100, ) preprocessing = DockerOperator( task_id="preprocessing", image="airflow-preprocess", command="--input-dir data/raw/{{ ds }} " "--output-dir data/processed/for_preds/{{ ds }} " "--prediction", network_mode="bridge", do_xcom_push=False, volumes=[DEFAULT_VOLUME]) prediction = DockerOperator( task_id="prediction", image="airflow-predict", command="--data-dir data/processed/for_preds/{{ ds }} " "--output-dir data/predictions/{{ ds }} " "--model-dir {{ var.value.model_dir }}", network_mode="bridge", do_xcom_push=False, volumes=[DEFAULT_VOLUME]) end_task = DummyOperator(task_id='end-prediction')
"retry_delay": timedelta(minutes=5), } # !!! HOST folder(NOT IN CONTAINER) replace with yours !!! HOST_DATA_DIR = '/home/stacy/Work/made/prod2/creative-crisis/airflow_ml_dags/' with DAG( "predict_target", default_args=default_args, schedule_interval="@daily", start_date=days_ago(5), ) as dag: preprocess = DockerOperator( image="airflow-preprocess", command= "--input-dir /data/raw/{{ ds }} --output-dir /data/processed/{{ ds }}", task_id="docker-airflow-preprocess", do_xcom_push=False, volumes=[f"{HOST_DATA_DIR}/data:/data"]) predict = DockerOperator(image="airflow-predict", command="--input-dir /data/processed/{{ ds }} " "--model-dir /data/model/{{ var.value.model }} " "--output-dir /data/predictions/{{ ds }}", task_id="docker-airflow-predict", do_xcom_push=False, volumes=[f"{HOST_DATA_DIR}/data:/data"]) preprocess >> predict
data_sensor = FileSensor(task_id="Wait_for_data", poke_interval=10, retries=100, filepath="data/raw/{{ ds }}/data.csv") target_sensor = FileSensor(task_id="Wait_for_target", poke_interval=10, retries=100, filepath="data/raw/{{ ds }}/target.csv") preprocess = DockerOperator( task_id="Data_preprocess", image="airflow-preprocess", command= "/data/raw/{{ ds }} /data/processed/{{ ds }} /data/model/{{ ds }}", network_mode="bridge", do_xcom_push=False, volumes=[VOLUME], ) split = DockerOperator( task_id="Split_data", image="airflow-split", command="/data/processed/{{ ds }} /data/splitted/{{ ds }}", network_mode="bridge", do_xcom_push=False, volumes=[VOLUME]) train = DockerOperator( task_id="Train_model",
from datetime import timedelta from airflow import DAG from airflow.providers.docker.operators.docker import DockerOperator from airflow.utils.dates import days_ago from airflow.models import Variable DATA_PATH = "/Users/mariapopova/Documents/GitHub/chydlife/airflow_ml_dags/data:/data" default_args = { "owner": "airflow", "email": ["*****@*****.**"], "retries": 1, "retry_delay": timedelta(minutes=5), } with DAG( "pred-data", default_args=default_args, schedule_interval="@daily", start_date=days_ago(5), ) as dag: predict = DockerOperator( image="airflow-predict", command=f"--input-dir /data/processed/{{ ds }} --output-dir /data/predictions/{{ ds }} --model_path {Variable.get('MODEL_PATH')}", task_id="docker-airflow-predict", do_xcom_push=False, volumes=[DATA_PATH] ) predict
# 'execution_timeout': timedelta(seconds=300), # 'on_failure_callback': some_function, # 'on_success_callback': some_other_function, # 'on_retry_callback': another_function, # 'sla_miss_callback': yet_another_function, # 'trigger_rule': 'all_success' } dag = DAG( 'stock_update_with_docker_operator', default_args=default_args, description='update latest stock price daily.', schedule_interval='0 0 * * *', start_date=days_ago(2), tags=['stock'], ) task1 = DockerOperator(command='price', task_id='update_stock_price_by_crawler', image="stock_update", dag=dag, do_xcom_push=False) task2 = DockerOperator(command='stats', task_id='update_revenue_stats', image="stock_update", dag=dag, do_xcom_push=False) task1 >> task2