コード例 #1
0
ファイル: test_solid_execution.py プロジェクト: sd2k/dagster
def test_template_task_dag():
    dag = DAG(
        dag_id="dag",
        default_args=default_args,
        schedule_interval=None,
    )

    t1 = BashOperator(
        task_id="print_hello",
        bash_command="echo hello dagsir",
        dag=dag,
    )

    t2 = BashOperator(
        task_id="sleep",
        bash_command="sleep 2",
        dag=dag,
    )

    templated_command = """
    {% for i in range(5) %}
        echo '{{ ds }}'
        echo '{{ macros.ds_add(ds, 7)}}'
        echo '{{ params.my_param }}'
    {% endfor %}
    """

    t3 = BashOperator(
        task_id="templated",
        depends_on_past=False,
        bash_command=templated_command,
        params={"my_param": "Parameter I passed in"},
        dag=dag,
    )

    # pylint: disable=pointless-statement
    t1 >> [t2, t3]

    instance = DagsterInstance.local_temp()
    manager = instance.compute_log_manager

    execution_date = get_current_datetime_in_utc()
    execution_date_add_one_week = execution_date + datetime.timedelta(days=7)
    execution_date_iso = execution_date.strftime("%Y-%m-%d")
    execution_date_add_one_week_iso = execution_date_add_one_week.strftime(
        "%Y-%m-%d")

    result = execute_pipeline(
        make_dagster_pipeline_from_airflow_dag(
            dag=dag, tags={AIRFLOW_EXECUTION_DATE_STR: execution_date_iso}),
        instance=instance,
    )

    compute_steps = [
        event.step_key for event in result.step_event_list
        if event.event_type == DagsterEventType.STEP_START
    ]

    assert compute_steps == [
        "airflow_print_hello.compute",
        "airflow_sleep.compute",
        "airflow_templated.compute",
    ]

    for step_key in compute_steps:
        compute_io_path = manager.get_local_path(result.run_id, step_key,
                                                 ComputeIOType.STDOUT)
        assert os.path.exists(compute_io_path)
        stdout_file = open(compute_io_path, "r")
        file_contents = normalize_file_content(stdout_file.read())
        stdout_file.close()

        if step_key == "airflow_print_hello.compute":
            assert file_contents.count(
                "INFO - Running command: echo hello dagsir\n") == 1
            assert file_contents.count(
                "INFO - Command exited with return code 0") == 1

        elif step_key == "airflow_sleep.compute":
            assert file_contents.count(
                "INFO - Running command: sleep 2\n") == 1
            assert file_contents.count("INFO - Output:\n") == 1
            assert file_contents.count(
                "INFO - Command exited with return code 0") == 1

        elif step_key == "airflow_templated.compute":
            assert (file_contents.count(
                "INFO - Running command: \n    \n        "
                "echo '{execution_date_iso}'\n        "
                "echo '{execution_date_add_one_week_iso}'\n        "
                "echo 'Parameter I passed in'\n    \n        "
                "echo '{execution_date_iso}'\n        "
                "echo '{execution_date_add_one_week_iso}'\n        "
                "echo 'Parameter I passed in'\n    \n        "
                "echo '{execution_date_iso}'\n        "
                "echo '{execution_date_add_one_week_iso}'\n        "
                "echo 'Parameter I passed in'\n    \n        "
                "echo '{execution_date_iso}'\n        "
                "echo '{execution_date_add_one_week_iso}'\n        "
                "echo 'Parameter I passed in'\n    \n        "
                "echo '{execution_date_iso}'\n        "
                "echo '{execution_date_add_one_week_iso}'\n        "
                "echo 'Parameter I passed in'\n    \n    \n".format(
                    execution_date_iso=execution_date_iso,
                    execution_date_add_one_week_iso=
                    execution_date_add_one_week_iso,
                )) == 1)
            assert (file_contents.count("INFO - {execution_date_iso}\n".format(
                execution_date_iso=execution_date_iso)) == 5)
            assert (file_contents.count(
                "INFO - {execution_date_add_one_week_iso}\n".format(
                    execution_date_add_one_week_iso=
                    execution_date_add_one_week_iso)) == 5)
            assert file_contents.count("INFO - Parameter I passed in\n") == 5
            assert file_contents.count(
                "INFO - Command exited with return code 0") == 1
コード例 #2
0
    def test_failure_callback_only_called_once(self, mock_return_code,
                                               _check_call):
        """
        Test that ensures that when a task exits with failure by itself,
        failure callback is only called once
        """
        # use shared memory value so we can properly track value change even if
        # it's been updated across processes.
        failure_callback_called = Value('i', 0)
        callback_count_lock = Lock()

        def failure_callback(context):
            with callback_count_lock:
                failure_callback_called.value += 1
            assert context['dag_run'].dag_id == 'test_failure_callback_race'
            assert isinstance(context['exception'], AirflowFailException)

        def task_function(ti):
            raise AirflowFailException()

        dag = DAG(dag_id='test_failure_callback_race', start_date=DEFAULT_DATE)
        task = PythonOperator(
            task_id='test_exit_on_failure',
            python_callable=task_function,
            on_failure_callback=failure_callback,
            dag=dag,
        )

        dag.clear()
        with create_session() as session:
            dag.create_dagrun(
                run_id="test",
                state=State.RUNNING,
                execution_date=DEFAULT_DATE,
                start_date=DEFAULT_DATE,
                session=session,
            )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.refresh_from_db()

        job1 = LocalTaskJob(task_instance=ti,
                            ignore_ti_state=True,
                            executor=SequentialExecutor())

        # Simulate race condition where job1 heartbeat ran right after task
        # state got set to failed by ti.handle_failure but before task process
        # fully exits. See _execute loop in airflow/jobs/local_task_job.py.
        # In this case, we have:
        #  * task_runner.return_code() is None
        #  * ti.state == State.Failed
        #
        # We also need to set return_code to a valid int after job1.terminating
        # is set to True so _execute loop won't loop forever.
        def dummy_return_code(*args, **kwargs):
            return None if not job1.terminating else -9

        mock_return_code.side_effect = dummy_return_code

        with timeout(10):
            # This should be _much_ shorter to run.
            # If you change this limit, make the timeout in the callable above bigger
            job1.run()

        ti.refresh_from_db()
        assert ti.state == State.FAILED  # task exits with failure state
        assert failure_callback_called.value == 1
コード例 #3
0
"""
This dag only runs some simple tasks to test Airflow's task execution.
"""
from datetime import datetime, timedelta

from airflow.models.dag import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils.dates import days_ago

now = datetime.now()
now_to_the_hour = (now - timedelta(0, 0, 0, 0, 0, 3)).replace(minute=0,
                                                              second=0,
                                                              microsecond=0)
START_DATE = now_to_the_hour
DAG_NAME = 'test_dag_v1'

default_args = {
    'owner': 'airflow',
    'depends_on_past': True,
    'start_date': days_ago(2)
}
dag = DAG(DAG_NAME,
          schedule_interval='*/10 * * * *',
          default_args=default_args)

run_this_1 = DummyOperator(task_id='run_this_1', dag=dag)
run_this_2 = DummyOperator(task_id='run_this_2', dag=dag)
run_this_2.set_upstream(run_this_1)
run_this_3 = DummyOperator(task_id='run_this_3', dag=dag)
run_this_3.set_upstream(run_this_2)
コード例 #4
0
    def test_external_task_sensor_fn_multiple_execution_dates(self):
        bash_command_code = """
{% set s=execution_date.time().second %}
echo "second is {{ s }}"
if [[ $(( {{ s }} % 60 )) == 1 ]]
    then
        exit 1
fi
exit 0
"""
        dag_external_id = TEST_DAG_ID + '_external'
        dag_external = DAG(dag_external_id,
                           default_args=self.args,
                           schedule_interval=timedelta(seconds=1))
        task_external_with_failure = BashOperator(
            task_id="task_external_with_failure",
            bash_command=bash_command_code,
            retries=0,
            dag=dag_external)
        task_external_without_failure = DummyOperator(
            task_id="task_external_without_failure",
            retries=0,
            dag=dag_external)

        task_external_without_failure.run(start_date=DEFAULT_DATE,
                                          end_date=DEFAULT_DATE +
                                          timedelta(seconds=1),
                                          ignore_ti_state=True)

        session = settings.Session()
        TI = TaskInstance
        try:
            task_external_with_failure.run(start_date=DEFAULT_DATE,
                                           end_date=DEFAULT_DATE +
                                           timedelta(seconds=1),
                                           ignore_ti_state=True)
            # The test_with_failure task is excepted to fail
            # once per minute (the run on the first second of
            # each minute).
        except Exception as e:  # pylint: disable=broad-except
            failed_tis = session.query(TI).filter(
                TI.dag_id == dag_external_id, TI.state == State.FAILED,
                TI.execution_date == DEFAULT_DATE +
                timedelta(seconds=1)).all()
            if len(failed_tis) == 1 and \
               failed_tis[0].task_id == 'task_external_with_failure':
                pass
            else:
                raise e

        dag_id = TEST_DAG_ID
        dag = DAG(dag_id,
                  default_args=self.args,
                  schedule_interval=timedelta(minutes=1))
        task_without_failure = ExternalTaskSensor(
            task_id='task_without_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_without_failure',
            execution_date_fn=lambda dt:
            [dt + timedelta(seconds=i) for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)
        task_with_failure = ExternalTaskSensor(
            task_id='task_with_failure',
            external_dag_id=dag_external_id,
            external_task_id='task_external_with_failure',
            execution_date_fn=lambda dt:
            [dt + timedelta(seconds=i) for i in range(2)],
            allowed_states=['success'],
            retries=0,
            timeout=1,
            poke_interval=1,
            dag=dag)

        task_without_failure.run(start_date=DEFAULT_DATE,
                                 end_date=DEFAULT_DATE,
                                 ignore_ti_state=True)

        with self.assertRaises(AirflowSensorTimeout):
            task_with_failure.run(start_date=DEFAULT_DATE,
                                  end_date=DEFAULT_DATE,
                                  ignore_ti_state=True)
コード例 #5
0
 def setUp(self):
     self.dagbag = DagBag(dag_folder=DEV_NULL, include_examples=True)
     self.args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG(TEST_DAG_ID, default_args=self.args)
コード例 #6
0
from parameterized import parameterized

from airflow import settings
from airflow.models import Variable
from airflow.models.dag import DAG
from airflow.models.renderedtifields import RenderedTaskInstanceFields as RTIF
from airflow.models.taskinstance import TaskInstance as TI
from airflow.operators.bash import BashOperator
from airflow.utils.session import create_session
from airflow.utils.timezone import datetime
from airflow.version import version
from tests.test_utils.asserts import assert_queries_count
from tests.test_utils.db import clear_rendered_ti_fields

TEST_DAG = DAG("example_rendered_ti_field", schedule_interval=None)
START_DATE = datetime(2018, 1, 1)
EXECUTION_DATE = datetime(2019, 1, 1)


class ClassWithCustomAttributes:
    """Class for testing purpose: allows to create objects with custom attributes in one single statement."""

    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

    def __str__(self):
        return "{}({})".format(ClassWithCustomAttributes.__name__, str(self.__dict__))

    def __repr__(self):
コード例 #7
0
    def setUp(self):
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        dag = DAG(TEST_DAG_ID, default_args=args)
        self.dag = dag

        rows = [
            (1880, "John", 0.081541, "boy"),
            (1880, "William", 0.080511, "boy"),
            (1880, "James", 0.050057, "boy"),
            (1880, "Charles", 0.045167, "boy"),
            (1880, "George", 0.043292, "boy"),
            (1880, "Frank", 0.02738, "boy"),
            (1880, "Joseph", 0.022229, "boy"),
            (1880, "Thomas", 0.021401, "boy"),
            (1880, "Henry", 0.020641, "boy"),
            (1880, "Robert", 0.020404, "boy"),
            (1880, "Edward", 0.019965, "boy"),
            (1880, "Harry", 0.018175, "boy"),
            (1880, "Walter", 0.014822, "boy"),
            (1880, "Arthur", 0.013504, "boy"),
            (1880, "Fred", 0.013251, "boy"),
            (1880, "Albert", 0.012609, "boy"),
            (1880, "Samuel", 0.008648, "boy"),
            (1880, "David", 0.007339, "boy"),
            (1880, "Louis", 0.006993, "boy"),
            (1880, "Joe", 0.006174, "boy"),
            (1880, "Charlie", 0.006165, "boy"),
            (1880, "Clarence", 0.006165, "boy"),
            (1880, "Richard", 0.006148, "boy"),
            (1880, "Andrew", 0.005439, "boy"),
            (1880, "Daniel", 0.00543, "boy"),
            (1880, "Ernest", 0.005194, "boy"),
            (1880, "Will", 0.004966, "boy"),
            (1880, "Jesse", 0.004805, "boy"),
            (1880, "Oscar", 0.004594, "boy"),
            (1880, "Lewis", 0.004366, "boy"),
            (1880, "Peter", 0.004189, "boy"),
            (1880, "Benjamin", 0.004138, "boy"),
            (1880, "Frederick", 0.004079, "boy"),
            (1880, "Willie", 0.00402, "boy"),
            (1880, "Alfred", 0.003961, "boy"),
            (1880, "Sam", 0.00386, "boy"),
            (1880, "Roy", 0.003716, "boy"),
            (1880, "Herbert", 0.003581, "boy"),
            (1880, "Jacob", 0.003412, "boy"),
            (1880, "Tom", 0.00337, "boy"),
            (1880, "Elmer", 0.00315, "boy"),
            (1880, "Carl", 0.003142, "boy"),
            (1880, "Lee", 0.003049, "boy"),
            (1880, "Howard", 0.003015, "boy"),
            (1880, "Martin", 0.003015, "boy"),
            (1880, "Michael", 0.00299, "boy"),
            (1880, "Bert", 0.002939, "boy"),
            (1880, "Herman", 0.002931, "boy"),
            (1880, "Jim", 0.002914, "boy"),
            (1880, "Francis", 0.002905, "boy"),
            (1880, "Harvey", 0.002905, "boy"),
            (1880, "Earl", 0.002829, "boy"),
            (1880, "Eugene", 0.00277, "boy"),
        ]

        self.env_vars = {
            'AIRFLOW_CTX_DAG_ID': 'test_dag_id',
            'AIRFLOW_CTX_TASK_ID': 'test_task_id',
            'AIRFLOW_CTX_EXECUTION_DATE': '2015-01-01T00:00:00+00:00',
            'AIRFLOW_CTX_DAG_RUN_ID': '55',
            'AIRFLOW_CTX_DAG_OWNER': 'airflow',
            'AIRFLOW_CTX_DAG_EMAIL': '*****@*****.**',
        }

        with MySqlHook().get_conn() as cur:
            cur.execute('''
            CREATE TABLE IF NOT EXISTS baby_names (
              org_year integer(4),
              baby_name VARCHAR(25),
              rate FLOAT(7,6),
              sex VARCHAR(4)
            )
            ''')

        for row in rows:
            cur.execute("INSERT INTO baby_names VALUES(%s, %s, %s, %s);", row)
コード例 #8
0
    def test_retry_on_error_sending_task(self):
        """Test that Airflow retries publishing tasks to Celery Broker atleast 3 times"""

        def fake_execute_command(command):
            print(command)

        with _prepare_app(execute=fake_execute_command), self.assertLogs(
            celery_executor.log
        ) as cm, mock.patch.object(celery_executor, "OPERATION_TIMEOUT", 0.001):
            # fake_execute_command takes no arguments while execute_command takes 1,
            # which will cause TypeError when calling task.apply_async()
            executor = celery_executor.CeleryExecutor()
            self.assertEqual(executor.task_publish_retries, {})
            self.assertEqual(executor.task_publish_max_retries, 3, msg="Assert Default Max Retries is 3")

            task = BashOperator(
                task_id="test", bash_command="true", dag=DAG(dag_id='id'), start_date=datetime.now()
            )
            when = datetime.now()
            value_tuple = (
                'command',
                1,
                None,
                SimpleTaskInstance(ti=TaskInstance(task=task, execution_date=datetime.now())),
            )
            key = ('fail', 'fake_simple_ti', when, 0)
            executor.queued_tasks[key] = value_tuple

            # Test that when heartbeat is called again, task is published again to Celery Queue
            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {key: 2})
            self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue")
            self.assertEqual(executor.event_buffer, {})
            self.assertIn(
                "INFO:airflow.executors.celery_executor.CeleryExecutor:"
                f"[Try 1 of 3] Task Timeout Error for Task: ({key}).",
                cm.output,
            )

            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {key: 3})
            self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue")
            self.assertEqual(executor.event_buffer, {})
            self.assertIn(
                "INFO:airflow.executors.celery_executor.CeleryExecutor:"
                f"[Try 2 of 3] Task Timeout Error for Task: ({key}).",
                cm.output,
            )

            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {key: 4})
            self.assertEqual(1, len(executor.queued_tasks), "Task should remain in queue")
            self.assertEqual(executor.event_buffer, {})
            self.assertIn(
                "INFO:airflow.executors.celery_executor.CeleryExecutor:"
                f"[Try 3 of 3] Task Timeout Error for Task: ({key}).",
                cm.output,
            )

            executor.heartbeat()
            self.assertEqual(dict(executor.task_publish_retries), {})
            self.assertEqual(0, len(executor.queued_tasks), "Task should no longer be in queue")
            self.assertEqual(executor.event_buffer[('fail', 'fake_simple_ti', when, 0)][0], State.FAILED)
コード例 #9
0
    "resources_vpc_config": {
        "subnetIds": ["subnet-12345ab", "subnet-67890cd"],
        "endpointPublicAccess": true,
        "endpointPrivateAccess": false
    },
    "nodegroup_name": "templated-nodegroup",
    "nodegroup_subnets": "['subnet-12345ab', 'subnet-67890cd']",
    "nodegroup_role_arn": "arn:aws:iam::123456789012:role/role_name"
}
"""

with DAG(
        dag_id='example_eks_templated',
        schedule_interval=None,
        start_date=datetime(2021, 1, 1),
        tags=['example', 'templated'],
        catchup=False,
        # render_template_as_native_obj=True is what converts the Jinja to Python objects, instead of a string.
        render_template_as_native_obj=True,
) as dag:

    CLUSTER_NAME = "{{ dag_run.conf['cluster_name'] }}"
    NODEGROUP_NAME = "{{ dag_run.conf['nodegroup_name'] }}"

    # Create an Amazon EKS Cluster control plane without attaching a compute service.
    create_cluster = EksCreateClusterOperator(
        task_id='create_eks_cluster',
        cluster_name=CLUSTER_NAME,
        compute=None,
        cluster_role_arn="{{ dag_run.conf['cluster_role_arn'] }}",
        resources_vpc_config="{{ dag_run.conf['resources_vpc_config'] }}",
コード例 #10
0
    "nodegroup_subnets": ["subnet-12345ab", "subnet-67890cd"],
    "resources_vpc_config": {
        "subnetIds": ["subnet-12345ab", "subnet-67890cd"],
        "endpointPublicAccess": true,
        "endpointPrivateAccess": false
    },
    "nodegroup_name": "templated-nodegroup",
    "nodegroup_role_arn": "arn:aws:iam::123456789012:role/role_name"
}
"""

with DAG(
        dag_id='to-publish-manuals-templated',
        schedule_interval=None,
        start_date=days_ago(2),
        max_active_runs=1,
        tags=['example', 'templated'],
        # render_template_as_native_obj=True is what converts the Jinja to Python objects, instead of a string.
        render_template_as_native_obj=True,
) as dag:

    # Create an Amazon EKS Cluster control plane without attaching a compute service.
    create_cluster = EKSCreateClusterOperator(
        task_id='create_eks_cluster',
        compute=None,
        cluster_name="{{ dag_run.conf['cluster_name'] }}",
        cluster_role_arn="{{ dag_run.conf['cluster_role_arn'] }}",
        resources_vpc_config="{{ dag_run.conf['resources_vpc_config'] }}",
    )

    await_create_cluster = EKSClusterStateSensor(
コード例 #11
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.models.dag import DAG
from airflow.providers.alibaba.cloud.operators.oss import (
    OSSDeleteBatchObjectOperator,
    OSSDeleteObjectOperator,
    OSSDownloadObjectOperator,
    OSSUploadObjectOperator,
)
from airflow.utils.dates import days_ago

with DAG(
    dag_id='oss_object_dag',
    start_date=days_ago(2),
    max_active_runs=1,
    tags=['example'],
) as dag:

    create_object = OSSUploadObjectOperator(
        file='your local file',
        key='your oss key',
        oss_conn_id='oss_default',
        region='your region',
        task_id='task1',
        bucket_name='your bucket',
    )

    download_object = OSSDownloadObjectOperator(
        file='your local file',
        key='your oss key',
コード例 #12
0
def test_complex_dag(snapshot):
    dag = DAG(dag_id="complex_dag",
              default_args=default_args,
              schedule_interval=None)

    # Create
    create_entry_group = DummyOperator(
        task_id="create_entry_group",
        dag=dag,
    )
    create_entry_group_result = DummyOperator(
        task_id="create_entry_group_result",
        dag=dag,
    )
    create_entry_group_result2 = DummyOperator(
        task_id="create_entry_group_result2",
        dag=dag,
    )
    create_entry_gcs = DummyOperator(
        task_id="create_entry_gcs",
        dag=dag,
    )
    create_entry_gcs_result = DummyOperator(
        task_id="create_entry_gcs_result",
        dag=dag,
    )
    create_entry_gcs_result2 = DummyOperator(
        task_id="create_entry_gcs_result2",
        dag=dag,
    )
    create_tag = DummyOperator(
        task_id="create_tag",
        dag=dag,
    )
    create_tag_result = DummyOperator(
        task_id="create_tag_result",
        dag=dag,
    )
    create_tag_result2 = DummyOperator(
        task_id="create_tag_result2",
        dag=dag,
    )
    create_tag_template = DummyOperator(
        task_id="create_tag_template",
        dag=dag,
    )
    create_tag_template_result = DummyOperator(
        task_id="create_tag_template_result",
        dag=dag,
    )
    create_tag_template_result2 = DummyOperator(
        task_id="create_tag_template_result2",
        dag=dag,
    )
    create_tag_template_field = DummyOperator(
        task_id="create_tag_template_field",
        dag=dag,
    )
    create_tag_template_field_result = DummyOperator(
        task_id="create_tag_template_field_result",
        dag=dag,
    )
    create_tag_template_field_result2 = DummyOperator(
        task_id="create_tag_template_field_result",
        dag=dag,
    )

    # Delete
    delete_entry = DummyOperator(
        task_id="delete_entry",
        dag=dag,
    )
    create_entry_gcs >> delete_entry
    delete_entry_group = DummyOperator(
        task_id="delete_entry_group",
        dag=dag,
    )
    create_entry_group >> delete_entry_group
    delete_tag = DummyOperator(
        task_id="delete_tag",
        dag=dag,
    )
    create_tag >> delete_tag
    delete_tag_template_field = DummyOperator(
        task_id="delete_tag_template_field",
        dag=dag,
    )
    delete_tag_template = DummyOperator(
        task_id="delete_tag_template",
        dag=dag,
    )

    # Get
    get_entry_group = DummyOperator(
        task_id="get_entry_group",
        dag=dag,
    )
    get_entry_group_result = DummyOperator(
        task_id="get_entry_group_result",
        dag=dag,
    )
    get_entry = DummyOperator(
        task_id="get_entry",
        dag=dag,
    )
    get_entry_result = DummyOperator(
        task_id="get_entry_result",
        dag=dag,
    )
    get_tag_template = DummyOperator(
        task_id="get_tag_template",
        dag=dag,
    )
    get_tag_template_result = DummyOperator(
        task_id="get_tag_template_result",
        dag=dag,
    )

    # List
    list_tags = DummyOperator(
        task_id="list_tags",
        dag=dag,
    )
    list_tags_result = DummyOperator(
        task_id="list_tags_result",
        dag=dag,
    )

    # Lookup
    lookup_entry = DummyOperator(
        task_id="lookup_entry",
        dag=dag,
    )
    lookup_entry_result = DummyOperator(
        task_id="lookup_entry_result",
        dag=dag,
    )

    # Rename
    rename_tag_template_field = DummyOperator(
        task_id="rename_tag_template_field",
        dag=dag,
    )

    # Search
    search_catalog = DummyOperator(
        task_id="search_catalog",
        dag=dag,
    )
    search_catalog_result = DummyOperator(
        task_id="search_catalog_result",
        dag=dag,
    )

    # Update
    update_entry = DummyOperator(
        task_id="update_entry",
        dag=dag,
    )
    update_tag = DummyOperator(
        task_id="update_tag",
        dag=dag,
    )
    update_tag_template = DummyOperator(
        task_id="update_tag_template",
        dag=dag,
    )
    update_tag_template_field = DummyOperator(
        task_id="update_tag_template_field",
        dag=dag,
    )

    # Create
    create_tasks = [
        create_entry_group,
        create_entry_gcs,
        create_tag_template,
        create_tag_template_field,
        create_tag,
    ]
    chain(*create_tasks)

    create_entry_group >> delete_entry_group
    create_entry_group >> create_entry_group_result
    create_entry_group >> create_entry_group_result2

    create_entry_gcs >> delete_entry
    create_entry_gcs >> create_entry_gcs_result
    create_entry_gcs >> create_entry_gcs_result2

    create_tag_template >> delete_tag_template_field
    create_tag_template >> create_tag_template_result
    create_tag_template >> create_tag_template_result2

    create_tag_template_field >> delete_tag_template_field
    create_tag_template_field >> create_tag_template_field_result
    create_tag_template_field >> create_tag_template_field_result2

    create_tag >> delete_tag
    create_tag >> create_tag_result
    create_tag >> create_tag_result2

    # Delete
    delete_tasks = [
        delete_tag,
        delete_tag_template_field,
        delete_tag_template,
        delete_entry_group,
        delete_entry,
    ]
    chain(*delete_tasks)

    # Get
    create_tag_template >> get_tag_template >> delete_tag_template
    get_tag_template >> get_tag_template_result

    create_entry_gcs >> get_entry >> delete_entry
    get_entry >> get_entry_result

    create_entry_group >> get_entry_group >> delete_entry_group
    get_entry_group >> get_entry_group_result

    # List
    create_tag >> list_tags >> delete_tag
    list_tags >> list_tags_result

    # Lookup
    create_entry_gcs >> lookup_entry >> delete_entry
    lookup_entry >> lookup_entry_result

    # Rename
    create_tag_template_field >> rename_tag_template_field >> delete_tag_template_field

    # Search
    chain(create_tasks, search_catalog, delete_tasks)
    search_catalog >> search_catalog_result

    # Update
    create_entry_gcs >> update_entry >> delete_entry
    create_tag >> update_tag >> delete_tag
    create_tag_template >> update_tag_template >> delete_tag_template
    create_tag_template_field >> update_tag_template_field >> rename_tag_template_field

    snapshot.assert_match(
        serialize_pp(
            PipelineSnapshot.from_pipeline_def(
                make_dagster_pipeline_from_airflow_dag(
                    dag=dag)).dep_structure_snapshot))
コード例 #13
0
NODEGROUP_SUFFIX = '-nodegroup'
NODEGROUP_NAME = CLUSTER_NAME + NODEGROUP_SUFFIX
ROLE_ARN = environ.get('EKS_DEMO_ROLE_ARN',
                       'arn:aws:iam::123456789012:role/role_name')
SUBNETS = environ.get('EKS_DEMO_SUBNETS',
                      'subnet-12345ab subnet-67890cd').split(' ')
VPC_CONFIG = {
    'subnetIds': SUBNETS,
    'endpointPublicAccess': True,
    'endpointPrivateAccess': False,
}

with DAG(
        dag_id='example_eks_with_nodegroups_dag',
        schedule_interval=None,
        start_date=days_ago(2),
        max_active_runs=1,
        tags=['example'],
) as dag:

    # [START howto_operator_eks_create_cluster]
    # Create an Amazon EKS Cluster control plane without attaching a compute service.
    create_cluster = EKSCreateClusterOperator(
        task_id='create_eks_cluster',
        cluster_name=CLUSTER_NAME,
        cluster_role_arn=ROLE_ARN,
        resources_vpc_config=VPC_CONFIG,
        compute=None,
    )
    # [END howto_operator_eks_create_cluster]
コード例 #14
0
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime

from airflow.models.dag import DAG
from airflow.providers.slack.operators.slack import SlackAPIFileOperator

with DAG(
        dag_id='slack_example_dag',
        schedule_interval=None,
        start_date=datetime(2021, 1, 1),
        default_args={
            'slack_conn_id': 'slack',
            'channel': '#general',
            'initial_comment': 'Hello World!'
        },
        max_active_runs=1,
        tags=['example'],
) as dag:
    # [START slack_operator_howto_guide_send_file]
    # Send file with filename and filetype
    slack_operator_file = SlackAPIFileOperator(
        task_id="slack_file_upload_1",
        filename="/files/dags/test.txt",
        filetype="txt",
    )
    # [END slack_operator_howto_guide_send_file]

    # [START slack_operator_howto_guide_send_file_content]
コード例 #15
0
 def setUp(self):
     self.dag = DAG(TEST_DAG_ID, default_args=self.TRAINING_DEFAULT_ARGS)
コード例 #16
0
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from airflow.models.dag import DAG
from airflow.utils import timezone
from airflow.contrib.jobs.event_handlers import AIFlowHandler
from airflow.operators.bash import BashOperator

dag = DAG(dag_id='workflow_1',
          start_date=timezone.utcnow(),
          schedule_interval='@once')
op_0 = BashOperator(task_id='0_job',
                    dag=dag,
                    bash_command='echo "0 hello word!"')
op_1 = BashOperator(task_id='1_job',
                    dag=dag,
                    bash_command='echo "1 hello word!"')
op_2 = BashOperator(task_id='2_job',
                    dag=dag,
                    bash_command='echo "2 hello word!"')
op_1.subscribe_event('key_1', 'UNDEFINED', 'default')
configs_op_1 = '[{"__af_object_type__": "jsonable", "__class__": "MetConfig", "__module__": "ai_flow.graph.edge", "action": "START", "condition": "NECESSARY", "event_key": "key_1", "event_type": "UNDEFINED", "event_value": "value_1", "life": "ONCE", "namespace": "default", "value_condition": "EQUAL"}]'

op_1.set_events_handler(AIFlowHandler(configs_op_1))
op_2.subscribe_event('key_2', 'UNDEFINED', 'default')
コード例 #17
0
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG(TEST_DAG_ID, default_args=args)
コード例 #18
0
NODEGROUP_SUFFIX = '-nodegroup'
NODEGROUP_NAME = CLUSTER_NAME + NODEGROUP_SUFFIX
ROLE_ARN = environ.get('EKS_DEMO_ROLE_ARN',
                       'arn:aws:iam::123456789012:role/role_name')
SUBNETS = environ.get('EKS_DEMO_SUBNETS',
                      'subnet-12345ab subnet-67890cd').split(' ')
VPC_CONFIG = {
    'subnetIds': SUBNETS,
    'endpointPublicAccess': True,
    'endpointPrivateAccess': False,
}

with DAG(
        dag_id='example_eks_using_defaults_dag',
        schedule_interval=None,
        start_date=days_ago(2),
        max_active_runs=1,
        tags=['example'],
) as dag:

    # [START howto_operator_eks_create_cluster_with_nodegroup]
    # Create an Amazon EKS cluster control plane and an EKS nodegroup compute platform in one step.
    create_cluster_and_nodegroup = EKSCreateClusterOperator(
        task_id='create_eks_cluster_and_nodegroup',
        cluster_name=CLUSTER_NAME,
        nodegroup_name=NODEGROUP_NAME,
        cluster_role_arn=ROLE_ARN,
        nodegroup_role_arn=ROLE_ARN,
        # Opting to use the same ARN for the cluster and the nodegroup here,
        # but a different ARN could be configured and passed if desired.
        resources_vpc_config=VPC_CONFIG,
コード例 #19
0
    def test_get_k8s_pod_yaml(self, mock_pod_mutation_hook):
        """
        Test that k8s_pod_yaml is rendered correctly, stored in the Database,
        and are correctly fetched using RTIF.get_k8s_pod_yaml
        """
        dag = DAG("test_get_k8s_pod_yaml", start_date=START_DATE)
        with dag:
            task = BashOperator(task_id="test", bash_command="echo hi")

        ti = TI(task=task, execution_date=EXECUTION_DATE)
        rtif = RTIF(ti=ti)

        # Test that pod_mutation_hook is called
        mock_pod_mutation_hook.assert_called_once_with(mock.ANY)

        self.assertEqual(ti.dag_id, rtif.dag_id)
        self.assertEqual(ti.task_id, rtif.task_id)
        self.assertEqual(ti.execution_date, rtif.execution_date)

        expected_pod_yaml = {
            'metadata': {
                'annotations': {
                    'dag_id': 'test_get_k8s_pod_yaml',
                    'execution_date': '2019-01-01T00:00:00+00:00',
                    'task_id': 'test',
                    'try_number': '1',
                },
                'labels': {
                    'airflow-worker': 'worker-config',
                    'airflow_version': version,
                    'dag_id': 'test_get_k8s_pod_yaml',
                    'execution_date': '2019-01-01T00_00_00_plus_00_00',
                    'kubernetes_executor': 'True',
                    'task_id': 'test',
                    'try_number': '1',
                },
                'name': mock.ANY,
                'namespace': 'default',
            },
            'spec': {
                'containers': [
                    {
                        'command': [
                            'airflow',
                            'tasks',
                            'run',
                            'test_get_k8s_pod_yaml',
                            'test',
                            '2019-01-01T00:00:00+00:00',
                        ],
                        'image': ':',
                        'name': 'base',
                        'env': [{'name': 'AIRFLOW_IS_K8S_EXECUTOR_POD', 'value': 'True'}],
                    }
                ]
            },
        }

        self.assertEqual(expected_pod_yaml, rtif.k8s_pod_yaml)

        with create_session() as session:
            session.add(rtif)

        self.assertEqual(expected_pod_yaml, RTIF.get_k8s_pod_yaml(ti=ti))

        # Test the else part of get_k8s_pod_yaml
        # i.e. for the TIs that are not stored in RTIF table
        # Fetching them will return None
        with dag:
            task_2 = BashOperator(task_id="test2", bash_command="echo hello")

        ti2 = TI(task_2, EXECUTION_DATE)
        self.assertIsNone(RTIF.get_k8s_pod_yaml(ti=ti2))
コード例 #20
0
from datetime import datetime

from airflow.models.dag import DAG
from airflow.providers.alibaba.cloud.operators.oss import (
    OSSDeleteBatchObjectOperator,
    OSSDeleteObjectOperator,
    OSSDownloadObjectOperator,
    OSSUploadObjectOperator,
)

with DAG(
        dag_id='oss_object_dag',
        start_date=datetime(2021, 1, 1),
        default_args={
            'bucket_name': 'your bucket',
            'region': 'your region'
        },
        max_active_runs=1,
        tags=['example'],
        catchup=False,
) as dag:

    create_object = OSSUploadObjectOperator(
        file='your local file',
        key='your oss key',
        task_id='task1',
    )

    download_object = OSSDownloadObjectOperator(
        file='your local file',
        key='your oss key',
コード例 #21
0
 def setUp(self):
     args = {
         'owner': 'airflow',
         'start_date': datetime.datetime(2017, 1, 1)
     }
     self.dag = DAG('test_dag_id', default_args=args)
コード例 #22
0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime

from airflow.models.dag import DAG
from airflow.providers.arangodb.operators.arangodb import AQLOperator
from airflow.providers.arangodb.sensors.arangodb import AQLSensor

dag = DAG(
    'example_arangodb_operator',
    start_date=datetime(2021, 1, 1),
    tags=['example'],
    catchup=False,
)

# [START howto_aql_sensor_arangodb]

sensor = AQLSensor(
    task_id="aql_sensor",
    query="FOR doc IN students FILTER doc.name == 'judy' RETURN doc",
    timeout=60,
    poke_interval=10,
    dag=dag,
)

# [END howto_aql_sensor_arangodb]
コード例 #23
0
def dag_bag_ext():
    """
    Create a DagBag with DAGs looking like this. The dotted lines represent external dependencies
    set up using ExternalTaskMarker and ExternalTaskSensor.

    dag_0:   task_a_0 >> task_b_0
                             |
                             |
    dag_1:                   ---> task_a_1 >> task_b_1
                                                  |
                                                  |
    dag_2:                                        ---> task_a_2 >> task_b_2
                                                                       |
                                                                       |
    dag_3:                                                             ---> task_a_3 >> task_b_3
    """
    dag_bag = DagBag(dag_folder=DEV_NULL, include_examples=False)

    dag_0 = DAG("dag_0", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_0 = DummyOperator(task_id="task_a_0", dag=dag_0)
    task_b_0 = ExternalTaskMarker(task_id="task_b_0",
                                  external_dag_id="dag_1",
                                  external_task_id="task_a_1",
                                  recursion_depth=3,
                                  dag=dag_0)
    task_a_0 >> task_b_0

    dag_1 = DAG("dag_1", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_1 = ExternalTaskSensor(task_id="task_a_1",
                                  external_dag_id=dag_0.dag_id,
                                  external_task_id=task_b_0.task_id,
                                  dag=dag_1)
    task_b_1 = ExternalTaskMarker(task_id="task_b_1",
                                  external_dag_id="dag_2",
                                  external_task_id="task_a_2",
                                  recursion_depth=2,
                                  dag=dag_1)
    task_a_1 >> task_b_1

    dag_2 = DAG("dag_2", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_2 = ExternalTaskSensor(task_id="task_a_2",
                                  external_dag_id=dag_1.dag_id,
                                  external_task_id=task_b_1.task_id,
                                  dag=dag_2)
    task_b_2 = ExternalTaskMarker(task_id="task_b_2",
                                  external_dag_id="dag_3",
                                  external_task_id="task_a_3",
                                  recursion_depth=1,
                                  dag=dag_2)
    task_a_2 >> task_b_2

    dag_3 = DAG("dag_3", start_date=DEFAULT_DATE, schedule_interval=None)
    task_a_3 = ExternalTaskSensor(task_id="task_a_3",
                                  external_dag_id=dag_2.dag_id,
                                  external_task_id=task_b_2.task_id,
                                  dag=dag_3)
    task_b_3 = DummyOperator(task_id="task_b_3", dag=dag_3)
    task_a_3 >> task_b_3

    for dag in [dag_0, dag_1, dag_2, dag_3]:
        dag_bag.bag_dag(dag=dag, root_dag=dag)

    return dag_bag
コード例 #24
0
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     self.dag = DAG('test_dag_id', default_args=args)
コード例 #25
0
import datetime
from dateutil.tz import *

from airflow.models.dag import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.bash_operator import BashOperator


valid_dag = DAG(
    dag_id="ValidDag",
    description="This is a valid test dag",
    start_date=datetime.datetime(2020, 5, 20, 0, 0),
)


task1 = BashOperator(bash_command="echo 1", task_id="Task1", dag=valid_dag)
task2 = BashOperator(bash_command='echo "2"', task_id="Task2", dag=valid_dag)


task1 >> task2
コード例 #26
0
# under the License.
"""
Example Airflow DAG that shows the complex DAG structure.
"""

from airflow.models.dag import DAG
# from airflow.models.baseoperator import chain
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from airflow.utils.dates import days_ago

default_args = {"start_date": days_ago(1)}

with DAG(
        dag_id="example_complex",
        default_args=default_args,
        schedule_interval=None,
        tags=['example'],
) as dag:

    # Create
    create_entry_group = BashOperator(task_id="create_entry_group",
                                      bash_command="echo create_entry_group")

    create_entry_group_result = BashOperator(
        task_id="create_entry_group_result",
        bash_command="echo create_entry_group_result")

    create_entry_group_result2 = BashOperator(
        task_id="create_entry_group_result2",
        bash_command="echo create_entry_group_result2")
コード例 #27
0
    def test_mark_success_on_success_callback(self):
        """
        Test that ensures that where a task is marked success in the UI
        on_success_callback gets executed
        """
        # use shared memory value so we can properly track value change even if
        # it's been updated across processes.
        success_callback_called = Value('i', 0)
        task_terminated_externally = Value('i', 1)
        shared_mem_lock = Lock()

        def success_callback(context):
            with shared_mem_lock:
                success_callback_called.value += 1
            assert context['dag_run'].dag_id == 'test_mark_success'

        dag = DAG(dag_id='test_mark_success',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        def task_function(ti):
            # pylint: disable=unused-argument
            time.sleep(60)
            # This should not happen -- the state change should be noticed and the task should get killed
            with shared_mem_lock:
                task_terminated_externally.value = 0

        task = PythonOperator(
            task_id='test_state_succeeded1',
            python_callable=task_function,
            on_success_callback=success_callback,
            dag=dag,
        )

        session = settings.Session()

        dag.clear()
        dag.create_dagrun(
            run_id="test",
            state=State.RUNNING,
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            session=session,
        )
        ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti.refresh_from_db()
        job1 = LocalTaskJob(task_instance=ti,
                            ignore_ti_state=True,
                            executor=SequentialExecutor())
        job1.task_runner = StandardTaskRunner(job1)

        settings.engine.dispose()
        process = multiprocessing.Process(target=job1.run)
        process.start()

        for _ in range(0, 25):
            ti.refresh_from_db()
            if ti.state == State.RUNNING:
                break
            time.sleep(0.2)
        assert ti.state == State.RUNNING
        ti.state = State.SUCCESS
        session.merge(ti)
        session.commit()

        process.join(timeout=10)
        assert success_callback_called.value == 1
        assert task_terminated_externally.value == 1
        assert not process.is_alive()
コード例 #28
0
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
# [START dag]
"""This dag only runs some simple tasks to test Airflow's task execution."""
from datetime import datetime, timedelta

from airflow.models.dag import DAG
from airflow.operators.dummy import DummyOperator

now = datetime.now()
now_to_the_hour = (now - timedelta(0, 0, 0, 0, 0, 3)).replace(minute=0, second=0, microsecond=0)
START_DATE = now_to_the_hour
DAG_NAME = 'test_dag_v1'

dag = DAG(
    DAG_NAME,
    schedule_interval='*/10 * * * *',
    default_args={'depends_on_past': True},
    start_date=datetime(2021, 1, 1),
    catchup=False,
)

run_this_1 = DummyOperator(task_id='run_this_1', dag=dag)
run_this_2 = DummyOperator(task_id='run_this_2', dag=dag)
run_this_2.set_upstream(run_this_1)
run_this_3 = DummyOperator(task_id='run_this_3', dag=dag)
run_this_3.set_upstream(run_this_2)
# [END dag]