def createShortCircuitOprTask(shrtCirtaskId, dagObj, queryTaskId):
    return ShortCircuitOperator(task_id=shrtCirtaskId,
                                python_callable=checkAvailability,
                                provide_context=True,
                                depends_on_past=True,
                                dag=dagObj,
                                op_kwargs=[("quertTaskId", queryTaskId)])
コード例 #2
0
    def test_get_task_instance_on_empty_dagrun(self):
        """
        Make sure that a proper value is returned when a dagrun has no task instances
        """
        dag = DAG(dag_id='test_get_task_instance_on_empty_dagrun',
                  start_date=timezone.datetime(2017, 1, 1))
        ShortCircuitOperator(task_id='test_short_circuit_false',
                             dag=dag,
                             python_callable=lambda: False)

        session = settings.Session()

        now = timezone.utcnow()

        # Don't use create_dagrun since it will create the task instances too which we
        # don't want
        dag_run = models.DagRun(
            dag_id=dag.dag_id,
            run_id='manual__' + now.isoformat(),
            execution_date=now,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )
        session.add(dag_run)
        session.commit()

        ti = dag_run.get_task_instance('test_short_circuit_false')
        self.assertEqual(None, ti)
コード例 #3
0
    def test_dagrun_success_when_all_skipped(self):
        """
        Tests that a DAG run succeeds when all tasks are skipped
        """
        dag = DAG(dag_id='test_dagrun_success_when_all_skipped',
                  start_date=timezone.datetime(2017, 1, 1))
        dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false',
                                         dag=dag,
                                         python_callable=lambda: False)
        dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag)
        dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag)
        dag_task1.set_downstream(dag_task2)
        dag_task2.set_downstream(dag_task3)

        initial_task_states = {
            'test_short_circuit_false': State.SUCCESS,
            'test_state_skipped1': State.SKIPPED,
            'test_state_skipped2': State.SKIPPED,
        }

        dag_run = self.create_dag_run(dag=dag,
                                      state=State.RUNNING,
                                      task_states=initial_task_states)
        updated_dag_state = dag_run.update_state()
        self.assertEqual(State.SUCCESS, updated_dag_state)
コード例 #4
0
    def test_with_dag_run(self):
        value = False
        dag = DAG('shortcircuit_operator_test_with_dag_run',
                  default_args={
                      'owner': 'airflow',
                      'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        logging.error("Tasks {}".format(dag.tasks))
        dr = dag.create_dagrun(
            run_id="manual__",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise Exception

        value = True
        dag.clear()
        dr.verify_integrity()
        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.NONE)
            else:
                raise Exception
コード例 #5
0
def create_gcs_short_circuit_operator(task_id: str, gcs_download_task_id: str, dag: DAG, provide_context: bool = True):
    return ShortCircuitOperator(
        task_id=task_id,
        provide_context=provide_context,
        python_callable=did_gcs_file_download,
        op_kwargs={'gcs_download_task_id': gcs_download_task_id},
        dag=dag,
    )
コード例 #6
0
ファイル: operator_factories.py プロジェクト: siklosid/dagger
def make_control_flow(is_dummy_operator_short_circuit, dag):
    control_flow = ShortCircuitOperator(
        task_id="dummy-control-flow",
        dag=dag,
        provide_context=True,
        python_callable=partial(eval_control_flow,
                                is_dummy_operator_short_circuit),
    )
    return control_flow
コード例 #7
0
ファイル: operators.py プロジェクト: tushar912/cccatalog
def get_file_staging_operator(dag,
                              output_dir,
                              minimum_file_age_minutes,
                              identifier=TIMESTAMP_TEMPLATE):
    return ShortCircuitOperator(
        task_id='stage_oldest_tsv_file',
        python_callable=paths.stage_oldest_tsv_file,
        op_args=[output_dir, identifier, minimum_file_age_minutes],
        dag=dag)
コード例 #8
0
 def _create_infinite_retry_short_circuit_operator(self, task_id, dag,
                                                   python_callable):
     return ShortCircuitOperator(task_id=task_id,
                                 dag=dag,
                                 python_callable=python_callable,
                                 retries=99999,
                                 retry_exponential_backoff=True,
                                 max_retry_delay=timedelta(seconds=3600),
                                 retry_delay=timedelta(seconds=600),
                                 provide_context=True)
コード例 #9
0
    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        value = False
        dag = DAG('shortcircuit_operator_test_without_dag_run',
                  default_args={
                       'owner': 'airflow',
                       'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        session = Session()
        tis = session.query(TI).filter(
            TI.dag_id == dag.dag_id,
            TI.execution_date == DEFAULT_DATE
        )

        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                # should not exist
                raise
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        value = True
        dag.clear()

        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                # should not exist
                raise
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise

        session.close()
コード例 #10
0
    def test_clear_skipped_downstream_task(self):
        """
        After a downstream task is skipped by ShortCircuitOperator, clearing the skipped task
        should not cause it to be executed.
        """
        dag = DAG('shortcircuit_clear_skipped_downstream_task',
                  default_args={
                      'owner': 'airflow',
                      'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: False)
        downstream = DummyOperator(task_id='downstream', dag=dag)

        short_op >> downstream

        dag.clear()

        dr = dag.create_dagrun(run_id="manual__",
                               start_date=timezone.utcnow(),
                               execution_date=DEFAULT_DATE,
                               state=State.RUNNING)

        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        downstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()

        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'downstream':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise

        # Clear downstream
        with create_session() as session:
            clear_task_instances([t for t in tis if t.task_id == "downstream"],
                                 session=session,
                                 dag=dag)

        # Run downstream again
        downstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        # Check if the states are correct.
        for ti in dr.get_task_instances():
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'downstream':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise
コード例 #11
0
def test_dag_after_add_sequential_sensor_and_then_short_circuit(
        test_task_sensor_service, test_dummy_task):
    short_circuit_operator_id = 'test_short_circuit'
    short_circuit_operator = ShortCircuitOperator(
        dag=test_dummy_task.dag,
        task_id=short_circuit_operator_id,
        python_callable=lambda x: x,
        provide_context=True)
    test_task_sensor_service.add_task_short_circuit(test_dummy_task,
                                                    short_circuit_operator)
    test_task_sensor_service.add_task_sequential_sensor(test_dummy_task)
    assert_dag_after_add_short_circuit_and_sequential_sensor(
        test_dummy_task, short_circuit_operator_id)
コード例 #12
0
    def setUp(self):
        self.dag = DAG('shortcircuit_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval=INTERVAL)
        self.short_op = ShortCircuitOperator(
            task_id='make_choice',
            dag=self.dag,
            python_callable=lambda: self.value)

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_1.set_upstream(self.short_op)
        self.upstream = DummyOperator(task_id='upstream', dag=self.dag)
        self.upstream.set_downstream(self.short_op)
        self.dag.clear()

        self.value = True
コード例 #13
0
def test_dag_after_add_task_short_circuit(test_task_sensor_service,
                                          test_dummy_task):
    short_circuit_operator_id = 'test_short_circuit'
    short_circuit_operator = ShortCircuitOperator(
        dag=test_dummy_task.dag,
        task_id=short_circuit_operator_id,
        python_callable=lambda x: x,
        provide_context=True)
    test_task_sensor_service.add_task_short_circuit(test_dummy_task,
                                                    short_circuit_operator)
    task_downstream_list = test_dummy_task.get_direct_relatives(upstream=False)
    assert len(task_downstream_list) == 0
    task_upstream_list = test_dummy_task.get_direct_relatives(upstream=True)
    assert len(task_upstream_list) == 1
    operator = task_upstream_list[0]
    assert isinstance(operator, ShortCircuitOperator)
    assert operator.task_id == short_circuit_operator_id
    task_downstream_list = operator.get_direct_relatives(upstream=False)
    assert len(task_downstream_list) == 1
    task_upstream_list = operator.get_direct_relatives(upstream=True)
    assert len(task_upstream_list) == 0
コード例 #14
0
def test_dag_after_add_sequential_sensor_and_two_short_circuit_and_gapped_sensor(
        default_args, test_task_sensor_service, test_dummy_task):
    test_task_sensor_service.add_task_sequential_sensor(test_dummy_task)

    short_circuit_operator_id = 'test_short_circuit'
    short_circuit_operator = ShortCircuitOperator(
        dag=test_dummy_task.dag,
        task_id=short_circuit_operator_id,
        python_callable=lambda x: x,
        provide_context=True)
    test_task_sensor_service.add_task_short_circuit(test_dummy_task,
                                                    short_circuit_operator)

    gapped_task_dag = DAG("test_gapped_dag", default_args=default_args)
    gapped_task = DummyOperator(dag=gapped_task_dag, task_id="gapped_task")
    execution_delta = timedelta(seconds=60)
    test_task_sensor_service.add_task_gap_sensor(test_dummy_task, gapped_task,
                                                 execution_delta)

    assert_dag_after_add_sequential_sensor_and_two_short_circuit_and_gapped_sensor(
        test_dummy_task, short_circuit_operator_id, gapped_task_dag,
        gapped_task, execution_delta)
コード例 #15
0

default_args = dict(
    owner = dag_owner,
    start_date = datetime(2020, 10, 14))

dag = DAG(dag_name,
        default_args = default_args,
        catchup = False,
        schedule_interval = "@once") 

dummy = DummyOperator(task_id = "dummy", dag = dag)

task1a = ShortCircuitOperator( dag=dag,
                        task_id='look_for_new_feeds',
                        python_callable=look_for_new_feeds,
                        provide_context=True
                        )

task1b = PythonOperator( dag = dag,
                        task_id = f"download_new_feed",
                        python_callable = download_new_feed,
                        provide_context = True,
                        #op_kwargs = {"file_name" : file}
                        )    

dummy >> task1a >> task1b >> dummy


files_xlsx = os.listdir(input_files_path)
print(files_xlsx)
コード例 #16
0
                on_failure_callback=notify_email) as dag:

    email_to_csv = PythonOperator(task_id='email_to_csv',
                                  on_failure_callback=notify_email,
                                  python_callable=gmail_to_csv,
                                  op_kwargs={
                                      'username': var_config['gmail_username'],
                                      'password': var_config['gmail_password'],
                                      'imap_server': var_config['imap_server'],
                                      'inbox_label': var_config['inbox_label'],
                                      'csv_file_path':
                                      var_config['csv_file_path']
                                  })

    checkforfile = ShortCircuitOperator(task_id='checkforfile',
                                        provide_context=False,
                                        python_callable=checkforfile)

    csv_to_psql = PythonOperator(task_id='csv_to_psql',
                                 on_failure_callback=notify_email,
                                 python_callable=pg_load_table,
                                 op_kwargs={
                                     'file_path': var_config['file_path'],
                                     'table_name': var_config['table_name'],
                                     'dbname': var_config['pg_dbname'],
                                     'host': var_config['pg_host'],
                                     'port': var_config['pg_port'],
                                     'user': var_config['pg_user'],
                                     'pwd': var_config['pg_password']
                                 })
コード例 #17
0
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datetime import datetime

from airflow.models import DAG
from airflow.operators.python_operator import ShortCircuitOperator
from airflow.operators.dummy_operator import DummyOperator

# DAG that has its short circuit op fail and skip multiple downstream tasks
dag = DAG(dag_id='test_dagrun_short_circuit_false',
          start_date=datetime(2017, 1, 1))
dag_task1 = ShortCircuitOperator(task_id='test_short_circuit_false',
                                 dag=dag,
                                 python_callable=lambda: False)
dag_task2 = DummyOperator(task_id='test_state_skipped1', dag=dag)
dag_task3 = DummyOperator(task_id='test_state_skipped2', dag=dag)
dag_task1.set_downstream(dag_task2)
dag_task2.set_downstream(dag_task3)
コード例 #18
0
# if except hour is at 24h OJU, this will break
l_exclude_hours = [(2, 0, 0), (6, 30, 0), (18, 30, 0)]
seconds_boundary = 60 * 20  # 20 minutes from side to side

default_args = {
    "owner": "airflow",
    "start_date": datetime.utcnow(),
}

dag = DAG("dag_shortcut_operator", schedule_interval="@once", default_args=default_args)

op_shortcut = ShortCircuitOperator(
    task_id="ShortCut_operation",
    python_callable=check_hour,
    op_kwargs={'l_exclude_hours': l_exclude_hours},
    provide_context=True,
    dag=dag
)

op1 = DummyOperator(
    task_id="Op1",
    dag=dag

)

op2 = DummyOperator(
    task_id="Op2",
    dag=dag
)
コード例 #19
0
ファイル: example_1.py プロジェクト: fossabot/docker-airflow
    )
    a.doc_md = task_sample.__doc__

    b = BranchPythonOperator(
        task_id="b",
        params={},
        python_callable=task_branch,
    )

    c = DummyOperator(task_id="c")

    d = DummyOperator(task_id="d")

    e = ShortCircuitOperator(
        task_id="e",
        params={},
        trigger_rule="none_failed",
        python_callable=task_stop,
    )

    f = DummyOperator(task_id="f")

    g = PythonOperator(
        task_id="g",
        params={},
        python_callable=task_fail,
    )

    start >> a >> b >> [c, d] >> e >> f
    start >> g
コード例 #20
0
dag = DAG(
    dag_id="real_estate",
    default_args=args,
    description="all learned during training",
    schedule_interval="@daily"
)


def check_date(execution_date, **context):
    return execution_date <= datetime.datetime(2019, 11, 28)


check_date = ShortCircuitOperator(
        task_id="check_if_before_end_of_last_year",
        python_callable=check_date,
        provide_context=True,
        dag=dag
    )
'''
use of f voor format dan {{{{ gebruiken om {{ 2 over te houden
'''

get_from_api_to_gcs = HttpToGcsOperator(
    task_id="get_from_api_to_gcs",
    endpoint=f"/history?start_at={{{{ ds }}}}&end_at={{{{ tomorrow_ds }}}}&base=GBP&symbols={currency}",
    http_conn_id="currency-http",
    gcs_conn_id="google_cloud_storage_default",
    gcs_path=f"usecase/currency/{{{{ ds }}}}-{currency}.json",
    gcs_bucket=f"{bucket_name}",
    dag=dag
)
コード例 #21
0
    # pull xcom from a subdag to see if data was written
    def iswritten(value, **context):
        #value = context['task_instance'].xcom_pull(dag_id=f"{DAG_NAME}.align", task_ids="write_align")
        #value = context['task_instance'].xcom_pull(task_ids=align_end_t.task_id, key="bbox")
        #logging.info(align_end_t.task_id)
        if value is not None:
            return value
        return False

    # conditional for successful alignment
    isaligned_t = ShortCircuitOperator(
        task_id='iswritten',
        python_callable=iswritten,
        trigger_rule=TriggerRule.ALL_DONE,
        op_kwargs={
            "value":
            f"{{{{ task_instance.xcom_pull(task_ids='{align_end_t.task_id}') }}}}"
        },
        provide_context=True,
        dag=dag)

    # delete source_{ds_nodash}/(*.png) (run if align_t succeeds and ngingest finishes) -- let it survive for 1 day in case there are re-runs and the same policy is still in effect
    lifecycle_config = {
        "lifecycle": {
            "rule": [{
                "action": {
                    "type": "Delete"
                },
                "condition": {
                    "age": 5
                }
コード例 #22
0
        if _entity == entity and task.get("task") == action:
            yield task


http_kernel_check = HttpSensor(
    task_id="http_kernel_check",
    http_conn_id="kernel_conn",
    endpoint="/changes",
    request_params={},
    poke_interval=5,
    dag=dag,
)

read_changes_task = ShortCircuitOperator(
    task_id="read_changes_task",
    provide_context=True,
    python_callable=read_changes,
    dag=dag,
)


def JournalFactory(data):
    """Produz instância de `models.Journal` a partir dos dados retornados do
    endpoint `/journals/:journal_id` do Kernel.
    """
    metadata = data["metadata"]

    journal = models.Journal()
    journal._id = journal.jid = data.get("id")
    journal.title = metadata.get("title", "")
    journal.title_iso = metadata.get("title_iso", "")
    journal.short_title = metadata.get("short_title", "")
コード例 #23
0
def judge_if_1st_day_of_month(**kwargs):
    print(kwargs.get('ds'))
    if kwargs.get('ds').endswith('01'):
        logging.warn('judge_if_1st_day_of_month: 01' + kwargs.get('ds'))
        return True
    logging.warn('judge_if_1st_day_of_week: Not 01' + kwargs.get('ds'))
    return False


if_1st_day_of_week = BranchPythonOperator(
    task_id='if_1st_day_of_week',
    python_callable=judge_if_1st_day_of_week,
    provide_context=True,
    trigger_rule="all_done",
    dag=dag)

if_1st_day_of_month = ShortCircuitOperator(
    task_id='if_1st_day_of_month',
    python_callable=judge_if_1st_day_of_month,
    provide_context=True,
    trigger_rule="all_done",
    dag=dag)

daily >> if_1st_day_of_week
if_1st_day_of_week >> if_1st_day_of_month
if_1st_day_of_week >> weekly
weekly >> if_1st_day_of_month
if_1st_day_of_month >> monthly

if __name__ == "__main__":
    dag.cli()
コード例 #24
0
    if now_epoch - expected_run_epoch > schedule_interval + 7:
        return False
    else:
        return True


# Skip unnecessary executions
doc = """
Skip the subsequent tasks if
    a) the execution_date is in past
    b) there multiple dag runs are currently active
"""
start_task = ShortCircuitOperator(
    task_id='skip_check',
    #python_callable=is_latest_active_dagrun,
    python_callable=to_run_next,
    provide_context=True,
    depends_on_past=True,
    dag=dag)
start_task.doc = doc

t11 = BashOperator(task_id='catchup_control',
                   bash_command="echo AAAAAAAAAAAAA BBBBBBBBBB CCCCCCCCCC " +
                   str(datetime.now()),
                   dag=dag)

start_task >> t11

# Extract
doc = """Extract from source database"""
extract_task = PythonOperator(task_id='extract_from_db',
コード例 #25
0
        logging.info("There were processes to kill")
        if ENABLE_KILL:
            logging.info("enable_kill is set to true")
            logging.info("Opting to send an email to alert the users that processes were killed")
            return True  # True = don't short circuit the dag and execute downstream tasks
        else:
            logging.info("enable_kill is set to False")
    else:
        logging.info("Processes to kill list was either None or Empty")

    logging.info("Opting to skip sending an email since no processes were killed")
    return False  # False = short circuit the dag and don't execute downstream tasks

email_or_not_branch = ShortCircuitOperator(
    task_id="email_or_not_branch",
    python_callable=branch_function,
    provide_context=True,
    dag=dag)



send_processes_killed_email = EmailOperator(
    task_id="send_processes_killed_email",
    to=PROCESS_KILLED_EMAIL_ADDRESSES,
    subject=PROCESS_KILLED_EMAIL_SUBJECT,
    html_content="""
    <html>
        <body>

            <h6>This is not a failure alert!</h6>
コード例 #26
0
         max_active_runs=1,
         schedule_interval=None) as dag:

    op1 = SodaToS3Operator(task_id='get_evictions_data',
                           http_conn_id='API_Evictions',
                           headers=soda_headers,
                           days_ago=31,
                           s3_conn_id='S3_Evictions',
                           s3_bucket='sf-evictionmeter',
                           s3_directory='soda_jsons',
                           size_check=True,
                           max_bytes=500000000,
                           dag=dag)

    op2 = ShortCircuitOperator(task_id='check_get_results',
                               python_callable=get_size,
                               provide_context=True,
                               dag=dag)

    op3 = PostgresOperator(task_id='truncate_target_tables',
                           postgres_conn_id='RDS_Evictions',
                           sql='sql/trunc_target_tables.sql',
                           dag=dag)

    op4 = S3ToPostgresOperator(task_id='load_evictions_data',
                               s3_conn_id='S3_Evictions',
                               s3_bucket='sf-evictionmeter',
                               s3_prefix='soda_jsons/soda_evictions_import',
                               source_data_type='json',
                               postgres_conn_id='RDS_Evictions',
                               schema='raw',
                               table='soda_evictions',
コード例 #27
0
                   bash_command=ss + " " + p1pkg + " " + py_file_loc +
                   "part1.py I",
                   dag=dag)


def new_rows():
    s3 = boto3.resource('s3')
    bucket = s3.Bucket('rcs-training-12-18')
    for o in bucket.objects.all():
        if o.key == 'config_files/skip':
            return False
    return True


no_new = ShortCircuitOperator(task_id="new_rows",
                              python_callable=new_rows,
                              trigger_rule=TriggerRule.ONE_SUCCESS,
                              dag=dag)

p2 = BashOperator(task_id='data_curation',
                  bash_command=ss + " " + p2pkg + " " + py_file_loc +
                  "part2.py",
                  trigger_rule=TriggerRule.ONE_SUCCESS,
                  dag=dag)

p3 = BashOperator(task_id='aggregation_and_move_to_staging',
                  bash_command=ss + " " + py_file_loc + "part3.py",
                  dag=dag)

p4 = BashOperator(task_id='move_data_from_s3_to_snowflake',
                  bash_command="python " + py_file_loc + "part4.py",
                  dag=dag)
コード例 #28
0
def docker_move_subdag(host_top_dir, input_path, output_path):
    host_path = f"{host_top_dir}/{input_path}"

    with DAG("docker_backup_db",
             default_args=default_args,
             schedule_interval=timedelta(minutes=10)) as dag:

        locate_file_cmd = """
            sleep 10
            find {{params.source_location}} -type f  -printf "%f\n" | head -1
        """

        t_view = BashOperator(task_id="view_file",
                              bash_command=locate_file_cmd,
                              xcom_push=True,
                              params={"source_location": host_path})

        def is_data_available(*args, **kwargs):
            ti = kwargs["ti"]
            data = ti.xcom_pull(key=None, task_ids="view_file")
            return data is not None

        t_is_data_available = ShortCircuitOperator(
            task_id="check_if_data_available",
            python_callable=is_data_available)

        t_move = DockerOperator(
            api_version="auto",
            docker_url=
            "tcp://socat:2375",  # replace it with swarm/docker endpoint
            image="centos:latest",
            network_mode="bridge",
            volumes=[
                f"{host_path}:{input_path}",
                f"{host_top_dir}/{input_path}:{output_path}",
            ],
            command=[
                "/bin/bash",
                "-c",
                "/bin/sleep 30; "
                "/bin/mv {{params.source_location}}/{{ ti.xcom_pull('view_file') }} {{params.target_location}};"
                "/bin/echo '{{params.target_location}}/{{ ti.xcom_pull('view_file') }}';",
            ],
            task_id="move_data",
            xcom_push=True,
            params={
                "source_location": f"{input_path}",
                "target_location": f"{output_path}"
            },
        )

        print_templated_cmd = """
            cat {{ ti.xcom_pull('move_data') }}
        """

        t_print = DockerOperator(
            api_version="auto",
            docker_url="tcp://socat:2375",
            image="centos:latest",
            volumes=[f"{host_top_dir}/{output_path}:{output_path}"],
            command=print_templated_cmd,
            task_id="print",
        )

        t_view.set_downstream(t_is_data_available)
        t_is_data_available.set_downstream(t_move)
        t_move.set_downstream(t_print)
コード例 #29
0
import airflow.utils.helpers
from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import ShortCircuitOperator

args = {
    'owner': 'airflow',
    'start_date': airflow.utils.dates.days_ago(2),
}

dag = DAG(dag_id='example_short_circuit_operator', default_args=args)

cond_true = ShortCircuitOperator(
    task_id='condition_is_True',
    python_callable=lambda: True,
    dag=dag,
)

cond_false = ShortCircuitOperator(
    task_id='condition_is_False',
    python_callable=lambda: False,
    dag=dag,
)

ds_true = [DummyOperator(task_id='true_' + str(i), dag=dag) for i in [1, 2]]
ds_false = [DummyOperator(task_id='false_' + str(i), dag=dag) for i in [1, 2]]

airflow.utils.helpers.chain(cond_true, *ds_true)
airflow.utils.helpers.chain(cond_false, *ds_false)
コード例 #30
0
    error_message = kwargs['ti'].xcom_pull(key='error_message')
    publish_tg_message(TG_TOKEN, TG_CHAT_ID, error_message)


with DAG(
        'DAG_ORDERS_DATASET_WITH_BELLS_N_WHISTLES_HW4',
        default_args=default_args,
        description='Collects and dumps orders data from different sources.'\
        ' Sends msg to telegram if something goes wrong.',
        schedule_interval=datetime.timedelta(hours=4),
        ) as dag:

    check_db = lambda: is_engine_available(
        read_pg_engine) and is_engine_available(write_pg_engine)
    check_db_step = ShortCircuitOperator(task_id='check_db_step',
                                         python_callable=check_db,
                                         dag=dag)

    order_step = PythonOperator(task_id='order_step',
                                python_callable=read_orders,
                                dag=dag)

    trans_step = PythonOperator(task_id='trans_step',
                                python_callable=read_transactions,
                                dag=dag)

    customers_n_goods_step = PythonOperator(
        task_id='customers_n_goods_step',
        python_callable=lambda: read_customers_n_goods(engine=read_pg_engine),
        dag=dag)