Example #1
0
            ],
            'name':
            f"airflow-{air_env}-oaiharvester",
        }]
    },
    network_configuration=network_config)

# Monitor oaiharvester fargate task
monitor_harvest = ECSTaskSensor(task_id='harvest_step_2',
                                dag=dag,
                                cluster=cluster,
                                ecs_task_id='harvest_step_1')

check_records = BranchPythonOperator(
    task_id='records_check',
    dag=dag,
    provide_context=True,
    python_callable=check_if_records,
)

no_records_to_harvest = DummyOperator(task_id='no_records_to_harvest')

# ingest with mario fargate task
ingest = ECSOperator(
    task_id='harvest_step_3',
    dag=dag,
    cluster=cluster,
    task_definition=f"airflow-{air_env}-mario",
    overrides={
        'containerOverrides': [{
            'command': [
                f"--url={es_url}",
          orientation="TB",
          tags=['DECRYPT'],
          dagrun_timeout=timedelta(hours=1)
    ) as dag:
     

    t_pipeline_begin = PythonOperator(
        task_id="begin_pipeline",
        python_callable=begin_pipeline,
        provide_context=True,
        dag=dag,
    )
    
    t_check_pipeline = BranchPythonOperator(
        task_id="check_pipeline",
        python_callable=pipeline_enable_check,
        provide_context=True,
        dag=dag,
    )
    
    t_pipeline_check_passed = PythonOperator(
        task_id="pipeline_check_passed",
        python_callable=pipeline_check_passed,
        provide_context=True,
        dag=dag,
    )
    
    
    t_pipeline_check_skipped = PythonOperator(
        task_id="pipeline_check_skipped",
        python_callable=pipeline_check_skipped,
        provide_context=True,
Example #3
0
def extract_2g_externals(parent_dag_name, child_dag_name, start_date,
                         schedule_interval):
    """
    Parse huawei 4g cm files.

    :param parent_dag_name:
    :param child_dag_name:
    :param start_date:
    :param schedule_interval:
    :return:
    """
    dag = DAG(
        '%s.%s' % (parent_dag_name, child_dag_name),
        schedule_interval=schedule_interval,
        start_date=start_date,
    )

    branch_start = BranchPythonOperator(task_id='branch_huawei_2g_externals',
                                        python_callable=pre_clean_up,
                                        dag=dag)

    def extract_huawei_2g_externals():
        huawei_cm.extract_live_network_2g_externals_on_2g()
        huawei_cm.extract_live_network_3g_externals_on_2g()
        huawei_cm.extract_live_network_4g_externals_on_2g()

    t66 = PythonOperator(task_id='extract_huawei_2g_externals',
                         python_callable=extract_huawei_2g_externals,
                         dag=dag)

    t29 = BashOperator(
        task_id='run_huawei_4g_xml_nbi_parser',
        bash_command=
        'java -jar /mediation/bin/boda-huaweinbixmlparser.jar /mediation/data/cm/huawei/raw/nbi_lte /mediation/data/cm/huawei/parsed/nbi_lte /mediation/conf/cm/huawei_nbi_lte_parser.cfg',
        dag=dag)

    t29_2 = BashOperator(
        task_id='run_huawei_4g_mml_parser',
        bash_command=
        'java -jar /mediation/bin/boda-huaweimmlparser.jar /mediation/data/cm/huawei/raw/mml_lte /mediation/data/cm/huawei/parsed/mml_lte /mediation/conf/cm/huawei_mml_lte_parser.cfg',
        dag=dag)

    run_huawei_2g_xml_gexport_parser = BashOperator(
        task_id='run_huawei_4g_xml_gexport_parser',
        bash_command=
        'java -jar /mediation/bin/boda-huaweicmobjectparser.jar /mediation/data/cm/huawei/raw/gexport_lte /mediation/data/cm/huawei/parsed/gexport_lte /mediation/conf/cm/huawei_gexport_lte_parser.cfg',
        dag=dag)

    t_join = DummyOperator(
        task_id='join_huawei_2g_externals',
        dag=dag,
    )

    dag.set_dependency('branch_huawei_4g_parser', 'run_huawei_4g_mml_parser')
    dag.set_dependency('branch_huawei_4g_parser',
                       'run_huawei_4g_xml_nbi_parser')
    dag.set_dependency('branch_huawei_4g_parser',
                       'run_huawei_4g_xml_gexport_parser')

    dag.set_dependency('run_huawei_4g_mml_parser', 'join_huawei_4g_parser')
    dag.set_dependency('run_huawei_4g_xml_nbi_parser', 'join_huawei_4g_parser')
    dag.set_dependency('run_huawei_4g_xml_gexport_parser',
                       'join_huawei_4g_parser')

    return dag
def book_hotel_do(**kwargs):
    # get hotel connection string
    conn = os.environ["AIRFLOW_CONN_HOTEL_SERVICE"]
    # send request to service
    r = requests.get(conn + "?tid=" +
                     kwargs['ti'].xcom_pull(task_ids='init_transaction'))
    if r.status_code == 200:
        return 'Book_Flight'
    else:
        return 'hotel_booking_failed'


book_hotel = BranchPythonOperator(task_id='Book_Hotel',
                                  retries=0,
                                  python_callable=book_hotel_do,
                                  provide_context=True,
                                  dag=dag)

compensate_book_hotel = SimpleHttpOperator(
    task_id='compensate_book_hotel',
    method='GET',
    http_conn_id='HOTEL_SERVICE',
    endpoint='/compensate',
    trigger_rule='none_skipped',
    data={"tid": "{{ti.xcom_pull(task_ids='init_transaction')}}"},
    headers={},
    dag=dag,
)

compensate_book_flight_and_hotel1 = SimpleHttpOperator(
Example #5
0
    }})

model_name = 'MnistModel'

serve = 'tensorflow_model_server --port=8500 --rest_api_port=8501 --model_name={} \
 --model_base_path=/root/airflow/runtime/models/{}'.format(
    model_name, model_name)


def model_exist():
    if os.path.isdir('/root/airflow/runtime/{}'.format(model_name)):
        return 'update_version_or_not_serve'
    else:
        return 'serve_model'


branch = BranchPythonOperator(task_id="serve_or_not",
                              python_callable=model_exist,
                              dag=dag)

t2 = BashOperator(
    task_id="serve_model",
    bash_command=serve,
    dag=dag,
    executor_config={"KubernetesExecutor": {
        "image": "tfserving:airflow"
    }})

t3 = DummyOperator(task_id="update_version_or_not_serve", dag=dag)

t1.set_downstream(branch)
escolhe_h_m = PythonOperator(
    task_id='escolhe-h-m',
    python_callable=sorteia_h_m,
    dag=dag
)

def MouF(**context):
    value = context['task_instance'].xcom_pull(task_ids='escolhe-h-m')
    if value =='male':
        return 'branch_homem'
    else:
        return 'branch_mulher'

male_female = BranchPythonOperator(
    task_id = 'condicional',
    python_callable=MouF,
    provide_context=True,
    dag=dag
)

def mean_homem():
    df = pd.read_csv('~/train.csv')
    df = df.loc[df.Sex == 'male']
    print(f"Media de idade dos homens no Titanic: {df.Age.mean()}")

branch_homem = PythonOperator(
    task_id='branch_homem',
    python_callable=mean_homem,
    dag=dag
)

def mean_mulher():
Example #7
0
    description=
    "This DAG shows branching, if there is no data for particular partition then it skips it, else does word count",
    start_date=datetime(2018, 2, 1),
    catchup=False,
    schedule_interval=timedelta(days=1))


def check_data_exists():
    import requests
    r = requests.get(Variable.get('data_base_url'))
    if r.status_code == 200:
        return 'process_data'
    else:
        return 'log_no_data'


does_data_exist = BranchPythonOperator(task_id='does_data_exist',
                                       python_callable=check_data_exists,
                                       dag=dag)

process_data = BashOperator(task_id='process_data',
                            dag=dag,
                            bash_command='echo Processing data')

log_no_data = BashOperator(task_id='log_no_data',
                           dag=dag,
                           bash_command='echo No data found!')

process_data.set_upstream(does_data_exist)
log_no_data.set_upstream(does_data_exist)
Example #8
0
for (dirpath, dirnames, filenames) in os.walk(input_path):
	for ifile in filenames:
		target="%s/%s" % (dirpath, ifile)
		output="%s/%s" % (output_path, ifile)

		### File SUBDAG
		subdag_name="process_%s" % ifile
		subdag = DAG("%s.%s" % (DAG_NAME, subdag_name), default_args=args)
		subdagop = SubDagOperator(task_id=subdag_name, subdag=subdag, dag=dag)
		init >> subdagop >> end

		## File SUBDAG TASKS
		init_process = DummyOperator(task_id='init_processing', dag=subdag)
		# Check file exists
		input_files_check = BranchPythonOperator(task_id='input_files_check', python_callable=check_if_file_exists, op_kwargs={'target': target}, dag=subdag)
		# File not found dummy task for branching
		file_not_found = DummyOperator(task_id='file_not_found', dag=subdag)
		# Method selection
		ingest_method_selector = BranchPythonOperator(task_id='ingest_method_selector', python_callable=branch_ingest_method, op_kwargs={'target': target}, dag=subdag)
		# Call Admin to inform about an error
		call_admin = BashOperator(task_id='call_admin', bash_command="echo \"LA INGESTA NO PUEDE REALIZARSE\"", dag=subdag)
		# Ending SUBDAG
		end_process = DummyOperator(task_id='end_processing', trigger_rule='one_success', dag=subdag)
		



		### SUBSUBDAG CSV
		csv_subdag_name="csv_processor"
		csv_subdag = DAG("%s.%s.%s" % (DAG_NAME, subdag_name, csv_subdag_name), default_args=args)
Example #9
0
        return '{}'.format(cont_task)
    else:
        return ''.format(stop_task)


start_op = BashOperator(task_id='start_task',
                        bash_command="echo False",
                        xcom_push=True,
                        dag=dag)

start_py = PythonOperator(
    task_id='start_py',
    python_callable=compare,
    #op_kwargs={'connection_name': 'redshift', 'schema':'aoi', 'table_name':'order_details', 'column':'order_date'},
    dag=dag,
)

branch_op = BranchPythonOperator(task_id='branch_task',
                                 provide_context=True,
                                 python_callable=branch_func,
                                 op_kwargs={
                                     'input_task': 'start_py',
                                     'cont_task': 'continue_task',
                                     'stop_task': 'stop_task'
                                 },
                                 dag=dag)

continue_op = DummyOperator(task_id='continue_task', dag=dag)
stop_op = DummyOperator(task_id='stop_task', dag=dag)

start_py >> branch_op >> [continue_op, stop_op]
Example #10
0
        task_ids='check_comic_info')
    print("export the message to a file")


with DAG('comic_pusher', default_args=default_args) as dag:

    get_read_history = PythonOperator(task_id='get_read_history',
                                      python_callable=process_metadata,
                                      op_args=['read'])

    check_comic_info = PythonOperator(task_id='check_comic_info',
                                      python_callable=check_comic_info,
                                      provide_context=True)

    decide_what_to_do = BranchPythonOperator(task_id='new_comic_available',
                                             python_callable=decide_what_to_do,
                                             provide_context=True)

    update_read_history = PythonOperator(task_id='update_read_history',
                                         python_callable=process_metadata,
                                         op_args=['write'],
                                         provide_context=True)

    generate_notification = PythonOperator(task_id='yes_generate_notification',
                                           python_callable=generate_message,
                                           provide_context=True)

    send_notification = SlackAPIPostOperator(
        task_id='send_notification',
        token="YOUR_SLACK_TOKEN",
        channel='#comic-notification',
                                   dag=dag,
                                   retries=1)

check_s3_for_key = S3KeySensor(task_id='check_s3_for_key',
                               bucket_key=OUTPUT_FILE_KEY,
                               wildcard_match=True,
                               bucket_name=BUCKET_NAME,
                               s3_conn_id='aws_default',
                               timeout=20,
                               poke_interval=5,
                               dag=dag)

t_check_dataset_group = BranchPythonOperator(
    task_id='check_dataset_group',
    provide_context=True,
    python_callable=check_dataset_group,
    retries=1,
    dag=dag,
)

t_init_personalize = DummyOperator(
    task_id="init_personalize",
    trigger_rule=TriggerRule.ALL_SUCCESS,
    dag=dag,
)

t_create_dataset_group = PythonOperator(
    task_id='create_dataset_group',
    provide_context=True,
    python_callable=create_dataset_group,
    retries=1,
Example #12
0
finish = DummyOperator(task_id='finish', dag=dag)


def decide_which_path():
    now = datetime.now(timezone('Africa/Nairobi'))
    print('Current Hour in Africa/Nairobi')
    print(now.hour)
    if now.hour >= 5 and now.hour <= 21:
        return "rerun_trigger"
    else:
        return "sleep_trigger"


branch = BranchPythonOperator(task_id='branch',
                              python_callable=decide_which_path,
                              trigger_rule="all_done",
                              dag=dag)

rerun_trigger = TriggerDagRunOperator(task_id='rerun_trigger',
                                      trigger_dag_id=DAG_ID,
                                      dag=dag)

sleep_trigger = TriggerDagRunOperator(task_id='sleep_trigger',
                                      trigger_dag_id=SLEEP_DAG_ID,
                                      dag=dag)

pause_replication >> wait_for_replication_pause

wait_for_replication_pause >> update_flat_obs
wait_for_replication_pause >> update_flat_orders
wait_for_replication_pause >> update_flat_lab_obs
Example #13
0
"""
Example DAG demonstrating a workflow with nested branching. The join tasks are created with
``none_failed_or_skipped`` trigger rule such that they are skipped whenever their corresponding
``BranchPythonOperator`` are skipped.
"""

from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.utils.dates import days_ago

with DAG(dag_id="join_dag", start_date=days_ago(2), schedule_interval="@daily") as dag:
    branch_1 = BranchPythonOperator(task_id="branch_1", python_callable=lambda: "true_1")
    join_1 = DummyOperator(task_id="join_1", trigger_rule="one_success")
    true_1 = DummyOperator(task_id="true_1")
    false_1 = DummyOperator(task_id="false_1")
    branch_2 = BranchPythonOperator(task_id="branch_2", python_callable=lambda: "true_2")
    join_2 = DummyOperator(task_id="join_2", trigger_rule="one_success")
    true_2 = DummyOperator(task_id="true_2")
    false_2 = DummyOperator(task_id="false_2")
    false_3 = DummyOperator(task_id="false_3")

    branch_1 >> true_1 >> join_1
    branch_1 >> false_1 >> branch_2 >> [true_2, false_2] >> join_2 >> false_3 >> join_1
Example #14
0
    'owner': 'air',
    'start_date': datetime.now(),
    'provide_context': True,
    'start_date': days_ago(2),
}

dag = DAG(
    dag_id='copyBatteryLogs',
    default_args=args,
    schedule_interval='@daily',
    tags=['gemeni'],
    catchup=False
)

connect = BranchPythonOperator(task_id='connect',
                               python_callable=connect_to_server,
                               dag=dag)

mount = BashOperator(task_id='mount',
                    bash_command='./gemeni/mountDC.sh',
                    # xcom_push=True,
                    dag=dag)

list_source_folder = PythonOperator(task_id='read_source',
                             python_callable=read_source_folder,
                             provide_context=True,
                             dag=dag)

list_dist_folder = PythonOperator(task_id='read_dist',
                             python_callable=read_dist_folder,
                             provide_context=True,
Example #15
0
        cluster_name='spark-cluster-{{ ds_nodash }}',
        num_workers=2,
        master_machine_type='n1-standard-1',
        worker_machine_type='n1-standard-1',
        image_version='1.3.89-debian10',
        storage_bucket='fsp-logistics-spark-bucket',
        region='europe-central2'
    )

    create_cluster.doc_md = """## Create Dataproc cluster
    This task creates a Dataproc cluster in your project.
    """

    weekday_or_weekend = BranchPythonOperator(
        task_id='weekday_or_weekend',
        python_callable=assess_day,
        op_kwargs={'execution_date': '{{ ds }}'}
    )

    weekend_analytics = DataProcPySparkOperator(
        task_id='weekend_analytics',
        main='gs://fsp-logistics-spark-bucket/pyspark/weekend/gas_composition_count.py',
        cluster_name='spark-cluster-{{ ds_nodash }}',
        region='europe-central2',
        dataproc_pyspark_jars='gs://spark-lib/bigquery/spark-bigquery-latest.jar',
    )

    weekday_analytics = SubDagOperator(
        task_id='weekday_analytics',
        subdag=weekday_subdag(
            parent_dag='bigquery_data_analytics',
Example #16
0
    5: "Saturday​",
    6: "Sunday​",
}


def get_weekday(execution_date, **kwargs):
    print("Today it is: {}".format(weekdays[execution_date.weekday()]))


print_execution_time = PythonOperator(task_id="print_weekday",
                                      dag=dag,
                                      python_callable=get_weekday,
                                      provide_context=True)


def get_on_call(execution_date, **kwargs):
    return weekday_person_to_email[execution_date.weekday()]


branching = BranchPythonOperator(task_id="branching",
                                 dag=dag,
                                 python_callable=get_on_call,
                                 provide_context=True)

print_execution_time >> branching

final_task = DummyOperator(task_id="final_task", dag=dag)

for person in set(weekday_person_to_email.values()):
    branching >> DummyOperator(task_id=person, dag=dag) >> final_task
Example #17
0
model23 = PythonOperator(
    task_id='model23',
    provide_context=True,
    python_callable=model23.run_model23,
    params={'file_path': '/home/hasitha/airflow/dags/files/model1.txt'},
    dag=dag)

model3 = PythonOperator(
    task_id='model3',
    provide_context=True,
    python_callable=model_final.run_model,
    params={'file_path': '/home/hasitha/airflow/dags/files/output.txt'},
    dag=dag)

branch1 = BranchPythonOperator(task_id='branch1',
                               python_callable=branch1.check_branch1_condition,
                               dag=dag)

branch2 = BranchPythonOperator(task_id='branch2',
                               python_callable=branch2.check_branch2_condition,
                               dag=dag)

model3.set_upstream(model21)
model3.set_upstream(model22)
model3.set_upstream(model23)
model21.set_upstream(branch2)
model22.set_upstream(branch2)
model23.set_upstream(branch2)
branch2.set_upstream(model11)
branch2.set_upstream(model12)
model11.set_upstream(branch1)
Example #18
0
email_subject = """
  Email report for {{ params.department }} on {{ ds_nodash }}
"""

email_report_task = EmailOperator(
    task_id='email_report_task',
    to='*****@*****.**',
    subject=email_subject,
    html_content='',
    params={'department': 'Data subscription services'},
    dag=dag)

no_email_task = DummyOperator(task_id='no_email_task', dag=dag)


def check_weekend(**kwargs):
    dt = datetime.strptime(kwargs['execution_date'], '%Y-%m-%d')
    #If dt.weekday() is 0-4, it's Mon-Fri. If 5-6, it's Sat/Sun
    if (dt.weekday() < 5):
        return 'email_report_task'
    else:
        return 'no_email_task'


branch_task = BranchPythonOperator(task_id='check_if_weekend',
                                   python_callable=check_weekend,
                                   provide_context=True,
                                   dag=dag)

sensor >> bash_task >> python_task
python_task >> branch_task >> [email_report_task, no_email_task]
Example #19
0
        notify=True,
        tags=['tag1', 'tag2'],
        # If the script at s3 location has any qubole specific macros to be replaced
        # macros='[{"date": "{{ ds }}"}, {"name" : "abc"}]',
        trigger_rule="all_done")

    t3 = PythonOperator(task_id='compare_result',
                        provide_context=True,
                        python_callable=compare_result,
                        trigger_rule="all_done")

    t3 << [t1, t2]

    options = ['hadoop_jar_cmd', 'presto_cmd', 'db_query', 'spark_cmd']

    branching = BranchPythonOperator(
        task_id='branching', python_callable=lambda: random.choice(options))

    branching << t3

    join = DummyOperator(task_id='join', trigger_rule='one_success')

    t4 = QuboleOperator(
        task_id='hadoop_jar_cmd',
        command_type='hadoopcmd',
        sub_command='jar s3://paid-qubole/HadoopAPIExamples/'
        'jars/hadoop-0.20.1-dev-streaming.jar '
        '-mapper wc '
        '-numReduceTasks 0 -input s3://paid-qubole/HadoopAPITests/'
        'data/3.tsv -output '
        's3://paid-qubole/HadoopAPITests/data/3_wc',
        cluster_label='{{ params.cluster_label }}',
Example #20
0
def source_to_use(**kwargs):
	ti = kwargs['ti']
	source = ti.xcom_pull(task_ids='hook_task')
	print("source fetch from XCOM: {}".format(source))
	return source

def check_for_activated_source(**kwargs):
	ti = kwargs['ti']
	return ti.xcom_pull(task_ids='xcom_task').lower()

with DAG('branch_dag',
	default_args=default_args,
	schedule_interval='@once') as dag:

	start_task 	= DummyOperator(task_id='start_task')
	hook_task 	= PythonOperator(task_id='hook_task', python_callable=get_activated_sources)
	xcom_task 	= PythonOperator(task_id='xcom_task', python_callable=source_to_use, provide_context=True)
	branch_task 	= BranchPythonOperator(task_id='branch_task', python_callable=check_for_activated_source, provide_context=True)
	mysql_task 	= BashOperator(task_id='mysql', bash_command='echo "MYSQL is activated"')
	postgresql_task = BashOperator(task_id='postgresql', bash_command='echo "PostgreSQL is activated"')
	s3_task 	= BashOperator(task_id='s3', bash_command='echo "S3 is activated"')
	mongo_task 	= BashOperator(task_id='mongo', bash_command='echo "Mongo is activated"')
	
	start_task >> hook_task >> xcom_task >> branch_task
	branch_task >> mysql_task
	branch_task >> postgresql_task
	branch_task >> s3_task
	branch_task >> mongo_task
	
}

# BranchPython operator that depends on past
# and where tasks may run or be skipped on
# alternating runs
dag = DAG(dag_id='example_branch_dop_operator_v3',
          schedule_interval='*/1 * * * *',
          default_args=args,
          tags=['example'])


def should_run(**kwargs):
    print('------------- exec dttm = {} and minute = {}'.format(
        kwargs['execution_date'], kwargs['execution_date'].minute))
    if kwargs['execution_date'].minute % 2 == 0:
        return "dummy_task_1"
    else:
        return "dummy_task_2"


cond = BranchPythonOperator(
    task_id='condition',
    provide_context=True,
    python_callable=should_run,
    dag=dag,
)

dummy_task_1 = DummyOperator(task_id='dummy_task_1', dag=dag)
dummy_task_2 = DummyOperator(task_id='dummy_task_2', dag=dag)
cond >> [dummy_task_1, dummy_task_2]
Example #22
0
start = DummyOperator(task_id='start', dag=dag)


def print_all(*args, **kwargs):
    logger.info('print all')
    logger.info('args: %s', args)
    logger.info('kwargs: %s', kwargs)


def check_callable(*args, **kwargs):
    return 'foo'


check = BranchPythonOperator(task_id='check',
                             python_callable=check_callable,
                             dag=dag)

foo = PythonOperator(task_id='foo',
                     python_callable=print_all,
                     provide_context=True,
                     dag=dag)

bar = PythonOperator(task_id='bar',
                     python_callable=print_all,
                     provide_context=True,
                     dag=dag)

start >> check
check >> foo
check >> bar
Example #23
0

def _pick_a_branch(execution_date, **context):
    weekday = execution_date.weekday()
    return job_map[weekday_person_to_email[weekday]]


def _print_exec_date(execution_date, **context):
    print(execution_date)


print_date = PythonOperator(
    task_id="print_branching_date",
    python_callable=_print_exec_date,
    provide_context=True,
    dag=dag)

branching = BranchPythonOperator(task_id='branching_operator',
                                 python_callable=_pick_a_branch,
                                 provide_context=True,
                                 dag=dag)

final_task = DummyOperator(task_id='final_task',
                           dag=dag,
                           trigger_rule=TriggerRule.ONE_SUCCESS)

for key, value in job_map.items():
    branching >> DummyOperator(task_id='' + value, dag=dag) >> final_task

print_date >> branching
Example #24
0
    endpoint_name = 'class_attendance'
    get_enpdpoints_task_id = "get_{0}_dl_endpoint".format(endpoint_name)
    branch_task_id = "branch_row_count_{0}_dl".format(endpoint_name)
    file_to_gcs_task_id = "{0}_to_gcs".format(endpoint_name)
    zero_branch_task_id = "{0}_zero_row".format(endpoint_name)

    t2 = PythonOperator(task_id=get_enpdpoints_task_id,
                        python_callable=get_class_attendance,
                        op_args=[SAVE_PATH, BASE_URL, API_KEYS],
                        templates_dict=ep_template)

    t_branch = BranchPythonOperator(task_id=branch_task_id,
                                    python_callable=row_count_branch,
                                    op_args=[
                                        get_enpdpoints_task_id,
                                        file_to_gcs_task_id,
                                        zero_branch_task_id
                                    ],
                                    trigger_rule="all_done")

    t_gcs = FileToGoogleCloudStorageOperator(
        task_id=file_to_gcs_task_id,
        google_cloud_storage_conn_id='gcs_silo',
        bucket="deanslist",
        src="{{ task_instance.xcom_pull(task_ids='" + get_enpdpoints_task_id +
        "', key='dl_file_path' )}}",
        dst=endpoint_name + "/{{ task_instance.xcom_pull(task_ids='" +
        get_enpdpoints_task_id + "', key='dl_file_name') }}",
        dag=dag)

    t_zero_row = DummyOperator(task_id=zero_branch_task_id)
Example #25
0
    Print the payload "message" passed to the DagRun conf attribute.
    :param context: The execution context
    :type context: dict
    """
    print("Remotely received value of {}".format(str(context["dag_run"].conf)))

########################################################################################################################
###############################################                     ####################################################
###############################################  TASKS DEFINITIONS  ####################################################
###############################################                     ####################################################
########################################################################################################################

network_init = BashOperator(task_id='network_init',bash_command='ansible --version -vvv', dag=dag,)
teams_message = PythonOperator(task_id='send_teams_message', python_callable=send_teams_message, trigger_rule='one_success',dag=dag,)
deploy_playbook_sbc = BashOperator(task_id='deploy_sbc',bash_command='cd ' + PLAYBOOKS + ' && ansible-playbook -i ' + PLAYBOOKS + '/inventory ' + PLAYBOOKS + '/sbc_audiocodes_telnet.yaml', dag=dag,)
deploy_validation = BranchPythonOperator(task_id='deploy_validation', python_callable=deploy_validation, dag=dag,)
query_formio = PythonOperator(task_id='query_formio', python_callable=query_formio,dag=dag,)
rollback = PythonOperator(task_id='rollback', python_callable=rollback,dag=dag,)
write_influx_result = PythonOperator(task_id='write_influx_result', python_callable=write_influx_result, trigger_rule='one_success', dag=dag,)
network_end = BashOperator(task_id='network_end', bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"', trigger_rule='one_success', dag=dag,)
backup_sbc = PythonOperator(task_id='backup_sbc',python_callable=backup_sbc, dag=dag,)
context_data = PythonOperator(task_id='dag_context', python_callable=context_data, dag=dag)
init_teams_message = PythonOperator(task_id='init_teams_message', python_callable=send_teams_init, trigger_rule='one_success',dag=dag,)

########################################################################################################################
################################################                  ######################################################
################################################  TASKS WORKFLOW  ######################################################
################################################                  ######################################################
########################################################################################################################

network_init >> query_formio
        'email': [os.environ['email']],
        'email_on_failure': True,
        'email_on_retry': False,
        'retries': 4,
        'retry_delay': timedelta(minutes=15),
      }

with DAG('update_data_if_not_already_up_to_date',
         catchup=False,
         default_args=default_args,
         schedule_interval="@daily",
         ) as dag:

    cond = BranchPythonOperator(
        task_id='check_if_data_is_up_to_date',
        python_callable=should_run,
        dag=dag,
    )

    data_is_already_up_to_date = DummyOperator(task_id='data_is_already_up_to_date')

    start_spark_cluster = BashOperator(task_id='start_spark_cluster',
                                       bash_command='/usr/local/spark/sbin/start-all.sh ')

    process_reddit_data = BashOperator(task_id='process_data',
                             bash_command='/usr/local/spark/sbin/spark-submit --master spark://10.0.0.16:7077 --packages org.apache.hadoop:hadoop-aws:2.7.3, --packages org.postgresql:postgresql:42.2.5 /identifying_trending_topics_on_social_media/process_data/process_reddit_comments.py ')

    stop_spark_cluster = BashOperator(task_id='stop_spark_cluster',bash_command='/usr/local/spark/sbin/stop-all.sh ')


cond >> start_spark_cluster >> process_reddit_data >> stop_spark_cluster
Example #27
0
from airflow.models import Variable

def demo_check():

    demo = Variable.get("Demo_Flag")

    if demo:
        if demo == '1':
            return "send_sns"

    return "end_task"

skip_operator = DummyOperator(task_id='skip_message', trigger_rule='one_success', dag=dag)
end_operator = DummyOperator(task_id='end_task', trigger_rule='one_success', dag=dag)

demo_check_operator = BranchPythonOperator(
    task_id='demo_check',
    python_callable=demo_check,
    trigger_rule='one_success',
    dag=dag
)

sns_operator = SnsPublishOperator(
    task_id='send_sns',
    target_arn='arn:aws:sns:us-west-2:356032320829:airflow',
    message='Airflow fingerprinting done',
    aws_conn_id='S3_conn',
    trigger_rule='one_success',
    dag=dag
)
            task_id=f"download_file_if_changed_{identifier}",
            python_callable=download_file_if_changed,
            op_kwargs={
                "url": original_url_exp,
                "target": zip_path_exp
            })

        calc_hash = CalculateHash(task_id=f'calc_hash_{identifier}',
                                  path=zip_path_exp)

        check_if_is_already_up = BranchPythonOperator(
            task_id=f"branching_{identifier}",
            provide_context=True,
            python_callable=check_if_is_already_processed,
            op_kwargs={
                'pull_hash_from': f'calc_hash_{identifier}',
                'data_set': '{{ params.data_set }}',
                'proceed_path': f"proceed_to_insert_{identifier}",
                'db_name': 'db',
                'already_processed_path': "success",
            })

        upload_to_ftp_step = PythonOperator(
            task_id=f"upload_to_ftp_{identifier}",
            python_callable=upload_to_ftp,
            op_kwargs={
                'con_id': 'ftp_data.controlciudadano.org.py',
                'remote_path': ftp_target_path,
                'local_path': zip_path_exp
            })
Example #29
0
    )

    t_pipeline_exec_cwl2 = BashOperator(task_id='pipeline_exec_cwl2',
                                        bash_command=""" \
        tmp_dir={{tmp_dir_path(run_id)}} ; \
        cd ${tmp_dir}/cwl_out ; \
        {{ti.xcom_pull(task_ids='build_cmd2')}} >> $tmp_dir/session.log 2>&1 ; \
        echo $?
        """)

    #next_op if true, bail_op if false. test_op returns value for testing.
    t_maybe_keep_cwl2 = BranchPythonOperator(
        task_id='maybe_keep_cwl2',
        python_callable=utils.pythonop_maybe_keep,
        provide_context=True,
        op_kwargs={
            'next_op': 'move_data',
            'bail_op': 'set_dataset_error',
            'test_op': 'pipeline_exec_cwl2'
        })

    #Others
    t_send_create_dataset = PythonOperator(
        task_id='send_create_dataset',
        python_callable=utils.pythonop_send_create_dataset,
        provide_context=True,
        op_kwargs={
            'parent_dataset_uuid_callable': get_parent_dataset_uuid,
            'http_conn_id': 'ingest_api_connection',
            'endpoint': '/datasets/derived',
            'dataset_name_callable': build_dataset_name,
    else:
        return "email_joe"


args = {
    "owner": "airflow",
    "start_date": datetime(2019, 12, 1),
}

with DAG(dag_id='sixth_dag', default_args=args,
         schedule_interval='@daily') as dag:

    print_execution_date = PythonOperator(task_id='print_execution_date',
                                          python_callable=_get_execution_date,
                                          provide_context=True)

    branching = BranchPythonOperator(task_id='branching',
                                     python_callable=_get_branch,
                                     provide_context=True)

    email_bob = DummyOperator(task_id='email_bob')
    email_alice = DummyOperator(task_id='email_alice')
    email_joe = DummyOperator(task_id='email_joe')

    final_task = BashOperator(task_id='final_task',
                              bash_command="echo final task",
                              trigger_rule="none_failed")

print_execution_date >> branching >> [email_bob, email_alice, email_joe
                                      ] >> final_task