Esempio n. 1
0
from airflow.operators.hive_to_mysql import HiveToMySqlTransfer
from airflow.sensors import OssSensor

args = {
    'owner': 'zhenqian.zhang',
    'start_date': datetime(2019, 8, 8),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('okash_hourly',
                  schedule_interval="10 * * * *",
                  default_args=args)

check_client_file = OssSensor(
    task_id='check_client_file',
    bucket_key=
    'okash/okash/{table}/dt={{{{ ds }}}}/hour={{{{ execution_date.strftime("%H") }}}}/'
    .format(table='client'),
    bucket_name='okash',
    timeout=3600,
    dag=dag)

add_client_partitions = HiveOperator(task_id='add_client_partitions',
                                     hql="""
            ALTER TABLE ods_log_client_hi ADD IF NOT EXISTS PARTITION (dt = '{{ ds }}', hour = '{{ execution_date.strftime("%H") }}');
    """,
# Converting date into YYYY-MM-DD format
#print(d.strftime('%Y-%m-%d'))

#we need yesterday and today date formats, but prefix and suffix are the same in our example.
file_prefix = "myPrefiex/"
file_suffix = "_file.csv"

file_date = today.strftime('%Y-%m-%d')
full_path_today = file_prefix + file_date + file_suffix

file_date_yesterday = yesterday.strftime('%Y-%m-%d')
full_path_yesterday = file_prefix + file_date_yesterday + file_suffix

with airflow.DAG("file_sensor_example",
                 default_args=default_args,
                 schedule_interval="@once") as dag:

    start_task = DummyOperator(task_id="start")
    stop_task = DummyOperator(task_id="stop")
    sensor_task = FileSensor(task_id="file_sensor_task",
                             poke_interval=30,
                             filepath="/tmp/")
    #we expect yesterday to exist
    gcs_file_sensor_yesterday = GoogleCloudStorageObjectSensor(
        task_id='gcs_file_sensor_yesterday_task',
        bucket='myBucketName',
        object=full_path_yesterday)
    #for this example we expect today not to exist, keep running until 120 timeout, checkout docs for more options like mode  and soft_fail
    gcs_file_sensor_today = GoogleCloudStorageObjectSensor(
        task_id='gcs_file_sensor_today_task',
# 央行月报汇报指标
#
args = {
    'owner': 'liushuzhen',
    'start_date': datetime(2020, 3, 24),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'app_opay_user_agent_trans_sum_d',
    schedule_interval="30 02 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

dwd_opay_user_transaction_record_df_prev_day_task = OssSensor(
    task_id='dwd_opay_user_transaction_record_df_prev_day_task',
    bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
        hdfs_path_str=
        "opay/opay_dw/dwd_opay_user_transaction_record_df/country_code=NG",
        pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)
import os
from airflow.sensors import OssSensor

args = {
        'owner': 'yangmingze',
        'start_date': datetime(2019, 12, 6),
        'depends_on_past': False,
        'retries': 3,
        'retry_delay': timedelta(minutes=2),
        'email': ['*****@*****.**'],
        'email_on_failure': True,
        'email_on_retry': False,
} 

dag = airflow.DAG( 'dwd_oride_promoter_users_device_df', 
    schedule_interval="00 02 * * *",
    default_args=args,
    )

##----------------------------------------- 变量 ---------------------------------------##

db_name="oride_dw"
table_name="dwd_oride_promoter_users_device_df"

##----------------------------------------- 依赖 ---------------------------------------## 
#获取变量
code_map=eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower()=="ufile":

    ods_sqoop_promoter_promoter_users_device_df_tesk = UFileSensor(
import airflow
import os
from airflow.contrib.operators.ssh_operator import SSHOperator
from datetime import datetime

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2020, 1, 1, 0, 0),
}

dag = airflow.DAG(
    'comet_docker_import',
    default_args=default_args,
    schedule_interval=None
)

HOST_IP = os.getenv('HOST_IP')
HOST_DIR = os.getenv('HOST_DIR')

cmd_ssh = f'cd {HOST_DIR} && ./1.import.sh '

task_1 = SSHOperator(
    ssh_conn_id='comet_host',
    task_id='import',
    command=cmd_ssh,
    do_xcom_push=True,
    dag=dag
)

Esempio n. 6
0
import requests
import os

args = {
    'owner': 'xiedong',
    'start_date': datetime(2020, 3, 29),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('ods_sqoop_base_receive_money_request_record_di',
                  schedule_interval="30 00 * * *",
                  default_args=args,
                  )

##----------------------------------------- 依赖 ---------------------------------------##

ods_binlog_base_receive_money_request_record_hi_task = OssSensor(
    task_id='ods_binlog_base_receive_money_request_record_hi_task',
    bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format(
        hdfs_path_str="opay_binlog/opay_transaction_db.opay_transaction.receive_money_request_record",
        pt='{{ds}}'
    ),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)
Esempio n. 7
0
import requests
import os

args = {
    'owner': 'lili.chen',
    'start_date': datetime(2019, 11, 15),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('dm_oride_driver_order_base_cube',
                  schedule_interval="50 00 * * *",
                  default_args=args)
##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "dm_oride_driver_order_base_cube"

##----------------------------------------- 依赖 ---------------------------------------##
#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":
    # 依赖前一天分区
    dependence_dwm_oride_driver_order_base_di_prev_day_task = UFileSensor(
        task_id='dwm_oride_driver_order_base_di_prev_day_task',
import os

args = {
    'owner': 'liushuzhen',
    'start_date': datetime(2019, 12, 20),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dim_opay_user_base_di',
    schedule_interval="00 01 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

ods_sqoop_base_user_di_prev_day_task = OssSensor(
    task_id='ods_sqoop_base_user_di_prev_day_task',
    bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
        hdfs_path_str="opay_dw_sqoop_di/opay_user/user", pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)


##----------------------------------------- 任务超时监控 ---------------------------------------##
import os

args = {
    'owner': 'yuanfeng',
    'start_date': datetime(2019, 11, 25),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_pre_opos_payment_order_di',
    schedule_interval="30 01 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

dim_opos_bd_relation_df_task = OssSensor(
    task_id='dim_opos_bd_relation_df_task',
    bucket_key='{hdfs_path_str}/country_code=nal/dt={pt}/_SUCCESS'.format(
        hdfs_path_str="opos/opos_dw/dim_opos_bd_relation_df", pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

ods_sqoop_base_pre_opos_payment_order_di_task = OssSensor(
    task_id='ods_sqoop_base_pre_opos_payment_order_di_task',
from airflow.sensors import OssSensor

args = {
    'owner': 'lijialong',
    'start_date': datetime(2019, 11, 4),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=5),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'app_oride_order_global_operate_overview_d',
    schedule_interval="30 2 * * *",
    default_args=args,
)
##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "app_oride_order_global_operate_overview_d"

##----------------------------------------- 依赖 ---------------------------------------##
#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":
    dependence_dwm_oride_order_base_di_task = UFileSensor(
        task_id='dwm_oride_order_base_di_task',
Esempio n. 11
0
import os

args = {
    'owner': 'xiedong',
    'start_date': datetime(2020, 3, 31),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'ods_sqoop_base_transfer_not_register_record_di',
    schedule_interval="30 00 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

ods_binlog_base_transfer_not_register_record_hi_task = OssSensor(
    task_id='ods_binlog_base_transfer_not_register_record_hi_task',
    bucket_key='{hdfs_path_str}/dt={pt}/hour=22/_SUCCESS'.format(
        hdfs_path_str=
        "opay_binlog/opay_transaction_db.opay_transaction.transfer_not_register_record",
        pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)
Esempio n. 12
0
from datetime import datetime, timedelta
from airflow.hooks.S3_hook import S3Hook
from airflow.operators.python_operator import PythonOperator
from airflow import models
import logging

args = {
    'owner': 'airflow',
    'start_date': airflow.utils.dates.days_ago(7),
    'provide_context': True
}


def initialize_etl_process():
    logging.info('Creating connections, pool and sql path')
    source_s3 = S3Hook(aws_conn_id='aws_default')
    source_s3.list_keys('deutsche-boerse-eurex-pds',
                        prefix='2018-11-18',
                        delimiter='/')


dag = airflow.DAG('deutsche_boerse',
                  schedule_interval="@once",
                  default_args=args,
                  max_active_runs=1)

t1 = PythonOperator(task_id='analyze_trades',
                    python_callable=initialize_etl_process,
                    provide_context=False,
                    dag=dag)
Esempio n. 13
0
from utils.get_local_time import GetLocalTime

args = {
    'owner': 'lili.chen',
    'start_date': datetime(2020, 4, 1),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_ocredit_phones_repayment_detail_di',
    schedule_interval="30 00 * * *",
    default_args=args,
)

##----------------------------------------- 变量 ---------------------------------------##
db_name = "ocredit_phones_dw"
table_name = "dwd_ocredit_phones_repayment_detail_di"
hdfs_path = "oss://opay-datalake/opay/ocredit_phones_dw/" + table_name
config = eval(Variable.get("ocredit_time_zone_config"))
time_zone = config['NG']['time_zone']
##----------------------------------------- 依赖 ---------------------------------------##

### 检查当前小时的分区依赖
ods_binlog_base_t_repayment_detail_all_hi_check_task = OssSensor(
    task_id='ods_binlog_base_t_repayment_detail_all_hi_check_task',
    bucket_key='{hdfs_path_str}/dt={pt}/hour=23/_SUCCESS'.format(
Esempio n. 14
0
import requests
import os

args = {
    'owner': 'xiedong',
    'start_date': datetime(2019, 9, 21),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('dim_opay_region_state_mapping_df',
                  schedule_interval="00 00 * * *",
                  default_args=args)


##----------------------------------------- 任务超时监控 ---------------------------------------##
def fun_task_timeout_monitor(ds, dag, **op_kwargs):

    dag_ids = dag.dag_id

    msg = [{
        "dag": dag,
        "db": "opay_dw",
        "table": "{dag_name}".format(dag_name=dag_ids),
        "partition": "country_code=nal/dt={pt}".format(pt=ds),
        "timeout": "3000"
    }]
from utils.get_local_time import GetLocalTime

args = {
    'owner': 'yuanfeng',
    'start_date': datetime(2020, 3, 30),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_otrade_b2c_mall_merchant_hf',
    schedule_interval="25 * * * *",
    default_args=args,
)

##----------------------------------------- 变量 ---------------------------------------##
db_name = "otrade_dw"
table_name = "dwd_otrade_b2c_mall_merchant_hf"
hdfs_path = "oss://opay-datalake/otrade/otrade_dw/" + table_name
config = eval(Variable.get("otrade_time_zone_config"))
time_zone = config['NG']['time_zone']

##----------------------------------------- 依赖 ---------------------------------------##
### 检查最新的商户表的依赖
dwd_otrade_b2c_mall_merchant_hf_check_pre_locale_task = OssSensor(
    task_id='dwd_otrade_b2c_mall_merchant_hf_check_pre_locale_task',
    bucket_key='{hdfs_path_str}/country_code=NG/dt={pt}/hour={hour}/_SUCCESS'.
Esempio n. 16
0
        self.data = dict(query=rendered_query)
        return super().execute(context)


def save_file(task_instance, params, **kwargs):
    previous_task_id = 'raw_{}'.format(params['id'])
    data = task_instance.xcom_pull(task_ids=previous_task_id)

    filename = '{}.json'.format(params['id'])
    with open(filename, 'w') as file:
        file.write(data)
        return file.name


with airflow.DAG(dag_id='airbods',
                 start_date=datetime.datetime(
                     2021, 5, 17, tzinfo=datetime.timezone.utc)) as dag:
    # Datacake HTTP
    hook = HttpHook(http_conn_id='datacake_airbods')

    # List devices
    response = hook.run(endpoint=None,
                        json=dict(query=textwrap.dedent("""
query {
  allDevices(inWorkspace:"0bdfb2eb-6531-4afb-a842-ce6b51d3c980") {
    id
    serialNumber
    verboseName
  }
}
""")))
Esempio n. 17
0
import os

args = {
    'owner': 'yangmingze',
    'start_date': datetime(2020, 1, 13),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_oride_driver_pay_records_df',
    schedule_interval="30 02 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

ods_sqoop_base_data_driver_pay_records_df_tesk = OssSensor(
    task_id='ods_sqoop_base_data_driver_pay_records_df_tesk',
    bucket_key="{hdfs_path_str}/dt={pt}/_SUCCESS".format(
        hdfs_path_str="oride_dw_sqoop/oride_data/data_driver_pay_records",
        pt="{{ds}}"),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

##----------------------------------------- 变量 ---------------------------------------##
Esempio n. 18
0
from plugins.TaskTouchzSuccess import TaskTouchzSuccess
from airflow.sensors import OssSensor

args = {
    'owner': 'linan',
    'start_date': datetime(2019, 5, 20),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('dwd_oride_driver_accept_order_click_detail_di',
                  schedule_interval="10 00 * * *",
                  default_args=args,
                  )


##----------------------------------------- 变量 ---------------------------------------##

db_name="oride_dw"
table_name="dwd_oride_driver_accept_order_click_detail_di"

##----------------------------------------- 依赖 ---------------------------------------##

#获取变量
code_map=eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower()=="ufile":
import requests
import os

args = {
    'owner': 'xiedong',
    'start_date': datetime(2019, 9, 22),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('app_opay_transaction_consume_scenario_sum_d',
                  schedule_interval="00 03 * * *",
                  default_args=args)

##----------------------------------------- 依赖 ---------------------------------------##

dwd_opay_transaction_record_di_prev_day_task = OssSensor(
    task_id='dwd_opay_transaction_record_di_prev_day_task',
    bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
        hdfs_path_str=
        "opay/opay_dw/dwd_opay_transaction_record_di/country_code=NG",
        pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

# 央行月报汇报指标
#
args = {
    'owner': 'lishuai',
    'start_date': datetime(2019, 12, 1),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_opay_client_event_base_di',
    schedule_interval="00 02 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

opay_ep_logv0_prev_hour_task = HivePartitionSensor(
    task_id="opay_ep_logv0_prev_hour_task",
    table="opay_ep_logv0",
    partition="dt='{{ ds }}' and hour='22'",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

opay_ep_logv1_prev_hour_task = HivePartitionSensor(
    task_id="opay_ep_logv1_prev_hour_task",
Esempio n. 21
0
from datetime import datetime, timedelta
from acme.operators.dwh_operators import PostgresToPostgresOperator
from acme.operators.dwh_operators import AuditOperator
from airflow.models import Variable

args = {
    'owner': 'airflow',
    'start_date': datetime(2019, 5, 8, 17, 52),
    'provide_context': True
}

tmpl_search_path = Variable.get("sql_path")

dag = airflow.DAG('customer_staging',
                  schedule_interval="*/2 * * * *",
                  dagrun_timeout=timedelta(minutes=60),
                  template_searchpath=tmpl_search_path,
                  default_args=args,
                  max_active_runs=1)

get_auditid = AuditOperator(task_id='get_audit_id',
                            postgres_conn_id='postgres_dwh',
                            audit_key="customer",
                            cycle_dtm="{{ ts }}",
                            dag=dag,
                            pool='postgres_dwh')

extract_customer = PostgresToPostgresOperator(
    sql='select_customer.sql',
    pg_table='staging.customer',
    src_postgres_conn_id='postgres_oltp',
    dest_postgress_conn_id='postgres_dwh',
Esempio n. 22
0
# airflow DAG arguments
args = {
    "owner": "hunterowens",
    "start_date": airflow.utils.dates.days_ago(7),
    "provide_context": True,
    "email": ["*****@*****.**"],
    "email_on_failure": False,
    "retries": 1,
    "retry_delay": timedelta(minutes=5),
}

# initiating the DAG
dag = airflow.DAG(
    "merge_homeless_datasets_into_one",
    schedule_interval="@monthly",
    default_args=args,
    max_active_runs=1,
)

# when future annual datasets becomes available, just add a new url and add it to the
# urls list  the path & filename after '-O' is the path & name that the files are to
# be placed and renamed to
url2018 = ("wget -O /tmp/homeless2018.xlsx "
           "https://github.com/CityOfLosAngeles/aqueduct"
           "/raw/master/dags/homelessness/static_datasets/homeless2018.xlsx")
url2017 = ("wget -O /tmp/homeless2017.xlsx "
           "https://github.com/CityOfLosAngeles/aqueduct/"
           "raw/master/dags/homelessness/static_datasets/homeless2017.xlsx")
url2016 = ("wget -O /tmp/homeless2016.xlsx "
           "https://github.com/CityOfLosAngeles/aqueduct/"
           "raw/master/dags/homelessness/static_datasets/homeless2016.xlsx")
Esempio n. 23
0
from airflow.sensors import OssSensor

args = {
    'owner': 'yangmingze',
    'start_date': datetime(2019, 11, 28),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_oride_abnormal_order_df',
    schedule_interval="20 00 * * *",
    default_args=args,
)

##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "dwd_oride_abnormal_order_df"
##----------------------------------------- 依赖 ---------------------------------------##

#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":
# Set expected runtime in seconds, setting to 0 is 7200 seconds
expected_runtime = 0

# The group that owns this DAG
owner = "Analytic Services"

default_args = {
    'owner': owner,
    'depends_on_past': False,
    'start_date': datetime(2017, 12, 22),
    'schedule_interval': '@daily'
}

dag = airflow.DAG(dag_id='s2_collectible_completion_item_level',
                  default_args=default_args
                  )

# Start running at this time
start_time_task = TimeSensor(target_time=time(7, 00),
                             task_id='start_time_task',
                             dag=dag
                             )

##current_date = (datetime.now()).date()
##stats_date = current_date - timedelta(days=1)

def qubole_operator(task_id, sql, retries, retry_delay, dag):
    return PythonOperator(
        task_id=task_id,
        python_callable=qubole_wrapper,
Esempio n. 25
0
    6: "Alice",  # Sunday
}


def branch_fnc(**kwargs):
    return "task_for_" + weekday_person_to_email[pendulum.today().weekday()]


def pweedd():
    print(pendulum.today().weekday())


with airflow.DAG(
    dag_id="test_dag_4",
    start_date=datetime(2019, 11, 17),
    schedule_interval=None,
    catchup=False,
    default_args=default_args,
) as dag:

    final_task = DummyOperator(task_id="final_task", dag=dag)

    for day in range(0, 6):
        printd = PythonOperator(
            dag=dag, task_id="print_week_day", python_callable=pweedd
        )

        bop = BranchPythonOperator(
            task_id="branching",
            python_callable=branch_fnc,
            provide_context=True,
Esempio n. 26
0
dingding_alert = DingdingAlert(dingding_address)

args = {
    'owner': 'linan',
    'start_date': datetime(2020, 3, 19),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'email': ['*****@*****.**'],
    # 'email_on_failure': True,
    # 'email_on_retry': False,
}

dag = airflow.DAG('bussiness_monitor',
                  schedule_interval="*/5 * * * *",
                  default_args=args)

mysql_hook = MySqlHook("bussiness_mysql")
mysql_conn = mysql_hook.get_conn()
mysql_cursor = mysql_conn.cursor()

exec_command = """
    influx -database 'serverDB' -execute '{sql}'  -format='csv' > {metrics_name}.txt && echo 1 || echo 0
"""

ssh = paramiko.SSHClient()
key = paramiko.AutoAddPolicy()
ssh.set_missing_host_key_policy(key)
ssh.connect('10.52.5.233', 22, 'airflow', '', timeout=5)
                         "use_beeline": "true"})})

    create_new_conn(session,
                    {"conn_id": "hiveserver2-dvstar",
                     "conn_type": "hiveserver2",
                     "host": "hive",
                     "schema": 'dv_star',
                     "login": "******",
                     "port": 10000,
                     "extra": json.dumps({"authMechanism": "NOSASL"})})

    session.close()

dag = airflow.DAG(
    'init_datavault3_example',
    schedule_interval="@once",
    default_args=args,
    template_searchpath='/usr/local/airflow/sql',
    max_active_runs=1)

t1 = PythonOperator(task_id='init_datavault3_example',
                    python_callable=init_datavault3_example,
                    provide_context=False,
                    dag=dag)

t2 = HiveOperator(task_id='create_stg_database',
                  hive_cli_conn_id='hive_default',
                  schema='default',
                  hql='CREATE DATABASE IF NOT EXISTS {0}'.format(ADVWORKS_STAGING),
                  dag=dag)

t3 = HiveOperator(task_id='create_dv_database',
Esempio n. 28
0
from datetime import date, timedelta
import airflow
from google.cloud import storage
import os
import json
from airflow.contrib.operators.bigquery_operator import BigQueryOperator

default_args = {
    "depends_on_past": False,
    "start_date": airflow.utils.dates.days_ago(1),
    "retries": 1,
    "retry_delay": datetime.timedelta(minutes=2),
}
# Run SQL command example DAG
with airflow.DAG("Copy_Table_BigQuery",
                 default_args=default_args,
                 template_searchpath=['/home/airflow/gcs/data/pipeline4/'],
                 schedule_interval="@once") as dag:
    start_task = DummyOperator(task_id="start")
    stop_task = DummyOperator(task_id="stop")
    copy_records = BigQueryOperator(
        task_id='copy_records',
        sql="myquery.sql",
        use_legacy_sql=False,
        create_disposition='CREATE_IF_NEEDED',
        write_disposition='WRITE_TRUNCATE',
        params=dict(
            destination_project=os.environ['AIRFLOW_VAR_ENTERPRISE_PROJECT'],
            source_project=os.environ['AIRFLOW_VAR_ENTERPRISE_PROJECT']),
        bigquery_conn_id='etl_sa')

    start_task >> copy_records >> stop_task
Esempio n. 29
0
import datetime

import airflow
from airflow.operators import bash_operator

YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)

default_args = {
    'owner': 'Composer Example',
    'depends_on_past': False,
    'email': [''],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': YESTERDAY,
}

with airflow.DAG(
    'composer_sample_dag',
    'catchup=False',
    default_args=default_args,
    schedule_interval=datetime.timedelta(days=1)) as dag:

    # Print the dag_run id from the Airflow logs
    print_dag_run_conf = bash_operator.BashOperator(
        task_id='print_dag_run_conf', bash_command='echo {{ dag_run.id }}')
Esempio n. 30
0
import requests
import os

args = {
    'owner': 'xiedong',
    'start_date': datetime(2020, 1, 13),
    'depends_on_past': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=2),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('dim_opay_bd_relation_df',
                  schedule_interval="10 01 * * *",
                  default_args=args)

##----------------------------------------- 依赖 ---------------------------------------##

dim_opay_bd_admin_user_df_prev_day_task = OssSensor(
    task_id='dim_opay_bd_admin_user_df_prev_day_task',
    bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
        hdfs_path_str="opay/opay_dw/dim_opay_bd_admin_user_df/country_code=NG",
        pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)


##----------------------------------------- 任务超时监控 ---------------------------------------##