Esempio n. 1
0
 def test_hive_partition_sensor(self):
     op = HivePartitionSensor(
         task_id='hive_partition_check',
         table='airflow.static_babynames_partitioned',
         dag=self.dag)
     op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE,
            ignore_ti_state=True)
        partition = "dt='{}'".format(
            chk_d if int(chk_d2) > int(start) else datetime.
            strftime(datetime.strptime(start, '%Y%m%d'), '%Y-%m-%d'))
        logging.info('Poking for table %s.%s, partition %s',
                     params.get('hive_db'), params.get('hive_table'),
                     partition)
        from airflow.hooks.hive_hooks import HiveMetastoreHook
        hook = HiveMetastoreHook(metastore_conn_id="metastore_default")
        return hook.check_for_partition(params.get('hive_db'),
                                        params.get('hive_table'), partition)

    table_validate_task = HivePartitionSensor(
        task_id="table_validate_task_{}_{}".format(db, table),
        table=table,
        partition="dt='{{ ds }}'",
        schema=db,
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)
    # table_validate_task.pre_execute = get_check_date
    table_validate_task.params = table_info
    table_validate_task.poke = get_check_date

    # 同步hive与mysql表结构
    sync_table_schema = PythonOperator(
        task_id='sync_table_schema_{}_{}'.format(db, table),
        python_callable=init_mysql_table,
        provide_context=True,
        op_kwargs={
            # "conn": hive_cursor,
            "db": db,
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG(
    'dwd_opay_client_event_base_di',
    schedule_interval="00 02 * * *",
    default_args=args,
)

##----------------------------------------- 依赖 ---------------------------------------##

opay_ep_logv0_prev_hour_task = HivePartitionSensor(
    task_id="opay_ep_logv0_prev_hour_task",
    table="opay_ep_logv0",
    partition="dt='{{ ds }}' and hour='22'",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

opay_ep_logv1_prev_hour_task = HivePartitionSensor(
    task_id="opay_ep_logv1_prev_hour_task",
    table="opay_ep_logv1",
    partition="dt='{{ ds }}' and hour='22'",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)


##----------------------------------------- 任务超时监控 ---------------------------------------##
def fun_task_timeout_monitor(ds, dag, **op_kwargs):
                'ofood_dw_ods.ods_sqoop_base_jh_shop_df',
                'ofood_dw_ods.ods_sqoop_base_jh_waimai_df',
                'ofood_dw_ods.ods_log_client_event_hi'
            ],
        # 任务名称
        "task_name": "ofood全局运营指标"
    },
    dag=dag
)


# 熔断阻塞流程
jh_order_validate_task = HivePartitionSensor(
    task_id="jh_order_validate_task",
    table="ods_sqoop_base_jh_order_df",
    partition="dt='{{ds}}'",
    schema="ofood_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)


jh_order_log_validate_task = HivePartitionSensor(
    task_id="jh_order_log_validate_task",
    table="ods_sqoop_base_jh_order_log_df",
    partition="dt='{{ds}}'",
    schema="ofood_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)

    # 依赖前一天分区
    dependence_dm_oride_driver_base_prev_day_task = UFileSensor(
        task_id='dm_oride_driver_base_prev_day_task',
        filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
            hdfs_path_str="oride/oride_dw/dm_oride_driver_base/country_code=NG",
            pt='{{ds}}'),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)

    # 依赖当天分区00点
    dependence_server_magic_now_day_task = HivePartitionSensor(
        task_id="server_magic_now_day_task",
        table="server_magic",
        partition="dt='{{macros.ds_add(ds, +1)}}' and hour='00'",
        schema="oride_source",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)

    # 依赖前一天分区
    dependence_dwm_oride_driver_finance_di_prev_day_task = UFileSensor(
        task_id='dwm_oride_driver_finance_di_prev_day_task',
        filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
            hdfs_path_str=
            "oride/oride_dw/dwm_oride_driver_finance_di/country_code=NG",
            pt='{{ds}}'),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)
db_name = "oride_dw"
table_name = "dwd_oride_order_dispatch_chose_detail_di"

##----------------------------------------- 依赖 ---------------------------------------##
#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":

    # 依赖前一天分区
    dependence_dispatch_tracker_server_magic_prev_day_task = HivePartitionSensor(
        task_id="dependence_dispatch_tracker_server_magic_prev_day_task",
        table="dispatch_tracker_server_magic",
        partition="dt='{{ ds }}' and hour='23'",
        schema="oride_source",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)
    hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name

else:
    print("成功")
    dependence_dispatch_tracker_server_magic_prev_day_task = HivePartitionSensor(
        task_id="dependence_dispatch_tracker_server_magic_prev_day_task",
        table="dispatch_tracker_server_magic",
        partition="dt='{{ ds }}' and hour='23'",
        schema="oride_source",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)
    hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name
        task_id='dependence_dwd_oride_client_event_detail_hi_prev_day_task',
        filepath='{hdfs_path_str}/country_code=nal/dt={pt}/hour={hour}/_SUCCESS'.format(
            hdfs_path_str="oride/oride_dw/dwd_oride_client_event_detail_hi",
            pt='{{ds}}',
            hour='23'
        ),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag
    )

    # 依赖前一天分区
    dependence_dwd_oride_driver_location_event_hi_prev_day_task = HivePartitionSensor(
        task_id="dwd_oride_driver_location_event_hi_prev_day_task",
        table="ods_log_driver_track_data_hi",
        partition="""dt='{{ ds }}' and hour='23'""",
        schema="oride_dw_ods",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag
    )

    dwd_oride_order_base_include_test_di_prev_day_task = S3KeySensor(
        task_id='dwd_oride_order_base_include_test_di_prev_day_task',
        bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
            hdfs_path_str="oride/oride_dw/dwd_oride_order_base_include_test_di/country_code=NG",
            pt='{{ds}}'
        ),
        bucket_name='opay-bi',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag
    )
    hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name
#     task_id='sleep_id',
#     depends_on_past=False,
#     bash_command='sleep 10',
#     dag=dag)
##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "dwd_oride_order_trip_raw_feature_hi"
hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name

##----------------------------------------- 依赖 ---------------------------------------##

oride_trip_raw_feature_prev_hour_task = HivePartitionSensor(
    task_id="oride_trip_raw_feature_prev_hour_task",
    table="ods_log_oride_trip_raw_feature_hi",
    partition=
    """dt='{{ ds }}' and hour='{{ execution_date.strftime("%H") }}'""",
    schema="oride_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

##----------------------------------------- 任务超时监控 ---------------------------------------##


def fun_task_timeout_monitor(ds, execution_date, dag, **op_kwargs):
    dag_ids = dag.dag_id

    tb = [{
        "dag":
        dag,
        "db":
        "oride_dw",
Esempio n. 9
0
dag = airflow.DAG('dwd_oride_order_skyeye_di',
                  schedule_interval="30 07 * * *",
                  default_args=args,
                  )

##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "dwd_oride_order_skyeye_di"
hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name

##----------------------------------------- 依赖 ---------------------------------------##
ods_log_oride_order_skyeye_di_task = HivePartitionSensor(
        task_id="dwd_oride_order_skyeye_di_prev_day_task",
        table="ods_log_oride_order_skyeye_di",
        partition="dt='{{ds}}'",
        schema="oride_dw_ods",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag
    )

##----------------------------------------- 任务超时监控 ---------------------------------------##

def fun_task_timeout_monitor(ds,dag,**op_kwargs):

    dag_ids=dag.dag_id

    msg = [
        {"dag":dag,"db": "oride_dw", "table":"{dag_name}".format(dag_name=dag_ids), "partition": "country_code=nal/dt={pt}".format(pt=ds), "timeout": "2400"}
    ]

    TaskTimeoutMonitor().set_task_monitor(msg)
validate_partition_data = PythonOperator(
    task_id='validate_partition_data',
    python_callable=validate_partition,
    provide_context=True,
    op_kwargs={
        # 验证table
        "table_names": table_names,
        # 任务名称
        "task_name": "快车司机档案数据"
    },
    dag=dag)

data_order_validate_task = HivePartitionSensor(
    task_id="data_order_validate_task",
    table="ods_sqoop_base_data_order_df",
    partition="dt='{{ds}}'",
    schema="oride_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

data_driver_comment_validate_task = HivePartitionSensor(
    task_id="data_driver_comment_validate_task",
    table="ods_sqoop_base_data_driver_comment_df",
    partition="dt='{{ds}}'",
    schema="oride_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

data_driver_balance_extend_validate_task = HivePartitionSensor(
    task_id="data_driver_balance_extend_validate_task",
    table="ods_sqoop_base_data_driver_balance_extend_df",
Esempio n. 11
0
db_name = "oride_dw"
table_name = "dwd_driver_track_data_di"

##----------------------------------------- 依赖 ---------------------------------------##

#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":
    # 依赖前一天分区
    dependence_ods_log_driver_track_data_hi_task = HivePartitionSensor(
        task_id="dependence_ods_log_driver_track_data_hi_task",
        table="ods_log_driver_track_data_hi",
        partition="dt='{{ ds }}'",
        schema="oride_dw_ods",
        poke_interval=60,
        dag=dag)

    # 路径
    hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name
else:
    print("成功")
    # 依赖前一天分区
    dependence_ods_log_driver_track_data_hi_task = HivePartitionSensor(
        task_id="dependence_ods_log_driver_track_data_hi_task",
        table="ods_log_driver_track_data_hi",
        partition="dt='{{ ds }}'",
        schema="oride_dw_ods",
        poke_interval=60,
Esempio n. 12
0
sleep_time = BashOperator(task_id='sleep_id',
                          depends_on_past=False,
                          bash_command='sleep 120',
                          dag=dag)

sleep_time2 = BashOperator(task_id='sleep_i2',
                           depends_on_past=False,
                           bash_command='sleep 20',
                           dag=dag)
"""
##----依赖数据源---##
"""
dependence_ods_log_oride_order_skyeye_di = HivePartitionSensor(
    task_id="dependence_ods_log_oride_order_skyeye_di",
    table="ods_log_oride_order_skyeye_di",
    partition="dt='{{ ds }}'",
    schema="oride_dw_ods",
    poke_interval=60,
    dag=dag)

dwd_oride_order_base_include_test_di_task = HivePartitionSensor(
    task_id='dwd_oride_order_base_include_test_di_task',
    table="dwd_oride_order_base_include_test_di",
    partition="dt='{{ds}}'",
    schema="oride_dw",
    poke_interval=60,
    dag=dag)

dependence_data_city_conf = HivePartitionSensor(
    task_id="dependence_data_city_conf",
    table="ods_sqoop_base_data_city_conf_df",
dag = airflow.DAG('app_opos_bd_admin_users_d',
                  schedule_interval="30 04 * * *",
                  max_active_runs=1,
                  default_args=args)

sleep_time = BashOperator(task_id='sleep_id',
                          depends_on_past=False,
                          bash_command='sleep 300',
                          dag=dag)

# 依赖
dependence_table_opos_metrcis_report_task = HivePartitionSensor(
    task_id="dependence_table_opos_metrcis_report_task",
    table="opos_metrcis_report",
    partition="dt='{{ ds }}'",
    schema="opos_temp",
    poke_interval=120,
    dag=dag)

dependence_table_opos_active_user_daily_task = HivePartitionSensor(
    task_id="dependence_table_opos_active_user_daily_task",
    table="opos_active_user_daily",
    partition="dt='{{ ds }}'",
    schema="opos_temp",
    poke_interval=120,
    dag=dag)

# sync_db = Variable.get("app_opos_bd_admin_users").split("\n")
# {"odb":"opay_crm", "otable":"bd_admin_users", "oconn":"opos_opay_crm", "ddb":""}
Esempio n. 14
0
            [
                'ofood_dw_ods.ods_sqoop_base_jh_order_df',
                'ofood_dw_ods.ods_sqoop_bd_bd_admin_users_df',
                'ofood_dw_ods.ods_sqoop_bd_invitation_info_df',
            ],
        # 任务名称
        "task_name": "ofood mkt指标"
    },
    dag=dag
)

# 熔断阻塞流程
jh_order_validate_task = HivePartitionSensor(
    task_id="jh_order_validate_task",
    table="ods_sqoop_base_jh_order_df",
    partition="dt='{{ds}}'",
    schema="ofood_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)

bd_admin_users_validate_task = HivePartitionSensor(
    task_id="bd_admin_users_validate_task",
    table="ods_sqoop_bd_bd_admin_users_df",
    partition="dt='{{ds}}'",
    schema="ofood_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)

bd_invitation_info_validate_task = HivePartitionSensor(
    task_id="bd_invitation_info_validate_task",
Esempio n. 15
0
    dependence_dwd_oride_order_push_driver_detail_di_prev_day_task = UFileSensor(
        task_id='dwd_oride_order_push_driver_detail_di_prev_day_task',
        filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
            hdfs_path_str=
            "oride/oride_dw/dwd_oride_order_push_driver_detail_di/country_code=nal",
            pt='{{ds}}'),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)

    # 依赖前一天分区
    dependence_oride_driver_timerange_prev_day_task = HivePartitionSensor(
        task_id="oride_driver_timerange_prev_day_task",
        table="ods_log_oride_driver_timerange",
        partition="dt='{{ds}}'",
        schema="oride_dw_ods",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)

    dependence_dwd_oride_driver_accept_order_click_detail_di_prev_day_task = UFileSensor(
        task_id='dwd_oride_driver_accept_order_click_detail_di_prev_day_task',
        filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
            hdfs_path_str=
            "oride/oride_dw/dwd_oride_driver_accept_order_click_detail_di/country_code=nal",
            pt='{{ds}}'),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag)

    dependence_dwd_oride_driver_accept_order_show_detail_di_prev_day_task = UFileSensor(
Esempio n. 16
0
    def tesk_dependence(self, tables, dag):

        dependence = []

        try:

            for item in tables:

                #读取 db、table、partition
                table = item.get('table', None)
                db = item.get('db', None)
                partition = item.get('partitions', None)

                if table is None or db is None or partition is None:
                    return None

                #读取hive location地址
                location = self.get_hive_location(db, table)

                #替换原有bucket
                location = location.replace('ufile://opay-datalake/', '')

                #task_id 名称
                task_id_flag = table + "_task"

                #区分ods的依赖路径
                if db[-3:].lower() == 'ods' or db[-2:].lower() == 'bi':

                    # 配置依赖关系(前一天分区)
                    dependence_task_flag = HivePartitionSensor(
                        task_id='dependence_{task_id_name}'.format(
                            task_id_name=task_id_flag),
                        table=table,
                        partition="dt='{{ds}}'",
                        schema=db,
                        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
                        dag=dag)

                else:

                    # 配置依赖关系(前一天分区)
                    dependence_task_flag = UFileSensor(
                        task_id='dependence_{task_id_name}'.format(
                            task_id_name=task_id_flag),
                        filepath=
                        '{hdfs_path_name}/{partition_name}/dt={{{{ds}}}}/_SUCCESS'
                        .format(hdfs_path_name=location,
                                partition_name=partition),
                        bucket_name='opay-datalake',
                        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
                        dag=dag)

                dependence.append(dependence_task_flag)

            return dependence

        except Exception as e:

            #self.comwx.postAppMessage('DW调度系统任务 {jobname} 任务依赖列表产出异常'.format(jobname=dag.dag_id),'271')

            logging.info(e)

            sys.exit(1)
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('app_opay_user_device_d',
                  schedule_interval="00 03 * * *",
                  default_args=args,
                  )

##----------------------------------------- 依赖 ---------------------------------------##

# 依赖前一天分区
dwd_opay_client_event_base_di_prev_day_task = HivePartitionSensor(
    task_id="dwd_opay_client_event_base_di_prev_day_task",
    table="dwd_opay_client_event_base_di",
    partition="dt='{{ ds }}'",
    schema="opay_dw",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)

app_opay_user_device_d_prev_day_task = OssSensor(
    task_id='app_opay_user_device_d_prev_day_task',
    bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
        hdfs_path_str="opay/opay_dw/app_opay_user_device_d/country_code=nal",
        pt='{{macros.ds_add(ds, -1)}}'
    ),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)
Esempio n. 18
0
#判断ufile(cdh环境)
if code_map["id"].lower()=="ufile":
    dependence_dwm_oride_order_base_di_task = UFileSensor(
        task_id='dwm_oride_order_base_di_task',
        filepath='{hdfs_path_str}/country_code=nal/dt={pt}/_SUCCESS'.format(
            hdfs_path_str="oride/oride_dw/dwm_oride_order_base_di",
            pt='{{ds}}'
        ),
        bucket_name='opay-datalake',
        poke_interval=60,
        dag=dag
    )
    dependence_dim_oride_city_task = HivePartitionSensor(
        task_id="dim_oride_city_task",
        table="dim_oride_city",
        partition="dt='{{ds}}'",
        schema="oride_dw",
        poke_interval=60,  # 依赖   不满足时,一分钟检查一次依赖状态
        dag=dag
    )
    dependence_dim_oride_passenger_base_task = HivePartitionSensor(
        task_id="dim_oride_passenger_base_task",
        table="dim_oride_passenger_base",
        partition="dt='{{ds}}'",
        schema="oride_dw",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
        dag=dag
    )
    dependence_dim_oride_driver_base_task = UFileSensor(
        task_id='dim_oride_driver_base_task',
        filepath='{hdfs_path_str}/country_code=NG/dt={pt}/_SUCCESS'.format(
            hdfs_path_str="oride/oride_dw/dim_oride_driver_base",
db_name="oride_dw"
table_name="dwd_oride_anti_fraud_log_di"

##----------------------------------------- 依赖 ---------------------------------------##

#获取变量
code_map=eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower()=="ufile":

    #依赖前一天分区
    log_anti_oride_fraud_task=HivePartitionSensor(
          task_id="log_anti_oride_fraud_task",
          table="log_anti_oride_fraud",
          partition="dt='{{macros.ds_add(ds, +1)}}' and hour='00'",
          schema="oride_source",
          poke_interval=60, #依赖不满足时,一分钟检查一次依赖状态
          dag=dag
        )
    # 路径
    hdfs_path="ufile://opay-datalake/oride/oride_dw/dwd_oride_anti_fraud_log_new_di"
else:
    print("成功")

    # 依赖前一天分区
    log_anti_oride_fraud_task = HivePartitionSensor(
        task_id="log_anti_oride_fraud_task",
        table="log_anti_oride_fraud",
        partition="dt='{{macros.ds_add(ds, +1)}}' and hour='00'",
        schema="oride_source",
        poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
Esempio n. 20
0
dag = airflow.DAG('dwd_oride_driver_cheating_detection_hi',
                  schedule_interval="30 * * * *",
                  default_args=args)
##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "dwd_oride_driver_cheating_detection_hi"
hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name

##----------------------------------------- 依赖 ---------------------------------------##

server_event_task = HivePartitionSensor(
    task_id="server_event_task",
    table="server_event",
    partition=
    """dt='{{ ds }}' and hour='{{ execution_date.strftime("%H") }}'""",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

##----------------------------------------- 任务超时监控 ---------------------------------------##


def fun_task_timeout_monitor(ds, dag, execution_date, **op_kwargs):
    dag_ids = dag.dag_id

    tb = [{
        "dag":
        dag,
        "db":
        "oride_dw",
db_name="oride_dw"
table_name="dwd_oride_client_event_detail_hi"

##----------------------------------------- 依赖 ---------------------------------------##

#获取变量
code_map=eval(Variable.get("sys_flag"))



# 依赖前一小时分区
client_event_prev_hour_task = HivePartitionSensor(
    task_id="client_event_prev_hour_task",
    table="client_event",
    partition="""dt='{{ ds }}' and hour='{{ execution_date.strftime("%H") }}'""",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)


opay_ep_logv0_prev_hour_task = HivePartitionSensor(
    task_id="opay_ep_logv0_prev_hour_task",
    table="opay_ep_logv0",
    partition="""dt='{{ ds }}' and hour='{{ execution_date.strftime("%H") }}'""",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag
)

opay_ep_logv1_prev_hour_task = HivePartitionSensor(
Esempio n. 22
0
            'ofood_dw_ods.ods_sqoop_bd_invitation_info_df',
            'ofood_dw_ods.ods_sqoop_bd_jh_member_df',
            'ofood_dw_ods.ods_sqoop_base_jh_shop_account_df',
            'ofood_dw_ods.ods_sqoop_base_jh_waimai_product_df',
        ],
        # 任务名称
        "task_name":
        "ofood BD指标"
    },
    dag=dag)

# 熔断阻塞流程
jh_order_validate_task = HivePartitionSensor(
    task_id="jh_order_validate_task",
    table="ods_sqoop_base_jh_order_df",
    partition="dt='{{ds}}'",
    schema="ofood_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

jh_order_time_validate_task = HivePartitionSensor(
    task_id="jh_order_time_validate_task",
    table="ods_sqoop_base_jh_order_time_df",
    partition="dt='{{ds}}'",
    schema="ofood_dw_ods",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

jh_waimai_comment_validate_task = HivePartitionSensor(
    task_id="jh_waimai_comment_validate_task",
    table="ods_sqoop_base_jh_waimai_comment_df",
Esempio n. 23
0
            count(distinct user_id) as users,
            sum(price) as price_total,
            sum(reward) as reward_total
        FROM
            oride_source.user_order
        WHERE
          dt='{{ ds }}'
        GROUP BY status
    """,
                                          schema='dashboard',
                                          dag=dag)

dependent_dwd_oride_driver_timerange_di = HivePartitionSensor(
    task_id="dependent_dwd_oride_driver_timerange_di",
    table="dwd_oride_driver_timerange_di",
    schema="oride_dw",
    partition="dt='{{ds}}'",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

create_oride_driver_daily_summary = HiveOperator(
    task_id='create_oride_driver_daily_summary',
    hql="""
        CREATE TABLE IF NOT EXISTS oride_driver_daily_summary (
            driver_id bigint,
            phone_number string,
            real_name string,
            group_id int,
            group_name string,
            group_leader string,
            online_time bigint,
Esempio n. 24
0
                  schedule_interval="30 01 * * *",
                  default_args=args)

##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "app_oride_driver_gps_info"
hdfs_path = "oss://opay-datalake/oride/oride_dw/" + table_name

##----------------------------------------- 依赖 ---------------------------------------##

# 依赖前一天分区
moto_locations_prev_day_task = HivePartitionSensor(
    task_id="moto_locations_prev_day_task",
    table="moto_locations",
    partition="dt='{{ds}}'",
    schema="oride_source",
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

ods_sqoop_base_oride_assets_sku_df_prev_day_task = OssSensor(
    task_id='ods_sqoop_base_oride_assets_sku_df_prev_day_task',
    bucket_key='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
        hdfs_path_str="oride_dw_sqoop/opay_assets/oride_assets_sku",
        pt='{{ds}}'),
    bucket_name='opay-datalake',
    poke_interval=60,  # 依赖不满足时,一分钟检查一次依赖状态
    dag=dag)

##----------------------------------------- 任务超时监控 ---------------------------------------##
Esempio n. 25
0
db_name = "oride_dw"
table_name = "dwd_driver_track_data_hi"

##----------------------------------------- 依赖 ---------------------------------------##

#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":
    # 依赖前一天分区
    dependence_ods_log_driver_track_data_hi_task = HivePartitionSensor(
        task_id="dependence_ods_log_driver_track_data_hi_task",
        table="ods_log_driver_track_data_hi",
        partition=
        """ dt='{{ ds }}' and hour='{{ execution_date.strftime("%H") }}' """,
        schema="oride_dw_ods",
        poke_interval=60,
        dag=dag)

    # 路径
    hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name
else:
    print("成功")
    # 依赖前一天分区
    dependence_ods_log_driver_track_data_hi_task = HivePartitionSensor(
        task_id="dependence_ods_log_driver_track_data_hi_task",
        table="ods_log_driver_track_data_hi",
        partition=
        """ dt='{{ ds }}' and hour='{{ execution_date.strftime("%H") }}' """,
        schema="oride_dw_ods",