Example #1
0
def _track_task(task):
    if should_not_track(task):
        return

    if is_instance_by_class_name(task, "SubDagOperator"):
        # we do not track the execute of a SubDag, only its tasks
        track_dag(task.subdag)
    else:
        track_operator(task)
Example #2
0
def _track_task(task):
    if should_not_track(task):
        return

    try:
        tracking_wrapper = get_tracking_wrapper(task)
    except KeyError:
        return
    else:
        tracking_wrapper(task)
Example #3
0
def _track_task(task):
    from airflow.operators.subdag_operator import SubDagOperator

    if should_not_track(task):
        return

    track_operator(task)
    if is_instance_by_class_name(task, SubDagOperator.__name__):
        # we also track the subdag's inner tasks
        track_dag(task.subdag)
Example #4
0
def _track_task(task):
    from airflow.operators.subdag_operator import SubDagOperator

    if should_not_track(task):
        return

    if is_instance_by_class_name(task, SubDagOperator.__name__):
        # we do not track the execute of a SubDag, only its tasks
        track_dag(task.subdag)
    else:
        track_operator(task)
Example #5
0
def _track_function(function):
    if not _is_function(function) or should_not_track(function):
        return

    decorated_function = task(function)

    # We modify all modules since each module has its own pointers to local and imported functions.
    # If a module has already imported the function we need to change the pointer in that module.
    for module in sys.modules.copy().values():
        if not _is_module(module):
            continue

        for k, v in module.__dict__.items():
            if v is function:
                module.__dict__[k] = decorated_function
Example #6
0
def track_dag(dag):
    """
    Modify operators in dag if necessary so we could track them.
    Supported operators: EmrAddStepsOperator, DataProcPySparkOperator, SparkSubmitOperator, PythonOperator.
    Other operators are not modified.

    More details on each operator:
    - EmrAddStepsOperator: modify args of spark-submit steps by adding dbnd variables using --conf
    - DataProcPySparkOperator: add dbnd variables to dataproc properties
    - SparkSubmitOperator: add dbnd variables to spark-submit command using --conf
    - PythonOperator: wrap python function with @task
    """
    try:
        if should_not_track(dag):
            return

        for task in dag.tasks:
            track_task(task)
    except Exception:
        logger.exception("Failed to modify %s for tracking" % dag.dag_id)