Example #1
0
def dag_show(args):
    """Displays DAG or saves it's graphic representation to the file"""
    dag = get_dag(args.subdir, args.dag_id)
    dot = render_dag(dag)
    filename = args.save
    imgcat = args.imgcat

    if filename and imgcat:
        raise SystemExit(
            "Option --save and --imgcat are mutually exclusive. "
            "Please remove one option to execute the command.",
        )
    elif filename:
        _save_dot_to_file(dot, filename)
    elif imgcat:
        _display_dot_via_imgcat(dot)
    else:
        print(dot.source)
Example #2
0
def dag_state(args):
    """
    Returns the state (and conf if exists) of a DagRun at the command line.
    >>> airflow dags state tutorial 2015-01-01T00:00:00.000000
    running
    >>> airflow dags state a_dag_with_conf_passed 2015-01-01T00:00:00.000000
    failed, {"name": "bob", "age": "42"}
    """
    if args.subdir:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        dag = get_dag_by_file_location(args.dag_id)
    dr = DagRun.find(dag.dag_id, execution_date=args.execution_date)
    out = dr[0].state if dr else None
    conf_out = ''
    if out and dr[0].conf:
        conf_out = ', ' + json.dumps(dr[0].conf)
    print(str(out) + conf_out)
Example #3
0
def task_test(args, dag=None):
    """Tests task for a given dag_id"""
    # We want to log output from operators etc to show up here. Normally
    # airflow.task would redirect to a file, but here we want it to propagate
    # up to the normal airflow handler.
    handlers = logging.getLogger('airflow.task').handlers
    already_has_stream_handler = False
    for handler in handlers:
        already_has_stream_handler = isinstance(handler, logging.StreamHandler)
        if already_has_stream_handler:
            break
    if not already_has_stream_handler:
        logging.getLogger('airflow.task').propagate = True

    env_vars = {'AIRFLOW_TEST_MODE': 'True'}
    if args.env_vars:
        env_vars.update(args.env_vars)
        os.environ.update(env_vars)

    dag = dag or get_dag(args.subdir, args.dag_id)

    task = dag.get_task(task_id=args.task_id)
    # Add CLI provided task_params to task.params
    if args.task_params:
        passed_in_params = json.loads(args.task_params)
        task.params.update(passed_in_params)
    ti = TaskInstance(task, args.execution_date)

    try:
        if args.dry_run:
            ti.dry_run()
        else:
            ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True)
    except Exception:  # pylint: disable=broad-except
        if args.post_mortem:
            debugger = _guess_debugger()
            debugger.post_mortem()
        else:
            raise
    finally:
        if not already_has_stream_handler:
            # Make sure to reset back to normal. When run for CLI this doesn't
            # matter, but it does for test suite
            logging.getLogger('airflow.task').propagate = False
def task_run(args, dag=None):
    """Runs a single task instance"""
    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path) as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    settings.MASK_SECRETS_IN_LOGS = True

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException("You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass
    task = dag.get_task(task_id=args.task_id)
    ti = _get_ti(task, args.execution_date_or_run_id)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        with _capture_task_logs(ti):
            _run_task_by_selected_method(args, dag, ti)
Example #5
0
def generate_pod_yaml(args):
    """Generates yaml files for each task in the DAG. Used for testing output of KubernetesExecutor"""

    from kubernetes.client.api_client import ApiClient

    from airflow.executors.kubernetes_executor import AirflowKubernetesScheduler, KubeConfig
    from airflow.kubernetes import pod_generator
    from airflow.kubernetes.pod_generator import PodGenerator
    from airflow.settings import pod_mutation_hook

    execution_date = args.execution_date
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    yaml_output_path = args.output_path
    kube_config = KubeConfig()
    for task in dag.tasks:
        ti = TaskInstance(task, execution_date)
        pod = PodGenerator.construct_pod(
            dag_id=args.dag_id,
            task_id=ti.task_id,
            pod_id=AirflowKubernetesScheduler._create_pod_id(  # pylint: disable=W0212
                args.dag_id, ti.task_id),
            try_number=ti.try_number,
            kube_image=kube_config.kube_image,
            date=ti.execution_date,
            command=ti.command_as_list(),
            pod_override_object=PodGenerator.from_obj(ti.executor_config),
            worker_uuid="worker-config",
            namespace=kube_config.executor_namespace,
            base_worker_pod=PodGenerator.deserialize_model_file(
                kube_config.pod_template_file))
        pod_mutation_hook(pod)
        api_client = ApiClient()
        date_string = pod_generator.datetime_to_label_safe_datestring(
            execution_date)
        yaml_file_name = f"{args.dag_id}_{ti.task_id}_{date_string}.yml"
        os.makedirs(os.path.dirname(yaml_output_path +
                                    "/airflow_yaml_output/"),
                    exist_ok=True)
        with open(yaml_output_path + "/airflow_yaml_output/" + yaml_file_name,
                  "w") as output:
            sanitized_pod = api_client.sanitize_for_serialization(pod)
            output.write(yaml.dump(sanitized_pod))
    print(
        f"YAML output can be found at {yaml_output_path}/airflow_yaml_output/")
Example #6
0
def task_render(args):
    """Renders and displays templated fields for a given task"""
    dag = get_dag(args.subdir, args.dag_id)
    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.render_templates()
    for attr in task.__class__.template_fields:
        print(
            textwrap.dedent(
                """\
        # ----------------------------------------------------------
        # property: {}
        # ----------------------------------------------------------
        {}
        """.format(
                    attr, getattr(task, attr)
                )
            )
        )
Example #7
0
def dag_next_execution(args):
    """
    Returns the next execution datetime of a DAG at the command line.
    >>> airflow dags next-execution tutorial
    2018-08-31 10:38:00
    """
    dag = get_dag(args.subdir, args.dag_id)

    if dag.get_is_paused():
        print("[INFO] Please be reminded this DAG is PAUSED now.",
              file=sys.stderr)

    with create_session() as session:
        max_date_subq = (session.query(
            func.max(DagRun.execution_date).label("max_date")).filter(
                DagRun.dag_id == dag.dag_id).subquery())
        max_date_run: Optional[DagRun] = (session.query(DagRun).filter(
            DagRun.dag_id == dag.dag_id,
            DagRun.execution_date == max_date_subq.c.max_date).one_or_none())

        if max_date_run is None:
            print(
                "[WARN] Only applicable when there is execution record found for the DAG.",
                file=sys.stderr)
            print(None)
            return

    next_info = dag.next_dagrun_info(dag.get_run_data_interval(max_date_run),
                                     restricted=False)
    if next_info is None:
        print(
            "[WARN] No following schedule can be found. "
            "This DAG may have schedule interval '@once' or `None`.",
            file=sys.stderr,
        )
        print(None)
        return

    print(next_info.logical_date.isoformat())
    for _ in range(1, args.num_executions):
        next_info = dag.next_dagrun_info(next_info.data_interval,
                                         restricted=False)
        print(next_info.logical_date.isoformat())
Example #8
0
    def test_local_run(self):
        args = self.parser.parse_args([
            'tasks',
            'run',
            'example_python_operator',
            'print_the_context',
            '2018-04-27T08:39:51.298439+00:00',
            '--interactive',
            '--subdir',
            '/root/dags/example_python_operator.py',
        ])

        dag = get_dag(args.subdir, args.dag_id)
        reset(dag.dag_id)

        task_command.task_run(args)
        task = dag.get_task(task_id=args.task_id)
        ti = TaskInstance(task, args.execution_date)
        ti.refresh_from_db()
        state = ti.current_state()
        assert state == State.SUCCESS
Example #9
0
def dag_next_execution(args):
    """
    Returns the next execution datetime of a DAG at the command line.
    >>> airflow dags next_execution tutorial
    2018-08-31 10:38:00
    """
    dag = get_dag(args.subdir, args.dag_id)

    if dag.is_paused:
        print("[INFO] Please be reminded this DAG is PAUSED now.")

    if dag.latest_execution_date:
        next_execution_dttm = dag.following_schedule(dag.latest_execution_date)

        if next_execution_dttm is None:
            print("[WARN] No following schedule can be found. " +
                  "This DAG may have schedule interval '@once' or `None`.")

        print(next_execution_dttm)
    else:
        print("[WARN] Only applicable when there is execution record found for the DAG.")
        print(None)
Example #10
0
def dag_test(args, session=None):
    """Execute one single DagRun for a given DAG and execution date, using the DebugExecutor."""
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    dag.clear(start_date=args.execution_date, end_date=args.execution_date, dag_run_state=False)
    try:
        dag.run(
            executor=DebugExecutor(),
            start_date=args.execution_date,
            end_date=args.execution_date,
            # Always run the DAG at least once even if no logical runs are
            # available. This does not make a lot of sense, but Airflow has
            # been doing this prior to 2.2 so we keep compatibility.
            run_at_least_once=True,
        )
    except BackfillUnfinished as e:
        print(str(e))

    show_dagrun = args.show_dagrun
    imgcat = args.imgcat_dagrun
    filename = args.save_dagrun
    if show_dagrun or imgcat or filename:
        tis = (
            session.query(TaskInstance)
            .filter(
                TaskInstance.dag_id == args.dag_id,
                TaskInstance.execution_date == args.execution_date,
            )
            .all()
        )

        dot_graph = render_dag(dag, tis=tis)
        print()
        if filename:
            _save_dot_to_file(dot_graph, filename)
        if imgcat:
            _display_dot_via_imgcat(dot_graph)
        if show_dagrun:
            print(dot_graph.source)
Example #11
0
def task_test(args, dag=None):
    """Tests task for a given dag_id"""
    # We want log outout from operators etc to show up here. Normally
    # airflow.task would redirect to a file, but here we want it to propagate
    # up to the normal airflow handler.
    handlers = logging.getLogger('airflow.task').handlers
    already_has_stream_handler = False
    for handler in handlers:
        already_has_stream_handler = isinstance(handler, logging.StreamHandler)
        if already_has_stream_handler:
            break
    if not already_has_stream_handler:
        logging.getLogger('airflow.task').propagate = True

    dag = dag or get_dag(args)

    task = dag.get_task(task_id=args.task_id)
    # Add CLI provided task_params to task.params
    if args.task_params:
        passed_in_params = json.loads(args.task_params)
        task.params.update(passed_in_params)
    ti = TaskInstance(task, args.execution_date)

    try:
        if args.dry_run:
            ti.dry_run()
        else:
            ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True)
    except Exception:  # pylint: disable=broad-except
        if args.post_mortem:
            try:
                debugger = importlib.import_module("ipdb")
            except ImportError:
                debugger = importlib.import_module("pdb")
            debugger.post_mortem()
        else:
            raise
Example #12
0
def task_failed_deps(args):
    """
    Returns the unmet dependencies for a task instance from the perspective of the
    scheduler (i.e. why a task instance doesn't get scheduled and then queued by the
    scheduler, and then run by an executor).
    >>> airflow tasks failed-deps tutorial sleep 2015-01-01
    Task instance dependencies not met:
    Dagrun Running: Task instance's dagrun did not exist: Unknown reason
    Trigger Rule: Task's trigger rule 'all_success' requires all upstream tasks
    to have succeeded, but found 1 non-success(es).
    """
    dag = get_dag(args.subdir, args.dag_id)
    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)

    dep_context = DepContext(deps=SCHEDULER_QUEUED_DEPS)
    failed_deps = list(ti.get_failed_dep_statuses(dep_context=dep_context))
    # TODO, Do we want to print or log this
    if failed_deps:
        print("Task instance dependencies not met:")
        for dep in failed_deps:
            print("{}: {}".format(dep.dep_name, dep.reason))
    else:
        print("Task instance dependencies are all met.")
Example #13
0
def dag_backfill(args, dag=None):
    """Creates backfill job or dry run for a DAG"""
    logging.basicConfig(level=settings.LOGGING_LEVEL,
                        format=settings.SIMPLE_LOG_FORMAT)

    signal.signal(signal.SIGTERM, sigint_handler)

    import warnings
    warnings.warn(
        '--ignore-first-depends-on-past is deprecated as the value is always set to True',
        category=PendingDeprecationWarning)

    if args.ignore_first_depends_on_past is False:
        args.ignore_first_depends_on_past = True

    dag = dag or get_dag(args.subdir, args.dag_id)

    if not args.start_date and not args.end_date:
        raise AirflowException("Provide a start_date and/or end_date")

    # If only one date is passed, using same as start and end
    args.end_date = args.end_date or args.start_date
    args.start_date = args.start_date or args.end_date

    if args.task_regex:
        dag = dag.partial_subset(task_ids_or_regex=args.task_regex,
                                 include_upstream=not args.ignore_dependencies)

    run_conf = None
    if args.conf:
        run_conf = json.loads(args.conf)

    if args.dry_run:
        print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date))
        for task in dag.tasks:
            print("Task {0}".format(task.task_id))
            ti = TaskInstance(task, args.start_date)
            ti.dry_run()
    else:
        if args.reset_dagruns:
            DAG.clear_dags(
                [dag],
                start_date=args.start_date,
                end_date=args.end_date,
                confirm_prompt=not args.yes,
                include_subdags=True,
                dag_run_state=State.NONE,
            )

        dag.run(start_date=args.start_date,
                end_date=args.end_date,
                mark_success=args.mark_success,
                local=args.local,
                donot_pickle=(args.donot_pickle
                              or conf.getboolean('core', 'donot_pickle')),
                ignore_first_depends_on_past=args.ignore_first_depends_on_past,
                ignore_task_deps=args.ignore_dependencies,
                pool=args.pool,
                delay_on_limit_secs=args.delay_on_limit,
                verbose=args.verbose,
                conf=run_conf,
                rerun_failed_tasks=args.rerun_failed_tasks,
                run_backwards=args.run_backwards)
Example #14
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException("You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        if settings.DONOT_MODIFY_HANDLERS:
            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)
        else:
            # Get all the Handlers from 'airflow.task' logger
            # Add these handlers to the root logger so that we can get logs from
            # any custom loggers defined in the DAG
            airflow_logger_handlers = logging.getLogger('airflow.task').handlers
            root_logger = logging.getLogger()
            root_logger_handlers = root_logger.handlers

            # Remove all handlers from Root Logger to avoid duplicate logs
            for handler in root_logger_handlers:
                root_logger.removeHandler(handler)

            for handler in airflow_logger_handlers:
                root_logger.addHandler(handler)
            root_logger.setLevel(logging.getLogger('airflow.task').level)

            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)

            # We need to restore the handlers to the loggers as celery worker process
            # can call this command multiple times,
            # so if we don't reset this then logs from next task would go to the wrong place
            for handler in airflow_logger_handlers:
                root_logger.removeHandler(handler)
            for handler in root_logger_handlers:
                root_logger.addHandler(handler)

    logging.shutdown()
Example #15
0
    def _start_by_fork(self):
        pid = os.fork()
        if pid:
            self.log.info("Started process %d to run task", pid)
            return psutil.Process(pid)
        else:
            # Start a new process group
            os.setpgid(0, 0)
            import signal

            signal.signal(signal.SIGINT, signal.SIG_DFL)
            signal.signal(signal.SIGTERM, signal.SIG_DFL)

            from airflow import settings
            from airflow.cli.cli_parser import get_parser
            from airflow.sentry import Sentry
            from airflow.utils.cli import get_dag

            # Force a new SQLAlchemy session. We can't share open DB handles
            # between process. The cli code will re-create this as part of its
            # normal startup
            settings.engine.pool.dispose()
            settings.engine.dispose()

            parser = get_parser()
            # [1:] - remove "airflow" from the start of the command
            args = parser.parse_args(self._command[1:])

            # We prefer the job_id passed on the command-line because at this time, the
            # task instance may not have been updated.
            job_id = getattr(args, "job_id", self._task_instance.job_id)
            self.log.info('Running: %s', self._command)
            self.log.info('Job %s: Subtask %s', job_id, self._task_instance.task_id)

            proc_title = "airflow task runner: {0.dag_id} {0.task_id} {0.execution_date_or_run_id}"
            if job_id is not None:
                proc_title += " {0.job_id}"
            setproctitle(proc_title.format(args))

            return_code = 0
            try:
                # parse dag file since `airflow tasks run --local` does not parse dag file
                dag = get_dag(args.subdir, args.dag_id)
                args.func(args, dag=dag)
                return_code = 0
            except Exception as exc:
                return_code = 1

                self.log.error(
                    "Failed to execute job %s for task %s (%s; %r)",
                    job_id,
                    self._task_instance.task_id,
                    exc,
                    os.getpid(),
                )
            except SystemExit as sys_ex:
                # Someone called sys.exit() in the fork - mistakenly. You should not run sys.exit() in
                # the fork because you can mistakenly execute atexit that were set by the parent process
                # before fork happened
                return_code = sys_ex.code
            except BaseException:
                # while we want to handle Also Base exceptions here - we do not want to log them (this
                # is the default behaviour anyway. Setting the return code here to 2 to indicate that
                # this had happened.
                return_code = 2
            finally:
                try:
                    # Explicitly flush any pending exception to Sentry and logging if enabled
                    Sentry.flush()
                    logging.shutdown()
                except BaseException:
                    # also make sure to silently ignore ALL POSSIBLE exceptions thrown in the flush/shutdown,
                    # otherwise os._exit() might never be called. We could have used `except:` but
                    # except BaseException is more explicit (and linters do not comply).
                    pass
            # We run os._exit() making sure it is not run within the `finally` clause.
            # We cannot run os._exit() in finally clause, because during finally clause processing, the
            # Exception handled is held in memory as well as stack trace and possibly some objects that
            # might need to be finalized. Running os._exit() inside the `finally` clause might cause effects
            # similar to https://github.com/apache/airflow/issues/22404. There Temporary file has not been
            # deleted at os._exit()
            os._exit(return_code)
def task_run(args, dag=None):
    """Run a single task instance.

    Note that there must be at least one DagRun for this to start,
    i.e. it must have been scheduled and/or triggered previously.
    Alternatively, if you just need to run it for testing then use
    "airflow tasks test ..." command instead.
    """
    # Load custom airflow config

    if args.local and args.raw:
        raise AirflowException(
            "Option --raw and --local are mutually exclusive. "
            "Please remove one option to execute the command.")

    if args.raw:
        unsupported_options = [
            o for o in RAW_TASK_UNSUPPORTED_OPTION if getattr(args, o)
        ]

        if unsupported_options:
            unsupported_raw_task_flags = ', '.join(
                f'--{o}' for o in RAW_TASK_UNSUPPORTED_OPTION)
            unsupported_flags = ', '.join(f'--{o}'
                                          for o in unsupported_options)
            raise AirflowException(
                "Option --raw does not work with some of the other options on this command. "
                "You can't use --raw option and the following options: "
                f"{unsupported_raw_task_flags}. "
                f"You provided the option {unsupported_flags}. "
                "Delete it to execute the command.")
    if dag and args.pickle:
        raise AirflowException(
            "You cannot use the --pickle option when using DAG.cli() method.")
    if args.cfg_path:
        with open(args.cfg_path) as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    settings.MASK_SECRETS_IN_LOGS = True

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass
    task = dag.get_task(task_id=args.task_id)
    ti = _get_ti(task, args.execution_date_or_run_id)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        with _capture_task_logs(ti):
            _run_task_by_selected_method(args, dag, ti)
Example #17
0
def dag_backfill(args, dag=None):
    """Creates backfill job or dry run for a DAG"""
    logging.basicConfig(level=settings.LOGGING_LEVEL,
                        format=settings.SIMPLE_LOG_FORMAT)

    signal.signal(signal.SIGTERM, sigint_handler)

    import warnings

    warnings.warn(
        '--ignore-first-depends-on-past is deprecated as the value is always set to True',
        category=PendingDeprecationWarning,
    )

    if args.ignore_first_depends_on_past is False:
        args.ignore_first_depends_on_past = True

    if not args.start_date and not args.end_date:
        raise AirflowException("Provide a start_date and/or end_date")

    dag = dag or get_dag(args.subdir, args.dag_id)

    # If only one date is passed, using same as start and end
    args.end_date = args.end_date or args.start_date
    args.start_date = args.start_date or args.end_date

    if args.task_regex:
        dag = dag.partial_subset(task_ids_or_regex=args.task_regex,
                                 include_upstream=not args.ignore_dependencies)
        if not dag.task_dict:
            raise AirflowException(
                f"There are no tasks that match '{args.task_regex}' regex. Nothing to run, exiting..."
            )

    run_conf = None
    if args.conf:
        run_conf = json.loads(args.conf)

    if args.dry_run:
        print(f"Dry run of DAG {args.dag_id} on {args.start_date}")
        dr = DagRun(dag.dag_id, execution_date=args.start_date)
        for task in dag.tasks:
            print(f"Task {task.task_id}")
            ti = TaskInstance(task, run_id=None)
            ti.dag_run = dr
            ti.dry_run()
    else:
        if args.reset_dagruns:
            DAG.clear_dags(
                [dag],
                start_date=args.start_date,
                end_date=args.end_date,
                confirm_prompt=not args.yes,
                include_subdags=True,
                dag_run_state=DagRunState.QUEUED,
            )

        try:
            dag.run(
                start_date=args.start_date,
                end_date=args.end_date,
                mark_success=args.mark_success,
                local=args.local,
                donot_pickle=(args.donot_pickle
                              or conf.getboolean('core', 'donot_pickle')),
                ignore_first_depends_on_past=args.ignore_first_depends_on_past,
                ignore_task_deps=args.ignore_dependencies,
                pool=args.pool,
                delay_on_limit_secs=args.delay_on_limit,
                verbose=args.verbose,
                conf=run_conf,
                rerun_failed_tasks=args.rerun_failed_tasks,
                run_backwards=args.run_backwards,
                continue_on_failures=args.continue_on_failures,
            )
        except ValueError as vr:
            print(str(vr))
            sys.exit(1)
Example #18
0
def dag_test(args):
    """Execute one single DagRun for a given DAG and execution date, using the DebugExecutor."""
    dag = get_dag(subdir=args.subdir, dag_id=args.dag_id)
    dag.clear(start_date=args.execution_date, end_date=args.execution_date, reset_dag_runs=True)
    dag.run(executor=DebugExecutor(), start_date=args.execution_date, end_date=args.execution_date)