def backfill(args): logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) dagbag = DagBag(args.subdir) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] if args.start_date: args.start_date = dateutil.parser.parse(args.start_date) if args.end_date: args.end_date = dateutil.parser.parse(args.end_date) # If only one date is passed, using same as start and end args.end_date = args.end_date or args.start_date args.start_date = args.start_date or args.end_date if args.task_regex: dag = dag.sub_dag(task_regex=args.task_regex, include_upstream=not args.ignore_dependencies) if args.dry_run: print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date)) for task in dag.tasks: print("Task {0}".format(task.task_id)) ti = TaskInstance(task, args.start_date) ti.dry_run() else: dag.run(start_date=args.start_date, end_date=args.end_date, mark_success=args.mark_success, include_adhoc=args.include_adhoc, local=args.local, donot_pickle=args.donot_pickle, ignore_dependencies=args.ignore_dependencies)
def backfill(args, dag=None): logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) dag = dag or get_dag(args) if not args.start_date and not args.end_date: raise AirflowException("Provide a start_date and/or end_date") # If only one date is passed, using same as start and end args.end_date = args.end_date or args.start_date args.start_date = args.start_date or args.end_date if args.task_regex: dag = dag.sub_dag(task_regex=args.task_regex, include_upstream=not args.ignore_dependencies) if args.dry_run: print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date)) for task in dag.tasks: print("Task {0}".format(task.task_id)) ti = TaskInstance(task, args.start_date) ti.dry_run() else: dag.run(start_date=args.start_date, end_date=args.end_date, mark_success=args.mark_success, include_adhoc=args.include_adhoc, local=args.local, donot_pickle=(args.donot_pickle or conf.getboolean('core', 'donot_pickle')), ignore_first_depends_on_past=args.ignore_first_depends_on_past, ignore_task_deps=args.ignore_dependencies, pool=args.pool)
def backfill(args, dag=None): logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) dag = dag or get_dag(args) if not args.start_date and not args.end_date: raise AirflowException("Provide a start_date and/or end_date") # If only one date is passed, using same as start and end args.end_date = args.end_date or args.start_date args.start_date = args.start_date or args.end_date if args.task_regex: dag = dag.sub_dag(task_regex=args.task_regex, include_upstream=not args.ignore_dependencies) if args.dry_run: print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date)) for task in dag.tasks: print("Task {0}".format(task.task_id)) ti = TaskInstance(task, args.start_date) ti.dry_run() else: dag.run( start_date=args.start_date, end_date=args.end_date, mark_success=args.mark_success, include_adhoc=args.include_adhoc, local=args.local, donot_pickle=(args.donot_pickle or conf.getboolean("core", "donot_pickle")), ignore_dependencies=args.ignore_dependencies, ignore_first_depends_on_past=args.ignore_first_depends_on_past, pool=args.pool, )
def task_test(args, dag=None): """Tests task for a given dag_id""" # We want log outout from operators etc to show up here. Normally # airflow.task would redirect to a file, but here we want it to propagate # up to the normal airflow handler. logging.getLogger('airflow.task').propagate = True dag = dag or get_dag(args) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) try: if args.dry_run: ti.dry_run() else: ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) except Exception: # pylint: disable=broad-except if args.post_mortem: try: debugger = importlib.import_module("ipdb") except ImportError: debugger = importlib.import_module("pdb") debugger.post_mortem() else: raise
def backfill(args): logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) dagbag = DagBag(args.subdir) if args.dag_id not in dagbag.dags: raise AirflowException("dag_id could not be found") dag = dagbag.dags[args.dag_id] if args.start_date: args.start_date = dateutil.parser.parse(args.start_date) if args.end_date: args.end_date = dateutil.parser.parse(args.end_date) # If only one date is passed, using same as start and end args.end_date = args.end_date or args.start_date args.start_date = args.start_date or args.end_date if args.task_regex: dag = dag.sub_dag(task_regex=args.task_regex, include_upstream=not args.ignore_dependencies) if args.dry_run: print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date)) for task in dag.tasks: print("Task {0}".format(task.task_id)) ti = TaskInstance(task, args.start_date) ti.dry_run() else: dag.run( start_date=args.start_date, end_date=args.end_date, mark_success=args.mark_success, include_adhoc=args.include_adhoc, local=args.local, donot_pickle=(args.donot_pickle or conf.getboolean("core", "donot_pickle")), ignore_dependencies=args.ignore_dependencies, )
def dag_backfill(args, dag=None): """Creates backfill job or dry run for a DAG""" logging.basicConfig( level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) signal.signal(signal.SIGTERM, sigint_handler) dag = dag or get_dag(args.subdir, args.dag_id) if not args.start_date and not args.end_date: raise AirflowException("Provide a start_date and/or end_date") # If only one date is passed, using same as start and end args.end_date = args.end_date or args.start_date args.start_date = args.start_date or args.end_date if args.task_regex: dag = dag.sub_dag( task_regex=args.task_regex, include_upstream=not args.ignore_dependencies) run_conf = None if args.conf: run_conf = json.loads(args.conf) if args.dry_run: print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date)) for task in dag.tasks: print("Task {0}".format(task.task_id)) ti = TaskInstance(task, args.start_date) ti.dry_run() else: if args.reset_dagruns: DAG.clear_dags( [dag], start_date=args.start_date, end_date=args.end_date, confirm_prompt=not args.yes, include_subdags=True, ) dag.run( start_date=args.start_date, end_date=args.end_date, mark_success=args.mark_success, local=args.local, donot_pickle=(args.donot_pickle or conf.getboolean('core', 'donot_pickle')), ignore_first_depends_on_past=args.ignore_first_depends_on_past, ignore_task_deps=args.ignore_dependencies, pool=args.pool, delay_on_limit_secs=args.delay_on_limit, verbose=args.verbose, conf=run_conf, rerun_failed_tasks=args.rerun_failed_tasks, run_backwards=args.run_backwards )
def test(args): args.execution_date = dateutil.parser.parse(args.execution_date) dagbag = DagBag(process_subdir(args.subdir)) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) if args.dry_run: ti.dry_run() else: ti.run(force=True, ignore_dependencies=True, test_mode=True)
def test(args, dag=None): dag = dag or get_dag(args) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) if args.dry_run: ti.dry_run() else: ti.run(force=True, ignore_dependencies=True, test_mode=True)
def task_test(args, dag=None): """Tests task for a given dag_id""" # We want to log output from operators etc to show up here. Normally # airflow.task would redirect to a file, but here we want it to propagate # up to the normal airflow handler. settings.MASK_SECRETS_IN_LOGS = True handlers = logging.getLogger('airflow.task').handlers already_has_stream_handler = False for handler in handlers: already_has_stream_handler = isinstance(handler, logging.StreamHandler) if already_has_stream_handler: break if not already_has_stream_handler: logging.getLogger('airflow.task').propagate = True env_vars = {'AIRFLOW_TEST_MODE': 'True'} if args.env_vars: env_vars.update(args.env_vars) os.environ.update(env_vars) dag = dag or get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) try: if args.dry_run: ti.dry_run() else: ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) except Exception: # pylint: disable=broad-except if args.post_mortem: debugger = _guess_debugger() debugger.post_mortem() else: raise finally: if not already_has_stream_handler: # Make sure to reset back to normal. When run for CLI this doesn't # matter, but it does for test suite logging.getLogger('airflow.task').propagate = False
def test(args): args.execution_date = dateutil.parser.parse(args.execution_date) dagbag = DagBag(process_subdir(args.subdir)) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) if args.dry_run: ti.dry_run() else: ti.run(force=True, ignore_dependencies=True, test_mode=True)
def task_test(args, dag=None): """Tests task for a given dag_id""" # We want log outout from operators etc to show up here. Normally # airflow.task would redirect to a file, but here we want it to propagate # up to the normal airflow handler. handlers = logging.getLogger('airflow.task').handlers already_has_stream_handler = False for handler in handlers: already_has_stream_handler = isinstance(handler, logging.StreamHandler) if already_has_stream_handler: break if not already_has_stream_handler: logging.getLogger('airflow.task').propagate = True dag = dag or get_dag(args) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) try: if args.dry_run: ti.dry_run() else: ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) except Exception: # pylint: disable=broad-except if args.post_mortem: try: debugger = importlib.import_module("ipdb") except ImportError: debugger = importlib.import_module("pdb") debugger.post_mortem() else: raise finally: if not already_has_stream_handler: # Make sure to reset back to normal. When run for CLI this doesn't # matter, but it does for test suite logging.getLogger('airflow.task').propagate = False
def dag_backfill(args, dag=None): """Creates backfill job or dry run for a DAG""" logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT) signal.signal(signal.SIGTERM, sigint_handler) import warnings warnings.warn( '--ignore-first-depends-on-past is deprecated as the value is always set to True', category=PendingDeprecationWarning, ) if args.ignore_first_depends_on_past is False: args.ignore_first_depends_on_past = True if not args.start_date and not args.end_date: raise AirflowException("Provide a start_date and/or end_date") dag = dag or get_dag(args.subdir, args.dag_id) # If only one date is passed, using same as start and end args.end_date = args.end_date or args.start_date args.start_date = args.start_date or args.end_date if args.task_regex: dag = dag.partial_subset(task_ids_or_regex=args.task_regex, include_upstream=not args.ignore_dependencies) if not dag.task_dict: raise AirflowException( f"There are no tasks that match '{args.task_regex}' regex. Nothing to run, exiting..." ) run_conf = None if args.conf: run_conf = json.loads(args.conf) if args.dry_run: print(f"Dry run of DAG {args.dag_id} on {args.start_date}") dr = DagRun(dag.dag_id, execution_date=args.start_date) for task in dag.tasks: print(f"Task {task.task_id}") ti = TaskInstance(task, run_id=None) ti.dag_run = dr ti.dry_run() else: if args.reset_dagruns: DAG.clear_dags( [dag], start_date=args.start_date, end_date=args.end_date, confirm_prompt=not args.yes, include_subdags=True, dag_run_state=DagRunState.QUEUED, ) try: dag.run( start_date=args.start_date, end_date=args.end_date, mark_success=args.mark_success, local=args.local, donot_pickle=(args.donot_pickle or conf.getboolean('core', 'donot_pickle')), ignore_first_depends_on_past=args.ignore_first_depends_on_past, ignore_task_deps=args.ignore_dependencies, pool=args.pool, delay_on_limit_secs=args.delay_on_limit, verbose=args.verbose, conf=run_conf, rerun_failed_tasks=args.rerun_failed_tasks, run_backwards=args.run_backwards, continue_on_failures=args.continue_on_failures, ) except ValueError as vr: print(str(vr)) sys.exit(1)