def _create_scheduler_job(args): job = SchedulerJob( subdir=process_subdir(args.subdir), num_runs=args.num_runs, do_pickle=args.do_pickle, ) return job
def scheduler(args): """Starts Airflow Scheduler""" print(settings.HEADER) job = SchedulerJob(subdir=process_subdir(args.subdir), num_runs=args.num_runs, do_pickle=args.do_pickle) if args.daemon: pid, stdout, stderr, log_file = setup_locations( "scheduler", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: job.run() stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) signal.signal(signal.SIGQUIT, sigquit_handler) job.run()
def scheduler(args): """Starts Airflow Scheduler""" skip_serve_logs = args.skip_serve_logs print(settings.HEADER) job = SchedulerJob( subdir=process_subdir(args.subdir), num_runs=args.num_runs, do_pickle=args.do_pickle, ) if args.daemon: pid, stdout, stderr, log_file = setup_locations( "scheduler", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) with open(stdout, 'w+') as stdout_handle, open(stderr, 'w+') as stderr_handle: ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout_handle, stderr=stderr_handle, ) with ctx: sub_proc = _serve_logs(skip_serve_logs) job.run() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) signal.signal(signal.SIGQUIT, sigquit_handler) sub_proc = _serve_logs(skip_serve_logs) job.run() if sub_proc: sub_proc.terminate()
def event_based_scheduler(args): """Starts Airflow Event-based Scheduler""" print(settings.HEADER) job = EventBasedSchedulerJob( dag_directory=process_subdir(args.subdir), server_uri=args.server_uri, ) if args.daemon: pid, stdout, stderr, log_file = setup_locations( "event_based_scheduler", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: job.run() stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) signal.signal(signal.SIGQUIT, sigquit_handler) job.run()
def dag_list_import_errors(args): """Displays dags with import errors on the command line""" dagbag = DagBag(process_subdir(args.subdir)) data = [] for filename, errors in dagbag.import_errors.items(): data.append({"filepath": filename, "error": errors}) AirflowConsole().print_as( data=data, output=args.output, )
def scheduler(args): """Starts Airflow Scheduler""" print(settings.HEADER) job = SchedulerJob( subdir=process_subdir(args.subdir), num_runs=args.num_runs, do_pickle=args.do_pickle, ) scheduler_name = SchedulerFactory.get_scheduler_name() if scheduler_name == SchedulerFactory.DEFAULT_SCHEDULER: pass elif scheduler_name == SchedulerFactory.EVENT_BASED_SCHEDULER: job = EventBasedSchedulerJob(dag_directory=process_subdir(args.subdir), server_uri=args.server_uri) else: scheduler_class = SchedulerFactory.get_default_scheduler() job = scheduler_class() if args.daemon: pid, stdout, stderr, log_file = setup_locations( "scheduler", args.pid, args.stdout, args.stderr, args.log_file) handle = setup_logging(log_file) stdout = open(stdout, 'w+') stderr = open(stderr, 'w+') ctx = daemon.DaemonContext( pidfile=TimeoutPIDLockFile(pid, -1), files_preserve=[handle], stdout=stdout, stderr=stderr, ) with ctx: job.run() stdout.close() stderr.close() else: signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGTERM, sigint_handler) signal.signal(signal.SIGQUIT, sigquit_handler) job.run()
def dag_list_dags(args): """Displays dags with or without stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) AirflowConsole().print_as( data=sorted(dagbag.dags.values(), key=lambda d: d.dag_id), output=args.output, mapper=lambda x: { "dag_id": x.dag_id, "filepath": x.filepath, "owner": x.owner, }, )
def dag_list_dags(args): """Displays dags with or without stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) list_template = textwrap.dedent("""\n ------------------------------------------------------------------- DAGS ------------------------------------------------------------------- {dag_list} """) dag_list = "\n".join(sorted(dagbag.dags)) print(list_template.format(dag_list=dag_list)) if args.report: print(dagbag.dagbag_report())
def get_etl_dags(): from airflow.models import DagBag from airflow.utils.cli import process_subdir _dir = get_dag_folder() dagbag = DagBag(process_subdir(_dir)) tags = ['etl', 'scanreport'] return [ dag.dag_id for dag in dagbag.dags.values() if all([tag in dag.tags for tag in tags]) ]
def dag_report(args): """Displays dagbag stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) AirflowConsole().print_as( data=dagbag.dagbag_stats, output=args.output, mapper=lambda x: { "file": x.file, "duration": x.duration, "dag_num": x.dag_num, "task_num": x.task_num, "dags": sorted(ast.literal_eval(x.dags)), }, )
def dag_list_dags(args): """Displays dags with or without stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) if dagbag.import_errors: from rich import print as rich_print rich_print( "[red][bold]Error:[/bold] Failed to load all files. " "For details, run `airflow dags list-import-errors`", file=sys.stderr, ) AirflowConsole().print_as( data=sorted(dagbag.dags.values(), key=lambda d: d.dag_id), output=args.output, mapper=lambda x: { "dag_id": x.dag_id, "filepath": x.filepath, "owner": x.owner, "paused": x.get_is_paused(), }, )
def dag_report(args): """Displays dagbag stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) print(tabulate(dagbag.dagbag_stats, headers="keys", tablefmt=args.output))
def dag_list_dags(args): """Displays dags with or without stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) dags = dagbag.dags.values() print(_tabulate_dags(dags, tablefmt=args.output))
def test_process_subdir_path_with_placeholder(self): self.assertEqual(os.path.join(settings.DAGS_FOLDER, 'abc'), cli.process_subdir('DAGS_FOLDER/abc'))
def test_process_subdir_path_with_placeholder(self): assert os.path.join(settings.DAGS_FOLDER, 'abc') == cli.process_subdir('DAGS_FOLDER/abc')