def info(self, console: AirflowConsole): table = SimpleTable(title="Providers info") table.add_column() table.add_column(width=150) for _, provider in ProvidersManager().providers.values(): table.add_row(provider['package-name'], provider['versions'][0]) console.print(table)
def display_recursive( prefix: List[str], commands: Iterable[Union[GroupCommand, ActionCommand]], help_msg: Optional[str] = None, ): actions: List[ActionCommand] groups: List[GroupCommand] actions: List[ActionCommand] = [] groups: List[GroupCommand] = [] for command in commands: if isinstance(command, GroupCommand): groups.append(command) else: actions.append(command) console = AirflowConsole() if actions: table = SimpleTable(title=help_msg or "Miscellaneous commands") table.add_column(width=40) table.add_column() for action_command in sorted(actions, key=lambda d: d.name): table.add_row(" ".join([*prefix, action_command.name]), action_command.help) console.print(table) if groups: for group_command in sorted(groups, key=lambda d: d.name): group_prefix = [*prefix, group_command.name] display_recursive(group_prefix, group_command.subcommands, group_command.help)
def info(self, console: AirflowConsole): console.print( f"[bold][green]Apache Airflow[/bold][/green]: {self.airflow_version}\n", highlight=False) self.system.info(console) self.tools.info(console) self.paths.info(console) self.config.info(console) self.provider.info(console)
def info(self, console: AirflowConsole): table = SimpleTable(title="Paths info") table.add_column() table.add_column(width=150) table.add_row("airflow_home", self.airflow_home) table.add_row("system_path", os.pathsep.join(self.system_path)) table.add_row("python_path", os.pathsep.join(self.python_path)) table.add_row("airflow_on_path", str(self.airflow_on_path)) console.print(table)
def info(self, console: AirflowConsole): table = SimpleTable(title="Config info") table.add_column() table.add_column(width=150) table.add_row("executor", self.executor) table.add_row("task_logging_handler", self.task_logging_handler) table.add_row("sql_alchemy_conn", self.sql_alchemy_conn) table.add_row("dags_folder", self.dags_folder) table.add_row("plugins_folder", self.plugins_folder) table.add_row("base_log_folder", self.base_log_folder) console.print(table)
def info(self, console: AirflowConsole): table = SimpleTable(title="System info") table.add_column() table.add_column(width=100) table.add_row("OS", self.operating_system or "NOT AVAILABLE") table.add_row("architecture", self.arch or "NOT AVAILABLE") table.add_row("uname", str(self.uname)) table.add_row("locale", str(self.locale)) table.add_row("python_version", self.python_version) table.add_row("python_location", self.python_location) console.print(table)
def info(self, console: AirflowConsole): table = SimpleTable(title="Tools info") table.add_column() table.add_column(width=150) table.add_row("git", self.git_version) table.add_row("ssh", self.ssh_version) table.add_row("kubectl", self.kubectl_version) table.add_row("gcloud", self.gcloud_version) table.add_row("cloud_sql_proxy", self.cloud_sql_proxy_version) table.add_row("mysql", self.mysql_version) table.add_row("sqlite3", self.sqlite3_version) table.add_row("psql", self.psql_version) console.print(table)
def task_states_for_dag_run(args, session=None): """Get the status of all task instances in a DagRun""" dag_run = (session.query(DagRun).filter( DagRun.run_id == args.execution_date_or_run_id, DagRun.dag_id == args.dag_id).one_or_none()) if not dag_run: try: execution_date = timezone.parse(args.execution_date_or_run_id) dag_run = (session.query(DagRun).filter( DagRun.execution_date == execution_date, DagRun.dag_id == args.dag_id).one_or_none()) except (ParserError, TypeError) as err: raise AirflowException( f"Error parsing the supplied execution_date. Error: {str(err)}" ) if dag_run is None: raise DagRunNotFound( f"DagRun for {args.dag_id} with run_id or execution_date of {args.execution_date_or_run_id!r} " "not found") AirflowConsole().print_as( data=dag_run.task_instances, output=args.output, mapper=lambda ti: { "dag_id": ti.dag_id, "execution_date": dag_run.execution_date.isoformat(), "task_id": ti.task_id, "state": ti.state, "start_date": ti.start_date.isoformat() if ti.start_date else "", "end_date": ti.end_date.isoformat() if ti.end_date else "", }, )
def dump_plugins(args): """Dump plugins information""" plugins_manager.ensure_plugins_loaded() plugins_manager.integrate_macros_plugins() plugins_manager.integrate_executor_plugins() plugins_manager.initialize_extra_operators_links_plugins() plugins_manager.initialize_web_ui_plugins() if not plugins_manager.plugins: print("No plugins loaded") return plugins_info: List[Dict[str, str]] = [] for plugin in plugins_manager.plugins: info = {"name": plugin.name} info.update( {n: getattr(plugin, n) for n in PLUGINS_ATTRIBUTES_TO_DUMP}) plugins_info.append(info) # Remove empty info if args.output == "table": # pylint: disable=too-many-nested-blocks # We can do plugins_info[0] as the element it will exist as there's # at least one plugin at this point for col in list(plugins_info[0]): if all(not bool(p[col]) for p in plugins_info): for plugin in plugins_info: del plugin[col] AirflowConsole().print_as(plugins_info, output=args.output)
def roles_list(args): """Lists all existing roles""" appbuilder = cached_app().appbuilder # pylint: disable=no-member roles = appbuilder.sm.get_all_roles() AirflowConsole().print_as(data=sorted([r.name for r in roles]), output=args.output, mapper=lambda x: {"name": x})
def dag_list_dag_runs(args, dag=None, session=NEW_SESSION): """Lists dag runs for a given DAG""" if dag: args.dag_id = dag.dag_id else: dag = DagModel.get_dagmodel(args.dag_id, session=session) if not dag: raise SystemExit( f"DAG: {args.dag_id} does not exist in 'dag' table") state = args.state.lower() if args.state else None dag_runs = DagRun.find( dag_id=args.dag_id, state=state, no_backfills=args.no_backfill, execution_start_date=args.start_date, execution_end_date=args.end_date, session=session, ) dag_runs.sort(key=lambda x: x.execution_date, reverse=True) AirflowConsole().print_as( data=dag_runs, output=args.output, mapper=lambda dr: { "dag_id": dr.dag_id, "run_id": dr.run_id, "state": dr.state, "execution_date": dr.execution_date.isoformat(), "start_date": dr.start_date.isoformat() if dr.start_date else '', "end_date": dr.end_date.isoformat() if dr.end_date else '', }, )
def task_states_for_dag_run(args): """Get the status of all task instances in a DagRun""" with create_session() as session: tis = (session.query( TaskInstance.dag_id, TaskInstance.execution_date, TaskInstance.task_id, TaskInstance.state, TaskInstance.start_date, TaskInstance.end_date, ).filter(TaskInstance.dag_id == args.dag_id, TaskInstance.execution_date == args.execution_date).all()) if len(tis) == 0: raise AirflowException("DagRun does not exist.") AirflowConsole().print_as( data=tis, output=args.output, mapper=lambda ti: { "dag_id": ti.dag_id, "execution_date": ti.execution_date.isoformat(), "task_id": ti.task_id, "state": ti.state, "start_date": ti.start_date.isoformat() if ti.start_date else "", "end_date": ti.end_date.isoformat() if ti.end_date else "", }, )
def dag_list_jobs(args, dag=None): """Lists latest n jobs""" queries = [] if dag: args.dag_id = dag.dag_id if args.dag_id: dagbag = DagBag() if args.dag_id not in dagbag.dags: error_message = f"Dag id {args.dag_id} not found" raise AirflowException(error_message) queries.append(BaseJob.dag_id == args.dag_id) if args.state: queries.append(BaseJob.state == args.state) fields = ['dag_id', 'state', 'job_type', 'start_date', 'end_date'] with create_session() as session: all_jobs = (session.query(BaseJob).filter(*queries).order_by( BaseJob.start_date.desc()).limit(args.limit).all()) all_jobs = [{f: str(job.__getattribute__(f)) for f in fields} for job in all_jobs] AirflowConsole().print_as( data=all_jobs, output=args.output, )
def variables_list(args): """Displays all of the variables""" with create_session() as session: variables = session.query(Variable) AirflowConsole().print_as(data=variables, output=args.output, mapper=lambda x: {"key": x.key})
def show(self, output: str, console: Optional[AirflowConsole] = None) -> None: """Shows information about Airflow instance""" all_info = { "Apache Airflow": self._airflow_info, "System info": self._system_info, "Tools info": self._tools_info, "Paths info": self._paths_info, "Providers info": self._providers_info, } console = console or AirflowConsole(show_header=False) if output in ("table", "plain"): # Show each info as table with key, value column for key, info in all_info.items(): console.print(f"\n[bold][green]{key}[/bold][/green]", highlight=False) console.print_as(data=[{ "key": k, "value": v } for k, v in info], output=output) else: # Render info in given format, change keys to snake_case console.print_as(data=[{ k.lower().replace(" ", "_"): dict(v) } for k, v in all_info.items()], output=output)
def dag_list_jobs(args, dag=None, session=NEW_SESSION): """Lists latest n jobs""" queries = [] if dag: args.dag_id = dag.dag_id if args.dag_id: dag = DagModel.get_dagmodel(args.dag_id, session=session) if not dag: raise SystemExit( f"DAG: {args.dag_id} does not exist in 'dag' table") queries.append(BaseJob.dag_id == args.dag_id) if args.state: queries.append(BaseJob.state == args.state) fields = ['dag_id', 'state', 'job_type', 'start_date', 'end_date'] all_jobs = (session.query(BaseJob).filter(*queries).order_by( BaseJob.start_date.desc()).limit(args.limit).all()) all_jobs = [{f: str(job.__getattribute__(f)) for f in fields} for job in all_jobs] AirflowConsole().print_as( data=all_jobs, output=args.output, )
def dag_list_dag_runs(args, dag=None): """Lists dag runs for a given DAG""" if dag: args.dag_id = dag.dag_id dagbag = DagBag() if args.dag_id is not None and args.dag_id not in dagbag.dags: error_message = f"Dag id {args.dag_id} not found" raise AirflowException(error_message) state = args.state.lower() if args.state else None dag_runs = DagRun.find( dag_id=args.dag_id, state=state, no_backfills=args.no_backfill, execution_start_date=args.start_date, execution_end_date=args.end_date, ) dag_runs.sort(key=lambda x: x.execution_date, reverse=True) AirflowConsole().print_as( data=dag_runs, output=args.output, mapper=lambda dr: { "dag_id": dr.dag_id, "run_id": dr.run_id, "state": dr.state, "execution_date": dr.execution_date.isoformat(), "start_date": dr.start_date.isoformat() if dr.start_date else '', "end_date": dr.end_date.isoformat() if dr.end_date else '', }, )
def secrets_backends_list(args): """Lists all secrets backends at the command line""" AirflowConsole().print_as( data=list(ProvidersManager().secrets_backend_class_names), output=args.output, mapper=lambda x: { "secrets_backend_class_name": x, }, )
def logging_list(args): """Lists all log task handlers at the command line""" AirflowConsole().print_as( data=list(ProvidersManager().logging_class_names), output=args.output, mapper=lambda x: { "logging_class_name": x, }, )
def extra_links_list(args): """Lists all extra links at the command line""" AirflowConsole().print_as( data=ProvidersManager().extra_links_class_names, output=args.output, mapper=lambda x: { "extra_link_class_name": x, }, )
def connection_field_behaviours(args): """Lists field behaviours""" AirflowConsole().print_as( data=list(ProvidersManager().field_behaviours.keys()), output=args.output, mapper=lambda x: { "field_behaviours": x, }, )
def auth_backend_list(args): """Lists all API auth backend modules at the command line""" AirflowConsole().print_as( data=list(ProvidersManager().auth_backend_module_names), output=args.output, mapper=lambda x: { "api_auth_backand_module": x, }, )
def dag_list_import_errors(args): """Displays dags with import errors on the command line""" dagbag = DagBag(process_subdir(args.subdir)) data = [] for filename, errors in dagbag.import_errors.items(): data.append({"filepath": filename, "error": errors}) AirflowConsole().print_as( data=data, output=args.output, )
def _show_pools(pools, output): AirflowConsole().print_as( data=pools, output=output, mapper=lambda x: { "pool": x[0], "slots": x[1], "description": x[2], }, )
def providers_list(args): """Lists all providers at the command line""" AirflowConsole().print_as( data=list(ProvidersManager().providers.values()), output=args.output, mapper=lambda x: { "package_name": x.data["package-name"], "description": _remove_rst_syntax(x.data["description"]), "version": x.version, }, )
def users_list(args): """Lists users at the command line""" appbuilder = cached_app().appbuilder users = appbuilder.sm.get_all_users() fields = ['id', 'username', 'email', 'first_name', 'last_name', 'roles'] AirflowConsole().print_as( data=users, output=args.output, mapper=lambda x: {f: x.__getattribute__(f) for f in fields})
def hooks_list(args): """Lists all hooks at the command line""" AirflowConsole().print_as( data=ProvidersManager().hooks.items(), output=args.output, mapper=lambda x: { "connection_type": x[0], "class": x[1][0], "conn_attribute_name": x[1][1], }, )
def connections_get(args): """Get a connection.""" try: conn = BaseHook.get_connection(args.conn_id) except AirflowNotFoundException: raise SystemExit("Connection not found.") AirflowConsole().print_as( data=[conn], output=args.output, mapper=_connection_mapper, )
def dag_list_dags(args): """Displays dags with or without stats at the command line""" dagbag = DagBag(process_subdir(args.subdir)) AirflowConsole().print_as( data=sorted(dagbag.dags.values(), key=lambda d: d.dag_id), output=args.output, mapper=lambda x: { "dag_id": x.dag_id, "filepath": x.filepath, "owner": x.owner, }, )
def connection_form_widget_list(args): """Lists all custom connection form fields at the command line""" AirflowConsole().print_as( data=list(ProvidersManager().connection_form_widgets.items()), output=args.output, mapper=lambda x: { "connection_parameter_name": x[0], "class": x[1].hook_class_name, 'package_name': x[1].package_name, 'field_type': x[1].field.field_class.__name__, }, )