def get_plugin_config(): " Return the plugin configuration " config = [] i = 0 # Iterate over the configurations while True: suffix = str( i ) if i != 0 else '' # the first configuration doesn't have a suffix try: if not configuration.has_option(CONFIG_SECTION, 'REPOSITORY_URL' + suffix): break except: # backports.configparser.NoSectionError and friends break repository_url = configuration.get(CONFIG_SECTION, 'REPOSITORY_URL' + suffix) dag = configuration.get(CONFIG_SECTION, 'DAG' + suffix) token = configuration.get(CONFIG_SECTION, 'TOKEN' + suffix) if configuration.has_option( CONFIG_SECTION, 'TOKEN' + suffix) else None config.append({ 'i': i, 'repository_url': repository_url, 'dag_id': dag, 'token': token }) i = i + 1 return config
def login(self, request): """Login a user. A function is executed on a new user session. Analyze request, parse JWT and create a new user object. :param self: required parameter for flask_login :param request: a request object :type request: Request """ if current_user.is_authenticated(): flash("You are already logged in") return redirect(url_for('admin.index')) auth_header = request.headers.get('Authorization') jwt_obj = parse_jwt_header(auth_header) if not jwt_obj: response = Response('JWT header is invalid or absent.', mimetype='text/plain') return response name = jwt_obj.get('name', '') email = jwt_obj.get('email', '') groups = jwt_obj.get('groups', []) admin_group = None if conf.has_option('webserver', 'dex_group_admin'): admin_group = conf.get('webserver', 'dex_group_admin') profiler_group = None if conf.has_option('webserver', 'dex_group_profiler'): profiler_group = conf.get('webserver', 'dex_group_profiler') admin_email = None if conf.has_option('webserver', 'dex_admin_email'): admin_email = conf.get('webserver', 'dex_admin_email') is_superuser = False is_data_profiler = False if admin_group or profiler_group: if admin_email and admin_email == email: # if the user is a Admin is_superuser = True is_data_profiler = True elif admin_group and admin_group in groups: is_superuser = True elif profiler_group and profiler_group in groups: is_data_profiler = True else: response = Response("Access denied for user '{}'".format(email), mimetype='text/plain') return response else: is_superuser = True is_data_profiler = True flask_login.login_user(DexUser(name, email, is_superuser, is_data_profiler)) return redirect(request.args.get("next") or url_for("admin.index"))
def load_user_from_header(auth_header): """Reload a user data from the header. :param auth_header: Authorization header :return user object, or ``None`` if header is invalid :rtype DexUser """ jwt_obj = parse_jwt_header(auth_header) if not jwt_obj: return None name = jwt_obj.get('name', '') email = jwt_obj.get('email', '') groups = jwt_obj.get('groups', []) admin_groups = [] if conf.has_option('webserver', 'dex_group_admin') and \ conf.get('webserver', 'dex_group_admin'): admin_groups = conf.get('webserver', 'dex_group_admin').split(' ') profiler_groups = [] if conf.has_option('webserver', 'dex_group_profiler') and \ conf.get('webserver', 'dex_group_profiler'): profiler_groups = conf.get('webserver', 'dex_group_profiler').split(' ') admin_email = None if conf.has_option('webserver', 'dex_admin_email'): admin_email = conf.get('webserver', 'dex_admin_email') is_superuser = False is_data_profiler = False if admin_groups or profiler_groups: if admin_email and admin_email == email: # if the user is a Admin is_superuser = True is_data_profiler = True else: for group in groups: if group in admin_groups: is_superuser = True if group in profiler_groups: is_data_profiler = True else: is_superuser = True is_data_profiler = True return DexUser(name, email, is_superuser, is_data_profiler)
def get_default(key, default=None): import airflow.configuration as conf if conf.has_option("airflowdocker", key): return conf.get("airflowdocker", key) else: return default
def get_boolean_default(key, default): import airflow.configuration as conf if conf.has_option("airflowdocker", key): return conf.getboolean("airflowdocker", key) else: return default
def try_login(username, password): conn = get_ldap_connection(configuration.get("ldap", "bind_user"), configuration.get("ldap", "bind_password")) search_filter = "(&({0})({1}={2}))".format( configuration.get("ldap", "user_filter"), configuration.get("ldap", "user_name_attr"), username ) search_scopes = { "LEVEL": LEVEL, "SUBTREE": SUBTREE, "BASE": BASE } search_scope = LEVEL if configuration.has_option("ldap", "search_scope"): search_scope = SUBTREE if configuration.get("ldap", "search_scope") == "SUBTREE" else LEVEL # todo: BASE or ONELEVEL? res = conn.search(native(configuration.get("ldap", "basedn")), native(search_filter), search_scope=native(search_scope)) # todo: use list or result? if not res: log.info("Cannot find user %s", username) raise AuthenticationError("Invalid username or password") entry = conn.response[0] conn.unbind() if 'dn' not in entry: # The search filter for the user did not return any values, so an # invalid user was used for credentials. raise AuthenticationError("Invalid username or password") try: conn = get_ldap_connection(entry['dn'], password) except KeyError as e: log.error(""" Unable to parse LDAP structure. If you're using Active Directory and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg. %s """ % traceback.format_exc()) raise LdapException("Could not parse LDAP structure. Try setting search_scope in airflow.cfg, or check logs") if not conn: log.info("Password incorrect for user %s", username) raise AuthenticationError("Invalid username or password")
def try_login(username, password): conn = get_ldap_connection(configuration.get("ldap", "bind_user"), configuration.get("ldap", "bind_password")) search_filter = "(&({0})({1}={2}))".format( configuration.get("ldap", "user_filter"), configuration.get("ldap", "user_name_attr"), username ) search_scopes = { "LEVEL": LEVEL, "SUBTREE": SUBTREE, "BASE": BASE } search_scope = LEVEL if configuration.has_option("ldap", "search_scope"): search_scope = SUBTREE if configuration.get("ldap", "search_scope") == "SUBTREE" else LEVEL # todo: BASE or ONELEVEL? res = conn.search(native(configuration.get("ldap", "basedn")), native(search_filter), search_scope=native(search_scope)) # todo: use list or result? if not res: _log.info("Cannot find user %s", username) raise AuthenticationError("Invalid username or password") entry = conn.response[0] conn.unbind() if 'dn' not in entry: # The search filter for the user did not return any values, so an # invalid user was used for credentials. raise AuthenticationError("Invalid username or password") try: conn = get_ldap_connection(entry['dn'], password) except KeyError as e: _log.error(""" Unable to parse LDAP structure. If you're using Active Directory and not specifying an OU, you must set search_scope=SUBTREE in airflow.cfg. %s """ % traceback.format_exc()) raise LdapException("Could not parse LDAP structure. Try setting search_scope in airflow.cfg, or check logs") if not conn: _log.info("Password incorrect for user %s", username) raise AuthenticationError("Invalid username or password")
def is_super_user(self): airflow_version = airflow.__version__ if float(airflow_version[:3]) <= 1.10: """ We need to add the following configuration in the airflow.cfg file for versions < 2.0 [user_management_plugin] #provide comma separated list of admin users #Ex: admin_users=admin,airflow admin_users=airflow """ users = configuration.get("user_management_plugin", "admin_users") if configuration.has_option("user_management_plugin", "admin_users" ) else "" if users != "": admin_users = users.split(",") if admin_users.__contains__(current_user.user.username): return True else: logging.error("No Admin users were provided") return False else: # For versions >= 2.0 we check for the superuser flag return current_user.user.superuser
from airflow.config_templates.default_celery import DEFAULT_CELERY_CONFIG from airflow.exceptions import AirflowException from airflow.executors.base_executor import BaseExecutor from airflow import configuration from airflow.utils.log.logging_mixin import LoggingMixin from airflow.utils.module_loading import import_string PARALLELISM = configuration.get('core', 'PARALLELISM') ''' To start the celery worker, run the command: airflow worker ''' if configuration.has_option('celery', 'celery_config_options'): celery_configuration = import_string( configuration.get('celery', 'celery_config_options') ) else: celery_configuration = DEFAULT_CELERY_CONFIG app = Celery( configuration.get('celery', 'CELERY_APP_NAME'), config_source=celery_configuration) @app.task def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command)
pass the call the singleton. (Ex: airflow.configuration.get calls conf.get). However, not all methods on conf are abstracted out, including the all important "has_section" & "add_section" (if we try to set an option on a section that doesn't exist, NoSectionError is raised). Therefore, we break the abstraction here once to access the singleton directly :param str section: the section """ # This uses the singleton described above to make sure the section exists if not airflow_configuration.conf.has_section(section_name): airflow_configuration.conf.add_section(section_name) _ensure_section_exists("fileflow") # Set some fileflow settings to a default if they do not already exist. if not airflow_configuration.has_option("fileflow", "environment"): airflow_configuration.set("fileflow", "environment", "production") if not airflow_configuration.has_option("fileflow", "storage_prefix"): airflow_configuration.set("fileflow", "storage_prefix", "storage") if not airflow_configuration.has_option("fileflow", "storage_type"): airflow_configuration.set("fileflow", "storage_type", "file") if airflow_configuration.get("fileflow", "storage_type") == "s3": if not airflow_configuration.has_option("fileflow", "aws_bucket_name"): airflow_configuration.set("fileflow", "aws_bucket_name", "mybeautifulbucket") # For AWS keys, check the AIRFLOW__ style environment variables first # Otherwise, fallback to the boto configuration aws_access_key_id_env_var = os.environ.get(
from airflow.config_templates.default_celery import DEFAULT_CELERY_CONFIG from airflow.exceptions import AirflowException from airflow.executors.base_executor import BaseExecutor from airflow import configuration from airflow.utils.log.logging_mixin import LoggingMixin PARALLELISM = configuration.get('core', 'PARALLELISM') ''' To start the celery worker, run the command: airflow worker ''' if configuration.has_option('celery', 'celery_config_options'): celery_configuration = configuration.get('celery', 'celery_config_options') else: celery_configuration = DEFAULT_CELERY_CONFIG app = Celery( configuration.get('celery', 'CELERY_APP_NAME'), config_source=celery_configuration) @app.task def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) try: subprocess.check_call(command, shell=True)
BACKFILL_URL="/api/v1.0/backfill", LIST_DAGS_URL="/api/v1.0/list_dags", KERBEROS_URL="/api/v1.0/kerberos", WORKER_URL="/api/v1.0/worker", SCHEDULER_URL="/api/v1.0/scheduler", TASK_STATE_URL="/api/v1.0/task_state", TRIGGER_DAG_URL="/api/v1.0/trigger_dag", REFRESH_DAG_URL="/api/v1.0/refresh_dag", DEPLOY_DAG_URL="/api/v1.0/deploy_dag" ) airflow_webserver_base_url = configuration.get('webserver', 'BASE_URL') dags_folder = configuration.get('core', 'DAGS_FOLDER') rest_api_plugin_http_token_header_name = "rest_api_plugin_http_token" expected_http_token = None if configuration.has_option("webserver", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN"): expected_http_token = configuration.get("webserver", "REST_API_PLUGIN_EXPECTED_HTTP_TOKEN") def get_base_response(status="OK", call_time=datetime.now(), include_arguments=True): base_response = {"status": status, "call_time": call_time} if include_arguments: base_response["arguments"] = request.args return base_response def get_final_response(base_response, output=None, airflow_cmd=None): final_response = base_response final_response["response_time"] = datetime.now() if output: final_response["output"] = output
def generate_biowardrobe_workflow(): _settings.cursor.execute("select * from experimenttype limit 1") field_names = [i[0] for i in _settings.cursor.description] if 'workflow' not in field_names: apply_sql_patch("labdata_alter.sql") apply_sql_patch("experimenttype_alter.sql") apply_sql_patch("experimenttype_patch.sql") _template = u"""#!/usr/bin/env python3 from airflow import DAG from biowardrobe_airflow_analysis import biowardrobe_workflow dag = biowardrobe_workflow("{}") """ _settings.cursor.execute("select workflow from experimenttype") for (workflow, ) in _settings.cursor.fetchall(): if not workflow: continue _filename = os.path.abspath( os.path.join( DAGS_FOLDER, os.path.basename(os.path.splitext(workflow)[0]) + '.py')) with open(_filename, 'w') as generated_workflow_stream: generated_workflow_stream.write(_template.format(workflow)) _template = u"""#!/usr/bin/env python3 from airflow import DAG from biowardrobe_airflow_analysis import BioWardrobeDownloadDAG, BioWardrobeDownloadTriggerDAG d = BioWardrobeDownloadDAG dt= BioWardrobeDownloadTriggerDAG """ with open(os.path.join(DAGS_FOLDER, 'biowardrobe_download.py'), 'w') as generated_workflow_stream: generated_workflow_stream.write(_template) _template = u"""#!/usr/bin/env python3 from airflow import DAG from biowardrobe_airflow_analysis import BioWardrobeForceRunDAG d = BioWardrobeForceRunDAG """ with open(os.path.join(DAGS_FOLDER, 'biowardrobe_force_run.py'), 'w') as generated_workflow_stream: generated_workflow_stream.write(_template) merge_conn( models.Connection(conn_id='biowardrobe', conn_type='mysql', host=_settings.config[0], login=_settings.config[1], password=_settings.config[2], schema=_settings.config[3], extra="{\"cursor\":\"dictcursor\"}")) try: api_client.get_pool(name='biowardrobe_download') except Exception as e: api_client.create_pool(name='biowardrobe_download', slots=5, description="pool to download files") try: api_client.get_pool(name='biowardrobe_basic_analysis') except Exception as e: api_client.create_pool(name='biowardrobe_basic_analysis', slots=1, description="pool to run basic analysis") if not conf.has_option('cwl', 'tmp_folder'): if not os.path.exists(conf.AIRFLOW_CONFIG + '.orig'): copyfile(conf.AIRFLOW_CONFIG, conf.AIRFLOW_CONFIG + '.orig') with open(conf.AIRFLOW_CONFIG, 'w') as fp: # for s in ['mesos', 'kerberos', 'celery', 'smtp', 'email', 'dask', 'ldap']: # conf.conf.remove_section(s) conf.conf.add_section('cwl') conf.set('cwl', 'tmp_folder', os.path.join(AIRFLOW_HOME, 'tmp')) conf.set('core', 'logging_level', 'WARNING') conf.set('core', 'load_examples', 'False') conf.set('webserver', 'dag_default_view', 'graph') conf.set('webserver', 'dag_orientation', 'TB') conf.set('webserver', 'web_server_worker_timeout', '120') conf.set('scheduler', 'job_heartbeat_sec', '20') conf.set('scheduler', 'scheduler_heartbeat_sec', '20') conf.set('scheduler', 'min_file_process_interval', '30') conf.conf.write(fp) startup_scripts = [ 'com.datirium.airflow-scheduler.plist', 'com.datirium.airflow-webserver.plist' ] if platform == "darwin": _sys_dir = os.path.expanduser('~/Library/LaunchAgents') for scripts in startup_scripts: with open(os.path.join(system_folder, 'macosx', scripts), 'r') as s: data = s.read() # OS X dst = os.path.join(_sys_dir, scripts) if os.path.exists(dst): with open(dst + '.new', 'w') as w: w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME)) else: with open(dst, 'w') as w: w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME))
""" Extend from the airflow configuration and address any missing fileflow related configuration values. """ from airflow import configuration as airflow_configuration import os import boto # Set some fileflow settings to a default if they do not already exist. if not airflow_configuration.has_option('fileflow', 'environment'): airflow_configuration.set('fileflow', 'environment', 'production') if not airflow_configuration.has_option('fileflow', 'storage_prefix'): airflow_configuration.set('fileflow', 'storage_prefix', 'storage') if not airflow_configuration.has_option('fileflow', 'storage_type'): airflow_configuration.set('fileflow', 'storage_type', 'file') if not airflow_configuration.has_option('fileflow', 'aws_bucket_name'): airflow_configuration.set('fileflow', 'aws_bucket_name', 'mybeautifulbucket') # For AWS keys, check the AIRFLOW__ style environment variables first # Otherwise, fallback to the boto configuration aws_access_key_id_env_var = os.environ.get( 'AIRFLOW__FILEFLOW__AWS_ACCESS_KEY_ID', False) aws_secret_access_key_env_var = os.environ.get( 'AIRFLOW__FILEFLOW__AWS_SECRET_ACCESS_KEY', False) boto_config = boto.pyami.config.Config() if not airflow_configuration.has_option('fileflow', 'aws_access_key_id'):
CLIs this REST API exposes are Defined here: http://airflow.incubator.apache.org/cli.html """ # Location of the REST Endpoint # Note: Changing this will only effect where the messages are posted to on the web interface and will not change where the endpoint actually resides rest_api_endpoint = "/admin/rest_api/api" # Getting Versions and Global variables hostname = socket.gethostname() airflow_version = airflow.__version__ # Getting configurations from airflow.cfg file airflow_webserver_base_url = configuration.get('webserver', 'BASE_URL') airflow_base_log_folder = configuration.get('core', 'BASE_LOG_FOLDER') airflow_dags_folder = configuration.get('core', 'DAGS_FOLDER') log_loading = configuration.getboolean("rest_api_plugin", "LOG_LOADING") if configuration.has_option("rest_api_plugin", "LOG_LOADING") else False filter_loading_messages_in_cli_response = configuration.getboolean("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") if configuration.has_option("rest_api_plugin", "FILTER_LOADING_MESSAGES_IN_CLI_RESPONSE") else True # Using UTF-8 Encoding so that response messages don't have any characters in them that can't be handled os.environ['PYTHONIOENCODING'] = 'utf-8' """ Metadata that defines a single API: { "name": "{string}", # Name of the API (cli command to be executed) "description": "{string}", # Description of the API "airflow_version": "{string}", # Version the API was available in to allow people to better determine if the API is available. (to be displayed on the Admin page) "http_method": "{string}", # HTTP method to use when calling the function. (Default: GET) (Optional) "background_mode": {boolean}, # Whether to run the process in the background if its a CLI API (Optional) "arguments": [ # List of arguments that can be provided to the API
def generate_biowardrobe_workflow(): _template = u"""#!/usr/bin/env python3 from airflow import DAG from biowardrobe_cwl_workflows import workflow dag = workflow("{}") """ all_workflows = available() for workflow in all_workflows: if not workflow: continue _filename = os.path.abspath( os.path.join( DAGS_FOLDER, os.path.basename(os.path.splitext(workflow)[0]) + '.py')) print(_filename) with open(_filename, 'w') as generated_workflow_stream: generated_workflow_stream.write(_template.format(workflow)) try: api_client.get_pool(name='basic_analysis') except Exception as e: api_client.create_pool(name='basic_analysis', slots=1, description="pool to run basic analysis") if not conf.has_option('cwl', 'tmp_folder'): if not os.path.exists(conf.AIRFLOW_CONFIG + '.orig'): copyfile(conf.AIRFLOW_CONFIG, conf.AIRFLOW_CONFIG + '.orig') with open(conf.AIRFLOW_CONFIG, 'w') as fp: # for s in ['mesos', 'kerberos', 'celery', 'smtp', 'email', 'dask', 'ldap']: # conf.conf.remove_section(s) conf.conf.add_section('cwl') conf.set('cwl', 'tmp_folder', os.path.join(AIRFLOW_HOME, 'tmp')) conf.set('core', 'logging_level', 'WARNING') conf.set('core', 'load_examples', 'False') conf.set('webserver', 'dag_default_view', 'graph') conf.set('webserver', 'dag_orientation', 'TB') conf.set('webserver', 'web_server_worker_timeout', '120') conf.set('scheduler', 'job_heartbeat_sec', '20') conf.set('scheduler', 'scheduler_heartbeat_sec', '20') conf.set('scheduler', 'min_file_process_interval', '30') conf.conf.write(fp) # startup_scripts = ['com.datirium.airflow-scheduler.plist', 'com.datirium.airflow-webserver.plist'] # if platform == "darwin": # _sys_dir = os.path.expanduser('~/Library/LaunchAgents') # for scripts in startup_scripts: # with open(os.path.join(system_folder, 'macosx', scripts), 'r') as s: # data = s.read() # # OS X # dst = os.path.join(_sys_dir, scripts) # # if os.path.exists(dst): # with open(dst + '.new', 'w') as w: # w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME)) # else: # with open(dst, 'w') as w: # w.write(data.format(AIRFLOW_HOME=AIRFLOW_HOME)) # if platform == "linux" or platform == "linux2": # linux # elif platform == "win32": # Windows... # TODO: tmp, dags do not exist ??? # generate_biowardrobe_workflow()