def load_login(): log = LoggingMixin().log auth_backend = 'airflow.default_login' try: if conf.getboolean('webserver', 'AUTHENTICATE'): auth_backend = conf.get('webserver', 'auth_backend') except conf.AirflowConfigException: if conf.getboolean('webserver', 'AUTHENTICATE'): log.warning( "auth_backend not found in webserver config reverting to " "*deprecated* behavior of importing airflow_login") auth_backend = "airflow_login" try: global login login = import_module(auth_backend) except ImportError as err: log.critical( "Cannot import authentication module %s. " "Please correct your authentication backend or disable authentication: %s", auth_backend, err ) if conf.getboolean('webserver', 'AUTHENTICATE'): raise AirflowException("Failed to import authentication backend")
def load_login(): log = LoggingMixin().log auth_backend = 'airflow.default_login' try: if conf.getboolean('webserver', 'AUTHENTICATE'): auth_backend = conf.get('webserver', 'auth_backend') except conf.AirflowConfigException: if conf.getboolean('webserver', 'AUTHENTICATE'): log.warning( "auth_backend not found in webserver config reverting to " "*deprecated* behavior of importing airflow_login") auth_backend = "airflow_login" try: global login login = import_module(auth_backend) if hasattr(login, 'login_manager') and not hasattr(login, 'LOGIN_MANAGER'): login.LOGIN_MANAGER = login.login_manager except ImportError as err: log.critical( "Cannot import authentication module %s. " "Please correct your authentication backend or disable authentication: %s", auth_backend, err ) if conf.getboolean('webserver', 'AUTHENTICATE'): raise AirflowException("Failed to import authentication backend")
def get_fernet(): """ Deferred load of Fernet key. This function could fail either because Cryptography is not installed or because the Fernet key is invalid. :return: Fernet object :raises: airflow.exceptions.AirflowException if there's a problem trying to load Fernet """ global _fernet log = LoggingMixin().log if _fernet: return _fernet try: fernet_key = conf.get('core', 'FERNET_KEY') if not fernet_key: log.warning( "empty cryptography key - values will not be stored encrypted." ) _fernet = NullFernet() else: _fernet = MultiFernet([ Fernet(fernet_part.encode('utf-8')) for fernet_part in fernet_key.split(',') ]) _fernet.is_encrypted = True except (ValueError, TypeError) as ve: raise AirflowException("Could not create Fernet object: {}".format(ve)) return _fernet
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if 200 <= response.status_code < 300: log.info('Email with subject %s is successfully sent to recipients: %s', mail_data['subject'], mail_data['personalizations']) else: log.warning('Failed to send out email with subject %s, status code: %s', mail_data['subject'], response.status_code)
def _parse_s3_config(config_file_name, config_format='boto', profile=None): """ Parses a config file for s3 credentials. Can currently parse boto, s3cmd.conf and AWS SDK config formats :param config_file_name: path to the config file :type config_file_name: str :param config_format: config type. One of "boto", "s3cmd" or "aws". Defaults to "boto" :type config_format: str :param profile: profile name in AWS type config file :type profile: str """ Config = configparser.ConfigParser() if Config.read(config_file_name): # pragma: no cover sections = Config.sections() else: raise AirflowException("Couldn't read {0}".format(config_file_name)) # Setting option names depending on file format if config_format is None: config_format = 'boto' conf_format = config_format.lower() if conf_format == 'boto': # pragma: no cover if profile is not None and 'profile ' + profile in sections: cred_section = 'profile ' + profile else: cred_section = 'Credentials' elif conf_format == 'aws' and profile is not None: cred_section = profile else: cred_section = 'default' # Option names if conf_format in ('boto', 'aws'): # pragma: no cover key_id_option = 'aws_access_key_id' secret_key_option = 'aws_secret_access_key' # security_token_option = 'aws_security_token' else: key_id_option = 'access_key' secret_key_option = 'secret_key' # Actual Parsing if cred_section not in sections: raise AirflowException("This config file format is not recognized") else: try: access_key = Config.get(cred_section, key_id_option) secret_key = Config.get(cred_section, secret_key_option) calling_format = None if Config.has_option(cred_section, 'calling_format'): calling_format = Config.get(cred_section, 'calling_format') except: log = LoggingMixin().log log.warning("Option Error in parsing s3 config file") raise return (access_key, secret_key, calling_format)
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if response.status_code >= 200 and response.status_code < 300: log.info('Email with subject %s is successfully sent to recipients: %s' % (mail_data['subject'], mail_data['personalizations'])) else: log.warning('Failed to send out email with subject %s, status code: %s' % (mail_data['subject'], response.status_code))
def _parse_s3_config(config_file_name, config_format='boto', profile=None): """ Parses a config file for s3 credentials. Can currently parse boto, s3cmd.conf and AWS SDK config formats :param config_file_name: path to the config file :type config_file_name: str :param config_format: config type. One of "boto", "s3cmd" or "aws". Defaults to "boto" :type config_format: str :param profile: profile name in AWS type config file :type profile: str """ Config = configparser.ConfigParser() if Config.read(config_file_name): # pragma: no cover sections = Config.sections() else: raise AirflowException("Couldn't read {0}".format(config_file_name)) # Setting option names depending on file format if config_format is None: config_format = 'boto' conf_format = config_format.lower() if conf_format == 'boto': # pragma: no cover if profile is not None and 'profile ' + profile in sections: cred_section = 'profile ' + profile else: cred_section = 'Credentials' elif conf_format == 'aws' and profile is not None: cred_section = profile else: cred_section = 'default' # Option names if conf_format in ('boto', 'aws'): # pragma: no cover key_id_option = 'aws_access_key_id' secret_key_option = 'aws_secret_access_key' # security_token_option = 'aws_security_token' else: key_id_option = 'access_key' secret_key_option = 'secret_key' # Actual Parsing if cred_section not in sections: raise AirflowException("This config file format is not recognized") else: try: access_key = Config.get(cred_section, key_id_option) secret_key = Config.get(cred_section, secret_key_option) calling_format = None if Config.has_option(cred_section, 'calling_format'): calling_format = Config.get(cred_section, 'calling_format') except: log = LoggingMixin().log log.warning("Option Error in parsing s3 config file") raise return (access_key, secret_key, calling_format)
def _post_sendgrid_mail(mail_data): log = LoggingMixin().log sg = sendgrid.SendGridAPIClient( apikey=configuration.get('sendgrid', 'SENDGRID_API_KEY')) response = sg.client.mail.send.post(request_body=mail_data) # 2xx status code. if response.status_code >= 200 and response.status_code < 300: log.info( 'The following email with subject %s is successfully sent to sendgrid.' % subject) else: log.warning( 'Failed to send out email with subject %s, status code: %s' % (subject, response.status_code))
def _to_timestamp(cls, col): """ Convert a column of a dataframe to UNIX timestamps if applicable :param col: A Series object representing a column of a dataframe. """ # try and convert the column to datetimes # the column MUST have a four digit year somewhere in the string # there should be a better way to do this, # but just letting pandas try and convert every column without a format # caused it to convert floats as well # For example, a column of integers # between 0 and 10 are turned into timestamps # if the column cannot be converted, # just return the original column untouched try: col = pd.to_datetime(col) except ValueError: log = LoggingMixin().log log.warning( "Could not convert field to timestamps: %s", col.name ) return col # now convert the newly created datetimes into timestamps # we have to be careful here # because NaT cannot be converted to a timestamp # so we have to return NaN converted = [] for i in col: try: converted.append(i.timestamp()) except ValueError: converted.append(pd.np.NaN) except AttributeError: converted.append(pd.np.NaN) # return a new series that maintains the same index as the original return pd.Series(converted, index=col.index)
def _to_timestamp(cls, col): """ Convert a column of a dataframe to UNIX timestamps if applicable :param col: A Series object representing a column of a dataframe. """ # try and convert the column to datetimes # the column MUST have a four digit year somewhere in the string # there should be a better way to do this, # but just letting pandas try and convert every column without a format # caused it to convert floats as well # For example, a column of integers # between 0 and 10 are turned into timestamps # if the column cannot be converted, # just return the original column untouched try: col = pd.to_datetime(col) except ValueError: log = LoggingMixin().log log.warning( "Could not convert field to timestamps: %s", col.name ) return col # now convert the newly created datetimes into timestamps # we have to be careful here # because NaT cannot be converted to a timestamp # so we have to return NaN converted = [] for i in col: try: converted.append(i.timestamp()) except ValueError: converted.append(pd.np.NaN) except AttributeError: converted.append(pd.np.NaN) # return a new series that maintains the same index as the original return pd.Series(converted, index=col.index)
configuration.conf.get('celery', 'DEFAULT_QUEUE'), 'broker_url': broker_url, 'broker_transport_options': broker_transport_options, 'result_backend': configuration.conf.get('celery', 'RESULT_BACKEND'), 'worker_concurrency': configuration.conf.getint('celery', 'WORKER_CONCURRENCY'), } celery_ssl_active = False try: celery_ssl_active = configuration.conf.getboolean('celery', 'SSL_ACTIVE') except AirflowConfigException as e: log.warning("Celery Executor will run without SSL") try: if celery_ssl_active: if 'amqp://' in broker_url: broker_use_ssl = { 'keyfile': configuration.conf.get('celery', 'SSL_KEY'), 'certfile': configuration.conf.get('celery', 'SSL_CERT'), 'ca_certs': configuration.conf.get('celery', 'SSL_CACERT'), 'cert_reqs': ssl.CERT_REQUIRED } elif 'redis://' in broker_url: broker_use_ssl = { 'ssl_keyfile': configuration.conf.get('celery', 'SSL_KEY'), 'ssl_certfile': configuration.conf.get('celery', 'SSL_CERT'), 'ssl_ca_certs': configuration.conf.get('celery', 'SSL_CACERT'),
try: # Kubernetes is optional, so not available in vanilla Airflow # pip install 'apache-airflow[kubernetes]' from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator args = {'owner': 'Airflow', 'start_date': days_ago(2)} dag = DAG(dag_id='example_kubernetes_operator', default_args=args, schedule_interval=None) tolerations = [{'key': "key", 'operator': 'Equal', 'value': 'value'}] k = KubernetesPodOperator(namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=False, task_id="task", get_logs=True, dag=dag, is_delete_operator_pod=False, tolerations=tolerations) except ImportError as e: log.warning("Could not import KubernetesPodOperator: " + str(e)) log.warning("Install kubernetes dependencies with: " " pip install 'apache-airflow[kubernetes]'")
'event_serializer': 'json', 'worker_prefetch_multiplier': 1, 'task_acks_late': True, 'task_default_queue': configuration.get('celery', 'DEFAULT_QUEUE'), 'task_default_exchange': configuration.get('celery', 'DEFAULT_QUEUE'), 'broker_url': configuration.get('celery', 'BROKER_URL'), 'broker_transport_options': broker_transport_options, 'result_backend': configuration.get('celery', 'RESULT_BACKEND'), 'worker_concurrency': configuration.getint('celery', 'WORKER_CONCURRENCY'), } celery_ssl_active = False try: celery_ssl_active = configuration.getboolean('celery', 'SSL_ACTIVE') except AirflowConfigException as e: log.warning("Celery Executor will run without SSL") try: if celery_ssl_active: broker_use_ssl = {'keyfile': configuration.get('celery', 'SSL_KEY'), 'certfile': configuration.get('celery', 'SSL_CERT'), 'ca_certs': configuration.get('celery', 'SSL_CACERT'), 'cert_reqs': ssl.CERT_REQUIRED} DEFAULT_CELERY_CONFIG['broker_use_ssl'] = broker_use_ssl except AirflowConfigException as e: raise AirflowException('AirflowConfigException: SSL_ACTIVE is True, ' 'please ensure SSL_KEY, ' 'SSL_CERT and SSL_CACERT are set') except Exception as e: raise AirflowException('Exception: There was an unknown Celery SSL Error. ' 'Please ensure you want to use '
'worker_prefetch_multiplier': 1, 'task_acks_late': True, 'task_default_queue': configuration.get('celery', 'DEFAULT_QUEUE'), 'task_default_exchange': configuration.get('celery', 'DEFAULT_QUEUE'), 'broker_url': configuration.get('celery', 'BROKER_URL'), 'broker_transport_options': {'visibility_timeout': 21600}, 'result_backend': configuration.get('celery', 'CELERY_RESULT_BACKEND'), 'worker_concurrency': configuration.getint('celery', 'CELERYD_CONCURRENCY'), } celery_ssl_active = False try: celery_ssl_active = configuration.getboolean('celery', 'CELERY_SSL_ACTIVE') except AirflowConfigException as e: log = LoggingMixin().log log.warning("Celery Executor will run without SSL") try: if celery_ssl_active: broker_use_ssl = {'keyfile': configuration.get('celery', 'CELERY_SSL_KEY'), 'certfile': configuration.get('celery', 'CELERY_SSL_CERT'), 'ca_certs': configuration.get('celery', 'CELERY_SSL_CACERT'), 'cert_reqs': ssl.CERT_REQUIRED} DEFAULT_CELERY_CONFIG['broker_use_ssl'] = broker_use_ssl except AirflowConfigException as e: raise AirflowException('AirflowConfigException: CELERY_SSL_ACTIVE is True, ' 'please ensure CELERY_SSL_KEY, ' 'CELERY_SSL_CERT and CELERY_SSL_CACERT are set') except Exception as e: raise AirflowException('Exception: There was an unknown Celery SSL Error. ' 'Please ensure you want to use '