예제 #1
0
def load_login():
    log = LoggingMixin().log

    auth_backend = 'airflow.default_login'
    try:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            auth_backend = conf.get('webserver', 'auth_backend')
    except conf.AirflowConfigException:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            log.warning(
                "auth_backend not found in webserver config reverting to "
                "*deprecated*  behavior of importing airflow_login")
            auth_backend = "airflow_login"

    try:
        global login
        login = import_module(auth_backend)
    except ImportError as err:
        log.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend, err
        )
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            raise AirflowException("Failed to import authentication backend")
예제 #2
0
def load_login():
    log = LoggingMixin().log

    auth_backend = 'airflow.default_login'
    try:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            auth_backend = conf.get('webserver', 'auth_backend')
    except conf.AirflowConfigException:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            log.warning(
                "auth_backend not found in webserver config reverting to "
                "*deprecated*  behavior of importing airflow_login")
            auth_backend = "airflow_login"

    try:
        global login
        login = import_module(auth_backend)

        if hasattr(login, 'login_manager') and not hasattr(login, 'LOGIN_MANAGER'):
            login.LOGIN_MANAGER = login.login_manager
    except ImportError as err:
        log.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend, err
        )
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            raise AirflowException("Failed to import authentication backend")
예제 #3
0
def get_fernet():
    """
    Deferred load of Fernet key.

    This function could fail either because Cryptography is not installed
    or because the Fernet key is invalid.

    :return: Fernet object
    :raises: airflow.exceptions.AirflowException if there's a problem trying to load Fernet
    """
    global _fernet
    log = LoggingMixin().log

    if _fernet:
        return _fernet

    try:
        fernet_key = conf.get('core', 'FERNET_KEY')
        if not fernet_key:
            log.warning(
                "empty cryptography key - values will not be stored encrypted."
            )
            _fernet = NullFernet()
        else:
            _fernet = MultiFernet([
                Fernet(fernet_part.encode('utf-8'))
                for fernet_part in fernet_key.split(',')
            ])
            _fernet.is_encrypted = True
    except (ValueError, TypeError) as ve:
        raise AirflowException("Could not create Fernet object: {}".format(ve))

    return _fernet
예제 #4
0
def _post_sendgrid_mail(mail_data):
    log = LoggingMixin().log
    sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY'))
    response = sg.client.mail.send.post(request_body=mail_data)
    # 2xx status code.
    if 200 <= response.status_code < 300:
        log.info('Email with subject %s is successfully sent to recipients: %s',
                 mail_data['subject'], mail_data['personalizations'])
    else:
        log.warning('Failed to send out email with subject %s, status code: %s',
                    mail_data['subject'], response.status_code)
예제 #5
0
def _parse_s3_config(config_file_name, config_format='boto', profile=None):
    """
    Parses a config file for s3 credentials. Can currently
    parse boto, s3cmd.conf and AWS SDK config formats

    :param config_file_name: path to the config file
    :type config_file_name: str
    :param config_format: config type. One of "boto", "s3cmd" or "aws".
        Defaults to "boto"
    :type config_format: str
    :param profile: profile name in AWS type config file
    :type profile: str
    """
    Config = configparser.ConfigParser()
    if Config.read(config_file_name):  # pragma: no cover
        sections = Config.sections()
    else:
        raise AirflowException("Couldn't read {0}".format(config_file_name))
    # Setting option names depending on file format
    if config_format is None:
        config_format = 'boto'
    conf_format = config_format.lower()
    if conf_format == 'boto':  # pragma: no cover
        if profile is not None and 'profile ' + profile in sections:
            cred_section = 'profile ' + profile
        else:
            cred_section = 'Credentials'
    elif conf_format == 'aws' and profile is not None:
        cred_section = profile
    else:
        cred_section = 'default'
    # Option names
    if conf_format in ('boto', 'aws'):  # pragma: no cover
        key_id_option = 'aws_access_key_id'
        secret_key_option = 'aws_secret_access_key'
        # security_token_option = 'aws_security_token'
    else:
        key_id_option = 'access_key'
        secret_key_option = 'secret_key'
    # Actual Parsing
    if cred_section not in sections:
        raise AirflowException("This config file format is not recognized")
    else:
        try:
            access_key = Config.get(cred_section, key_id_option)
            secret_key = Config.get(cred_section, secret_key_option)
            calling_format = None
            if Config.has_option(cred_section, 'calling_format'):
                calling_format = Config.get(cred_section, 'calling_format')
        except:
            log = LoggingMixin().log
            log.warning("Option Error in parsing s3 config file")
            raise
        return (access_key, secret_key, calling_format)
예제 #6
0
def _post_sendgrid_mail(mail_data):
    log = LoggingMixin().log
    sg = sendgrid.SendGridAPIClient(apikey=os.environ.get('SENDGRID_API_KEY'))
    response = sg.client.mail.send.post(request_body=mail_data)
    # 2xx status code.
    if response.status_code >= 200 and response.status_code < 300:
        log.info('Email with subject %s is successfully sent to recipients: %s' %
                 (mail_data['subject'], mail_data['personalizations']))
    else:
        log.warning('Failed to send out email with subject %s, status code: %s' %
                    (mail_data['subject'], response.status_code))
def _parse_s3_config(config_file_name, config_format='boto', profile=None):
    """
    Parses a config file for s3 credentials. Can currently
    parse boto, s3cmd.conf and AWS SDK config formats

    :param config_file_name: path to the config file
    :type config_file_name: str
    :param config_format: config type. One of "boto", "s3cmd" or "aws".
        Defaults to "boto"
    :type config_format: str
    :param profile: profile name in AWS type config file
    :type profile: str
    """
    Config = configparser.ConfigParser()
    if Config.read(config_file_name):  # pragma: no cover
        sections = Config.sections()
    else:
        raise AirflowException("Couldn't read {0}".format(config_file_name))
    # Setting option names depending on file format
    if config_format is None:
        config_format = 'boto'
    conf_format = config_format.lower()
    if conf_format == 'boto':  # pragma: no cover
        if profile is not None and 'profile ' + profile in sections:
            cred_section = 'profile ' + profile
        else:
            cred_section = 'Credentials'
    elif conf_format == 'aws' and profile is not None:
        cred_section = profile
    else:
        cred_section = 'default'
    # Option names
    if conf_format in ('boto', 'aws'):  # pragma: no cover
        key_id_option = 'aws_access_key_id'
        secret_key_option = 'aws_secret_access_key'
        # security_token_option = 'aws_security_token'
    else:
        key_id_option = 'access_key'
        secret_key_option = 'secret_key'
    # Actual Parsing
    if cred_section not in sections:
        raise AirflowException("This config file format is not recognized")
    else:
        try:
            access_key = Config.get(cred_section, key_id_option)
            secret_key = Config.get(cred_section, secret_key_option)
            calling_format = None
            if Config.has_option(cred_section, 'calling_format'):
                calling_format = Config.get(cred_section, 'calling_format')
        except:
            log = LoggingMixin().log
            log.warning("Option Error in parsing s3 config file")
            raise
        return (access_key, secret_key, calling_format)
예제 #8
0
def _post_sendgrid_mail(mail_data):
    log = LoggingMixin().log
    sg = sendgrid.SendGridAPIClient(
        apikey=configuration.get('sendgrid', 'SENDGRID_API_KEY'))
    response = sg.client.mail.send.post(request_body=mail_data)
    # 2xx status code.
    if response.status_code >= 200 and response.status_code < 300:
        log.info(
            'The following email with subject %s is successfully sent to sendgrid.'
            % subject)
    else:
        log.warning(
            'Failed to send out email with subject %s, status code: %s' %
            (subject, response.status_code))
예제 #9
0
    def _to_timestamp(cls, col):
        """
        Convert a column of a dataframe to UNIX timestamps if applicable

        :param col:     A Series object representing a column of a dataframe.
        """
        # try and convert the column to datetimes
        # the column MUST have a four digit year somewhere in the string
        # there should be a better way to do this,
        # but just letting pandas try and convert every column without a format
        # caused it to convert floats as well
        # For example, a column of integers
        # between 0 and 10 are turned into timestamps
        # if the column cannot be converted,
        # just return the original column untouched
        try:
            col = pd.to_datetime(col)
        except ValueError:
            log = LoggingMixin().log
            log.warning(
                "Could not convert field to timestamps: %s", col.name
            )
            return col

        # now convert the newly created datetimes into timestamps
        # we have to be careful here
        # because NaT cannot be converted to a timestamp
        # so we have to return NaN
        converted = []
        for i in col:
            try:
                converted.append(i.timestamp())
            except ValueError:
                converted.append(pd.np.NaN)
            except AttributeError:
                converted.append(pd.np.NaN)

        # return a new series that maintains the same index as the original
        return pd.Series(converted, index=col.index)
    def _to_timestamp(cls, col):
        """
        Convert a column of a dataframe to UNIX timestamps if applicable

        :param col:     A Series object representing a column of a dataframe.
        """
        # try and convert the column to datetimes
        # the column MUST have a four digit year somewhere in the string
        # there should be a better way to do this,
        # but just letting pandas try and convert every column without a format
        # caused it to convert floats as well
        # For example, a column of integers
        # between 0 and 10 are turned into timestamps
        # if the column cannot be converted,
        # just return the original column untouched
        try:
            col = pd.to_datetime(col)
        except ValueError:
            log = LoggingMixin().log
            log.warning(
                "Could not convert field to timestamps: %s", col.name
            )
            return col

        # now convert the newly created datetimes into timestamps
        # we have to be careful here
        # because NaT cannot be converted to a timestamp
        # so we have to return NaN
        converted = []
        for i in col:
            try:
                converted.append(i.timestamp())
            except ValueError:
                converted.append(pd.np.NaN)
            except AttributeError:
                converted.append(pd.np.NaN)

        # return a new series that maintains the same index as the original
        return pd.Series(converted, index=col.index)
예제 #11
0
    configuration.conf.get('celery', 'DEFAULT_QUEUE'),
    'broker_url':
    broker_url,
    'broker_transport_options':
    broker_transport_options,
    'result_backend':
    configuration.conf.get('celery', 'RESULT_BACKEND'),
    'worker_concurrency':
    configuration.conf.getint('celery', 'WORKER_CONCURRENCY'),
}

celery_ssl_active = False
try:
    celery_ssl_active = configuration.conf.getboolean('celery', 'SSL_ACTIVE')
except AirflowConfigException as e:
    log.warning("Celery Executor will run without SSL")

try:
    if celery_ssl_active:
        if 'amqp://' in broker_url:
            broker_use_ssl = {
                'keyfile': configuration.conf.get('celery', 'SSL_KEY'),
                'certfile': configuration.conf.get('celery', 'SSL_CERT'),
                'ca_certs': configuration.conf.get('celery', 'SSL_CACERT'),
                'cert_reqs': ssl.CERT_REQUIRED
            }
        elif 'redis://' in broker_url:
            broker_use_ssl = {
                'ssl_keyfile': configuration.conf.get('celery', 'SSL_KEY'),
                'ssl_certfile': configuration.conf.get('celery', 'SSL_CERT'),
                'ssl_ca_certs': configuration.conf.get('celery', 'SSL_CACERT'),
예제 #12
0
try:
    # Kubernetes is optional, so not available in vanilla Airflow
    # pip install 'apache-airflow[kubernetes]'
    from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator

    args = {'owner': 'Airflow', 'start_date': days_ago(2)}

    dag = DAG(dag_id='example_kubernetes_operator',
              default_args=args,
              schedule_interval=None)

    tolerations = [{'key': "key", 'operator': 'Equal', 'value': 'value'}]

    k = KubernetesPodOperator(namespace='default',
                              image="ubuntu:16.04",
                              cmds=["bash", "-cx"],
                              arguments=["echo", "10"],
                              labels={"foo": "bar"},
                              name="airflow-test-pod",
                              in_cluster=False,
                              task_id="task",
                              get_logs=True,
                              dag=dag,
                              is_delete_operator_pod=False,
                              tolerations=tolerations)

except ImportError as e:
    log.warning("Could not import KubernetesPodOperator: " + str(e))
    log.warning("Install kubernetes dependencies with: "
                "    pip install 'apache-airflow[kubernetes]'")
예제 #13
0
    'event_serializer': 'json',
    'worker_prefetch_multiplier': 1,
    'task_acks_late': True,
    'task_default_queue': configuration.get('celery', 'DEFAULT_QUEUE'),
    'task_default_exchange': configuration.get('celery', 'DEFAULT_QUEUE'),
    'broker_url': configuration.get('celery', 'BROKER_URL'),
    'broker_transport_options': broker_transport_options,
    'result_backend': configuration.get('celery', 'RESULT_BACKEND'),
    'worker_concurrency': configuration.getint('celery', 'WORKER_CONCURRENCY'),
}

celery_ssl_active = False
try:
    celery_ssl_active = configuration.getboolean('celery', 'SSL_ACTIVE')
except AirflowConfigException as e:
    log.warning("Celery Executor will run without SSL")

try:
    if celery_ssl_active:
        broker_use_ssl = {'keyfile': configuration.get('celery', 'SSL_KEY'),
                          'certfile': configuration.get('celery', 'SSL_CERT'),
                          'ca_certs': configuration.get('celery', 'SSL_CACERT'),
                          'cert_reqs': ssl.CERT_REQUIRED}
        DEFAULT_CELERY_CONFIG['broker_use_ssl'] = broker_use_ssl
except AirflowConfigException as e:
    raise AirflowException('AirflowConfigException: SSL_ACTIVE is True, '
                           'please ensure SSL_KEY, '
                           'SSL_CERT and SSL_CACERT are set')
except Exception as e:
    raise AirflowException('Exception: There was an unknown Celery SSL Error. '
                           'Please ensure you want to use '
예제 #14
0
    'worker_prefetch_multiplier': 1,
    'task_acks_late': True,
    'task_default_queue': configuration.get('celery', 'DEFAULT_QUEUE'),
    'task_default_exchange': configuration.get('celery', 'DEFAULT_QUEUE'),
    'broker_url': configuration.get('celery', 'BROKER_URL'),
    'broker_transport_options': {'visibility_timeout': 21600},
    'result_backend': configuration.get('celery', 'CELERY_RESULT_BACKEND'),
    'worker_concurrency': configuration.getint('celery', 'CELERYD_CONCURRENCY'),
}

celery_ssl_active = False
try:
    celery_ssl_active = configuration.getboolean('celery', 'CELERY_SSL_ACTIVE')
except AirflowConfigException as e:
    log = LoggingMixin().log
    log.warning("Celery Executor will run without SSL")

try:
    if celery_ssl_active:
        broker_use_ssl = {'keyfile': configuration.get('celery', 'CELERY_SSL_KEY'),
                          'certfile': configuration.get('celery', 'CELERY_SSL_CERT'),
                          'ca_certs': configuration.get('celery', 'CELERY_SSL_CACERT'),
                          'cert_reqs': ssl.CERT_REQUIRED}
        DEFAULT_CELERY_CONFIG['broker_use_ssl'] = broker_use_ssl
except AirflowConfigException as e:
    raise AirflowException('AirflowConfigException: CELERY_SSL_ACTIVE is True, '
                           'please ensure CELERY_SSL_KEY, '
                           'CELERY_SSL_CERT and CELERY_SSL_CACERT are set')
except Exception as e:
    raise AirflowException('Exception: There was an unknown Celery SSL Error. '
                           'Please ensure you want to use '