コード例 #1
0
ファイル: __init__.py プロジェクト: jthomas123/airflow
def load_login():
    auth_backend = "airflow.default_login"
    try:
        if configuration.getboolean("webserver", "AUTHENTICATE"):
            auth_backend = configuration.get("webserver", "auth_backend")
    except configuration.AirflowConfigException:
        if configuration.getboolean("webserver", "AUTHENTICATE"):
            logging.warning(
                "auth_backend not found in webserver config reverting to *deprecated*"
                " behavior of importing airflow_login"
            )
            auth_backend = "airflow_login"

    try:
        global login
        login = import_module(auth_backend)
    except ImportError as err:
        logging.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend,
            err,
        )
        if configuration.getboolean("webserver", "AUTHENTICATE"):
            raise AirflowException("Failed to import authentication backend")
コード例 #2
0
def load_login():
    log = LoggingMixin().log

    auth_backend = 'airflow.default_login'
    try:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            auth_backend = conf.get('webserver', 'auth_backend')
    except conf.AirflowConfigException:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            log.warning(
                "auth_backend not found in webserver config reverting to "
                "*deprecated*  behavior of importing airflow_login")
            auth_backend = "airflow_login"

    try:
        global login
        login = import_module(auth_backend)

        if hasattr(login, 'login_manager') and not hasattr(login, 'LOGIN_MANAGER'):
            login.LOGIN_MANAGER = login.login_manager
    except ImportError as err:
        log.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend, err
        )
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            raise AirflowException("Failed to import authentication backend")
コード例 #3
0
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    log = LoggingMixin().log

    SMTP_HOST = configuration.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.getint('smtp', 'SMTP_PORT')
    SMTP_STARTTLS = configuration.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.getboolean('smtp', 'SMTP_SSL')
    SMTP_USER = None
    SMTP_PASSWORD = None

    try:
        SMTP_USER = configuration.get('smtp', 'SMTP_USER')
        SMTP_PASSWORD = configuration.get('smtp', 'SMTP_PASSWORD')
    except AirflowConfigException:
        log.debug(
            "No user/password found for SMTP, so logging in with no authentication."
        )

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST,
                             SMTP_PORT) if SMTP_SSL else smtplib.SMTP(
                                 SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        log.info("Sent an alert email to %s", e_to)
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
コード例 #4
0
ファイル: settings.py プロジェクト: fengzhongzhu1621/xAirflow
def configure_orm(disable_connection_pool=False):
    pool_enabled = conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED')
    try:
        pool_size = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE')
    except AirflowConfigException:
        pool_size = 5
    try:
        pool_recycle = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE')
    except AirflowConfigException:
        pool_recycle = 1800
    try:
        encoding = conf.get('core', 'SQL_ENGINE_ENCODING')
    except AirflowConfigException:
        encoding = 'utf-8'
    echo = conf.getboolean('core', 'SQL_ALCHEMY_ECHO')
    reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT')
    autocommit = False
    sql_alchemy_conn = conf.get('core', 'SQL_ALCHEMY_CONN')

    global engine
    global Session
    (engine, Session) = alchemy_orm.configure_orm(
        sql_alchemy_conn,
        pool_enabled=pool_enabled,
        pool_size=pool_size,
        pool_recycle=pool_recycle,
        reconnect_timeout=reconnect_timeout,
        autocommit=autocommit,
        disable_connection_pool=disable_connection_pool,
        encoding=encoding,
        echo=echo)
    alchemy_orm.Session = Session
コード例 #5
0
ファイル: email.py プロジェクト: 7digital/incubator-airflow
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    log = LoggingMixin().log

    SMTP_HOST = configuration.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.getint('smtp', 'SMTP_PORT')
    SMTP_STARTTLS = configuration.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.getboolean('smtp', 'SMTP_SSL')
    SMTP_USER = None
    SMTP_PASSWORD = None

    try:
        SMTP_USER = configuration.get('smtp', 'SMTP_USER')
        SMTP_PASSWORD = configuration.get('smtp', 'SMTP_PASSWORD')
    except AirflowConfigException:
        log.debug("No user/password found for SMTP, so logging in with no authentication.")

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else smtplib.SMTP(SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        log.info("Sent an alert email to %s", e_to)
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
コード例 #6
0
def load_login():
    log = LoggingMixin().log

    auth_backend = 'airflow.default_login'
    try:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            auth_backend = conf.get('webserver', 'auth_backend')
    except conf.AirflowConfigException:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            log.warning(
                "auth_backend not found in webserver config reverting to "
                "*deprecated*  behavior of importing airflow_login")
            auth_backend = "airflow_login"

    try:
        global login
        login = import_module(auth_backend)
    except ImportError as err:
        log.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend, err
        )
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            raise AirflowException("Failed to import authentication backend")
コード例 #7
0
ファイル: __init__.py プロジェクト: fengzhongzhu1621/xAirflow
def load_login():
    log = LoggingMixin().log

    auth_backend = 'airflow.default_login'
    try:
        # 获得默认web认证
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            auth_backend = conf.get('webserver', 'auth_backend')
    except (AirflowConfigException, XToolConfigException):
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            log.warning(
                "auth_backend not found in webserver config reverting to "
                "*deprecated*  behavior of importing airflow_login")
            auth_backend = "airflow_login"

    # 导入认证模块
    try:
        global login
        login = import_module(auth_backend)
    except ImportError as err:
        log.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend, err
        )
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            raise AirflowException("Failed to import authentication backend")
コード例 #8
0
    def write(self, log, remote_log_location, append=False):
        """
        Writes the log to the remote_log_location. Fails silently if no hook
        was created.

        :param log: the log to write to the remote_log_location
        :type log: string
        :param remote_log_location: the log's location in remote storage
        :type remote_log_location: string (path)
        :param append: if False, any existing log file is overwritten. If True,
            the new log is appended to any existing logs.
        :type append: bool

        """
        if self.hook:

            if append:
                old_log = self.read(remote_log_location)
                log = old_log + '\n' + log
            try:
                self.hook.load_string(
                    log,
                    key=remote_log_location,
                    replace=True,
                    encrypt=configuration.getboolean('core', 'ENCRYPT_S3_LOGS'))
                return
            except:
                pass

        # raise/return error if we get here
        logging.error('Could not write logs to {}'.format(remote_log_location))
コード例 #9
0
    def test_kill_zombies_when_job_state_is_not_running(self, mock_ti_handle_failure):
        """
        Test that kill zombies calls TI's failure handler with proper context
        """
        dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True)
        with create_session() as session:
            session.query(TI).delete()
            session.query(LJ).delete()
            dag = dagbag.get_dag('example_branch_operator')
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)
            lj = LJ(ti)
            lj.state = State.SHUTDOWN
            lj.id = 1
            ti.job_id = lj.id

            session.add(lj)
            session.add(ti)
            session.commit()

            dagbag.kill_zombies()
            mock_ti_handle_failure \
                .assert_called_with(ANY,
                                    configuration.getboolean('core',
                                                             'unit_test_mode'),
                                    ANY)
コード例 #10
0
ファイル: dagbag.py プロジェクト: shubhparekh/airflow
    def kill_zombies(self, zombies, session=None):
        """
        Fail given zombie tasks, which are tasks that haven't
        had a heartbeat for too long, in the current DagBag.

        :param zombies: zombie task instances to kill.
        :type zombies: airflow.utils.dag_processing.SimpleTaskInstance
        :param session: DB session.
        :type session: sqlalchemy.orm.session.Session
        """
        from airflow.models.taskinstance import TaskInstance  # Avoid circular import

        for zombie in zombies:
            if zombie.dag_id in self.dags:
                dag = self.dags[zombie.dag_id]
                if zombie.task_id in dag.task_ids:
                    task = dag.get_task(zombie.task_id)
                    ti = TaskInstance(task, zombie.execution_date)
                    # Get properties needed for failure handling from SimpleTaskInstance.
                    ti.start_date = zombie.start_date
                    ti.end_date = zombie.end_date
                    ti.try_number = zombie.try_number
                    ti.state = zombie.state
                    ti.test_mode = configuration.getboolean(
                        'core', 'unit_test_mode')
                    ti.handle_failure("{} detected as zombie".format(ti),
                                      ti.test_mode, ti.get_template_context())
                    self.log.info('Marked zombie job %s as %s', ti, ti.state)
                    Stats.incr('zombies_killed')
        session.commit()
コード例 #11
0
class CeleryConfig(object):
    CELERY_ACCEPT_CONTENT = ['json', 'pickle']
    CELERY_EVENT_SERIALIZER = 'json'
    CELERY_RESULT_SERIALIZER = 'pickle'
    CELERY_TASK_SERIALIZER = 'pickle'
    CELERYD_PREFETCH_MULTIPLIER = 1
    CELERY_ACKS_LATE = True
    BROKER_URL = configuration.get('celery', 'BROKER_URL')
    CELERY_RESULT_BACKEND = configuration.get('celery', 'CELERY_RESULT_BACKEND')
    CELERYD_CONCURRENCY = configuration.getint('celery', 'CELERYD_CONCURRENCY')
    CELERY_DEFAULT_QUEUE = DEFAULT_QUEUE
    CELERY_DEFAULT_EXCHANGE = DEFAULT_QUEUE

    celery_ssl_active = False
    try:
        celery_ssl_active = configuration.getboolean('celery', 'CELERY_SSL_ACTIVE')
    except AirflowConfigException as e:
        log = LoggingMixin().log
        log.warning("Celery Executor will run without SSL")

    try:
        if celery_ssl_active:
            BROKER_USE_SSL = {'keyfile': configuration.get('celery', 'CELERY_SSL_KEY'),
                              'certfile': configuration.get('celery', 'CELERY_SSL_CERT'),
                              'ca_certs': configuration.get('celery', 'CELERY_SSL_CACERT'),
                              'cert_reqs': ssl.CERT_REQUIRED}
    except AirflowConfigException as e:
        raise AirflowException('AirflowConfigException: CELERY_SSL_ACTIVE is True, please ensure CELERY_SSL_KEY, '
                               'CELERY_SSL_CERT and CELERY_SSL_CACERT are set')
    except Exception as e:
        raise AirflowException('Exception: There was an unknown Celery SSL Error.  Please ensure you want to use '
                               'SSL and/or have all necessary certs and key.')
コード例 #12
0
ファイル: logging.py プロジェクト: pieces201020/airflow
    def write(self, log, remote_log_location, append=False):
        """
        Writes the log to the remote_log_location. Fails silently if no hook
        was created.

        :param log: the log to write to the remote_log_location
        :type log: string
        :param remote_log_location: the log's location in remote storage
        :type remote_log_location: string (path)
        :param append: if False, any existing log file is overwritten. If True,
            the new log is appended to any existing logs.
        :type append: bool

        """
        if self.hook:

            if append:
                old_log = self.read(remote_log_location)
                log = old_log + '\n' + log
            try:
                self.hook.load_string(log,
                                      key=remote_log_location,
                                      replace=True,
                                      encrypt=configuration.getboolean(
                                          'core', 'ENCRYPT_S3_LOGS'))
                return
            except:
                pass

        # raise/return error if we get here
        logging.error('Could not write logs to {}'.format(remote_log_location))
コード例 #13
0
class CeleryConfig(object):
    CELERY_ACCEPT_CONTENT = ['json', 'pickle']
    CELERY_EVENT_SERIALIZER = 'json'
    CELERY_RESULT_SERIALIZER = 'pickle'
    CELERY_TASK_SERIALIZER = 'pickle'
    CELERYD_PREFETCH_MULTIPLIER = 1
    CELERY_ACKS_LATE = True
    BROKER_URL = configuration.get('celery', 'BROKER_URL')
    CELERY_RESULT_BACKEND = configuration.get('celery',
                                              'CELERY_RESULT_BACKEND')
    CELERYD_CONCURRENCY = configuration.getint('celery', 'CELERYD_CONCURRENCY')
    CELERY_DEFAULT_QUEUE = DEFAULT_QUEUE
    CELERY_DEFAULT_EXCHANGE = DEFAULT_QUEUE
    if configuration.getboolean('celery', 'CELERY_SSL_ACTIVE'):
        try:
            BROKER_USE_SSL = {
                'keyfile': configuration.get('celery', 'CELERY_SSL_KEY'),
                'certfile': configuration.get('celery', 'CELERY_SSL_CERT'),
                'ca_certs': configuration.get('celery', 'CELERY_SSL_CACERT'),
                'cert_reqs': ssl.CERT_REQUIRED
            }
        except ValueError:
            raise AirflowException(
                'ValueError: CELERY_SSL_ACTIVE is True, please ensure CELERY_SSL_KEY, '
                'CELERY_SSL_CERT and CELERY_SSL_CACERT are set')
        except Exception as e:
            raise AirflowException(
                'Exception: There was an unknown Celery SSL Error.  Please ensure you want to use '
                'SSL and/or have all necessary certs and key.')
コード例 #14
0
    def s3_write(self, log, remote_log_location, append=True):
        """
        Writes the log to the remote_log_location. Fails silently if no hook
        was created.
        :param log: the log to write to the remote_log_location
        :type log: string
        :param remote_log_location: the log's location in remote storage
        :type remote_log_location: string (path)
        :param append: if False, any existing log file is overwritten. If True,
            the new log is appended to any existing logs.
        :type append: bool
        """
        if append:
            old_log = self.read(remote_log_location)
            log = '\n'.join([old_log, log])

        try:
            self.hook.load_string(
                log,
                key=remote_log_location,
                replace=True,
                encrypt=configuration.getboolean('core', 'ENCRYPT_S3_LOGS'),
            )
        except:
            self.logger.error('Could not write logs to %s',
                              remote_log_location)
コード例 #15
0
ファイル: settings.py プロジェクト: fengzhongzhu1621/xAirflow
def validate_session():
    """验证数据库是否可用 ."""
    try:
        worker_precheck = conf.getboolean('core', 'worker_precheck')
    except AirflowConfigException:
        worker_precheck = False
    return alchemy_orm.validate_session(engine, worker_precheck)
コード例 #16
0
ファイル: conf.py プロジェクト: EugenePY/airflow-docker
def get_boolean_default(key, default):
    import airflow.configuration as conf

    if conf.has_option("airflowdocker", key):
        return conf.getboolean("airflowdocker", key)
    else:
        return default
コード例 #17
0
def backfill(args, dag=None):
    logging.basicConfig(level=settings.LOGGING_LEVEL,
                        format=settings.SIMPLE_LOG_FORMAT)

    dag = dag or get_dag(args)

    if not args.start_date and not args.end_date:
        raise AirflowException("Provide a start_date and/or end_date")

    # If only one date is passed, using same as start and end
    args.end_date = args.end_date or args.start_date
    args.start_date = args.start_date or args.end_date

    if args.task_regex:
        dag = dag.sub_dag(task_regex=args.task_regex,
                          include_upstream=not args.ignore_dependencies)

    if args.dry_run:
        print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date))
        for task in dag.tasks:
            print("Task {0}".format(task.task_id))
            ti = TaskInstance(task, args.start_date)
            ti.dry_run()
    else:
        dag.run(start_date=args.start_date,
                end_date=args.end_date,
                mark_success=args.mark_success,
                include_adhoc=args.include_adhoc,
                local=args.local,
                donot_pickle=(args.donot_pickle
                              or conf.getboolean('core', 'donot_pickle')),
                ignore_first_depends_on_past=args.ignore_first_depends_on_past,
                ignore_task_deps=args.ignore_dependencies,
                pool=args.pool)
コード例 #18
0
    def s3_write(self, log, remote_log_location, append=True):
        """
        Writes the log to the remote_log_location. Fails silently if no hook
        was created.
        :param log: the log to write to the remote_log_location
        :type log: string
        :param remote_log_location: the log's location in remote storage
        :type remote_log_location: string (path)
        :param append: if False, any existing log file is overwritten. If True,
            the new log is appended to any existing logs.
        :type append: bool
        """
        if append and self.s3_log_exists(remote_log_location):
            old_log = self.s3_read(remote_log_location)
            log = '\n'.join([old_log, log]) if old_log else log

        try:
            self.hook.load_string(
                log,
                key=remote_log_location,
                replace=True,
                encrypt=configuration.getboolean('core', 'ENCRYPT_S3_LOGS'),
            )
        except:
            self.log.exception('Could not write logs to %s', remote_log_location)
コード例 #19
0
    def registered(self, driver, frameworkId, masterInfo):
        self.log.info(
            "AirflowScheduler registered to Mesos with framework ID %s",
            frameworkId.value)

        if configuration.getboolean('mesos',
                                    'CHECKPOINT') and configuration.get(
                                        'mesos', 'FAILOVER_TIMEOUT'):
            # Import here to work around a circular import error
            from airflow.models import Connection

            # Update the Framework ID in the database.
            session = Session()
            conn_id = FRAMEWORK_CONNID_PREFIX + get_framework_name()
            connection = Session.query(Connection).filter_by(
                conn_id=conn_id).first()
            if connection is None:
                connection = Connection(conn_id=conn_id,
                                        conn_type='mesos_framework-id',
                                        extra=frameworkId.value)
            else:
                connection.extra = frameworkId.value

            session.add(connection)
            session.commit()
            Session.remove()
コード例 #20
0
ファイル: cli.py プロジェクト: seancron/airflow
def backfill(args, dag=None):
    logging.basicConfig(level=settings.LOGGING_LEVEL, format=settings.SIMPLE_LOG_FORMAT)

    dag = dag or get_dag(args)

    if not args.start_date and not args.end_date:
        raise AirflowException("Provide a start_date and/or end_date")

    # If only one date is passed, using same as start and end
    args.end_date = args.end_date or args.start_date
    args.start_date = args.start_date or args.end_date

    if args.task_regex:
        dag = dag.sub_dag(task_regex=args.task_regex, include_upstream=not args.ignore_dependencies)

    if args.dry_run:
        print("Dry run of DAG {0} on {1}".format(args.dag_id, args.start_date))
        for task in dag.tasks:
            print("Task {0}".format(task.task_id))
            ti = TaskInstance(task, args.start_date)
            ti.dry_run()
    else:
        dag.run(
            start_date=args.start_date,
            end_date=args.end_date,
            mark_success=args.mark_success,
            include_adhoc=args.include_adhoc,
            local=args.local,
            donot_pickle=(args.donot_pickle or conf.getboolean("core", "donot_pickle")),
            ignore_dependencies=args.ignore_dependencies,
            ignore_first_depends_on_past=args.ignore_first_depends_on_past,
            pool=args.pool,
        )
コード例 #21
0
    def test_kill_zombie_when_job_received_no_heartbeat(self, mock_ti_handle_failure):
        """
        Test that kill zombies calls TI's failure handler with proper context
        """
        zombie_threshold_secs = (
            configuration.getint('scheduler', 'scheduler_zombie_task_threshold'))
        dagbag = models.DagBag(dag_folder=self.empty_dir, include_examples=True)
        with create_session() as session:
            session.query(TI).delete()
            session.query(LJ).delete()
            dag = dagbag.get_dag('example_branch_operator')
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)
            lj = LJ(ti)
            lj.latest_heartbeat = utcnow() - timedelta(seconds=zombie_threshold_secs)
            lj.state = State.RUNNING
            lj.id = 1
            ti.job_id = lj.id

            session.add(lj)
            session.add(ti)
            session.commit()

            dagbag.kill_zombies()
            mock_ti_handle_failure \
                .assert_called_with(ANY,
                                    configuration.getboolean('core',
                                                             'unit_test_mode'),
                                    ANY)
コード例 #22
0
def enabled(metric='', default=True):
    if metric:
        metric = '{}_'.format(metric)
    metric = 'airflow_metrics_{}enabled'.format(metric)
    try:
        return conf.getboolean('airflow_metrics', metric)
    except AirflowConfigException:
        return default
コード例 #23
0
ファイル: utils.py プロジェクト: xianhengma/airflow
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    SMTP_HOST = configuration.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.getint('smtp', 'SMTP_PORT')
    SMTP_USER = configuration.get('smtp', 'SMTP_USER')
    SMTP_PASSWORD = configuration.get('smtp', 'SMTP_PASSWORD')
    SMTP_STARTTLS = configuration.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.getboolean('smtp', 'SMTP_SSL')

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else smtplib.SMTP(SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        logging.info("Sent an alert email to " + str(e_to))
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
コード例 #24
0
ファイル: utils.py プロジェクト: MSurendra/airflow
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    SMTP_HOST = configuration.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.getint('smtp', 'SMTP_PORT')
    SMTP_USER = configuration.get('smtp', 'SMTP_USER')
    SMTP_PASSWORD = configuration.get('smtp', 'SMTP_PASSWORD')
    SMTP_STARTTLS = configuration.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.getboolean('smtp', 'SMTP_SSL')

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) if SMTP_SSL else smtplib.SMTP(SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        logging.info("Sent an alert email to " + str(e_to))
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
コード例 #25
0
    def heartbeat(self):
        """
        Heartbeats update the job's entry in the database with a timestamp
        for the latest_heartbeat and allows for the job to be killed
        externally. This allows at the system level to monitor what is
        actually active.

        For instance, an old heartbeat for SchedulerJob would mean something
        is wrong.

        This also allows for any job to be killed externally, regardless
        of who is running it or on which machine it is running.

        Note that if your heartbeat is set to 60 seconds and you call this
        method after 10 seconds of processing since the last heartbeat, it
        will sleep 50 seconds to complete the 60 seconds and keep a steady
        heart rate. If you go over 60 seconds before calling it, it won't
        sleep at all.
        """
        try:
            with create_session() as session:
                job = session.query(BaseJob).filter_by(id=self.id).one()
                make_transient(job)
                session.commit()

            if job.state == State.SHUTDOWN:
                self.kill()

            is_unit_test = conf.getboolean('core', 'unit_test_mode')
            if not is_unit_test:
                # Figure out how long to sleep for
                sleep_for = 0
                if job.latest_heartbeat:
                    seconds_remaining = self.heartrate - \
                        (timezone.utcnow() - job.latest_heartbeat)\
                        .total_seconds()
                    sleep_for = max(0, seconds_remaining)

                sleep(sleep_for)

            # Update last heartbeat time
            with create_session() as session:
                job = session.query(BaseJob).filter(
                    BaseJob.id == self.id).first()
                job.latest_heartbeat = timezone.utcnow()
                session.merge(job)
                session.commit()

                self.heartbeat_callback(session=session)
                self.log.debug('[heartbeat]')
        except OperationalError:
            Stats.incr(
                convert_camel_to_snake(self.__class__.__name__) +
                '_heartbeat_failure', 1, 1)
            self.log.exception("%s heartbeat got an exception",
                               self.__class__.__name__)
コード例 #26
0
def configure_orm(disable_connection_pool=False):
    log.debug("Setting up DB connection pool (PID %s)" % os.getpid())
    global engine
    global Session
    engine_args = {}

    pool_connections = conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED')
    if disable_connection_pool or not pool_connections:
        engine_args['poolclass'] = NullPool
        log.debug("settings.configure_orm(): Using NullPool")
    elif 'sqlite' not in SQL_ALCHEMY_CONN:
        # Pool size engine args not supported by sqlite.
        # If no config value is defined for the pool size, select a reasonable value.
        # 0 means no limit, which could lead to exceeding the Database connection limit.
        try:
            pool_size = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE')
        except conf.AirflowConfigException:
            pool_size = 5

        # The DB server already has a value for wait_timeout (number of seconds after
        # which an idle sleeping connection should be killed). Since other DBs may
        # co-exist on the same server, SQLAlchemy should set its
        # pool_recycle to an equal or smaller value.
        try:
            pool_recycle = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE')
        except conf.AirflowConfigException:
            pool_recycle = 1800

        log.info(
            "settings.configure_orm(): Using pool settings. pool_size={}, "
            "pool_recycle={}, pid={}".format(pool_size, pool_recycle,
                                             os.getpid()))
        engine_args['pool_size'] = pool_size
        engine_args['pool_recycle'] = pool_recycle

    try:
        # Allow the user to specify an encoding for their DB otherwise default
        # to utf-8 so jobs & users with non-latin1 characters can still use
        # us.
        engine_args['encoding'] = conf.get('core', 'SQL_ENGINE_ENCODING')
    except conf.AirflowConfigException:
        engine_args['encoding'] = 'utf-8'
    # For Python2 we get back a newstr and need a str
    engine_args['encoding'] = engine_args['encoding'].__str__()

    engine = create_engine(SQL_ALCHEMY_CONN, **engine_args)
    reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT')
    setup_event_handlers(engine, reconnect_timeout)

    Session = scoped_session(
        sessionmaker(autocommit=False,
                     autoflush=False,
                     bind=engine,
                     expire_on_commit=False))
コード例 #27
0
    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException(
                    "can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException(
                    "no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(
                command=self.command, get_pty=get_pty, timeout=self.timeout)
            stdin.close()
            output = b''
            for line in stdout:
                output += line.encode('utf-8')
                self.log.info(line.strip('\n'))

            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # only returning on output if do_xcom_push is set
                # otherwise its not suppose to be disclosed
                if self.do_xcom_push:
                    enable_pickling = configuration.getboolean(
                        'core', 'enable_xcom_pickling')
                    if enable_pickling:
                        return output
                    else:
                        return b64encode(output).decode('utf-8')

            else:
                error_msg = stderr.read()
                raise AirflowException(
                    "error running cmd: {0}, error: {1}".format(
                        self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True
コード例 #28
0
    def schedule_job(self, context):
        hook = HttpHook(method='POST', http_conn_id=self.http_conn_id)

        headers = {
            'content-type':
            'application/json',
            'Accept':
            'text/plain',
            'X-Bluecore-Token':
            configuration.get('appengine', 'token').strip(),
            'X-Airflow-Dag-Id':
            self.dag_id,
            'X-Airflow-Task-Id':
            self.task_id,
            'X-Airflow-Execution-Date':
            context['execution_date'].isoformat(),
            'X-Airflow-Enable-Xcom-Pickling':
            str(configuration.getboolean('core', 'enable_xcom_pickling')),
            'X-Airflow-Mysql-Db':
            configuration.get('mysql', 'db').strip(),
            'X-Airflow-Mysql-User':
            configuration.get('mysql', 'username').strip(),
            'X-Airflow-Mysql-Password':
            configuration.get('mysql', 'password').strip(),
            'X-Airflow-Fernet-Key':
            configuration.get('core', 'fernet_key').strip(),
        }

        mysql_host = safe_config_get('mysql', 'host')
        if mysql_host is not None:
            headers['X-Airflow-Mysql-Host'] = mysql_host

        mysql_cloudsql_instance = safe_config_get('mysql', 'cloudsql_instance')
        if mysql_cloudsql_instance is not None:
            headers[
                'X-Airflow-Mysql-Cloudsql-Instance'] = mysql_cloudsql_instance

        # generate a unique job name for the command to be added to the App Engine task queue
        job_id = uniquify_job_name(self, context)
        logging.info("Job ID: %s", job_id)

        instance_params = evaluate_xcoms(self.command_params, self, context)

        post_data = {
            'params_dict': instance_params,
            'appengine_queue': self.appengine_queue,
            'job_id': job_id
        }

        hook.run(endpoint='/api/airflow_v2/async/%s' % self.command_name,
                 headers=headers,
                 data=json.dumps(post_data),
                 extra_options=None)
コード例 #29
0
ファイル: xcom.py プロジェクト: Orpheus11/airflow-forked
 def init_on_load(self):
     enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
     if enable_pickling:
         self.value = pickle.loads(self.value)
     else:
         try:
             self.value = json.loads(self.value.decode('UTF-8'))
         except (UnicodeEncodeError, ValueError):
             # For backward-compatibility.
             # Preventing errors in webserver
             # due to XComs mixed with pickled and unpickled.
             self.value = pickle.loads(self.value)
コード例 #30
0
ファイル: settings.py プロジェクト: sguarrinieye/airflow
def configure_orm(disable_connection_pool=False):
    log.debug("Setting up DB connection pool (PID %s)" % os.getpid())
    global engine
    global Session
    engine_args = {}

    pool_connections = conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED')
    if disable_connection_pool or not pool_connections:
        engine_args['poolclass'] = NullPool
        log.debug("settings.configure_orm(): Using NullPool")
    elif 'sqlite' not in SQL_ALCHEMY_CONN:
        # Pool size engine args not supported by sqlite.
        # If no config value is defined for the pool size, select a reasonable value.
        # 0 means no limit, which could lead to exceeding the Database connection limit.
        try:
            pool_size = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE')
        except conf.AirflowConfigException:
            pool_size = 5

        # The DB server already has a value for wait_timeout (number of seconds after
        # which an idle sleeping connection should be killed). Since other DBs may
        # co-exist on the same server, SQLAlchemy should set its
        # pool_recycle to an equal or smaller value.
        try:
            pool_recycle = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE')
        except conf.AirflowConfigException:
            pool_recycle = 1800

        log.info("settings.configure_orm(): Using pool settings. pool_size={}, "
                 "pool_recycle={}, pid={}".format(pool_size, pool_recycle, os.getpid()))
        engine_args['pool_size'] = pool_size
        engine_args['pool_recycle'] = pool_recycle

    try:
        # Allow the user to specify an encoding for their DB otherwise default
        # to utf-8 so jobs & users with non-latin1 characters can still use
        # us.
        engine_args['encoding'] = conf.get('core', 'SQL_ENGINE_ENCODING')
    except conf.AirflowConfigException:
        engine_args['encoding'] = 'utf-8'
    # For Python2 we get back a newstr and need a str
    engine_args['encoding'] = engine_args['encoding'].__str__()

    engine = create_engine(SQL_ALCHEMY_CONN, **engine_args)
    reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT')
    setup_event_handlers(engine, reconnect_timeout)

    Session = scoped_session(
        sessionmaker(autocommit=False,
                     autoflush=False,
                     bind=engine,
                     expire_on_commit=False))
コード例 #31
0
ファイル: dagbag.py プロジェクト: fengzhongzhu1621/xAirflow
    def kill_zombies(self, session=None):
        """
        Fails tasks that haven't had a heartbeat in too long
        """
        secs = configuration.conf.getint('scheduler',
                                         'scheduler_zombie_task_threshold')
        now = datetime.now()
        limit_dttm = now - timedelta(seconds=secs)
        self.log.info(
            "Finding 'running' jobs without a recent heartbeat after %s",
            limit_dttm)

        # 任务实例正在运行,但是job已经停止,或者job的心跳长时间未更新
        begin_time = now - timedelta(
            days=configuration.conf.getint('core', 'sql_query_history_days'))
        TI = TaskInstance
        from airflow.jobs import LocalTaskJob as LJ
        # SELECT task_instance.try_number AS task_instance_try_number, task_instance.task_id AS task_instance_task_id, task_instance.dag_id AS task_instance_dag_id, task_instance.execution_date AS task_instance_execution_date, task_instance.start_date AS task_instance_start_date, task_instance.end_date AS task_instance_end_date, task_instance.duration AS task_instance_duration, task_instance.state AS task_instance_state, task_instance.max_tries AS task_instance_max_tries, task_instance.hostname AS task_instance_hostname, task_instance.unixname AS task_instance_unixname, task_instance.job_id AS task_instance_job_id, task_instance.pool AS task_instance_pool, task_instance.queue AS task_instance_queue, task_instance.priority_weight AS task_instance_priority_weight, task_instance.operator AS task_instance_operator, task_instance.queued_dttm AS task_instance_queued_dttm, task_instance.pid AS task_instance_pid, task_instance.executor_config AS task_instance_executor_config
        # FROM task_instance INNER JOIN job ON task_instance.job_id = job.id AND job.job_type IN (LocalTaskJob)
        # WHERE task_instance.execution_date > %s AND task_instance.state = 'running' AND (job.latest_heartbeat < %s OR job.state != 'running')
        # 1. 获取正在运行的任务实例
        # 2. 或者 now > LJ.latest_heartbeat + scheduler_zombie_task_threshold
        tis = (
            session.query(TI).join(LJ, TI.job_id == LJ.id).filter(
                TI.execution_date > begin_time).filter(
                    TI.state == State.RUNNING)  # 任务实例正在运行
            .filter(
                or_(
                    LJ.latest_heartbeat < limit_dttm,  # 没有上报心跳
                    LJ.state != State.RUNNING,  # 但是job不是运行态
                )).all())

        # 执行任务实例的失败处理函数
        for ti in tis:
            if ti.dag_id in self.dags:
                # 根据任务实例获取dag
                dag = self.dags[ti.dag_id]
                # 获得dag中所有的任务
                if ti.task_id in dag.task_ids:
                    # 获得任务
                    task = dag.get_task(ti.task_id)
                    # now set non db backed vars on ti
                    ti.task = task
                    ti.test_mode = configuration.getboolean(
                        'core', 'unit_test_mode')
                    # 任务执行失败,发送通知,并执行失败回调函数
                    ti.handle_failure("{} detected as zombie".format(ti),
                                      ti.test_mode, ti.get_template_context())
                    self.log.info('Marked zombie job %s as %s', ti, ti.state)
                    Stats.incr('zombies_killed')
        session.commit()
コード例 #32
0
    def test_login_logout_ldap(self):
        assert configuration.getboolean('webserver', 'authenticate') is True

        response = self.login('user1', 'userx')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('userz', 'user1')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('user1', 'user1')
        assert 'Data Profiling' in response.data.decode('utf-8')

        response = self.logout()
        assert 'form-signin' in response.data.decode('utf-8')
コード例 #33
0
    def test_login_logout_password_auth(self):
        assert configuration.getboolean('webserver', 'authenticate') is True

        response = self.login('user1', 'whatever')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('airflow_passwordauth', 'wrongpassword')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('airflow_passwordauth', 'password')
        assert 'Data Profiling' in response.data.decode('utf-8')

        response = self.logout()
        assert 'form-signin' in response.data.decode('utf-8')
コード例 #34
0
    def set(
            cls,
            key,
            value,
            execution_date,
            task_id,
            dag_id,
            session=None):
        """保存中间结果
        Store an XCom value.
        TODO: "pickling" has been deprecated and JSON is preferred.
              "pickling" will be removed in Airflow 2.0.
        :return: None
        """
        # 调用底层,清除所有session实例
        session.expunge_all()

        enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
        # 序列化中间结果
        if enable_pickling:
            value = pickle.dumps(value)
        else:
            try:
                # 注意编码转换
                value = json.dumps(value).encode('UTF-8')
            except ValueError:
                log = LoggingMixin().log
                log.error("Could not serialize the XCOM value into JSON. "
                          "If you are using pickles instead of JSON "
                          "for XCOM, then you need to enable pickle "
                          "support for XCOM in your airflow config.")
                raise

        # remove any duplicate XComs
        # 删除相同key的中间结果
        session.query(cls).filter(
            cls.key == key,
            cls.execution_date == execution_date,
            cls.task_id == task_id,
            cls.dag_id == dag_id).delete()
        session.commit()

        # insert new XCom
        session.add(XCom(
            key=key,
            value=value,
            execution_date=execution_date,
            task_id=task_id,
            dag_id=dag_id))
        session.commit()
コード例 #35
0
ファイル: core.py プロジェクト: praveev/airflow
    def test_login_logout_ldap(self):
        assert configuration.getboolean("webserver", "authenticate") is True

        response = self.login("user1", "userx")
        assert "Incorrect login details" in response.data.decode("utf-8")

        response = self.login("userz", "user1")
        assert "Incorrect login details" in response.data.decode("utf-8")

        response = self.login("user1", "user1")
        assert "Data Profiling" in response.data.decode("utf-8")

        response = self.logout()
        assert "form-signin" in response.data.decode("utf-8")
コード例 #36
0
ファイル: core.py プロジェクト: praveev/airflow
    def test_login_logout_password_auth(self):
        assert configuration.getboolean("webserver", "authenticate") is True

        response = self.login("user1", "whatever")
        assert "Incorrect login details" in response.data.decode("utf-8")

        response = self.login("airflow_passwordauth", "wrongpassword")
        assert "Incorrect login details" in response.data.decode("utf-8")

        response = self.login("airflow_passwordauth", "password")
        assert "Data Profiling" in response.data.decode("utf-8")

        response = self.logout()
        assert "form-signin" in response.data.decode("utf-8")
コード例 #37
0
ファイル: core.py プロジェクト: moritzpein/airflow
    def test_login_logout_ldap(self):
        assert configuration.getboolean('webserver', 'authenticate') is True

        response = self.login('user1', 'userx')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('userz', 'user1')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('user1', 'user1')
        assert 'Data Profiling' in response.data.decode('utf-8')

        response = self.logout()
        assert 'form-signin' in response.data.decode('utf-8')
コード例 #38
0
ファイル: core.py プロジェクト: moritzpein/airflow
    def test_login_logout_password_auth(self):
        assert configuration.getboolean('webserver', 'authenticate') is True

        response = self.login('user1', 'whatever')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('airflow_passwordauth', 'wrongpassword')
        assert 'Incorrect login details' in response.data.decode('utf-8')

        response = self.login('airflow_passwordauth', 'password')
        assert 'Data Profiling' in response.data.decode('utf-8')

        response = self.logout()
        assert 'form-signin' in response.data.decode('utf-8')
コード例 #39
0
ファイル: xcom.py プロジェクト: alrolorojas/airflow
    def set(
            cls,
            key,
            value,
            execution_date,
            task_id,
            dag_id,
            session=None):
        """
        Store an XCom value.
        TODO: "pickling" has been deprecated and JSON is preferred.
        "pickling" will be removed in Airflow 2.0.

        :return: None
        """
        session.expunge_all()

        enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
        if enable_pickling:
            value = pickle.dumps(value)
        else:
            try:
                value = json.dumps(value).encode('UTF-8')
            except ValueError:
                log = LoggingMixin().log
                log.error("Could not serialize the XCOM value into JSON. "
                          "If you are using pickles instead of JSON "
                          "for XCOM, then you need to enable pickle "
                          "support for XCOM in your airflow config.")
                raise

        # remove any duplicate XComs
        session.query(cls).filter(
            cls.key == key,
            cls.execution_date == execution_date,
            cls.task_id == task_id,
            cls.dag_id == dag_id).delete()

        session.commit()

        # insert new XCom
        session.add(XCom(
            key=key,
            value=value,
            execution_date=execution_date,
            task_id=task_id,
            dag_id=dag_id))

        session.commit()
コード例 #40
0
ファイル: settings.py プロジェクト: wooga/airflow
def validate_session():
    worker_precheck = conf.getboolean('core', 'worker_precheck', fallback=False)
    if not worker_precheck:
        return True
    else:
        check_session = sessionmaker(bind=engine)
        session = check_session()
        try:
            session.execute("select 1")
            conn_status = True
        except exc.DBAPIError as err:
            log.error(err)
            conn_status = False
        session.close()
        return conn_status
コード例 #41
0
    def serialize_value(value):
        # TODO: "pickling" has been deprecated and JSON is preferred.
        # "pickling" will be removed in Airflow 2.0.
        if configuration.getboolean('core', 'enable_xcom_pickling'):
            return pickle.dumps(value)

        try:
            return json.dumps(value).encode('UTF-8')
        except ValueError:
            log = LoggingMixin().log
            log.error("Could not serialize the XCOM value into JSON. "
                      "If you are using pickles instead of JSON "
                      "for XCOM, then you need to enable pickle "
                      "support for XCOM in your airflow config.")
            raise
コード例 #42
0
def validate_session():
    worker_precheck = conf.getboolean('core', 'worker_precheck', fallback=False)
    if not worker_precheck:
        return True
    else:
        check_session = sessionmaker(bind=engine)
        session = check_session()
        try:
            session.execute("select 1")
            conn_status = True
        except exc.DBAPIError as err:
            log.error(err)
            conn_status = False
        session.close()
        return conn_status
コード例 #43
0
def try_get_one(execution_date,
                key=None,
                task_id=None,
                dag_id=None,
                include_prior_dates=False,
                enable_pickling=None,
                session=None):
    """
    Retrieve an XCom value, optionally meeting certain criteria.
    TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0.

    :param enable_pickling: If pickling is not enabled, the XCOM value will be parsed to JSON instead.
    :return: XCom value
    """
    filters = []
    if key:
        filters.append(XCom.key == key)
    if task_id:
        filters.append(XCom.task_id == task_id)
    if dag_id:
        filters.append(XCom.dag_id == dag_id)
    if include_prior_dates:
        filters.append(XCom.execution_date <= execution_date)
    else:
        filters.append(XCom.execution_date == execution_date)

    query = (session.query(XCom.value).filter(and_(*filters)).order_by(
        XCom.execution_date.desc(), XCom.timestamp.desc()))

    result = query.first()
    if result:
        if enable_pickling is None:
            enable_pickling = configuration.getboolean('core',
                                                       'enable_xcom_pickling')

        if enable_pickling:
            return (True, pickle.loads(result.value))
        else:
            try:
                return (True, json.loads(result.value.decode('UTF-8')))
            except ValueError:
                log = LoggingMixin().log
                log.error("Could not serialize the XCOM value into JSON. "
                          "If you are using pickles instead of JSON "
                          "for XCOM, then you need to enable pickle "
                          "support for XCOM in your airflow config.")
                raise
    return (False, None)
コード例 #44
0
    def execute(self, context):
        try:
            if self.ssh_conn_id and not self.ssh_hook:
                self.ssh_hook = SSHHook(ssh_conn_id=self.ssh_conn_id)

            if not self.ssh_hook:
                raise AirflowException("can not operate without ssh_hook or ssh_conn_id")

            if self.remote_host is not None:
                self.ssh_hook.remote_host = self.remote_host

            ssh_client = self.ssh_hook.get_conn()

            if not self.command:
                raise AirflowException("no command specified so nothing to execute here.")

            # Auto apply tty when its required in case of sudo
            get_pty = False
            if self.command.startswith('sudo'):
                get_pty = True

            # set timeout taken as params
            stdin, stdout, stderr = ssh_client.exec_command(command=self.command,
                                                            get_pty=get_pty,
                                                            timeout=self.timeout
                                                            )
            exit_status = stdout.channel.recv_exit_status()
            if exit_status is 0:
                # only returning on output if do_xcom_push is set
                # otherwise its not suppose to be disclosed
                if self.do_xcom_push:
                    enable_pickling = configuration.getboolean('core',
                                                               'enable_xcom_pickling')
                    if enable_pickling:
                        return stdout.read()
                    else:
                        return b64encode(stdout.read()).decode('utf-8')

            else:
                error_msg = stderr.read()
                raise AirflowException("error running cmd: {0}, error: {1}"
                                        .format(self.command, error_msg))

        except Exception as e:
            raise AirflowException("SSH operator error: {0}".format(str(e)))

        return True
コード例 #45
0
    def get_one(cls,
                execution_date,
                key=None,
                task_id=None,
                dag_id=None,
                include_prior_dates=False,
                session=None):
        """获取一条中间结果
        Retrieve an XCom value, optionally meeting certain criteria.
        TODO: "pickling" has been deprecated and JSON is preferred.
              "pickling" will be removed in Airflow 2.0.
        :return: XCom value
        """
        # 构造搜索条件
        filters = []
        if key:
            filters.append(cls.key == key)
        if task_id:
            filters.append(cls.task_id == task_id)
        if dag_id:
            filters.append(cls.dag_id == dag_id)
        if include_prior_dates:
            filters.append(cls.execution_date <= execution_date)
        else:
            filters.append(cls.execution_date == execution_date)

        query = (
            session.query(cls.value).filter(and_(*filters))
                   .order_by(cls.execution_date.desc(), cls.timestamp.desc()))

        # 获得最近的一条记录
        result = query.first()
        if result:
            enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
            if enable_pickling:
                return pickle.loads(result.value)
            else:
                try:
                    # 注意编码转换
                    return json.loads(result.value.decode('UTF-8'))
                except ValueError:
                    log = LoggingMixin().log
                    log.error("Could not deserialize the XCOM value from JSON. "
                              "If you are using pickles instead of JSON "
                              "for XCOM, then you need to enable pickle "
                              "support for XCOM in your airflow config.")
                    raise
コード例 #46
0
ファイル: xcom.py プロジェクト: alrolorojas/airflow
    def get_one(cls,
                execution_date,
                key=None,
                task_id=None,
                dag_id=None,
                include_prior_dates=False,
                session=None):
        """
        Retrieve an XCom value, optionally meeting certain criteria.
        TODO: "pickling" has been deprecated and JSON is preferred.
        "pickling" will be removed in Airflow 2.0.

        :return: XCom value
        """
        filters = []
        if key:
            filters.append(cls.key == key)
        if task_id:
            filters.append(cls.task_id == task_id)
        if dag_id:
            filters.append(cls.dag_id == dag_id)
        if include_prior_dates:
            filters.append(cls.execution_date <= execution_date)
        else:
            filters.append(cls.execution_date == execution_date)

        query = (
            session.query(cls.value).filter(and_(*filters))
                   .order_by(cls.execution_date.desc(), cls.timestamp.desc()))

        result = query.first()
        if result:
            enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
            if enable_pickling:
                return pickle.loads(result.value)
            else:
                try:
                    return json.loads(result.value.decode('UTF-8'))
                except ValueError:
                    log = LoggingMixin().log
                    log.error("Could not deserialize the XCOM value from JSON. "
                              "If you are using pickles instead of JSON "
                              "for XCOM, then you need to enable pickle "
                              "support for XCOM in your airflow config.")
                    raise
コード例 #47
0
    def registered(self, driver, frameworkId, masterInfo):
        logging.info("AirflowScheduler registered to mesos with framework ID %s", frameworkId.value)

        if configuration.getboolean('mesos', 'CHECKPOINT') and configuration.get('mesos', 'FAILOVER_TIMEOUT'):
            # Import here to work around a circular import error
            from airflow.models import Connection

            # Update the Framework ID in the database.
            session = Session()
            conn_id = FRAMEWORK_CONNID_PREFIX + get_framework_name()
            connection = Session.query(Connection).filter_by(conn_id=conn_id).first()
            if connection is None:
                connection = Connection(conn_id=conn_id, conn_type='mesos_framework-id',
                                        extra=frameworkId.value)
            else:
                connection.extra = frameworkId.value

            session.add(connection)
            session.commit()
            Session.remove()
コード例 #48
0
ファイル: settings.py プロジェクト: ataki/incubator-airflow
def configure_orm(disable_connection_pool=False):
    log.debug("Setting up DB connection pool (PID %s)" % os.getpid())
    global engine
    global Session
    engine_args = {}

    pool_connections = conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED')
    if disable_connection_pool or not pool_connections:
        engine_args['poolclass'] = NullPool
    elif 'sqlite' not in SQL_ALCHEMY_CONN:
        # Engine args not supported by sqlite
        engine_args['pool_size'] = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE')
        engine_args['pool_recycle'] = conf.getint('core',
                                                  'SQL_ALCHEMY_POOL_RECYCLE')

    engine = create_engine(SQL_ALCHEMY_CONN, **engine_args)
    reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT')
    setup_event_handlers(engine, reconnect_timeout)

    Session = scoped_session(
        sessionmaker(autocommit=False, autoflush=False, bind=engine))
コード例 #49
0
ファイル: cli.py プロジェクト: imgaara/airflow
def backfill(args):
    logging.basicConfig(
        level=settings.LOGGING_LEVEL,
        format=settings.SIMPLE_LOG_FORMAT)
    dagbag = DagBag(process_subdir(args.subdir))
    if args.dag_id not in dagbag.dags:
        raise AirflowException('dag_id could not be found')
    dag = dagbag.dags[args.dag_id]

    if args.start_date:
        args.start_date = dateutil.parser.parse(args.start_date)
    if args.end_date:
        args.end_date = dateutil.parser.parse(args.end_date)

    # If only one date is passed, using same as start and end
    args.end_date = args.end_date or args.start_date
    args.start_date = args.start_date or args.end_date

    if args.task_regex:
        dag = dag.sub_dag(
            task_regex=args.task_regex,
            include_upstream=not args.ignore_dependencies)

    if args.dry_run:
        print("Dry run of DAG {0} on {1}".format(args.dag_id,
                                                 args.start_date))
        for task in dag.tasks:
            print("Task {0}".format(task.task_id))
            ti = TaskInstance(task, args.start_date)
            ti.dry_run()
    else:
        dag.run(
            start_date=args.start_date,
            end_date=args.end_date,
            mark_success=args.mark_success,
            include_adhoc=args.include_adhoc,
            local=args.local,
            donot_pickle=(args.donot_pickle or configuration.getboolean('core', 'donot_pickle')),
            ignore_dependencies=args.ignore_dependencies,
            pool=args.pool)
コード例 #50
0
    def test_kill_zombies(self, mock_ti_handle_failure):
        """
        Test that kill zombies call TIs failure handler with proper context
        """
        dagbag = models.DagBag()
        with create_session() as session:
            session.query(TI).delete()
            dag = dagbag.get_dag('example_branch_operator')
            task = dag.get_task(task_id='run_this_first')

            ti = TI(task, DEFAULT_DATE, State.RUNNING)

            session.add(ti)
            session.commit()

            zombies = [SimpleTaskInstance(ti)]
            dagbag.kill_zombies(zombies)
            mock_ti_handle_failure \
                .assert_called_with(ANY,
                                    configuration.getboolean('core',
                                                             'unit_test_mode'),
                                    ANY)
コード例 #51
0
def configure_orm(disable_connection_pool=False):
    log.debug("Setting up DB connection pool (PID %s)" % os.getpid())
    global engine
    global Session
    engine_args = {}

    pool_connections = conf.getboolean('core', 'SQL_ALCHEMY_POOL_ENABLED')
    if disable_connection_pool or not pool_connections:
        engine_args['poolclass'] = NullPool
        log.debug("settings.configure_orm(): Using NullPool")
    elif 'sqlite' not in SQL_ALCHEMY_CONN:
        # Engine args not supported by sqlite.
        # If no config value is defined for the pool size, select a reasonable value.
        # 0 means no limit, which could lead to exceeding the Database connection limit.
        try:
            pool_size = conf.getint('core', 'SQL_ALCHEMY_POOL_SIZE')
        except conf.AirflowConfigException:
            pool_size = 5

        # The DB server already has a value for wait_timeout (number of seconds after
        # which an idle sleeping connection should be killed). Since other DBs may
        # co-exist on the same server, SQLAlchemy should set its
        # pool_recycle to an equal or smaller value.
        try:
            pool_recycle = conf.getint('core', 'SQL_ALCHEMY_POOL_RECYCLE')
        except conf.AirflowConfigException:
            pool_recycle = 1800

        log.info("setting.configure_orm(): Using pool settings. pool_size={}, "
                 "pool_recycle={}".format(pool_size, pool_recycle))
        engine_args['pool_size'] = pool_size
        engine_args['pool_recycle'] = pool_recycle

    engine = create_engine(SQL_ALCHEMY_CONN, **engine_args)
    reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT')
    setup_event_handlers(engine, reconnect_timeout)

    Session = scoped_session(
        sessionmaker(autocommit=False, autoflush=False, bind=engine))
コード例 #52
0
def list_py_file_paths(directory, safe_mode=True,
                       include_examples=None):
    """
    Traverse a directory and look for Python files.

    :param directory: the directory to traverse
    :type directory: unicode
    :param safe_mode: whether to use a heuristic to determine whether a file
        contains Airflow DAG definitions
    :return: a list of paths to Python files in the specified directory
    :rtype: list[unicode]
    """
    if include_examples is None:
        include_examples = conf.getboolean('core', 'LOAD_EXAMPLES')
    file_paths = []
    if directory is None:
        return []
    elif os.path.isfile(directory):
        return [directory]
    elif os.path.isdir(directory):
        patterns_by_dir = {}
        for root, dirs, files in os.walk(directory, followlinks=True):
            patterns = patterns_by_dir.get(root, [])
            ignore_file = os.path.join(root, '.airflowignore')
            if os.path.isfile(ignore_file):
                with open(ignore_file, 'r') as f:
                    # If we have new patterns create a copy so we don't change
                    # the previous list (which would affect other subdirs)
                    patterns += [re.compile(p) for p in f.read().split('\n') if p]

            # If we can ignore any subdirs entirely we should - fewer paths
            # to walk is better. We have to modify the ``dirs`` array in
            # place for this to affect os.walk
            dirs[:] = [
                d
                for d in dirs
                if not any(p.search(os.path.join(root, d)) for p in patterns)
            ]

            # We want patterns defined in a parent folder's .airflowignore to
            # apply to subdirs too
            for d in dirs:
                patterns_by_dir[os.path.join(root, d)] = patterns

            for f in files:
                try:
                    file_path = os.path.join(root, f)
                    if not os.path.isfile(file_path):
                        continue
                    mod_name, file_ext = os.path.splitext(
                        os.path.split(file_path)[-1])
                    if file_ext != '.py' and not zipfile.is_zipfile(file_path):
                        continue
                    if any([re.findall(p, file_path) for p in patterns]):
                        continue

                    # Heuristic that guesses whether a Python file contains an
                    # Airflow DAG definition.
                    might_contain_dag = True
                    if safe_mode and not zipfile.is_zipfile(file_path):
                        with open(file_path, 'rb') as fp:
                            content = fp.read()
                            might_contain_dag = all(
                                [s in content for s in (b'DAG', b'airflow')])

                    if not might_contain_dag:
                        continue

                    file_paths.append(file_path)
                except Exception:
                    log = LoggingMixin().log
                    log.exception("Error while examining %s", f)
    if include_examples:
        import airflow.example_dags
        example_dag_folder = airflow.example_dags.__path__[0]
        file_paths.extend(list_py_file_paths(example_dag_folder, safe_mode, False))
    return file_paths
コード例 #53
0
ファイル: utils.py プロジェクト: AndreiDev/incubator-airflow
from io import BytesIO as IO
import functools
import gzip
import dateutil.parser as dateparser
import json
import time

from flask import after_this_request, request, Response
from flask_login import current_user
import wtforms
from wtforms.compat import text_type

from airflow import configuration, models, settings
from airflow.utils.json import AirflowJsonEncoder

AUTHENTICATE = configuration.getboolean('webserver', 'AUTHENTICATE')


class LoginMixin(object):
    def is_accessible(self):
        return (
            not AUTHENTICATE or (
                not current_user.is_anonymous() and
                current_user.is_authenticated()
            )
        )


class SuperUserMixin(object):
    def is_accessible(self):
        return (
コード例 #54
0
ファイル: jobs.py プロジェクト: KamilMroczek/airflow
from time import sleep

from sqlalchemy import Column, Integer, String, DateTime, func, Index, and_
from sqlalchemy.orm.session import make_transient

from airflow import executors, models, settings, utils
from airflow import configuration
from airflow.utils import AirflowException, State


Base = models.Base
ID_LEN = models.ID_LEN

# Setting up a statsd client if needed
statsd = None
if configuration.getboolean('scheduler', 'statsd_on'):
    from statsd import StatsClient
    statsd = StatsClient(
        host=configuration.get('scheduler', 'statsd_host'),
        port=configuration.getint('scheduler', 'statsd_port'),
        prefix=configuration.get('scheduler', 'statsd_prefix'))


class BaseJob(Base):
    """
    Abstract class to be derived for jobs. Jobs are processing items with state
    and duration that aren't task instances. For instance a BackfillJob is
    a collection of task instance runs, but should have it's own state, start
    and end time.
    """
コード例 #55
0
def create_app(config=None, session=None, testing=False, app_name="Airflow"):
    global app, appbuilder
    app = Flask(__name__)
    if conf.getboolean('webserver', 'ENABLE_PROXY_FIX'):
        app.wsgi_app = ProxyFix(app.wsgi_app)
    app.secret_key = conf.get('webserver', 'SECRET_KEY')

    airflow_home_path = conf.get('core', 'AIRFLOW_HOME')
    webserver_config_path = airflow_home_path + '/webserver_config.py'
    app.config.from_pyfile(webserver_config_path, silent=True)
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
    app.config['APP_NAME'] = app_name
    app.config['TESTING'] = testing

    csrf.init_app(app)

    db = SQLA(app)

    from airflow import api
    api.load_auth()
    api.api_auth.init_app(app)

    # flake8: noqa: F841
    cache = Cache(app=app, config={'CACHE_TYPE': 'filesystem', 'CACHE_DIR': '/tmp'})

    from airflow.www_rbac.blueprints import routes
    app.register_blueprint(routes)

    configure_logging()
    configure_manifest_files(app)

    with app.app_context():

        from airflow.www_rbac.security import AirflowSecurityManager
        security_manager_class = app.config.get('SECURITY_MANAGER_CLASS') or \
            AirflowSecurityManager

        if not issubclass(security_manager_class, AirflowSecurityManager):
            raise Exception(
                """Your CUSTOM_SECURITY_MANAGER must now extend AirflowSecurityManager,
                 not FAB's security manager.""")

        appbuilder = AppBuilder(
            app,
            db.session if not session else session,
            security_manager_class=security_manager_class,
            base_template='appbuilder/baselayout.html')

        def init_views(appbuilder):
            from airflow.www_rbac import views
            appbuilder.add_view_no_menu(views.Airflow())
            appbuilder.add_view_no_menu(views.DagModelView())
            appbuilder.add_view_no_menu(views.ConfigurationView())
            appbuilder.add_view_no_menu(views.VersionView())
            appbuilder.add_view(views.DagRunModelView,
                                "DAG Runs",
                                category="Browse",
                                category_icon="fa-globe")
            appbuilder.add_view(views.JobModelView,
                                "Jobs",
                                category="Browse")
            appbuilder.add_view(views.LogModelView,
                                "Logs",
                                category="Browse")
            appbuilder.add_view(views.SlaMissModelView,
                                "SLA Misses",
                                category="Browse")
            appbuilder.add_view(views.TaskInstanceModelView,
                                "Task Instances",
                                category="Browse")
            appbuilder.add_link("Configurations",
                                href='/configuration',
                                category="Admin",
                                category_icon="fa-user")
            appbuilder.add_view(views.ConnectionModelView,
                                "Connections",
                                category="Admin")
            appbuilder.add_view(views.PoolModelView,
                                "Pools",
                                category="Admin")
            appbuilder.add_view(views.VariableModelView,
                                "Variables",
                                category="Admin")
            appbuilder.add_view(views.XComModelView,
                                "XComs",
                                category="Admin")
            appbuilder.add_link("Documentation",
                                href='https://airflow.apache.org/',
                                category="Docs",
                                category_icon="fa-cube")
            appbuilder.add_link("Github",
                                href='https://github.com/apache/incubator-airflow',
                                category="Docs")
            appbuilder.add_link('Version',
                                href='/version',
                                category='About',
                                category_icon='fa-th')

            def integrate_plugins():
                """Integrate plugins to the context"""
                from airflow.plugins_manager import (
                    flask_appbuilder_views, flask_appbuilder_menu_links)

                for v in flask_appbuilder_views:
                    log.debug("Adding view %s", v["name"])
                    appbuilder.add_view(v["view"],
                                        v["name"],
                                        category=v["category"])
                for ml in sorted(flask_appbuilder_menu_links, key=lambda x: x["name"]):
                    log.debug("Adding menu link %s", ml["name"])
                    appbuilder.add_link(ml["name"],
                                        href=ml["href"],
                                        category=ml["category"],
                                        category_icon=ml["category_icon"])

            integrate_plugins()
            # Garbage collect old permissions/views after they have been modified.
            # Otherwise, when the name of a view or menu is changed, the framework
            # will add the new Views and Menus names to the backend, but will not
            # delete the old ones.

        init_views(appbuilder)

        security_manager = appbuilder.sm
        security_manager.sync_roles()

        from airflow.www_rbac.api.experimental import endpoints as e
        # required for testing purposes otherwise the module retains
        # a link to the default_auth
        if app.config['TESTING']:
            if six.PY2:
                reload(e) # noqa
            else:
                import importlib
                importlib.reload(e)

        app.register_blueprint(e.api_experimental, url_prefix='/api/experimental')

        @app.context_processor
        def jinja_globals():
            return {
                'hostname': socket.getfqdn(),
                'navbar_color': conf.get('webserver', 'NAVBAR_COLOR'),
            }

        @app.teardown_appcontext
        def shutdown_session(exception=None):
            settings.Session.remove()

    return app, appbuilder
コード例 #56
0
ファイル: settings.py プロジェクト: ludovicc/airflow
    @classmethod
    def decr(cls, stat, count=1, rate=1):
        pass

    @classmethod
    def gauge(cls, stat, value, rate=1, delta=False):
        pass

    @classmethod
    def timing(cls, stat, dt):
        pass

Stats = DummyStatsLogger

if conf.getboolean('scheduler', 'statsd_on'):
    from statsd import StatsClient
    statsd = StatsClient(
        host=conf.get('scheduler', 'statsd_host'),
        port=conf.getint('scheduler', 'statsd_port'),
        prefix=conf.get('scheduler', 'statsd_prefix'))
    Stats = statsd
else:
    Stats = DummyStatsLogger


HEADER = """\
  ____________       _____________
 ____    |__( )_________  __/__  /________      __
____  /| |_  /__  ___/_  /_ __  /_  __ \_ | /| / /
___  ___ |  / _  /   _  __/ _  / / /_/ /_ |/ |/ /
コード例 #57
0
def create_app(config=None):
    app = Flask(__name__)
    app.secret_key = configuration.get('webserver', 'SECRET_KEY')
    app.config['LOGIN_DISABLED'] = not configuration.getboolean('webserver', 'AUTHENTICATE')

    csrf.init_app(app)

    #app.config = config
    airflow.load_login()
    airflow.login.login_manager.init_app(app)

    cache = Cache(
        app=app, config={'CACHE_TYPE': 'filesystem', 'CACHE_DIR': '/tmp'})

    app.register_blueprint(ck, url_prefix='/ck')
    app.register_blueprint(routes)
    app.jinja_env.add_extension("chartkick.ext.charts")

    with app.app_context():
        from airflow.www import views

        admin = Admin(
            app, name='Airflow',
            static_url_path='/admin',
            index_view=views.HomeView(endpoint='', url='/admin', name="DAGs"),
            template_mode='bootstrap3',
        )
        av = admin.add_view
        vs = views
        av(vs.Airflow(name='DAGs', category='DAGs'))

        av(vs.QueryView(name='Ad Hoc Query', category="Data Profiling"))
        av(vs.ChartModelView(
            models.Chart, Session, name="Charts", category="Data Profiling"))
        av(vs.KnowEventView(
            models.KnownEvent,
            Session, name="Known Events", category="Data Profiling"))
        av(vs.SlaMissModelView(
            models.SlaMiss,
            Session, name="SLA Misses", category="Browse"))
        av(vs.TaskInstanceModelView(models.TaskInstance,
            Session, name="Task Instances", category="Browse"))
        av(vs.LogModelView(
            models.Log, Session, name="Logs", category="Browse"))
        av(vs.JobModelView(
            jobs.BaseJob, Session, name="Jobs", category="Browse"))
        av(vs.PoolModelView(
            models.Pool, Session, name="Pools", category="Admin"))
        av(vs.ConfigurationView(
            name='Configuration', category="Admin"))
        av(vs.UserModelView(
            models.User, Session, name="Users", category="Admin"))
        av(vs.ConnectionModelView(
            models.Connection, Session, name="Connections", category="Admin"))
        av(vs.VariableView(
            models.Variable, Session, name="Variables", category="Admin"))

        admin.add_link(base.MenuLink(
            category='Docs', name='Documentation',
            url='http://pythonhosted.org/airflow/'))
        admin.add_link(
            base.MenuLink(category='Docs',
                name='Github',url='https://github.com/airbnb/airflow'))

        av(vs.DagRunModelView(
            models.DagRun, Session, name="DAG Runs", category="Browse"))
        av(vs.DagModelView(models.DagModel, Session, name=None))
        # Hack to not add this view to the menu
        admin._menu = admin._menu[:-1]

        def integrate_plugins():
            """Integrate plugins to the context"""
            from airflow.plugins_manager import (
                admin_views, flask_blueprints, menu_links)
            for v in admin_views:
                admin.add_view(v)
            for bp in flask_blueprints:
                app.register_blueprint(bp)
            for ml in menu_links:
                admin.add_link(ml)

        integrate_plugins()

        @app.context_processor
        def jinja_globals():
            return {
                'hostname': socket.gethostname(),
            }

        @app.teardown_appcontext
        def shutdown_session(exception=None):
            settings.Session.remove()

        return app
コード例 #58
0
ファイル: app.py プロジェクト: danielvdende/incubator-airflow
def create_app(config=None, testing=False):

    log = LoggingMixin().log

    app = Flask(__name__)
    app.wsgi_app = ProxyFix(app.wsgi_app)

    if configuration.conf.get('webserver', 'SECRET_KEY') == "temporary_key":
        log.info("SECRET_KEY for Flask App is not specified. Using a random one.")
        app.secret_key = os.urandom(16)
    else:
        app.secret_key = configuration.conf.get('webserver', 'SECRET_KEY')

    app.config['LOGIN_DISABLED'] = not configuration.conf.getboolean(
        'webserver', 'AUTHENTICATE')

    csrf.init_app(app)

    app.config['TESTING'] = testing

    airflow.load_login()
    airflow.login.login_manager.init_app(app)

    from airflow import api
    api.load_auth()
    api.api_auth.init_app(app)

    cache = Cache(
        app=app, config={'CACHE_TYPE': 'filesystem', 'CACHE_DIR': '/tmp'})

    app.register_blueprint(routes)

    configure_logging()

    with app.app_context():
        from airflow.www import views

        admin = Admin(
            app, name='Airflow',
            static_url_path='/admin',
            index_view=views.HomeView(endpoint='', url='/admin', name="DAGs"),
            template_mode='bootstrap3',
        )
        av = admin.add_view
        vs = views
        av(vs.Airflow(name='DAGs', category='DAGs'))

        if not conf.getboolean('core', 'secure_mode'):
            av(vs.QueryView(name='Ad Hoc Query', category="Data Profiling"))
            av(vs.ChartModelView(
                models.Chart, Session, name="Charts", category="Data Profiling"))
        av(vs.KnownEventView(
            models.KnownEvent,
            Session, name="Known Events", category="Data Profiling"))
        av(vs.SlaMissModelView(
            models.SlaMiss,
            Session, name="SLA Misses", category="Browse"))
        av(vs.TaskInstanceModelView(models.TaskInstance,
            Session, name="Task Instances", category="Browse"))
        av(vs.LogModelView(
            models.Log, Session, name="Logs", category="Browse"))
        av(vs.JobModelView(
            jobs.BaseJob, Session, name="Jobs", category="Browse"))
        av(vs.PoolModelView(
            models.Pool, Session, name="Pools", category="Admin"))
        av(vs.ConfigurationView(
            name='Configuration', category="Admin"))
        av(vs.UserModelView(
            models.User, Session, name="Users", category="Admin"))
        av(vs.ConnectionModelView(
            models.Connection, Session, name="Connections", category="Admin"))
        av(vs.VariableView(
            models.Variable, Session, name="Variables", category="Admin"))
        av(vs.XComView(
            models.XCom, Session, name="XComs", category="Admin"))

        admin.add_link(base.MenuLink(
            category='Docs', name='Documentation',
            url='https://airflow.incubator.apache.org/'))
        admin.add_link(
            base.MenuLink(category='Docs',
                          name='Github',
                          url='https://github.com/apache/incubator-airflow'))

        av(vs.VersionView(name='Version', category="About"))

        av(vs.DagRunModelView(
            models.DagRun, Session, name="DAG Runs", category="Browse"))
        av(vs.DagModelView(models.DagModel, Session, name=None))
        # Hack to not add this view to the menu
        admin._menu = admin._menu[:-1]

        def integrate_plugins():
            """Integrate plugins to the context"""
            from airflow.plugins_manager import (
                admin_views, flask_blueprints, menu_links)
            for v in admin_views:
                log.debug('Adding view %s', v.name)
                admin.add_view(v)
            for bp in flask_blueprints:
                log.debug('Adding blueprint %s', bp.name)
                app.register_blueprint(bp)
            for ml in sorted(menu_links, key=lambda x: x.name):
                log.debug('Adding menu link %s', ml.name)
                admin.add_link(ml)

        integrate_plugins()

        import airflow.www.api.experimental.endpoints as e
        # required for testing purposes otherwise the module retains
        # a link to the default_auth
        if app.config['TESTING']:
            if six.PY2:
                reload(e)
            else:
                import importlib
                importlib.reload(e)

        app.register_blueprint(e.api_experimental, url_prefix='/api/experimental')

        @app.context_processor
        def jinja_globals():
            return {
                'hostname': get_hostname(),
                'navbar_color': configuration.get('webserver', 'NAVBAR_COLOR'),
            }

        @app.teardown_appcontext
        def shutdown_session(exception=None):
            settings.Session.remove()

        return app
コード例 #59
0
    def start(self):
        self.task_queue = Queue()
        self.result_queue = Queue()
        framework = mesos_pb2.FrameworkInfo()
        framework.user = ''

        if not configuration.get('mesos', 'MASTER'):
            logging.error("Expecting mesos master URL for mesos executor")
            raise AirflowException("mesos.master not provided for mesos executor")

        master = configuration.get('mesos', 'MASTER')

        framework.name = get_framework_name()

        if not configuration.get('mesos', 'TASK_CPU'):
            task_cpu = 1
        else:
            task_cpu = configuration.getint('mesos', 'TASK_CPU')

        if not configuration.get('mesos', 'TASK_MEMORY'):
            task_memory = 256
        else:
            task_memory = configuration.getint('mesos', 'TASK_MEMORY')

        if configuration.getboolean('mesos', 'CHECKPOINT'):
            framework.checkpoint = True

            if configuration.get('mesos', 'FAILOVER_TIMEOUT'):
                # Import here to work around a circular import error
                from airflow.models import Connection

                # Query the database to get the ID of the Mesos Framework, if available.
                conn_id = FRAMEWORK_CONNID_PREFIX + framework.name
                session = Session()
                connection = session.query(Connection).filter_by(conn_id=conn_id).first()
                if connection is not None:
                    # Set the Framework ID to let the scheduler reconnect with running tasks.
                    framework.id.value = connection.extra

                framework.failover_timeout = configuration.getint('mesos', 'FAILOVER_TIMEOUT')
        else:
            framework.checkpoint = False

        logging.info('MesosFramework master : %s, name : %s, cpu : %s, mem : %s, checkpoint : %s',
            master, framework.name, str(task_cpu), str(task_memory), str(framework.checkpoint))

        implicit_acknowledgements = 1

        if configuration.getboolean('mesos', 'AUTHENTICATE'):
            if not configuration.get('mesos', 'DEFAULT_PRINCIPAL'):
                logging.error("Expecting authentication principal in the environment")
                raise AirflowException("mesos.default_principal not provided in authenticated mode")
            if not configuration.get('mesos', 'DEFAULT_SECRET'):
                logging.error("Expecting authentication secret in the environment")
                raise AirflowException("mesos.default_secret not provided in authenticated mode")

            credential = mesos_pb2.Credential()
            credential.principal = configuration.get('mesos', 'DEFAULT_PRINCIPAL')
            credential.secret = configuration.get('mesos', 'DEFAULT_SECRET')

            framework.principal = credential.principal

            driver = mesos.native.MesosSchedulerDriver(
                AirflowMesosScheduler(self.task_queue, self.result_queue, task_cpu, task_memory),
                framework,
                master,
                implicit_acknowledgements,
                credential)
        else:
            framework.principal = 'Airflow'
            driver = mesos.native.MesosSchedulerDriver(
                AirflowMesosScheduler(self.task_queue, self.result_queue, task_cpu, task_memory),
                framework,
                master,
                implicit_acknowledgements)

        self.mesos_driver = driver
        self.mesos_driver.start()