Beispiel #1
0
def get_fqdn(hostname_or_ip=None):
    # Get hostname
    try:
        if hostname_or_ip:
            fqdn = socket.gethostbyaddr(hostname_or_ip)[0]
            if fqdn == 'localhost':
                fqdn = get_hostname()
        else:
            fqdn = get_hostname()
    except IOError:
        fqdn = hostname_or_ip

    return fqdn
    def test_trigger_dag(self):
        with self.app.test_client() as c:
            url_template = '/api/experimental/dags/{}/dag_runs'
            response = c.post(
                url_template.format('example_bash_operator'),
                data=json.dumps(dict(run_id='my_run' + datetime.now().isoformat())),
                content_type="application/json"
            )
            self.assertEqual(401, response.status_code)

            response.url = 'http://{}'.format(get_hostname())

            class Request():
                headers = {}

            response.request = Request()
            response.content = ''
            response.raw = mock.MagicMock()
            response.connection = mock.MagicMock()
            response.connection.send = mock.MagicMock()

            # disable mutual authentication for testing
            client_auth.mutual_authentication = 3

            # case can influence the results
            client_auth.hostname_override = get_hostname()

            client_auth.handle_response(response)
            self.assertIn('Authorization', response.request.headers)

            response2 = c.post(
                url_template.format('example_bash_operator'),
                data=json.dumps(dict(run_id='my_run' + datetime.now().isoformat())),
                content_type="application/json",
                headers=response.request.headers
            )
            self.assertEqual(200, response2.status_code)
Beispiel #3
0
    def heartbeat_callback(self, session=None):
        """Self destruct task if state has been moved away from running externally"""
        if self.terminating:
            # ensure termination if processes are created later
            self.task_runner.terminate()
            return

        self.task_instance.refresh_from_db()
        ti = self.task_instance

        if ti.state == State.RUNNING:
            fqdn = get_hostname()
            same_hostname = fqdn == ti.hostname
            if not same_hostname:
                self.log.warning(
                    "The recorded hostname %s does not match this instance's hostname %s",
                    ti.hostname,
                    fqdn,
                )
                raise AirflowException("Hostname of job runner does not match")
            current_pid = self.task_runner.process.pid
            recorded_pid = ti.pid
            same_process = recorded_pid == current_pid

            if ti.run_as_user or self.task_runner.run_as_user:
                recorded_pid = psutil.Process(ti.pid).ppid()
                same_process = recorded_pid == current_pid

            if recorded_pid is not None and not same_process:
                self.log.warning(
                    "Recorded pid %s does not match the current pid %s",
                    recorded_pid, current_pid)
                raise AirflowException("PID of job runner does not match")
        elif self.task_runner.return_code() is None and hasattr(
                self.task_runner, 'process'):
            self.log.warning(
                "State of this instance has been externally set to %s. Terminating instance.",
                ti.state)
            self.task_runner.terminate()
            if ti.state == State.SUCCESS:
                error = None
            else:
                # if ti.state is not set by taskinstance.handle_failure, then
                # error file will not be populated and it must be updated by
                # external source suck as web UI
                error = self.task_runner.deserialize_run_error(
                ) or "task marked as failed externally"
            ti._run_finished_callback(error=error)
            self.terminating = True
Beispiel #4
0
def task_run(args, dag=None):
    """Runs a single task instance"""
    if dag:
        args.dag_id = dag.dag_id

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if not args.pickle and not dag:
        dag = get_dag(args)
    elif not dag:
        with db.create_session() as session:
            print(f'Loading pickle id {args.pickle}')
            dag_pickle = session.query(DagPickle).filter(
                DagPickle.id == args.pickle).first()
            if not dag_pickle:
                raise AirflowException("Who hid the pickle!? [missing pickle]")
            dag = dag_pickle.pickle

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.refresh_from_db()

    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()
    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run(args, dag, ti)
    else:
        with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
            _run(args, dag, ti)
    logging.shutdown()
Beispiel #5
0
def execute_command(command_to_exec: CommandType) -> None:
    """Executes command."""
    BaseExecutor.validate_command(command_to_exec)
    log.info("Executing command in Celery: %s", command_to_exec)
    env = os.environ.copy()
    try:
        # pylint: disable=unexpected-keyword-arg
        subprocess.check_output(command_to_exec, stderr=subprocess.STDOUT,
                                close_fds=True, env=env)
        # pylint: disable=unexpected-keyword-arg
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)
        msg = 'Celery command failed on host: ' + get_hostname()
        raise AirflowException(msg)
Beispiel #6
0
 def __init__(self,
              executor=executors.get_default_executor(),
              heartrate=None,
              *args,
              **kwargs):
     self.hostname = get_hostname()
     self.executor = executor
     self.executor_class = executor.__class__.__name__
     self.start_date = timezone.utcnow()
     self.latest_heartbeat = timezone.utcnow()
     if heartrate is not None:
         self.heartrate = heartrate
     self.unixname = getpass.getuser()
     self.max_tis_per_query = conf.getint('scheduler', 'max_tis_per_query')
     super().__init__(*args, **kwargs)
Beispiel #7
0
 def __init__(self, executor=None, heartrate=None, *args, **kwargs):
     self.hostname = get_hostname()
     if executor:
         self.executor = executor
         self.executor_class = executor.__class__.__name__
     else:
         self.executor_class = conf.get('core', 'EXECUTOR')
     self.start_date = timezone.utcnow()
     self.latest_heartbeat = timezone.utcnow()
     if heartrate is not None:
         self.heartrate = heartrate
     self.unixname = getuser()
     self.max_tis_per_query: int = conf.getint('scheduler',
                                               'max_tis_per_query')
     super().__init__(*args, **kwargs)
Beispiel #8
0
def execute_command(command_to_exec: CommandType) -> None:
    """Executes command."""
    if command_to_exec[0:3] != ["airflow", "tasks", "run"]:
        raise ValueError('The command must start with ["airflow", "tasks", "run"].')

    log.info("Executing command in Celery: %s", command_to_exec)
    env = os.environ.copy()
    try:
        subprocess.check_call(command_to_exec, stderr=subprocess.STDOUT,
                              close_fds=True, env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)
        msg = 'Celery command failed on host: ' + get_hostname()
        raise AirflowException(msg)
Beispiel #9
0
def _execute_in_subprocess(command_to_exec: CommandType,
                           celery_task_id: Optional[str] = None) -> None:
    env = os.environ.copy()
    if celery_task_id:
        env["external_executor_id"] = celery_task_id
    try:
        subprocess.check_output(command_to_exec,
                                stderr=subprocess.STDOUT,
                                close_fds=True,
                                env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)
        msg = 'Celery command failed on host: ' + get_hostname()
        raise AirflowException(msg)
Beispiel #10
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException(
            "You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.refresh_from_db()

    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()
    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
            _run_task_by_selected_method(args, dag, ti)
    logging.shutdown()
Beispiel #11
0
 def do_GET(self):
     if self.path == '/health':
         try:
             with create_session() as session:
                 scheduler_job = (session.query(SchedulerJob).filter_by(
                     hostname=get_hostname()).order_by(
                         SchedulerJob.latest_heartbeat.desc()).limit(
                             1).first())
             if scheduler_job and scheduler_job.is_alive():
                 self.send_response(200)
                 self.end_headers()
             else:
                 self.send_error(503)
         except Exception:
             self.send_error(503)
     else:
         self.send_error(404)
 def jinja_globals():
     return {
         'hostname':
         get_hostname() if conf.getboolean('webserver',
                                           'EXPOSE_HOSTNAME',
                                           fallback=True) else 'redact',
         'navbar_color':
         conf.get('webserver', 'NAVBAR_COLOR'),
         'log_fetch_delay_sec':
         conf.getint('webserver', 'log_fetch_delay_sec', fallback=2),
         'log_auto_tailing_offset':
         conf.getint('webserver',
                     'log_auto_tailing_offset',
                     fallback=30),
         'log_animation_speed':
         conf.getint('webserver', 'log_animation_speed', fallback=1000)
     }
Beispiel #13
0
    def test_heartbeat_failed_fast(self, mock_getpid):
        """
        Test that task heartbeat will sleep when it fails fast
        """
        mock_getpid.return_value = 1

        heartbeat_records = []

        def heartbeat_recorder(**kwargs):
            heartbeat_records.append(timezone.utcnow())

        with create_session() as session:
            dagbag = models.DagBag(
                dag_folder=TEST_DAG_FOLDER,
                include_examples=False,
            )
            dag_id = 'test_heartbeat_failed_fast'
            task_id = 'test_heartbeat_failed_fast_op'
            dag = dagbag.get_dag(dag_id)
            task = dag.get_task(task_id)

            dag.create_dagrun(run_id="test_heartbeat_failed_fast_run",
                              state=State.RUNNING,
                              execution_date=DEFAULT_DATE,
                              start_date=DEFAULT_DATE,
                              session=session)
            ti = TI(task=task, execution_date=DEFAULT_DATE)
            ti.refresh_from_db()
            ti.state = State.RUNNING
            ti.hostname = get_hostname()
            ti.pid = 1
            session.commit()

            job = LocalTaskJob(task_instance=ti,
                               executor=MockExecutor(do_update=False))
            job.heartrate = 2
            job.heartbeat_callback = heartbeat_recorder
            job._execute()
            self.assertGreater(len(heartbeat_records), 1)
            for i in range(1, len(heartbeat_records)):
                time1 = heartbeat_records[i - 1]
                time2 = heartbeat_records[i]
                # Assert that difference small enough
                delta = (time2 - time1).total_seconds()
                self.assertAlmostEqual(delta, job.heartrate, delta=0.05)
Beispiel #14
0
    def run_command(self, run_with=None):
        """
        Run the task command.

        :param run_with: list of tokens to run the task command with e.g. ``['bash', '-c']``
        :return: the process that was run
        :rtype: subprocess.Popen
        """
        run_with = run_with or []
        full_cmd = run_with + self._command

        self.log.info("Running on host: %s", get_hostname())
        self.log.info('Running: %s', full_cmd)
        with _airflow_parsing_context_manager(
                dag_id=self._task_instance.dag_id,
                task_id=self._task_instance.task_id,
        ):
            if IS_WINDOWS:
                proc = subprocess.Popen(
                    full_cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    universal_newlines=True,
                    close_fds=True,
                    env=os.environ.copy(),
                )
            else:
                proc = subprocess.Popen(
                    full_cmd,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT,
                    universal_newlines=True,
                    close_fds=True,
                    env=os.environ.copy(),
                    preexec_fn=os.setsid,
                )

        # Start daemon thread to read subprocess logging output
        log_reader = threading.Thread(
            target=self._read_task_logs,
            args=(proc.stdout, ),
        )
        log_reader.daemon = True
        log_reader.start()
        return proc
    def heartbeat_callback(self, session=None):
        """Self destruct task if state has been moved away from running externally"""
        if self.terminating:
            # ensure termination if processes are created later
            self.task_runner.terminate()
            return

        self.task_instance.refresh_from_db()
        ti = self.task_instance

        if ti.state == State.RUNNING:
            fqdn = get_hostname()
            same_hostname = fqdn == ti.hostname
            if not same_hostname:
                self.log.warning(
                    "The recorded hostname %s "
                    "does not match this instance's hostname "
                    "%s",
                    ti.hostname,
                    fqdn,
                )
                raise AirflowException("Hostname of job runner does not match")

            current_pid = os.getpid()
            same_process = ti.pid == current_pid
            if not same_process:
                self.log.warning(
                    "Recorded pid %s does not match "
                    "the current pid %s", ti.pid, current_pid)
                raise AirflowException("PID of job runner does not match")
        elif ti.state == State.KILLING:
            self.log.warning("This instance is being killed %s", ti)
        elif self.task_runner.return_code() is None and hasattr(
                self.task_runner, 'process'):
            self.log.warning(
                "State of this instance has been externally set to %s. "
                "Terminating instance.", ti.state)
            if ti.state == State.FAILED and ti.task.on_failure_callback:
                context = ti.get_template_context()
                ti.task.on_failure_callback(context)
            if ti.state == State.SUCCESS and ti.task.on_success_callback:
                context = ti.get_template_context()
                ti.task.on_success_callback(context)
            self.task_runner.terminate()
            self.terminating = True
Beispiel #16
0
    def test_heartbeat_failed_fast(self):
        """
        Test that task heartbeat will sleep when it fails fast
        """
        self.mock_base_job_sleep.side_effect = time.sleep

        with create_session() as session:
            dagbag = DagBag(
                dag_folder=TEST_DAG_FOLDER,
                include_examples=False,
            )
            dag_id = 'test_heartbeat_failed_fast'
            task_id = 'test_heartbeat_failed_fast_op'
            dag = dagbag.get_dag(dag_id)
            task = dag.get_task(task_id)

            dag.create_dagrun(
                run_id="test_heartbeat_failed_fast_run",
                state=State.RUNNING,
                execution_date=DEFAULT_DATE,
                start_date=DEFAULT_DATE,
                session=session,
            )
            ti = TaskInstance(task=task, execution_date=DEFAULT_DATE)
            ti.refresh_from_db()
            ti.state = State.RUNNING
            ti.hostname = get_hostname()
            ti.pid = 1
            session.commit()

            job = LocalTaskJob(task_instance=ti,
                               executor=MockExecutor(do_update=False))
            job.heartrate = 2
            heartbeat_records = []
            job.heartbeat_callback = lambda session: heartbeat_records.append(
                job.latest_heartbeat)
            job._execute()
            self.assertGreater(len(heartbeat_records), 2)
            for i in range(1, len(heartbeat_records)):
                time1 = heartbeat_records[i - 1]
                time2 = heartbeat_records[i]
                # Assert that difference small enough
                delta = (time2 - time1).total_seconds()
                self.assertAlmostEqual(delta, job.heartrate, delta=0.05)
Beispiel #17
0
    def test_localtaskjob_heartbeat(self):
        session = settings.Session()
        dag = DAG('test_localtaskjob_heartbeat',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        with dag:
            op1 = DummyOperator(task_id='op1')

        dag.clear()
        dr = dag.create_dagrun(
            run_id="test",
            state=State.SUCCESS,
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            session=session,
        )
        ti = dr.get_task_instance(task_id=op1.task_id, session=session)
        ti.state = State.RUNNING
        ti.hostname = "blablabla"
        session.commit()

        job1 = LocalTaskJob(task_instance=ti,
                            ignore_ti_state=True,
                            executor=SequentialExecutor())
        ti.task = op1
        ti.refresh_from_task(op1)
        job1.task_runner = StandardTaskRunner(job1)
        job1.task_runner.process = mock.Mock()
        with pytest.raises(AirflowException):
            job1.heartbeat_callback()  # pylint: disable=no-value-for-parameter

        job1.task_runner.process.pid = 1
        ti.state = State.RUNNING
        ti.hostname = get_hostname()
        ti.pid = 1
        session.merge(ti)
        session.commit()
        assert ti.pid != os.getpid()
        job1.heartbeat_callback(session=None)

        job1.task_runner.process.pid = 2
        with pytest.raises(AirflowException):
            job1.heartbeat_callback()  # pylint: disable=no-value-for-parameter
Beispiel #18
0
 def from_dag(
     dag,
     dm,
     dag_folder,
     include_task_args,
     git_commit,
     is_committed,
     raw_data_only=False,
     include_source=True,
 ):
     # type: (DAG, DagModel, str, bool, str, bool, bool, bool) -> EDag
     # Can be Dag from DagBag or from DB, therefore not all attributes may exist
     source_code = _read_dag_file(dag.fileloc)
     return EDag(
         description=dag.description or "",
         root_task_ids=[t.task_id for t in getattr(dag, "roots", [])],
         tasks=[
             ETask.from_task(t, include_task_args, dag, include_source)
             for t in getattr(dag, "tasks", [])
         ] if not raw_data_only else [],
         owner=resolve_attribute_or_default_attribute(
             dag, ["owner", "owners"]),
         dag_id=dag.dag_id,
         schedule_interval=interval_to_str(dag.schedule_interval),
         catchup=resolve_attribute_or_default_value(dag, "catchup", False),
         start_date=resolve_attribute_or_default_value(
             dag, "start_date", None),
         end_date=resolve_attribute_or_default_value(dag, "end_date", None),
         dag_folder=dag_folder,
         hostname=get_hostname(),
         source_code=source_code
         if not raw_data_only and include_source else "",
         module_source_hash=source_md5(source_code),
         is_subdag=dag.is_subdag,
         tags=getattr(dm, "tags", []),
         task_type="DAG",
         task_args=_extract_args_from_dict(vars(dag))
         if include_task_args else {},
         is_active=dm.is_active,
         is_paused=dm.is_paused,
         git_commit=git_commit,
         is_committed=is_committed,
     )
Beispiel #19
0
def renew_from_kt():
    # The config is specified in seconds. But we ask for that same amount in
    # minutes to give ourselves a large renewal buffer.
    renewal_lifetime = "%sm" % configuration.getint('kerberos',
                                                    'reinit_frequency')
    principal = configuration.get('kerberos',
                                  'principal').replace("_HOST", get_hostname())

    cmdv = [
        configuration.get('kerberos', 'kinit_path'),
        "-r",
        renewal_lifetime,
        "-k",  # host ticket
        "-t",
        configuration.get('kerberos', 'keytab'),  # specify keytab
        "-c",
        configuration.get('kerberos', 'ccache'),  # specify credentials cache
        principal
    ]
    log.info("Reinitting kerberos from keytab: " + " ".join(cmdv))

    subp = subprocess.Popen(cmdv,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            close_fds=True,
                            bufsize=-1,
                            universal_newlines=True)
    subp.wait()
    if subp.returncode != 0:
        log.error(
            "Couldn't reinit from keytab! `kinit' exited with %s.\n%s\n%s" %
            (subp.returncode, "\n".join(subp.stdout.readlines()), "\n".join(
                subp.stderr.readlines())))
        sys.exit(subp.returncode)

    global NEED_KRB181_WORKAROUND
    if NEED_KRB181_WORKAROUND is None:
        NEED_KRB181_WORKAROUND = detect_conf_var()
    if NEED_KRB181_WORKAROUND:
        # (From: HUE-640). Kerberos clock have seconds level granularity. Make sure we
        # renew the ticket after the initial valid time.
        time.sleep(1.5)
        perform_krb181_workaround()
Beispiel #20
0
 def from_dag(dag, dag_folder, include_task_args):
     # type: (DAG, str) -> EDag
     git_commit, git_committed = _get_git_status(dag_folder)
     return EDag(
         description=dag.description,
         root_task_ids=[t.task_id for t in dag.roots],
         tasks=[ETask.from_task(t, include_task_args) for t in dag.tasks],
         owner=dag.owner,
         dag_id=dag.dag_id,
         schedule_interval=interval_to_str(dag.schedule_interval),
         catchup=dag.catchup,
         start_date=dag.start_date or utcnow(),
         end_date=dag.end_date,
         is_committed=git_committed,
         git_commit=git_commit or "",
         dag_folder=dag_folder,
         hostname=get_hostname(),
         source_code=_read_dag_file(dag.fileloc),
         is_subdag=dag.is_subdag,
     )
Beispiel #21
0
    def test_localtaskjob_double_trigger(self):
        dagbag = DagBag(
            dag_folder=TEST_DAG_FOLDER,
            include_examples=False,
        )
        dag = dagbag.dags.get('test_localtaskjob_double_trigger')
        task = dag.get_task('test_localtaskjob_double_trigger_task')

        session = settings.Session()

        dag.clear()
        dr = dag.create_dagrun(
            run_id="test",
            state=State.SUCCESS,
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            session=session,
        )
        ti = dr.get_task_instance(task_id=task.task_id, session=session)
        ti.state = State.RUNNING
        ti.hostname = get_hostname()
        ti.pid = 1
        session.merge(ti)
        session.commit()

        ti_run = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        ti_run.refresh_from_db()
        job1 = LocalTaskJob(task_instance=ti_run,
                            executor=SequentialExecutor())
        from airflow.task.task_runner.standard_task_runner import StandardTaskRunner

        with patch.object(StandardTaskRunner, 'start',
                          return_value=None) as mock_method:
            job1.run()
            mock_method.assert_not_called()

        ti = dr.get_task_instance(task_id=task.task_id, session=session)
        self.assertEqual(ti.pid, 1)
        self.assertEqual(ti.state, State.RUNNING)

        session.close()
Beispiel #22
0
def _execute_in_fork(command_to_exec: CommandType,
                     celery_task_id: Optional[str] = None) -> None:
    pid = os.fork()
    if pid:
        # In parent, wait for the child
        pid, ret = os.waitpid(pid, 0)
        if ret == 0:
            return

        raise AirflowException('Celery command failed on host: ' +
                               get_hostname())

    from airflow.sentry import Sentry

    ret = 1
    try:
        from airflow.cli.cli_parser import get_parser

        settings.engine.pool.dispose()
        settings.engine.dispose()

        parser = get_parser()
        # [1:] - remove "airflow" from the start of the command
        args = parser.parse_args(command_to_exec[1:])
        args.shut_down_logging = False
        if celery_task_id:
            args.external_executor_id = celery_task_id

        setproctitle(f"airflow task supervisor: {command_to_exec}")

        args.func(args)
        ret = 0
    except Exception as e:
        log.exception("Failed to execute task %s.", str(e))
        ret = 1
    finally:
        Sentry.flush()
        logging.shutdown()
        os._exit(ret)
Beispiel #23
0
    def _get_sensor_logger(self, si):
        """Return logger for a sensor instance object."""
        # The created log_id is used inside of smart sensor as the key to fetch
        # the corresponding in memory log handler.
        si.raw = False  # Otherwise set_context will fail
        log_id = "-".join([si.dag_id,
                           si.task_id,
                           si.execution_date.strftime("%Y_%m_%dT%H_%M_%S_%f"),
                           str(si.try_number)])
        logger = logging.getLogger('airflow.task' + '.' + log_id)

        if len(logger.handlers) == 0:
            handler = self.create_new_task_handler()
            logger.addHandler(handler)
            set_context(logger, si)

            line_break = ("-" * 120)
            logger.info(line_break)
            logger.info("Processing sensor task %s in smart sensor service on host: %s",
                        self.ti_key, get_hostname())
            logger.info(line_break)
        return logger
def init_app(app):
    global _SERVICE_NAME

    hostname = app.config.get('SERVER_NAME')
    if not hostname:
        hostname = get_hostname()
    log.info("Kerberos: hostname %s", hostname)

    service = 'airflow'

    _SERVICE_NAME = "{}@{}".format(service, hostname)

    if 'KRB5_KTNAME' not in os.environ:
        os.environ['KRB5_KTNAME'] = conf.get('kerberos', 'keytab')

    try:
        log.info("Kerberos init: %s %s", service, hostname)
        principal = kerberos.getServerPrincipalDetails(service, hostname)
    except kerberos.KrbError as err:
        log.warning("Kerberos: %s", err)
    else:
        log.info("Kerberos API: server is %s", principal)
def init_app(app):
    global _SERVICE_NAME

    hostname = app.config.get('SERVER_NAME')
    if not hostname:
        hostname = get_hostname()
    log.info("Kerberos: hostname %s", hostname)

    service = 'airflow'

    _SERVICE_NAME = "{}@{}".format(service, hostname)

    if 'KRB5_KTNAME' not in os.environ:
        os.environ['KRB5_KTNAME'] = conf.get('kerberos', 'keytab')

    try:
        log.info("Kerberos init: %s %s", service, hostname)
        principal = kerberos.getServerPrincipalDetails(service, hostname)
    except kerberos.KrbError as err:
        log.warning("Kerberos: %s", err)
    else:
        log.info("Kerberos API: server is %s", principal)
Beispiel #26
0
    def test_localtaskjob_heartbeat(self, mock_pid):
        session = settings.Session()
        dag = DAG('test_localtaskjob_heartbeat',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        with dag:
            op1 = DummyOperator(task_id='op1')

        dag.clear()
        dr = dag.create_dagrun(
            run_id="test",
            state=State.SUCCESS,
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            session=session,
        )
        ti = dr.get_task_instance(task_id=op1.task_id, session=session)
        ti.state = State.RUNNING
        ti.hostname = "blablabla"
        session.commit()

        job1 = LocalTaskJob(task_instance=ti,
                            ignore_ti_state=True,
                            executor=SequentialExecutor())
        self.assertRaises(AirflowException, job1.heartbeat_callback)

        mock_pid.return_value = 1
        ti.state = State.RUNNING
        ti.hostname = get_hostname()
        ti.pid = 1
        session.merge(ti)
        session.commit()

        job1.heartbeat_callback(session=None)

        mock_pid.return_value = 2
        self.assertRaises(AirflowException, job1.heartbeat_callback)
Beispiel #27
0
def perform_krb181_workaround():
    cmdv = [configuration.get('kerberos', 'kinit_path'),
            "-c", configuration.get('kerberos', 'ccache'),
            "-R"]  # Renew ticket_cache

    log.info("Renewing kerberos ticket to work around kerberos 1.8.1: " +
             " ".join(cmdv))

    ret = subprocess.call(cmdv)

    if ret != 0:
        principal = "%s/%s" % (configuration.get('kerberos', 'principal'), get_hostname())
        fmt_dict = dict(princ=principal,
                        ccache=configuration.get('kerberos', 'principal'))
        log.error("Couldn't renew kerberos ticket in order to work around "
                  "Kerberos 1.8.1 issue. Please check that the ticket for "
                  "'%(princ)s' is still renewable:\n"
                  "  $ kinit -f -c %(ccache)s\n"
                  "If the 'renew until' date is the same as the 'valid starting' "
                  "date, the ticket cannot be renewed. Please check your KDC "
                  "configuration, and the ticket renewal policy (maxrenewlife) "
                  "for the '%(princ)s' and `krbtgt' principals." % fmt_dict)
        sys.exit(ret)
Beispiel #28
0
    def heartbeat_callback(self, session=None):
        """Self destruct task if state has been moved away from running externally"""

        if self.terminating:
            # ensure termination if processes are created later
            self.task_runner.terminate()
            return

        self.task_instance.refresh_from_db()
        ti = self.task_instance

        fqdn = get_hostname()
        same_hostname = fqdn == ti.hostname
        same_process = ti.pid == os.getpid()

        if ti.state == State.RUNNING:
            if not same_hostname:
                self.log.warning("The recorded hostname %s "
                                 "does not match this instance's hostname "
                                 "%s", ti.hostname, fqdn)
                raise AirflowException("Hostname of job runner does not match")
            elif not same_process:
                current_pid = os.getpid()
                self.log.warning("Recorded pid %s does not match "
                                 "the current pid %s", ti.pid, current_pid)
                raise AirflowException("PID of job runner does not match")
        elif (
                self.task_runner.return_code() is None and
                hasattr(self.task_runner, 'process')
        ):
            self.log.warning(
                "State of this instance has been externally set to %s. "
                "Taking the poison pill.",
                ti.state
            )
            self.task_runner.terminate()
            self.terminating = True
Beispiel #29
0
def renew_from_kt():
    # The config is specified in seconds. But we ask for that same amount in
    # minutes to give ourselves a large renewal buffer.
    renewal_lifetime = "%sm" % configuration.getint('kerberos', 'reinit_frequency')
    principal = configuration.get('kerberos', 'principal').replace("_HOST", get_hostname())

    cmdv = [configuration.get('kerberos', 'kinit_path'),
            "-r", renewal_lifetime,
            "-k",  # host ticket
            "-t", configuration.get('kerberos', 'keytab'),  # specify keytab
            "-c", configuration.get('kerberos', 'ccache'),  # specify credentials cache
            principal]
    log.info("Reinitting kerberos from keytab: " + " ".join(cmdv))

    subp = subprocess.Popen(cmdv,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            close_fds=True,
                            bufsize=-1,
                            universal_newlines=True)
    subp.wait()
    if subp.returncode != 0:
        log.error("Couldn't reinit from keytab! `kinit' exited with %s.\n%s\n%s" % (
            subp.returncode,
            "\n".join(subp.stdout.readlines()),
            "\n".join(subp.stderr.readlines())))
        sys.exit(subp.returncode)

    global NEED_KRB181_WORKAROUND
    if NEED_KRB181_WORKAROUND is None:
        NEED_KRB181_WORKAROUND = detect_conf_var()
    if NEED_KRB181_WORKAROUND:
        # (From: HUE-640). Kerberos clock have seconds level granularity. Make sure we
        # renew the ticket after the initial valid time.
        time.sleep(1.5)
        perform_krb181_workaround()
Beispiel #30
0
def task_run(args, dag=None):
    """Runs a single task instance"""

    # Load custom airflow config
    if args.cfg_path:
        with open(args.cfg_path, 'r') as conf_file:
            conf_dict = json.load(conf_file)

        if os.path.exists(args.cfg_path):
            os.remove(args.cfg_path)

        conf.read_dict(conf_dict, source=args.cfg_path)
        settings.configure_vars()

    # IMPORTANT, have to use the NullPool, otherwise, each "run" command may leave
    # behind multiple open sleeping connections while heartbeating, which could
    # easily exceed the database connection limit when
    # processing hundreds of simultaneous tasks.
    settings.configure_orm(disable_connection_pool=True)

    if dag and args.pickle:
        raise AirflowException("You cannot use the --pickle option when using DAG.cli() method.")
    elif args.pickle:
        print(f'Loading pickle id: {args.pickle}')
        dag = get_dag_by_pickle(args.pickle)
    elif not dag:
        dag = get_dag(args.subdir, args.dag_id)
    else:
        # Use DAG from parameter
        pass

    task = dag.get_task(task_id=args.task_id)
    ti = TaskInstance(task, args.execution_date)
    ti.init_run_context(raw=args.raw)

    hostname = get_hostname()

    print(f"Running {ti} on host {hostname}")

    if args.interactive:
        _run_task_by_selected_method(args, dag, ti)
    else:
        if settings.DONOT_MODIFY_HANDLERS:
            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)
        else:
            # Get all the Handlers from 'airflow.task' logger
            # Add these handlers to the root logger so that we can get logs from
            # any custom loggers defined in the DAG
            airflow_logger_handlers = logging.getLogger('airflow.task').handlers
            root_logger = logging.getLogger()
            root_logger_handlers = root_logger.handlers

            # Remove all handlers from Root Logger to avoid duplicate logs
            for handler in root_logger_handlers:
                root_logger.removeHandler(handler)

            for handler in airflow_logger_handlers:
                root_logger.addHandler(handler)
            root_logger.setLevel(logging.getLogger('airflow.task').level)

            with redirect_stdout(StreamLogWriter(ti.log, logging.INFO)), \
                    redirect_stderr(StreamLogWriter(ti.log, logging.WARN)):
                _run_task_by_selected_method(args, dag, ti)

            # We need to restore the handlers to the loggers as celery worker process
            # can call this command multiple times,
            # so if we don't reset this then logs from next task would go to the wrong place
            for handler in airflow_logger_handlers:
                root_logger.removeHandler(handler)
            for handler in root_logger_handlers:
                root_logger.addHandler(handler)

    logging.shutdown()
Beispiel #31
0
 def jinja_globals():
     return {
         'hostname': get_hostname(),
     }
Beispiel #32
0
def replace_hostname_pattern(components, host=None):
    fqdn = host
    if not fqdn or fqdn == '0.0.0.0':
        fqdn = get_hostname()
    return '%s/%s@%s' % (components[0], fqdn.lower(), components[2])
Beispiel #33
0
 def test_get_hostname_set_missing(self, patched_conf):
     patched_conf.get = mock.Mock(
         return_value='tests.utils.test_net:missing_func'
     )
     with self.assertRaises(AttributeError):
         net.get_hostname()
Beispiel #34
0
 def test_get_hostname_set(self, patched_conf):
     patched_conf.get = mock.Mock(
         return_value='tests.utils.test_net:get_hostname'
     )
     self.assertTrue(net.get_hostname() == 'awesomehostname')
Beispiel #35
0
    def prepare_file_path_queue(self):
        """Generate more file paths to process. Result are saved in _file_path_queue."""
        self._parsing_start_time = time.perf_counter()
        # If the file path is already being processed, or if a file was
        # processed recently, wait until the next batch
        file_paths_in_progress = self._processors.keys()
        now = timezone.utcnow()

        # Sort the file paths by the parsing order mode
        list_mode = conf.get("scheduler", "file_parsing_sort_mode")

        files_with_mtime = {}
        file_paths = []
        is_mtime_mode = list_mode == "modified_time"

        file_paths_recently_processed = []
        for file_path in self._file_paths:

            if is_mtime_mode:
                try:
                    files_with_mtime[file_path] = os.path.getmtime(file_path)
                except FileNotFoundError:
                    self.log.warning("Skipping processing of missing file: %s",
                                     file_path)
                    continue
                file_modified_time = timezone.make_aware(
                    datetime.fromtimestamp(files_with_mtime[file_path]))
            else:
                file_paths.append(file_path)
                file_modified_time = None

            # Find file paths that were recently processed to exclude them
            # from being added to file_path_queue
            # unless they were modified recently and parsing mode is "modified_time"
            # in which case we don't honor "self._file_process_interval" (min_file_process_interval)
            last_finish_time = self.get_last_finish_time(file_path)
            if (last_finish_time is not None
                    and (now - last_finish_time).total_seconds() <
                    self._file_process_interval
                    and not (is_mtime_mode and file_modified_time and
                             (file_modified_time > last_finish_time))):
                file_paths_recently_processed.append(file_path)

        # Sort file paths via last modified time
        if is_mtime_mode:
            file_paths = sorted(files_with_mtime,
                                key=files_with_mtime.get,
                                reverse=True)
        elif list_mode == "alphabetical":
            file_paths = sorted(file_paths)
        elif list_mode == "random_seeded_by_host":
            # Shuffle the list seeded by hostname so multiple schedulers can work on different
            # set of files. Since we set the seed, the sort order will remain same per host
            random.Random(get_hostname()).shuffle(file_paths)

        files_paths_at_run_limit = [
            file_path for file_path, stat in self._file_stats.items()
            if stat.run_count == self._max_runs
        ]

        file_paths_to_exclude = set(file_paths_in_progress).union(
            file_paths_recently_processed, files_paths_at_run_limit)

        # Do not convert the following list to set as set does not preserve the order
        # and we need to maintain the order of file_paths for `[scheduler] file_parsing_sort_mode`
        files_paths_to_queue = [
            file_path for file_path in file_paths
            if file_path not in file_paths_to_exclude
        ]

        for file_path, processor in self._processors.items():
            self.log.debug(
                "File path %s is still being processed (started: %s)",
                processor.file_path,
                processor.start_time.isoformat(),
            )

        self.log.debug("Queuing the following files for processing:\n\t%s",
                       "\n\t".join(files_paths_to_queue))

        for file_path in files_paths_to_queue:
            if file_path not in self._file_stats:
                self._file_stats[file_path] = DagFileStat(
                    num_dags=0,
                    import_errors=0,
                    last_finish_time=None,
                    last_duration=None,
                    run_count=0)

        self._file_path_queue.extend(files_paths_to_queue)
Beispiel #36
0
 def jinja_globals():
     return {
         'hostname': get_hostname(),
     }
Beispiel #37
0
 def jinja_globals():
     return {
         'hostname': get_hostname(),
         'navbar_color': configuration.get('webserver', 'NAVBAR_COLOR'),
     }
Beispiel #38
0
 def test_get_hostname_unset(self, patched_conf, patched_socket):
     patched_conf.get = mock.Mock(return_value=None)
     patched_socket.getfqdn = mock.Mock(return_value='first')
     self.assertTrue(net.get_hostname() == 'first')
Beispiel #39
0
def replace_hostname_pattern(components, host=None):
    """Replaces hostname with the right pattern including lowercase of the name."""
    fqdn = host
    if not fqdn or fqdn == '0.0.0.0':
        fqdn = get_hostname()
    return '%s/%s@%s' % (components[0], fqdn.lower(), components[2])
Beispiel #40
0
 def test_get_hostname_set_incorrect(self, patched_conf):
     patched_conf.get = mock.Mock(
         return_value='tests.utils.test_net'
     )
     with self.assertRaises(ValueError):
         net.get_hostname()
def _run_raw_task(
    self,
    mark_success: bool = False,
    test_mode: bool = False,
    job_id: Optional[str] = None,
    pool: Optional[str] = None,
    error_file: Optional[str] = None,
    session=None,
) -> None:
    """
    Immediately runs the task (without checking or changing db state
    before execution) and then sets the appropriate final state after
    completion and runs any post-execute callbacks. Meant to be called
    only after another function changes the state to running.
    :param mark_success: Don't run the task, mark its state as success
    :type mark_success: bool
    :param test_mode: Doesn't record success or failure in the DB
    :type test_mode: bool
    :param pool: specifies the pool to use to run the task instance
    :type pool: str
    :param session: SQLAlchemy ORM Session
    :type session: Session
    """
    task = self.task
    self.test_mode = test_mode
    refresh_from_task(self, task, pool_override=pool)
    # self.refresh_from_db(session=session)
    self.job_id = job_id
    self.hostname = get_hostname()

    context = {}  # type: Dict
    actual_start_date = timezone.utcnow()
    try:
        if not mark_success:
            context = self.get_template_context()
            _prepare_and_execute_task_with_callbacks(self, context, task)
        self.refresh_from_db(lock_for_update=True)
        self.state = State.SUCCESS
    except AirflowSkipException as e:
        # Recording SKIP
        # log only if exception has any arguments to prevent log flooding
        if e.args:
            self.log.info(e)
        self.refresh_from_db(lock_for_update=True)
        self.state = State.SKIPPED
        self.log.info(
            'Marking task as SKIPPED. '
            'dag_id=%s, task_id=%s, execution_date=%s, start_date=%s, end_date=%s',
            self.dag_id,
            self.task_id,
            _date_or_empty(self, 'execution_date'),
            _date_or_empty(self, 'start_date'),
            _date_or_empty(self, 'end_date'),
        )
    # except AirflowRescheduleException as reschedule_exception:
    #     self.refresh_from_db()
    #     self._handle_reschedule(actual_start_date, reschedule_exception, test_mode)
    #     return
    # except AirflowFailException as e:
    #     self.refresh_from_db()
    #     self.handle_failure(e, test_mode, force_fail=True, error_file=error_file)
    #     raise
    except AirflowException as e:
        self.refresh_from_db()
        # for case when task is marked as success/failed externally
        # current behavior doesn't hit the success callback
        if self.state in {State.SUCCESS, State.FAILED}:
            return
        else:
            self.handle_failure(e, test_mode, error_file=error_file)
            raise
    except (Exception, KeyboardInterrupt) as e:
        self.handle_failure(e, test_mode, error_file=error_file)
        raise

    # Recording SUCCESS
    self.end_date = timezone.utcnow()
    self.log.info(
        'Marking task as SUCCESS. '
        'dag_id=%s, task_id=%s, execution_date=%s, start_date=%s, end_date=%s',
        self.dag_id,
        self.task_id,
        _date_or_empty(self, 'execution_date'),
        _date_or_empty(self, 'start_date'),
        _date_or_empty(self, 'end_date'),
    )
    self.set_duration()
    if not test_mode:
        session.add(Log(self.state, self))
        session.merge(self)

    session.commit()

    if not test_mode:
        _run_mini_scheduler_on_child_tasks(self, session)
Beispiel #42
0
 def jinja_globals():
     return {
         'hostname': get_hostname(),
         'navbar_color': configuration.get('webserver', 'NAVBAR_COLOR'),
     }
Beispiel #43
0
def get_localhost_name():
    return get_hostname()