Ejemplo n.º 1
0
class FileTaskHandler(logging.Handler):
    """
    FileTaskHandler is a python log handler that handles and reads
    task instance logs. It creates and delegates log handling
    to `logging.FileHandler` after receiving task instance context.
    It reads logs from task instance's host machine.

    :param base_log_folder: Base log folder to place logs.
    :param filename_template: template filename string
    """

    def __init__(self, base_log_folder: str, filename_template: Optional[str] = None):
        super().__init__()
        self.handler: Optional[logging.FileHandler] = None
        self.local_base = base_log_folder
        if filename_template is not None:
            warnings.warn(
                "Passing filename_template to a log handler is deprecated and has no effect",
                DeprecationWarning,
                # We want to reference the stack that actually instantiates the
                # handler, not the one that calls super()__init__.
                stacklevel=(2 if type(self) == FileTaskHandler else 3),
            )

    def set_context(self, ti: "TaskInstance"):
        """
        Provide task_instance context to airflow task handler.

        :param ti: task instance object
        """
        local_loc = self._init_file(ti)
        self.handler = NonCachingFileHandler(local_loc, encoding='utf-8')
        if self.formatter:
            self.handler.setFormatter(self.formatter)
        self.handler.setLevel(self.level)

    def emit(self, record):
        if self.handler:
            self.handler.emit(record)

    def flush(self):
        if self.handler:
            self.handler.flush()

    def close(self):
        if self.handler:
            self.handler.close()

    def _render_filename(self, ti: "TaskInstance", try_number: int) -> str:
        with create_session() as session:
            dag_run = ti.get_dagrun(session=session)
            template = dag_run.get_log_template(session=session).filename
        str_tpl, jinja_tpl = parse_template_string(template)

        if jinja_tpl:
            if hasattr(ti, "task"):
                context = ti.get_template_context()
            else:
                context = Context(ti=ti, ts=dag_run.logical_date.isoformat())
            context["try_number"] = try_number
            return render_template_to_string(jinja_tpl, context)
        elif str_tpl:
            try:
                dag = ti.task.dag
            except AttributeError:  # ti.task is not always set.
                data_interval = (dag_run.data_interval_start, dag_run.data_interval_end)
            else:
                if TYPE_CHECKING:
                    assert dag is not None
                data_interval = dag.get_run_data_interval(dag_run)
            if data_interval[0]:
                data_interval_start = data_interval[0].isoformat()
            else:
                data_interval_start = ""
            if data_interval[1]:
                data_interval_end = data_interval[1].isoformat()
            else:
                data_interval_end = ""
            return str_tpl.format(
                dag_id=ti.dag_id,
                task_id=ti.task_id,
                run_id=ti.run_id,
                data_interval_start=data_interval_start,
                data_interval_end=data_interval_end,
                execution_date=ti.get_dagrun().logical_date.isoformat(),
                try_number=try_number,
            )
        else:
            raise RuntimeError(f"Unable to render log filename for {ti}. This should never happen")

    def _read_grouped_logs(self):
        return False

    def _read(self, ti, try_number, metadata=None):
        """
        Template method that contains custom logic of reading
        logs given the try_number.

        :param ti: task instance record
        :param try_number: current try_number to read log from
        :param metadata: log metadata,
                         can be used for steaming log reading and auto-tailing.
        :return: log message as a string and metadata.
        """
        # Task instance here might be different from task instance when
        # initializing the handler. Thus explicitly getting log location
        # is needed to get correct log path.
        log_relative_path = self._render_filename(ti, try_number)
        location = os.path.join(self.local_base, log_relative_path)

        log = ""

        if os.path.exists(location):
            try:
                with open(location, encoding="utf-8", errors="surrogateescape") as file:
                    log += f"*** Reading local file: {location}\n"
                    log += "".join(file.readlines())
            except Exception as e:
                log = f"*** Failed to load local log file: {location}\n"
                log += f"*** {str(e)}\n"
        elif conf.get('core', 'executor') == 'KubernetesExecutor':
            try:
                from airflow.kubernetes.kube_client import get_kube_client

                kube_client = get_kube_client()

                if len(ti.hostname) >= 63:
                    # Kubernetes takes the pod name and truncates it for the hostname. This truncated hostname
                    # is returned for the fqdn to comply with the 63 character limit imposed by DNS standards
                    # on any label of a FQDN.
                    pod_list = kube_client.list_namespaced_pod(conf.get('kubernetes', 'namespace'))
                    matches = [
                        pod.metadata.name
                        for pod in pod_list.items
                        if pod.metadata.name.startswith(ti.hostname)
                    ]
                    if len(matches) == 1:
                        if len(matches[0]) > len(ti.hostname):
                            ti.hostname = matches[0]

                log += f'*** Trying to get logs (last 100 lines) from worker pod {ti.hostname} ***\n\n'

                res = kube_client.read_namespaced_pod_log(
                    name=ti.hostname,
                    namespace=conf.get('kubernetes', 'namespace'),
                    container='base',
                    follow=False,
                    tail_lines=100,
                    _preload_content=False,
                )

                for line in res:
                    log += line.decode()

            except Exception as f:
                log += f'*** Unable to fetch logs from worker pod {ti.hostname} ***\n{str(f)}\n\n'
        else:
            import httpx

            url = os.path.join("http://{ti.hostname}:{worker_log_server_port}/log", log_relative_path).format(
                ti=ti, worker_log_server_port=conf.get('logging', 'WORKER_LOG_SERVER_PORT')
            )
            log += f"*** Log file does not exist: {location}\n"
            log += f"*** Fetching from: {url}\n"
            try:
                timeout = None  # No timeout
                try:
                    timeout = conf.getint('webserver', 'log_fetch_timeout_sec')
                except (AirflowConfigException, ValueError):
                    pass

                signer = JWTSigner(
                    secret_key=conf.get('webserver', 'secret_key'),
                    expiration_time_in_seconds=conf.getint(
                        'webserver', 'log_request_clock_grace', fallback=30
                    ),
                    audience="task-instance-logs",
                )
                response = httpx.get(
                    url,
                    timeout=timeout,
                    headers={b'Authorization': signer.generate_signed_token({"filename": log_relative_path})},
                )
                response.encoding = "utf-8"

                if response.status_code == 403:
                    log += (
                        "*** !!!! Please make sure that all your Airflow components (e.g. "
                        "schedulers, webservers and workers) have "
                        "the same 'secret_key' configured in 'webserver' section and "
                        "time is synchronized on all your machines (for example with ntpd) !!!!!\n***"
                    )
                    log += (
                        "*** See more at https://airflow.apache.org/docs/apache-airflow/"
                        "stable/configurations-ref.html#secret-key\n***"
                    )
                # Check if the resource was properly fetched
                response.raise_for_status()

                log += '\n' + response.text
            except Exception as e:
                log += f"*** Failed to fetch log file from worker. {str(e)}\n"

        return log, {'end_of_log': True}

    def read(self, task_instance, try_number=None, metadata=None):
        """
        Read logs of given task instance from local machine.

        :param task_instance: task instance object
        :param try_number: task instance try_number to read logs from. If None
                           it returns all logs separated by try_number
        :param metadata: log metadata,
                         can be used for steaming log reading and auto-tailing.
        :return: a list of listed tuples which order log string by host
        """
        # Task instance increments its try number when it starts to run.
        # So the log for a particular task try will only show up when
        # try number gets incremented in DB, i.e logs produced the time
        # after cli run and before try_number + 1 in DB will not be displayed.

        if try_number is None:
            next_try = task_instance.next_try_number
            try_numbers = list(range(1, next_try))
        elif try_number < 1:
            logs = [
                [('default_host', f'Error fetching the logs. Try number {try_number} is invalid.')],
            ]
            return logs, [{'end_of_log': True}]
        else:
            try_numbers = [try_number]

        logs = [''] * len(try_numbers)
        metadata_array = [{}] * len(try_numbers)
        for i, try_number_element in enumerate(try_numbers):
            log, metadata = self._read(task_instance, try_number_element, metadata)
            # es_task_handler return logs grouped by host. wrap other handler returning log string
            # with default/ empty host so that UI can render the response in the same way
            logs[i] = log if self._read_grouped_logs() else [(task_instance.hostname, log)]
            metadata_array[i] = metadata

        return logs, metadata_array

    def _init_file(self, ti):
        """
        Create log directory and give it correct permissions.

        :param ti: task instance object
        :return: relative log path of the given task instance
        """
        # To handle log writing when tasks are impersonated, the log files need to
        # be writable by the user that runs the Airflow command and the user
        # that is impersonated. This is mainly to handle corner cases with the
        # SubDagOperator. When the SubDagOperator is run, all of the operators
        # run under the impersonated user and create appropriate log files
        # as the impersonated user. However, if the user manually runs tasks
        # of the SubDagOperator through the UI, then the log files are created
        # by the user that runs the Airflow command. For example, the Airflow
        # run command may be run by the `airflow_sudoable` user, but the Airflow
        # tasks may be run by the `airflow` user. If the log files are not
        # writable by both users, then it's possible that re-running a task
        # via the UI (or vice versa) results in a permission error as the task
        # tries to write to a log file created by the other user.
        relative_path = self._render_filename(ti, ti.try_number)
        full_path = os.path.join(self.local_base, relative_path)
        directory = os.path.dirname(full_path)
        # Create the log file and give it group writable permissions
        # TODO(aoen): Make log dirs and logs globally readable for now since the SubDag
        # operator is not compatible with impersonation (e.g. if a Celery executor is used
        # for a SubDag operator and the SubDag operator has a different owner than the
        # parent DAG)
        Path(directory).mkdir(mode=0o777, parents=True, exist_ok=True)

        if not os.path.exists(full_path):
            open(full_path, "a").close()
            # TODO: Investigate using 444 instead of 666.
            try:
                os.chmod(full_path, 0o666)
            except OSError:
                logging.warning("OSError while change ownership of the log file")

        return full_path
Ejemplo n.º 2
0
class FileProcessorHandler(logging.Handler):
    """
    FileProcessorHandler is a python log handler that handles
    dag processor logs. It creates and delegates log handling
    to `logging.FileHandler` after receiving dag processor context.

    :param base_log_folder: Base log folder to place logs.
    :param filename_template: template filename string
    """

    def __init__(self, base_log_folder, filename_template):
        super().__init__()
        self.handler = None
        self.base_log_folder = base_log_folder
        self.dag_dir = os.path.expanduser(settings.DAGS_FOLDER)
        self.filename_template, self.filename_jinja_template = parse_template_string(filename_template)

        self._cur_date = datetime.today()
        Path(self._get_log_directory()).mkdir(parents=True, exist_ok=True)

        self._symlink_latest_log_directory()

    def set_context(self, filename):
        """
        Provide filename context to airflow task handler.

        :param filename: filename in which the dag is located
        """
        local_loc = self._init_file(filename)
        self.handler = NonCachingFileHandler(local_loc)
        self.handler.setFormatter(self.formatter)
        self.handler.setLevel(self.level)

        if self._cur_date < datetime.today():
            self._symlink_latest_log_directory()
            self._cur_date = datetime.today()

    def emit(self, record):
        if self.handler is not None:
            self.handler.emit(record)

    def flush(self):
        if self.handler is not None:
            self.handler.flush()

    def close(self):
        if self.handler is not None:
            self.handler.close()

    def _render_filename(self, filename):

        # Airflow log path used to be generated by `os.path.relpath(filename, self.dag_dir)`, however all DAGs
        # in airflow source code are not located in the DAG dir as other DAGs.
        # That will create a log filepath which is not under control since it could be outside
        # of the log dir. The change here is to make sure the log path for DAGs in airflow code
        # is always inside the log dir as other DAGs. To be differentiate with regular DAGs,
        # their logs will be in the `log_dir/native_dags`.
        import airflow

        airflow_directory = airflow.__path__[0]
        if filename.startswith(airflow_directory):
            filename = os.path.join("native_dags", os.path.relpath(filename, airflow_directory))
        else:
            filename = os.path.relpath(filename, self.dag_dir)
        ctx = {'filename': filename}

        if self.filename_jinja_template:
            return self.filename_jinja_template.render(**ctx)

        return self.filename_template.format(filename=ctx['filename'])

    def _get_log_directory(self):
        now = datetime.utcnow()

        return os.path.join(self.base_log_folder, now.strftime("%Y-%m-%d"))

    def _symlink_latest_log_directory(self):
        """
        Create symbolic link to the current day's log directory to
        allow easy access to the latest scheduler log files.

        :return: None
        """
        log_directory = self._get_log_directory()
        latest_log_directory_path = os.path.join(self.base_log_folder, "latest")
        if os.path.isdir(log_directory):
            try:
                # if symlink exists but is stale, update it
                if os.path.islink(latest_log_directory_path):
                    if os.readlink(latest_log_directory_path) != log_directory:
                        os.unlink(latest_log_directory_path)
                        os.symlink(log_directory, latest_log_directory_path)
                elif os.path.isdir(latest_log_directory_path) or os.path.isfile(latest_log_directory_path):
                    logging.warning(
                        "%s already exists as a dir/file. Skip creating symlink.", latest_log_directory_path
                    )
                else:
                    os.symlink(log_directory, latest_log_directory_path)
            except OSError:
                logging.warning("OSError while attempting to symlink the latest log directory")

    def _init_file(self, filename):
        """
        Create log file and directory if required.

        :param filename: task instance object
        :return: relative log path of the given task instance
        """
        relative_log_file_path = os.path.join(self._get_log_directory(), self._render_filename(filename))
        log_file_path = os.path.abspath(relative_log_file_path)
        directory = os.path.dirname(log_file_path)

        Path(directory).mkdir(parents=True, exist_ok=True)

        if not os.path.exists(log_file_path):
            open(log_file_path, "a").close()

        return log_file_path