Esempio n. 1
0
    def set(
            cls,
            key,
            value,
            execution_date,
            task_id,
            dag_id,
            session=None):
        """保存中间结果
        Store an XCom value.
        TODO: "pickling" has been deprecated and JSON is preferred.
              "pickling" will be removed in Airflow 2.0.
        :return: None
        """
        # 调用底层,清除所有session实例
        session.expunge_all()

        enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
        # 序列化中间结果
        if enable_pickling:
            value = pickle.dumps(value)
        else:
            try:
                # 注意编码转换
                value = json.dumps(value).encode('UTF-8')
            except ValueError:
                log = LoggingMixin().log
                log.error("Could not serialize the XCOM value into JSON. "
                          "If you are using pickles instead of JSON "
                          "for XCOM, then you need to enable pickle "
                          "support for XCOM in your airflow config.")
                raise

        # remove any duplicate XComs
        # 删除相同key的中间结果
        session.query(cls).filter(
            cls.key == key,
            cls.execution_date == execution_date,
            cls.task_id == task_id,
            cls.dag_id == dag_id).delete()
        session.commit()

        # insert new XCom
        session.add(XCom(
            key=key,
            value=value,
            execution_date=execution_date,
            task_id=task_id,
            dag_id=dag_id))
        session.commit()
Esempio n. 2
0
def execute_command(command):
    """airflow worker 执行shell命令 ."""
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    env = os.environ.copy()
    try:
        # celery worker 收到消息后,执行消息中的shell命令
        subprocess.check_call(command,
                              shell=True,
                              stderr=subprocess.STDOUT,
                              close_fds=True,
                              env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)
        raise AirflowException('Celery command failed')
Esempio n. 3
0
    def get_one(cls,
                execution_date,
                key=None,
                task_id=None,
                dag_id=None,
                include_prior_dates=False,
                session=None):
        """获取一条中间结果
        Retrieve an XCom value, optionally meeting certain criteria.
        TODO: "pickling" has been deprecated and JSON is preferred.
              "pickling" will be removed in Airflow 2.0.
        :return: XCom value
        """
        # 构造搜索条件
        filters = []
        if key:
            filters.append(cls.key == key)
        if task_id:
            filters.append(cls.task_id == task_id)
        if dag_id:
            filters.append(cls.dag_id == dag_id)
        if include_prior_dates:
            filters.append(cls.execution_date <= execution_date)
        else:
            filters.append(cls.execution_date == execution_date)

        query = (
            session.query(cls.value).filter(and_(*filters))
                   .order_by(cls.execution_date.desc(), cls.timestamp.desc()))

        # 获得最近的一条记录
        result = query.first()
        if result:
            enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
            if enable_pickling:
                return pickle.loads(result.value)
            else:
                try:
                    # 注意编码转换
                    return json.loads(result.value.decode('UTF-8'))
                except ValueError:
                    log = LoggingMixin().log
                    log.error("Could not deserialize the XCOM value from JSON. "
                              "If you are using pickles instead of JSON "
                              "for XCOM, then you need to enable pickle "
                              "support for XCOM in your airflow config.")
                    raise
Esempio n. 4
0
 def get_val(self):
     """获得常量的值 ."""
     log = LoggingMixin().log
     if self._val and self.is_encrypted:
         try:
             # 解密常量值
             fernet_key = configuration.conf.get('core', 'FERNET_KEY')
             fernet = get_fernet(fernet_key)
             return fernet.decrypt(bytes(self._val, 'utf-8')).decode()
         except InvalidFernetToken:
             # 解密失败返回None
             log.error("Can't decrypt _val for key={}, invalid token "
                       "or value".format(self.key))
             return None
         except Exception:
             log.error("Can't decrypt _val for key={}, FERNET_KEY "
                       "configuration missing".format(self.key))
             return None
     else:
         return self._val
Esempio n. 5
0
            # 遍历模块的属性值
            for obj in list(m.__dict__.values()):
                # 判断模块中的类是否为 XToolPlugin 的子类
                if (
                        inspect.isclass(obj) and
                        issubclass(obj, XToolPlugin) and
                        obj is not XToolPlugin):
                    # 验证子类中是否定义了name静态变量
                    obj.validate()
                    # 将类加入到插件列表中
                    if obj not in plugins:
                        plugins.append(obj)

        except Exception as e:
            log.exception(e)
            log.error('Failed to import plugin %s', filepath)


def make_module(name, objects):
    """动态创建模块 .

    :param name: 模块名称
    :param objects: 模块中需要包含的对象列表
    """
    log.debug('Creating module %s', name)
    name = name.lower()
    # 创建模块
    module = imp.new_module(name)
    # 给模块设置_name属性 (插件名)
    module._name = name.split('.')[-1]
    # 给模块设置_object属性(插件中所有的类名)
Esempio n. 6
0
from hdfs import InsecureClient, HdfsError

from airflow import configuration
from airflow.exceptions import AirflowException
from airflow.hooks.base_hook import BaseHook
from xTool.utils.log.logging_mixin import LoggingMixin

_kerberos_security_mode = configuration.conf.get("core",
                                                 "security") == "kerberos"
if _kerberos_security_mode:
    try:
        from hdfs.ext.kerberos import KerberosClient
    except ImportError:
        log = LoggingMixin().log
        log.error("Could not load the Kerberos extension for the WebHDFSHook.")
        raise


class AirflowWebHDFSHookException(AirflowException):
    pass


class WebHDFSHook(BaseHook):
    """
    Interact with HDFS. This class is a wrapper around the hdfscli library.
    """
    def __init__(self, webhdfs_conn_id='webhdfs_default', proxy_user=None):
        self.webhdfs_conn_id = webhdfs_conn_id
        self.proxy_user = proxy_user