Beispiel #1
0
    def getint(self, section, key, **kwargs):
        val = self.get(section, key, **kwargs)

        try:
            return int(val)
        except ValueError:
            raise AirflowConfigException(
                f'Failed to convert value to int. Please check "{key}" key in "{section}" section. '
                f'Current value: "{val}".'
            )
Beispiel #2
0
 def _validate_enums(self):
     """Validate that enum type config has an accepted value"""
     for (section_key, option_key), enum_options in self.enums_options.items():
         if self.has_option(section_key, option_key):
             value = self.get(section_key, option_key)
             if value not in enum_options:
                 raise AirflowConfigException(
                     f"`[{section_key}] {option_key}` should not be "
                     + f"{value!r}. Possible values: {', '.join(enum_options)}."
                 )
Beispiel #3
0
    def get(self, section, key, **kwargs):
        section = str(section).lower()
        key = str(key).lower()

        deprecated_name = self.deprecated_options.get(section,
                                                      {}).get(key, None)

        # first check environment variables
        option = self._get_env_var_option(section, key)
        if option is not None:
            return option
        if deprecated_name:
            option = self._get_env_var_option(section, deprecated_name)
            if option is not None:
                self._warn_deprecate(section, key, deprecated_name)
                return option

        # ...then the config file
        if super(AirflowConfigParser, self).has_option(section, key):
            # Use the parent's methods to get the actual config here to be able to
            # separate the config from default config.
            return expand_env_var(
                super(AirflowConfigParser, self).get(section, key, **kwargs))
        if deprecated_name:
            if super(AirflowConfigParser,
                     self).has_option(section, deprecated_name):
                self._warn_deprecate(section, key, deprecated_name)
                return expand_env_var(
                    super(AirflowConfigParser,
                          self).get(section, deprecated_name, **kwargs))

        # ...then commands
        option = self._get_cmd_option(section, key)
        if option:
            return option
        if deprecated_name:
            option = self._get_cmd_option(section, deprecated_name)
            if option:
                self._warn_deprecate(section, key, deprecated_name)
                return option

        # ...then the default config
        if self.airflow_defaults.has_option(section,
                                            key) or 'fallback' in kwargs:
            return expand_env_var(
                self.airflow_defaults.get(section, key, **kwargs))

        else:
            log.warning(
                "section/key [{section}/{key}] not found in config".format(
                    **locals()))

            raise AirflowConfigException(
                "section/key [{section}/{key}] not found "
                "in config".format(**locals()))
Beispiel #4
0
    def get_conn(self) -> grpc.Channel:
        base_url = self.conn.host

        if self.conn.port:
            base_url = base_url + ":" + str(self.conn.port)

        auth_type = self._get_field("auth_type")

        if auth_type == "NO_AUTH":
            channel = grpc.insecure_channel(base_url)
        elif auth_type in {"SSL", "TLS"}:
            credential_file_name = self._get_field("credential_pem_file")
            with open(credential_file_name, "rb") as credential_file:
                creds = grpc.ssl_channel_credentials(credential_file.read())
            channel = grpc.secure_channel(base_url, creds)
        elif auth_type == "JWT_GOOGLE":
            credentials, _ = google_auth.default()
            jwt_creds = google_auth_jwt.OnDemandCredentials.from_signing_credentials(credentials)
            channel = google_auth_transport_grpc.secure_authorized_channel(jwt_creds, None, base_url)
        elif auth_type == "OATH_GOOGLE":
            scopes = self._get_field("scopes").split(",")
            credentials, _ = google_auth.default(scopes=scopes)
            request = google_auth_transport_requests.Request()
            channel = google_auth_transport_grpc.secure_authorized_channel(credentials, request, base_url)
        elif auth_type == "CUSTOM":
            if not self.custom_connection_func:
                raise AirflowConfigException(
                    "Customized connection function not set, not able to establish a channel"
                )
            channel = self.custom_connection_func(self.conn)
        else:
            raise AirflowConfigException(
                "auth_type not supported or not provided, channel cannot be established,\
                given value: %s"
                % str(auth_type)
            )

        if self.interceptors:
            for interceptor in self.interceptors:
                channel = grpc.intercept_channel(channel, interceptor)

        return channel
Beispiel #5
0
    def __init__(  # pylint: disable=too-many-arguments,too-many-locals
        self,
        pod: Optional[k8s.V1Pod] = None,
        pod_template_file: Optional[str] = None,
        extract_xcom: bool = True
    ):
        if not pod_template_file and not pod:
            raise AirflowConfigException("Podgenerator requires either a "
                                         "`pod` or a `pod_template_file` argument")
        if pod_template_file and pod:
            raise AirflowConfigException("Cannot pass both `pod` "
                                         "and `pod_template_file` arguments")

        if pod_template_file:
            self.ud_pod = self.deserialize_model_file(pod_template_file)
        else:
            self.ud_pod = pod

        # Attach sidecar
        self.extract_xcom = extract_xcom
    def _validate_config_dependencies(self):
        """
        Validate that config values aren't invalid given other config values
        or system-level limitations and requirements.
        """
        if (
                self.get("core", "executor") not in ('DebugExecutor', 'SequentialExecutor') and
                "sqlite" in self.get('core', 'sql_alchemy_conn')):
            raise AirflowConfigException(
                "error: cannot use sqlite with the {}".format(
                    self.get('core', 'executor')))

        if self.has_option('core', 'mp_start_method'):
            mp_start_method = self.get('core', 'mp_start_method')
            start_method_options = multiprocessing.get_all_start_methods()

            if mp_start_method not in start_method_options:
                raise AirflowConfigException(
                    "mp_start_method should not be " + mp_start_method +
                    ". Possible values are " + ", ".join(start_method_options))
Beispiel #7
0
 def _validate(self):
     # TODO: use XOR for dags_volume_claim and git_dags_folder_mount_point
     if not self.dags_volume_claim \
        and not self.dags_volume_host \
        and not self.dags_in_image \
        and (not self.git_repo or not self.git_branch or not self.git_dags_folder_mount_point):
         raise AirflowConfigException(
             'In kubernetes mode the following must be set in the `kubernetes` '
             'config section: `dags_volume_claim` '
             'or `dags_volume_host` '
             'or `dags_in_image` '
             'or `git_repo and git_branch and git_dags_folder_mount_point`')
     if self.git_repo \
        and (self.git_user or self.git_password) \
        and self.git_ssh_key_secret_name:
         raise AirflowConfigException(
             'In kubernetes mode, using `git_repo` to pull the DAGs: '
             'for private repositories, either `git_user` and `git_password` '
             'must be set for authentication through user credentials; '
             'or `git_ssh_key_secret_name` must be set for authentication '
             'through ssh key, but not both')
Beispiel #8
0
 def getboolean(self, section, key, **kwargs):
     val = str(self.get(section, key, **kwargs)).lower().strip()
     if '#' in val:
         val = val.split('#')[0].strip()
     if val in ('t', 'true', '1'):
         return True
     elif val in ('f', 'false', '0'):
         return False
     else:
         raise AirflowConfigException(
             'The value for configuration option "{}:{}" is not a '
             'boolean (received "{}").'.format(section, key, val))
 def _validate(self):
     # TODO: use XOR for dags_volume_claim and git_dags_folder_mount_point
     if not self.dags_volume_claim \
        and not self.dags_volume_host \
        and not self.dags_in_image \
        and (not self.git_repo or not self.git_branch or not self.git_dags_folder_mount_point):
         raise AirflowConfigException(
             'In kubernetes mode the following must be set in the `kubernetes` '
             'config section: `dags_volume_claim` '
             'or `dags_volume_host` '
             'or `dags_in_image` '
             'or `git_repo and git_branch and git_dags_folder_mount_point`')
Beispiel #10
0
 def getboolean(self, section, key, **kwargs):
     val = str(self.get(section, key, **kwargs)).lower().strip()
     if '#' in val:
         val = val.split('#')[0].strip()
     if val in ('t', 'true', '1'):
         return True
     elif val in ('f', 'false', '0'):
         return False
     else:
         raise AirflowConfigException(
             f'Failed to convert value to bool. Please check "{key}" key in "{section}" section. '
             f'Current value: "{val}".')
Beispiel #11
0
    def validate_pod_generator_args(given_args):
        """
        :param given_args: The arguments passed to the PodGenerator constructor.
        :type given_args: dict
        :return: None

        Validate that if `pod` or `pod_template_file` are set that the user is not attempting
        to configure the pod with the other arguments.
        """
        pod_args = list(inspect.signature(PodGenerator).parameters.items())

        def predicate(k, v):
            """
            :param k: an arg to PodGenerator
            :type k: string
            :param v: the parameter of the given arg
            :type v: inspect.Parameter
            :return: bool

            returns True if the PodGenerator argument has no default arguments
            or the default argument is None, and it is not one of the listed field
            in `non_empty_fields`.
            """
            non_empty_fields = {
                'pod', 'pod_template_file', 'extract_xcom', 'service_account_name', 'image_pull_policy',
                'restart_policy'
            }

            return (v.default is None or v.default is v.empty) and k not in non_empty_fields

        args_without_defaults = {k: given_args[k] for k, v in pod_args if predicate(k, v) and given_args[k]}

        if given_args['pod'] and given_args['pod_template_file']:
            raise AirflowConfigException("Cannot pass both `pod` and `pod_template_file` arguments")
        if args_without_defaults and (given_args['pod'] or given_args['pod_template_file']):
            raise AirflowConfigException(
                "Cannot configure pod and pass either `pod` or `pod_template_file`. Fields {} passed.".format(
                    list(args_without_defaults.keys())
                )
            )
Beispiel #12
0
    def __init__(self, deploy_type, deploy_target, secret, key=None, items=None):
        """
        Initialize a Kubernetes Secret Object. Used to track requested secrets from
        the user.

        :param deploy_type: The type of secret deploy in Kubernetes, either `env` or
            `volume`
        :type deploy_type: str
        :param deploy_target: (Optional) The environment variable when
            `deploy_type` `env` or file path when `deploy_type` `volume` where
            expose secret. If `key` is not provided deploy target should be None.
        :type deploy_target: str or None
        :param secret: Name of the secrets object in Kubernetes
        :type secret: str
        :param key: (Optional) Key of the secret within the Kubernetes Secret
            if not provided in `deploy_type` `env` it will mount all secrets in object
        :type key: str or None
        :param items: (Optional) items that can be added to a volume secret for specifying projects of
        secret keys to paths
        https://kubernetes.io/docs/concepts/configuration/secret/#projection-of-secret-keys-to-specific-paths
        :type items: List[k8s.V1KeyToPath]
        """
        if deploy_type not in ('env', 'volume'):
            raise AirflowConfigException("deploy_type must be env or volume")

        self.deploy_type = deploy_type
        self.deploy_target = deploy_target
        self.items = items or []

        if deploy_target is not None and deploy_type == 'env':
            # if deploying to env, capitalize the deploy target
            self.deploy_target = deploy_target.upper()

        if key is not None and deploy_target is None:
            raise AirflowConfigException(
                'If `key` is set, `deploy_target` should not be None'
            )

        self.secret = secret
        self.key = key
Beispiel #13
0
def _get_config_value_from_secret_backend(config_key):
    """Get Config option values from Secret Backend"""
    try:
        secrets_client = get_custom_secret_backend()
        if not secrets_client:
            return None
        return secrets_client.get_config(config_key)
    except Exception as e:  # pylint: disable=broad-except
        raise AirflowConfigException(
            'Cannot retrieve config from alternative secrets backend. '
            'Make sure it is configured properly and that the Backend '
            'is accessible.\n'
            f'{e}')
    def _validate_config_dependencies(self):
        """
        Validate that config values aren't invalid given other config values
        or system-level limitations and requirements.
        """

        if (
                self.get("core", "executor") not in ('DebugExecutor', 'SequentialExecutor') and
                "sqlite" in self.get('core', 'sql_alchemy_conn')):
            raise AirflowConfigException(
                "error: cannot use sqlite with the {}".format(
                    self.get('core', 'executor')))

        elif (
            self.getboolean("webserver", "authenticate") and
            self.get("webserver", "owner_mode") not in ['user', 'ldapgroup']
        ):
            raise AirflowConfigException(
                "error: owner_mode option should be either "
                "'user' or 'ldapgroup' when filtering by owner is set")

        elif (
            self.getboolean("webserver", "authenticate") and
            self.get("webserver", "owner_mode").lower() == 'ldapgroup' and
            self.get("webserver", "auth_backend") != (
                'airflow.contrib.auth.backends.ldap_auth')
        ):
            raise AirflowConfigException(
                "error: attempt at using ldapgroup "
                "filtering without using the Ldap backend")

        if self.has_option('core', 'mp_start_method'):
            mp_start_method = self.get('core', 'mp_start_method')
            start_method_options = multiprocessing.get_all_start_methods()

            if mp_start_method not in start_method_options:
                raise AirflowConfigException(
                    "mp_start_method should not be " + mp_start_method +
                    ". Possible values are " + ", ".join(start_method_options))
Beispiel #15
0
    def __init__(self,
                 conn_id,
                 cluster_creation_task,
                 cluster_type=None,
                 *args,
                 **kwargs):
        super(AWSTerminateClusterOperator, self).__init__(*args, **kwargs)
        self.conn_id = conn_id
        self.cluster_creation_task = cluster_creation_task
        self.cluster_type = cluster_type

        if self.cluster_type not in ALLOWED_CLUSTER_TYPES:
            raise AirflowConfigException(f"Param `cluster_type` must be one of {ALLOWED_CLUSTER_TYPES}")
Beispiel #16
0
    def _get_option_from_default_config(self, section, key, **kwargs):
        # ...then the default config
        if self.airflow_defaults.has_option(section,
                                            key) or 'fallback' in kwargs:
            return expand_env_var(
                self.airflow_defaults.get(section, key, **kwargs))

        else:
            log.warning("section/key [%s/%s] not found in config", section,
                        key)

            raise AirflowConfigException(
                f"section/key [{section}/{key}] not found in config")
Beispiel #17
0
    def _validate(self):
        if (self.get("core", "executor") != 'SequentialExecutor'
                and "sqlite" in self.get('core', 'sql_alchemy_conn')):
            raise AirflowConfigException(
                "error: cannot use sqlite with the {}".format(
                    self.get('core', 'executor')))

        elif (self.getboolean("webserver", "authenticate") and self.get(
                "webserver", "owner_mode") not in ['user', 'ldapgroup']):
            raise AirflowConfigException(
                "error: owner_mode option should be either "
                "'user' or 'ldapgroup' when filtering by owner is set")

        elif (self.getboolean("webserver", "authenticate")
              and self.get("webserver", "owner_mode").lower() == 'ldapgroup'
              and self.get("webserver", "auth_backend") !=
              ('airflow.contrib.auth.backends.ldap_auth')):
            raise AirflowConfigException(
                "error: attempt at using ldapgroup "
                "filtering without using the Ldap backend")

        self.is_validated = True
Beispiel #18
0
    def _validate(self):
        if (self.get("core", "executor") != 'SequentialExecutor'
                and "sqlite" in self.get('core', 'sql_alchemy_conn')):
            raise AirflowConfigException(
                "error: cannot use sqlite with the {}".format(
                    self.get('core', 'executor')))

        elif (self.getboolean("webserver", "authenticate") and self.get(
                "webserver", "owner_mode") not in ['user', 'ldapgroup']):
            raise AirflowConfigException(
                "error: owner_mode option should be either "
                "'user' or 'ldapgroup' when filtering by owner is set")

        elif (self.getboolean("webserver", "authenticate")
              and self.get("webserver", "owner_mode").lower() == 'ldapgroup'
              and self.get("webserver", "auth_backend") !=
              ('airflow.contrib.auth.backends.ldap_auth')):
            raise AirflowConfigException(
                "error: attempt at using ldapgroup "
                "filtering without using the Ldap backend")

        for section, replacement in self.deprecated_values.items():
            for name, info in replacement.items():
                old, new, version = info
                if self.get(section, name, fallback=None) == old:
                    # Make sure the env var option is removed, otherwise it
                    # would be read and used instead of the value we set
                    env_var = self._env_var_name(section, name)
                    os.environ.pop(env_var, None)

                    self.set(section, name, new)
                    warnings.warn(
                        self.deprecation_value_format_string.format(
                            **locals()),
                        FutureWarning,
                    )

        self.is_validated = True
Beispiel #19
0
def run_command(command):
    """Runs command and returns stdout"""
    process = subprocess.Popen(
        shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True
    )
    output, stderr = (stream.decode(sys.getdefaultencoding(), 'ignore') for stream in process.communicate())

    if process.returncode != 0:
        raise AirflowConfigException(
            f"Cannot execute {command}. Error code is: {process.returncode}. "
            f"Output: {output}, Stderr: {stderr}"
        )

    return output
Beispiel #20
0
def _get_dict_from_list(base_list, field_to_merge="name"):
    """
    :type base_list: list(Optional[dict, *to_dict])
    """
    result = {}
    for obj in base_list:
        if isinstance(obj, dict):
            result[obj[field_to_merge]] = obj
        elif hasattr(obj, "to_dict"):
            result[getattr(obj, field_to_merge)] = obj
        else:
            raise AirflowConfigException(
                "Trying to merge invalid object {}".format(obj))
    return result
Beispiel #21
0
    def _validate_config_dependencies(self):
        """
        Validate that config values aren't invalid given other config values
        or system-level limitations and requirements.
        """
        is_executor_without_sqlite_support = self.get("core", "executor") not in (
            'DebugExecutor',
            'SequentialExecutor',
        )
        is_sqlite = "sqlite" in self.get('core', 'sql_alchemy_conn')
        if is_sqlite and is_executor_without_sqlite_support:
            raise AirflowConfigException(f"error: cannot use sqlite with the {self.get('core', 'executor')}")
        if is_sqlite:
            import sqlite3
            from distutils.version import StrictVersion

            from airflow.utils.docs import get_docs_url

            # Some of the features in storing rendered fields require sqlite version >= 3.15.0
            min_sqlite_version = '3.15.0'
            if StrictVersion(sqlite3.sqlite_version) < StrictVersion(min_sqlite_version):
                raise AirflowConfigException(
                    f"error: sqlite C library version too old (< {min_sqlite_version}). "
                    f"See {get_docs_url('howto/set-up-database.rst#setting-up-a-sqlite-database')}"
                )

        if self.has_option('core', 'mp_start_method'):
            mp_start_method = self.get('core', 'mp_start_method')
            start_method_options = multiprocessing.get_all_start_methods()

            if mp_start_method not in start_method_options:
                raise AirflowConfigException(
                    "mp_start_method should not be "
                    + mp_start_method
                    + ". Possible values are "
                    + ", ".join(start_method_options)
                )
def run_command(command):
    """
    Runs command and returns stdout
    """
    process = subprocess.Popen(
        shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, stderr = [stream.decode(sys.getdefaultencoding(), 'ignore')
                      for stream in process.communicate()]

    if process.returncode != 0:
        raise AirflowConfigException(
            "Cannot execute {}. Error code is: {}. Output: {}, Stderr: {}"
            .format(command, process.returncode, output, stderr)
        )

    return output
Beispiel #23
0
    def start_go_pipeline(
        self,
        variables: dict,
        go_file: str,
        process_line_callback: Optional[Callable[[str], None]] = None,
        should_init_module: bool = False,
    ) -> None:
        """
        Starts Apache Beam Go pipeline.

        :param variables: Variables passed to the job.
        :param go_file: Path to the Go file with your beam pipeline.
        :param go_file:
        :param process_line_callback: (optional) Callback that can be used to process each line of
            the stdout and stderr file descriptors.
        :param should_init_module: If False (default), will just execute a `go run` command. If True, will
            init a module and dependencies with a ``go mod init`` and ``go mod tidy``, useful when pulling
            source with GCSHook.
        :return:
        """
        if shutil.which("go") is None:
            raise AirflowConfigException(
                "You need to have Go installed to run beam go pipeline. See https://go.dev/doc/install "
                "installation guide. If you are running airflow in Docker see more info at "
                "'https://airflow.apache.org/docs/docker-stack/recipes.html'.")

        if "labels" in variables:
            variables["labels"] = json.dumps(variables["labels"],
                                             separators=(",", ":"))

        working_directory = os.path.dirname(go_file)
        basename = os.path.basename(go_file)

        if should_init_module:
            init_module("main", working_directory)
            install_dependencies(working_directory)

        command_prefix = ["go", "run", basename]
        self._start_pipeline(
            variables=variables,
            command_prefix=command_prefix,
            process_line_callback=process_line_callback,
            working_directory=working_directory,
        )
Beispiel #24
0
    def getimport(self, section, key, **kwargs):  # noqa
        """
        Reads options, imports the full qualified name, and returns the object.

        In case of failure, it throws an exception a clear message with the key aad the section names

        :return: The object or None, if the option is empty
        """
        full_qualified_path = conf.get(section=section, key=key, **kwargs)
        if not full_qualified_path:
            return None

        try:
            return import_string(full_qualified_path)
        except ImportError as e:
            log.error(e)
            raise AirflowConfigException(
                f'The object could not be loaded. Please check "{key}" key in "{section}" section. '
                f'Current value: "{full_qualified_path}".')
Beispiel #25
0
def getuser() -> str:
    """
    Gets the username associated with the current user, or error with a nice
    error message if there's no current user.

    We don't want to fall back to os.getuid() because not having a username
    probably means the rest of the user environment is wrong (e.g. no $HOME).
    Explicit failure is better than silently trying to work badly.
    """
    try:
        return getpass.getuser()
    except KeyError:
        # Inner import to avoid circular import
        from airflow.exceptions import AirflowConfigException

        raise AirflowConfigException(
            "The user that Airflow is running as has no username; you must run"
            "Airflow as a full user, with a username and home directory, "
            "in order for it to function properly.")
    def load_executor(cls, executor_name: str) -> BaseExecutor:
        """
        Loads the executor.

        This supports the following formats:
        * by executor name for core executor
        * by ``{plugin_name}.{class_name}`` for executor from plugins
        * by import path.

        :return: an instance of executor class via executor_name
        """
        if executor_name == CELERY_KUBERNETES_EXECUTOR:
            return cls.__load_celery_kubernetes_executor()

        if executor_name in cls.executors:
            log.debug("Loading core executor: %s", executor_name)
            return import_string(cls.executors[executor_name])()
        # If the executor name looks like "plugin executor path" then try to load plugins.
        if executor_name.count(".") == 1:
            log.debug(
                "The executor name looks like the plugin path (executor_name=%s). Trying to load a "
                "executor from a plugin",
                executor_name,
            )
            with suppress(ImportError), suppress(AttributeError):
                # Load plugins here for executors as at that time the plugins might not have been
                # initialized yet
                from airflow import plugins_manager

                plugins_manager.integrate_executor_plugins()
                return import_string(f"airflow.executors.{executor_name}")()

        log.debug("Loading executor from custom path: %s", executor_name)
        try:
            executor = import_string(executor_name)()
        except ImportError as e:
            log.error(e)
            raise AirflowConfigException(
                f'The module/attribute could not be loaded. Please check "executor" key in "core" section. '
                f'Current value: "{executor_name}".')
        log.info("Loaded executor: %s", executor_name)

        return executor
def fetch_config(ds, **kwargs):
    CONFIG_PARAM, config = 'file', None
    conf_file = kwargs['dag_run'].conf[CONFIG_PARAM]
    if conf_file is None or conf_file.strip() == '':
        raise AirflowException(
            'Config parameter {} is not specified.'.format(CONFIG_PARAM))
    print('Config file for the job: {}'.format(conf_file))
    print('Reading configuration from {}'.format(conf_file))
    try:
        with open(conf_file, "r") as f:
            config = yaml.load(f)
    except Exception as error:
        raise AirflowException(
            'Error while reading the config file: {}'.format(error))
    try:
        validate(instance=config, schema=CONFIG_SCHEMA)
    except ValidationError as error:
        raise AirflowConfigException(
            'Invalid configuration specified: {}'.format(error))
    if 'staging_table_id' not in config['bigquery']:
        config['bigquery']['staging_table_id'] = 'staging_{}'.format(
            str(uuid4())[:8])
    config['bigquery']['merge_table'] = '{}:{}.{}'.format(
        config['bigquery']['project_id'], config['bigquery']['dataset_id'],
        config['bigquery']['table_id'])
    config['bigquery']['staging_table'] = '{}:{}.{}'.format(
        config['bigquery']['project_id'], config['bigquery']['dataset_id'],
        config['bigquery']['staging_table_id'])
    if 'jobname' not in config['dataflow']:
        config['dataflow']['jobname'] = '{}-{}'.format(
            config['dataflow']['job_name_prefix'],
            str(uuid4())[:8])
    config['bigquery'][
        'merge_query'] = 'MERGE `{}` t USING `{}` s ON {} WHEN MATCHED THEN {} WHEN NOT MATCHED THEN {}'.format(
            config['bigquery']['merge_table'].replace(':', '.'),
            config['bigquery']['staging_table'].replace(':', '.'),
            config['bigquery']['merge']['condition'],
            config['bigquery']['merge']['matched'],
            config['bigquery']['merge']['notmatched'])
    print('Airflow config: {}'.format(config))
    config_var = 'config-{}'.format(kwargs['dag_run'].run_id)
    print('Writing config to variable: {}'.format(config_var))
    Variable.set(config_var, config, serialize_json=True)
Beispiel #28
0
    def execute(self, context):
        ambari_hook = HdpAmbariHook(ambari_conn_id=self.ambari_conn_id)
        datas = {"user.name": ambari_hook.cluster_name}

        for attr_name in ["statusdir", "files", "callback"]:
            attr_value = getattr(self, attr_name)
            if attr_value is not None and attr_value != "":
                datas[attr_name] = attr_value

        if is_not_null_and_is_not_empty_str(self.file):
            datas["file"] = self.file
        elif not is_not_null_and_is_not_empty_str(self.file) and is_not_null_and_is_not_empty_str(self.execute_query):
            datas["execute"] = self.execute_query
        else:
            raise AirflowConfigException("Request body must include file or execute params")

        if self.enablelog:
            datas["enablelog"] = self.enablelog

        job_id = ambari_hook.submit_hive_job(datas, self.arg)
        if self.do_xcom_push:
            context['ti'].xcom_push(key='hive_job_id', value=job_id)
Beispiel #29
0
    def submit_hive_job(self, body_params: dict, arg: str = None) -> str:
        """
        Executes hql code or hive script in Azure HDInsight Cluster

        See https://cwiki.apache.org/confluence/display/Hive/WebHCat+Reference+Job

        :param arg: define arg params for hive =>  key1=value1;key2=value2
        :param execution_timeout: connection timeout of requesting to hortomwork cluster
        :type execution_timeout: int
        :param body_params: parametres of Hive script
        :type body_params: dict

        """

        if not ("file" in body_params or "execute" in body_params):
            raise AirflowConfigException(
                "Request body must include file or execute params")

        body_params.update(self.default_params)

        if arg is not None:
            hive_defines = urlencode([("define", x)
                                      for x in str(arg).split(";")])
            self.query = urlencode(body_params) + "&" + hive_defines
        else:
            self.query = urlencode(body_params)

        self.method = "POST"
        submit_endpoint = self.hive_endpoint + "hive"

        self.log.debug("Submiting hive  Script: %s", str(self.query))
        response = self.run(endpoint=submit_endpoint,
                            data=self.query,
                            headers=self.headers)

        job_id = response["id"]
        self.log.info("Finished submitting hive script job_id: %s", job_id)
        return job_id
Beispiel #30
0
def validate_logging_config(logging_config):
    """Validate the provided Logging Config"""
    # Now lets validate the other logging-related settings
    task_log_reader = conf.get('logging', 'task_log_reader')

    logger = logging.getLogger('airflow.task')

    def _get_handler(name):
        return next((h for h in logger.handlers if h.name == name), None)

    if _get_handler(task_log_reader) is None:
        # Check for pre 1.10 setting that might be in deployed airflow.cfg files
        if task_log_reader == "file.task" and _get_handler("task"):
            warnings.warn(
                f"task_log_reader setting in [logging] has a deprecated value of {task_log_reader!r}, "
                "but no handler with this name was found. Please update your config to use task. "
                "Running config has been adjusted to match",
                DeprecationWarning,
            )
            conf.set('logging', 'task_log_reader', 'task')
        else:
            raise AirflowConfigException(
                f"Configured task_log_reader {task_log_reader!r} was not a handler of "
                f"the 'airflow.task' logger.")