コード例 #1
0
def _split_tablename(table_input, default_project_id, var_name=None):
    assert default_project_id is not None, "INTERNAL: No default project is specified"

    def var_print(var_name):
        if var_name is None:
            return ""
        else:
            return "Format exception for {var}: ".format(var=var_name)

    if table_input.count('.') + table_input.count(':') > 3:
        raise Exception(('{var}Use either : or . to specify project '
                         'got {input}').format(var=var_print(var_name),
                                               input=table_input))

    cmpt = table_input.rsplit(':', 1)
    project_id = None
    rest = table_input
    if len(cmpt) == 1:
        project_id = None
        rest = cmpt[0]
    elif len(cmpt) == 2 and cmpt[0].count(':') <= 1:
        if cmpt[-1].count('.') != 2:
            project_id = cmpt[0]
            rest = cmpt[1]
    else:
        raise Exception(('{var}Expect format of (<project:)<dataset>.<table>, '
                         'got {input}').format(var=var_print(var_name),
                                               input=table_input))

    cmpt = rest.split('.')
    if len(cmpt) == 3:
        assert project_id is None, (
            "{var}Use either : or . to specify project").format(
                var=var_print(var_name))
        project_id = cmpt[0]
        dataset_id = cmpt[1]
        table_id = cmpt[2]

    elif len(cmpt) == 2:
        dataset_id = cmpt[0]
        table_id = cmpt[1]
    else:
        raise Exception(
            ('{var}Expect format of (<project.|<project:)<dataset>.<table>, '
             'got {input}').format(var=var_print(var_name), input=table_input))

    if project_id is None:
        if var_name is not None:
            log = LoggingMixin().logger
            log.info(
                'Project not included in {var}: {input}; using project "{project}"'
                .format(var=var_name,
                        input=table_input,
                        project=default_project_id))
        project_id = default_project_id

    return project_id, dataset_id, table_id
コード例 #2
0
ファイル: S3_hook.py プロジェクト: wing1124/incubator-airflow
def _parse_s3_config(config_file_name, config_format='boto', profile=None):
    """
    Parses a config file for s3 credentials. Can currently
    parse boto, s3cmd.conf and AWS SDK config formats

    :param config_file_name: path to the config file
    :type config_file_name: str
    :param config_format: config type. One of "boto", "s3cmd" or "aws".
        Defaults to "boto"
    :type config_format: str
    :param profile: profile name in AWS type config file
    :type profile: str
    """
    Config = configparser.ConfigParser()
    if Config.read(config_file_name):  # pragma: no cover
        sections = Config.sections()
    else:
        raise AirflowException("Couldn't read {0}".format(config_file_name))
    # Setting option names depending on file format
    if config_format is None:
        config_format = 'boto'
    conf_format = config_format.lower()
    if conf_format == 'boto':  # pragma: no cover
        if profile is not None and 'profile ' + profile in sections:
            cred_section = 'profile ' + profile
        else:
            cred_section = 'Credentials'
    elif conf_format == 'aws' and profile is not None:
        cred_section = profile
    else:
        cred_section = 'default'
    # Option names
    if conf_format in ('boto', 'aws'):  # pragma: no cover
        key_id_option = 'aws_access_key_id'
        secret_key_option = 'aws_secret_access_key'
        # security_token_option = 'aws_security_token'
    else:
        key_id_option = 'access_key'
        secret_key_option = 'secret_key'
    # Actual Parsing
    if cred_section not in sections:
        raise AirflowException("This config file format is not recognized")
    else:
        try:
            access_key = Config.get(cred_section, key_id_option)
            secret_key = Config.get(cred_section, secret_key_option)
            calling_format = None
            if Config.has_option(cred_section, 'calling_format'):
                calling_format = Config.get(cred_section, 'calling_format')
        except:
            log = LoggingMixin().logger
            log.warning("Option Error in parsing s3 config file")
            raise
        return (access_key, secret_key, calling_format)
コード例 #3
0
        def _str(s):
            # cloudant-python doesn't support unicode.
            if isinstance(s, unicode):
                log = LoggingMixin().logger
                log.debug(
                    'cloudant-python does not support unicode. Encoding %s as ascii using "ignore".',
                    s
                )
                return s.encode('ascii', 'ignore')

            return s
コード例 #4
0
def list_py_file_paths(directory, safe_mode=True):
    """
    Traverse a directory and look for Python files.

    :param directory: the directory to traverse
    :type directory: unicode
    :param safe_mode: whether to use a heuristic to determine whether a file
    contains Airflow DAG definitions
    :return: a list of paths to Python files in the specified directory
    :rtype: list[unicode]
    """
    file_paths = []
    if directory is None:
        return []
    elif os.path.isfile(directory):
        return [directory]
    elif os.path.isdir(directory):
        patterns = []
        for root, dirs, files in os.walk(directory, followlinks=True):
            ignore_file = [f for f in files if f == '.airflowignore']
            if ignore_file:
                f = open(os.path.join(root, ignore_file[0]), 'r')
                patterns += [p for p in f.read().split('\n') if p]
                f.close()
            for f in files:
                try:
                    file_path = os.path.join(root, f)
                    if not os.path.isfile(file_path):
                        continue
                    mod_name, file_ext = os.path.splitext(
                        os.path.split(file_path)[-1])
                    if file_ext != '.py' and not zipfile.is_zipfile(file_path):
                        continue
                    if any([re.findall(p, file_path) for p in patterns]):
                        continue

                    # Heuristic that guesses whether a Python file contains an
                    # Airflow DAG definition.
                    might_contain_dag = True
                    if safe_mode and not zipfile.is_zipfile(file_path):
                        with open(file_path, 'rb') as f:
                            content = f.read()
                            might_contain_dag = all(
                                [s in content for s in (b'DAG', b'airflow')])

                    if not might_contain_dag:
                        continue

                    file_paths.append(file_path)
                except Exception:
                    log = LoggingMixin().logger
                    log.exception("Error while examining %s", f)
    return file_paths
コード例 #5
0
def GetDefaultExecutor():
    """Creates a new instance of the configured executor if none exists and returns it"""
    global DEFAULT_EXECUTOR

    if DEFAULT_EXECUTOR is not None:
        return DEFAULT_EXECUTOR

    executor_name = configuration.get('core', 'EXECUTOR')

    DEFAULT_EXECUTOR = _get_executor(executor_name)

    log = LoggingMixin().logger
    log.info("Using executor %s", executor_name)

    return DEFAULT_EXECUTOR
コード例 #6
0
ファイル: email.py プロジェクト: wing1124/incubator-airflow
def send_MIME_email(e_from, e_to, mime_msg, dryrun=False):
    log = LoggingMixin().logger

    SMTP_HOST = configuration.get('smtp', 'SMTP_HOST')
    SMTP_PORT = configuration.getint('smtp', 'SMTP_PORT')
    SMTP_STARTTLS = configuration.getboolean('smtp', 'SMTP_STARTTLS')
    SMTP_SSL = configuration.getboolean('smtp', 'SMTP_SSL')
    SMTP_USER = None
    SMTP_PASSWORD = None

    try:
        SMTP_USER = configuration.get('smtp', 'SMTP_USER')
        SMTP_PASSWORD = configuration.get('smtp', 'SMTP_PASSWORD')
    except AirflowConfigException:
        log.debug(
            "No user/password found for SMTP, so logging in with no authentication."
        )

    if not dryrun:
        s = smtplib.SMTP_SSL(SMTP_HOST,
                             SMTP_PORT) if SMTP_SSL else smtplib.SMTP(
                                 SMTP_HOST, SMTP_PORT)
        if SMTP_STARTTLS:
            s.starttls()
        if SMTP_USER and SMTP_PASSWORD:
            s.login(SMTP_USER, SMTP_PASSWORD)
        log.info("Sent an alert email to %s", e_to)
        s.sendmail(e_from, e_to, mime_msg.as_string())
        s.quit()
コード例 #7
0
    def _to_timestamp(cls, col):
        """
        Convert a column of a dataframe to UNIX timestamps if applicable

        :param col:     A Series object representing a column of a dataframe.
        """
        # try and convert the column to datetimes
        # the column MUST have a four digit year somewhere in the string
        # there should be a better way to do this,
        # but just letting pandas try and convert every column without a format
        # caused it to convert floats as well
        # For example, a column of integers
        # between 0 and 10 are turned into timestamps
        # if the column cannot be converted,
        # just return the original column untouched
        try:
            col = pd.to_datetime(col)
        except ValueError:
            log = LoggingMixin().logger
            log.warning(
                "Could not convert field to timestamps: %s", col.name
            )
            return col

        # now convert the newly created datetimes into timestamps
        # we have to be careful here
        # because NaT cannot be converted to a timestamp
        # so we have to return NaN
        converted = []
        for i in col:
            try:
                converted.append(i.timestamp())
            except ValueError:
                converted.append(pd.np.NaN)
            except AttributeError:
                converted.append(pd.np.NaN)

        # return a new series that maintains the same index as the original
        return pd.Series(converted, index=col.index)
コード例 #8
0
def execute_command(command):
    log = LoggingMixin().logger
    log.info("Executing command in Celery: %s", command)
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        log.error(e)
        raise AirflowException('Celery command failed')
コード例 #9
0
ファイル: sensors.py プロジェクト: wing1124/incubator-airflow
    def filter_for_filesize(result, size=None):
        """
        Will test the filepath result and test if its size is at least self.filesize

        :param result: a list of dicts returned by Snakebite ls
        :param size: the file size in MB a file should be at least to trigger True
        :return: (bool) depending on the matching criteria
        """
        if size:
            log = LoggingMixin().logger
            log.debug('Filtering for file size >= %s in files: %s', size, map(lambda x: x['path'], result))
            size *= settings.MEGABYTE
            result = [x for x in result if x['length'] >= size]
            log.debug('HdfsSensor.poke: after size filter result is %s', result)
        return result
コード例 #10
0
    def handle_failure_retry(context):
        ti = context['ti']
        cmd_id = ti.xcom_pull(key='qbol_cmd_id', task_ids=ti.task_id)

        if cmd_id is not None:
            cmd = Command.find(cmd_id)
            if cmd is not None:
                log = LoggingMixin().logger
                if cmd.status == 'done':
                    log.info(
                        'Command ID: %s has been succeeded, hence marking this '
                        'TI as Success.', cmd_id)
                    ti.state = State.SUCCESS
                elif cmd.status == 'running':
                    log.info('Cancelling the Qubole Command Id: %s', cmd_id)
                    cmd.cancel()
コード例 #11
0
class CeleryConfig(object):
    CELERY_ACCEPT_CONTENT = ['json', 'pickle']
    CELERY_EVENT_SERIALIZER = 'json'
    CELERY_RESULT_SERIALIZER = 'pickle'
    CELERY_TASK_SERIALIZER = 'pickle'
    CELERYD_PREFETCH_MULTIPLIER = 1
    CELERY_ACKS_LATE = True
    BROKER_URL = configuration.get('celery', 'BROKER_URL')
    CELERY_RESULT_BACKEND = configuration.get('celery',
                                              'CELERY_RESULT_BACKEND')
    CELERYD_CONCURRENCY = configuration.getint('celery', 'CELERYD_CONCURRENCY')
    CELERY_DEFAULT_QUEUE = DEFAULT_QUEUE
    CELERY_DEFAULT_EXCHANGE = DEFAULT_QUEUE

    celery_ssl_active = False
    try:
        celery_ssl_active = configuration.getboolean('celery',
                                                     'CELERY_SSL_ACTIVE')
    except AirflowConfigException as e:
        log = LoggingMixin().logger
        log.warning("Celery Executor will run without SSL")

    try:
        if celery_ssl_active:
            BROKER_USE_SSL = {
                'keyfile': configuration.get('celery', 'CELERY_SSL_KEY'),
                'certfile': configuration.get('celery', 'CELERY_SSL_CERT'),
                'ca_certs': configuration.get('celery', 'CELERY_SSL_CACERT'),
                'cert_reqs': ssl.CERT_REQUIRED
            }
    except AirflowConfigException as e:
        raise AirflowException(
            'AirflowConfigException: CELERY_SSL_ACTIVE is True, please ensure CELERY_SSL_KEY, '
            'CELERY_SSL_CERT and CELERY_SSL_CACERT are set')
    except Exception as e:
        raise AirflowException(
            'Exception: There was an unknown Celery SSL Error.  Please ensure you want to use '
            'SSL and/or have all necessary certs and key.')
コード例 #12
0
def _poll_with_exponential_delay(request, max_n, is_done_func, is_error_func):
    log = LoggingMixin().logger

    for i in range(0, max_n):
        try:
            response = request.execute()
            if is_error_func(response):
                raise ValueError(
                    'The response contained an error: {}'.format(response))
            elif is_done_func(response):
                log.info('Operation is done: %s', response)
                return response
            else:
                time.sleep((2**i) + (random.randint(0, 1000) / 1000))
        except errors.HttpError as e:
            if e.resp.status != 429:
                log.info('Something went wrong. Not retrying: %s', format(e))
                raise
            else:
                time.sleep((2**i) + (random.randint(0, 1000) / 1000))
コード例 #13
0
ファイル: sensors.py プロジェクト: wing1124/incubator-airflow
    def filter_for_ignored_ext(result, ignored_ext, ignore_copying):
        """
        Will filter if instructed to do so the result to remove matching criteria

        :param result: (list) of dicts returned by Snakebite ls
        :param ignored_ext: (list) of ignored extentions
        :param ignore_copying: (bool) shall we ignore ?
        :return: (list) of dicts which were not removed
        """
        if ignore_copying:
            log = LoggingMixin().logger
            regex_builder = "^.*\.(%s$)$" % '$|'.join(ignored_ext)
            ignored_extentions_regex = re.compile(regex_builder)
            log.debug(
                'Filtering result for ignored extensions: %s in files %s',
                ignored_extentions_regex.pattern, map(lambda x: x['path'], result)
            )
            result = [x for x in result if not ignored_extentions_regex.match(x['path'])]
            log.debug('HdfsSensor.poke: after ext filter result is %s', result)
        return result
コード例 #14
0
def load_login():
    log = LoggingMixin().logger

    auth_backend = 'airflow.default_login'
    try:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            auth_backend = conf.get('webserver', 'auth_backend')
    except conf.AirflowConfigException:
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            log.warning(
                "auth_backend not found in webserver config reverting to "
                "*deprecated*  behavior of importing airflow_login")
            auth_backend = "airflow_login"

    try:
        global login
        login = import_module(auth_backend)
    except ImportError as err:
        log.critical(
            "Cannot import authentication module %s. "
            "Please correct your authentication backend or disable authentication: %s",
            auth_backend, err)
        if conf.getboolean('webserver', 'AUTHENTICATE'):
            raise AirflowException("Failed to import authentication backend")
コード例 #15
0
ファイル: db.py プロジェクト: wing1124/incubator-airflow
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from functools import wraps

import os

from sqlalchemy import event, exc
from sqlalchemy.pool import Pool

from airflow import settings
from airflow.utils.log.LoggingMixin import LoggingMixin

log = LoggingMixin().logger

def provide_session(func):
    """
    Function decorator that provides a session if it isn't provided.
    If you want to reuse a session or run the function as part of a
    database transaction, you pass it to the function, if not this wrapper
    will create one and close it for you.
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        needs_session = False
        arg_session = 'session'
        func_params = func.__code__.co_varnames
        session_in_args = arg_session in func_params and \
            func_params.index(arg_session) < len(args)
コード例 #16
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from builtins import object
import imp
import inspect
import os
import re
import sys

from airflow import configuration
from airflow.utils.log.LoggingMixin import LoggingMixin

log = LoggingMixin().logger


class AirflowPluginException(Exception):
    pass


class AirflowPlugin(object):
    name = None
    operators = []
    hooks = []
    executors = []
    macros = []
    admin_views = []
    flask_blueprints = []
    menu_links = []
コード例 #17
0
from __future__ import print_function
from __future__ import unicode_literals

import logging
import logging.config
import os
import sys

from sqlalchemy import create_engine
from sqlalchemy.orm import scoped_session, sessionmaker
from sqlalchemy.pool import NullPool

from airflow import configuration as conf
from airflow.utils.log.LoggingMixin import LoggingMixin

log = LoggingMixin().logger

class DummyStatsLogger(object):

    @classmethod
    def incr(cls, stat, count=1, rate=1):
        pass

    @classmethod
    def decr(cls, stat, count=1, rate=1):
        pass

    @classmethod
    def gauge(cls, stat, value, rate=1, delta=False):
        pass
コード例 #18
0
 def get_connection(cls, conn_id):
     conn = random.choice(cls.get_connections(conn_id))
     if conn.host:
         log = LoggingMixin().logger
         log.info("Using connection to: %s", conn.host)
     return conn
コード例 #19
0
# See the License for the specific language governing permissions and
# limitations under the License.

from airflow.hooks.base_hook import BaseHook
from airflow import configuration

from hdfs import InsecureClient, HdfsError

from airflow.utils.log.LoggingMixin import LoggingMixin

_kerberos_security_mode = configuration.get("core", "security") == "kerberos"
if _kerberos_security_mode:
    try:
        from hdfs.ext.kerberos import KerberosClient
    except ImportError:
        log = LoggingMixin().logger
        log.error("Could not load the Kerberos extension for the WebHDFSHook.")
        raise
from airflow.exceptions import AirflowException


class AirflowWebHDFSHookException(AirflowException):
    pass


class WebHDFSHook(BaseHook):
    """
    Interact with HDFS. This class is a wrapper around the hdfscli library.
    """
    def __init__(self, webhdfs_conn_id='webhdfs_default', proxy_user=None):
        self.webhdfs_conn_id = webhdfs_conn_id
コード例 #20
0
import shlex
import sys

from future import standard_library

from airflow.utils.log.LoggingMixin import LoggingMixin

standard_library.install_aliases()

from builtins import str
from collections import OrderedDict
from six.moves import configparser

from airflow.exceptions import AirflowConfigException

log = LoggingMixin().logger

# show Airflow's deprecation warnings
warnings.filterwarnings(action='default',
                        category=DeprecationWarning,
                        module='airflow')
warnings.filterwarnings(action='default',
                        category=PendingDeprecationWarning,
                        module='airflow')

if six.PY3:
    ConfigParser = configparser.ConfigParser
else:
    ConfigParser = configparser.SafeConfigParser