def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        log.error(e)
        raise AirflowException('Celery command failed')
예제 #2
0
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    try:
        subprocess.check_call(command, shell=True)
    except subprocess.CalledProcessError as e:
        log.error(e)
        raise AirflowException('Celery command failed')
def execute_command(command):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command)
    env = os.environ.copy()
    try:
        subprocess.check_call(command, stderr=subprocess.STDOUT,
                              close_fds=True, env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)

        raise AirflowException('Celery command failed')
예제 #4
0
def execute_command(command_to_exec):
    log = LoggingMixin().log
    log.info("Executing command in Celery: %s", command_to_exec)
    env = os.environ.copy()
    try:
        subprocess.check_call(command_to_exec, stderr=subprocess.STDOUT,
                              close_fds=True, env=env)
    except subprocess.CalledProcessError as e:
        log.exception('execute_command encountered a CalledProcessError')
        log.error(e.output)

        raise AirflowException('Celery command failed')
예제 #5
0
def extract_xcom_parameter(value):
    enable_pickling = conf.getboolean('core', 'enable_xcom_pickling')
    if enable_pickling:
        return pickle.loads(value)
    else:
        try:
            return json.loads(value.decode('UTF-8'))
        except ValueError:
            log = LoggingMixin().log
            log.error("Could not deserialize the XCOM value from JSON. "
                      "If you are using pickles instead of JSON "
                      "for XCOM, then you need to enable pickle "
                      "support for XCOM in your airflow config.")
            raise
예제 #6
0
 def get_val(self):
     log = LoggingMixin().log
     if self._val and self.is_encrypted:
         try:
             fernet = get_fernet()
             return fernet.decrypt(bytes(self._val, 'utf-8')).decode()
         except InvalidFernetToken:
             log.error("Can't decrypt _val for key=%s, invalid token or value", self.key)
             return None
         except Exception:
             log.error("Can't decrypt _val for key=%s, FERNET_KEY configuration missing", self.key)
             return None
     else:
         return self._val
예제 #7
0
    def set(
            cls,
            key,
            value,
            execution_date,
            task_id,
            dag_id,
            session=None):
        """
        Store an XCom value.
        TODO: "pickling" has been deprecated and JSON is preferred.
        "pickling" will be removed in Airflow 2.0.

        :return: None
        """
        session.expunge_all()

        enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
        if enable_pickling:
            value = pickle.dumps(value)
        else:
            try:
                value = json.dumps(value).encode('UTF-8')
            except ValueError:
                log = LoggingMixin().log
                log.error("Could not serialize the XCOM value into JSON. "
                          "If you are using pickles instead of JSON "
                          "for XCOM, then you need to enable pickle "
                          "support for XCOM in your airflow config.")
                raise

        # remove any duplicate XComs
        session.query(cls).filter(
            cls.key == key,
            cls.execution_date == execution_date,
            cls.task_id == task_id,
            cls.dag_id == dag_id).delete()

        session.commit()

        # insert new XCom
        session.add(XCom(
            key=key,
            value=value,
            execution_date=execution_date,
            task_id=task_id,
            dag_id=dag_id))

        session.commit()
예제 #8
0
    def serialize_value(value):
        # TODO: "pickling" has been deprecated and JSON is preferred.
        # "pickling" will be removed in Airflow 2.0.
        if configuration.getboolean('core', 'enable_xcom_pickling'):
            return pickle.dumps(value)

        try:
            return json.dumps(value).encode('UTF-8')
        except ValueError:
            log = LoggingMixin().log
            log.error("Could not serialize the XCOM value into JSON. "
                      "If you are using pickles instead of JSON "
                      "for XCOM, then you need to enable pickle "
                      "support for XCOM in your airflow config.")
            raise
예제 #9
0
def try_get_one(execution_date,
                key=None,
                task_id=None,
                dag_id=None,
                include_prior_dates=False,
                enable_pickling=None,
                session=None):
    """
    Retrieve an XCom value, optionally meeting certain criteria.
    TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0.

    :param enable_pickling: If pickling is not enabled, the XCOM value will be parsed to JSON instead.
    :return: XCom value
    """
    filters = []
    if key:
        filters.append(XCom.key == key)
    if task_id:
        filters.append(XCom.task_id == task_id)
    if dag_id:
        filters.append(XCom.dag_id == dag_id)
    if include_prior_dates:
        filters.append(XCom.execution_date <= execution_date)
    else:
        filters.append(XCom.execution_date == execution_date)

    query = (session.query(XCom.value).filter(and_(*filters)).order_by(
        XCom.execution_date.desc(), XCom.timestamp.desc()))

    result = query.first()
    if result:
        if enable_pickling is None:
            enable_pickling = configuration.getboolean('core',
                                                       'enable_xcom_pickling')

        if enable_pickling:
            return (True, pickle.loads(result.value))
        else:
            try:
                return (True, json.loads(result.value.decode('UTF-8')))
            except ValueError:
                log = LoggingMixin().log
                log.error("Could not serialize the XCOM value into JSON. "
                          "If you are using pickles instead of JSON "
                          "for XCOM, then you need to enable pickle "
                          "support for XCOM in your airflow config.")
                raise
    return (False, None)
예제 #10
0
    def deserialize_value(result) -> Any:
        # TODO: "pickling" has been deprecated and JSON is preferred.
        # "pickling" will be removed in Airflow 2.0.
        enable_pickling = conf.getboolean('core', 'enable_xcom_pickling')
        if enable_pickling:
            return pickle.loads(result.value)

        try:
            return json.loads(result.value.decode('UTF-8'))
        except ValueError:
            log = LoggingMixin().log
            log.error("Could not deserialize the XCOM value from JSON. "
                      "If you are using pickles instead of JSON "
                      "for XCOM, then you need to enable pickle "
                      "support for XCOM in your airflow config.")
            raise
예제 #11
0
 def get_val(self):
     log = LoggingMixin().log
     if self._val and self.is_encrypted:
         try:
             fernet = get_fernet()
             return fernet.decrypt(bytes(self._val, 'utf-8')).decode()
         except InvalidFernetToken:
             log.error("Can't decrypt _val for key={}, invalid token "
                       "or value".format(self.key))
             return None
         except Exception:
             log.error("Can't decrypt _val for key={}, FERNET_KEY "
                       "configuration missing".format(self.key))
             return None
     else:
         return self._val
예제 #12
0
파일: xcom.py 프로젝트: alrolorojas/airflow
    def get_one(cls,
                execution_date,
                key=None,
                task_id=None,
                dag_id=None,
                include_prior_dates=False,
                session=None):
        """
        Retrieve an XCom value, optionally meeting certain criteria.
        TODO: "pickling" has been deprecated and JSON is preferred.
        "pickling" will be removed in Airflow 2.0.

        :return: XCom value
        """
        filters = []
        if key:
            filters.append(cls.key == key)
        if task_id:
            filters.append(cls.task_id == task_id)
        if dag_id:
            filters.append(cls.dag_id == dag_id)
        if include_prior_dates:
            filters.append(cls.execution_date <= execution_date)
        else:
            filters.append(cls.execution_date == execution_date)

        query = (
            session.query(cls.value).filter(and_(*filters))
                   .order_by(cls.execution_date.desc(), cls.timestamp.desc()))

        result = query.first()
        if result:
            enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling')
            if enable_pickling:
                return pickle.loads(result.value)
            else:
                try:
                    return json.loads(result.value.decode('UTF-8'))
                except ValueError:
                    log = LoggingMixin().log
                    log.error("Could not deserialize the XCOM value from JSON. "
                              "If you are using pickles instead of JSON "
                              "for XCOM, then you need to enable pickle "
                              "support for XCOM in your airflow config.")
                    raise
예제 #13
0
def extract_xcom_parameter(value):
    """Deserializes value stored in xcom table."""
    enable_pickling = conf.getboolean("core", "enable_xcom_pickling")
    if enable_pickling:
        value = pickle.loads(value)
        try:
            value = json.loads(value)
            return value
        except Exception:
            return {}
    else:
        try:
            return json.loads(value.decode("UTF-8"))
        except ValueError:
            log = LoggingMixin().log
            log.error("Could not deserialize the XCOM value from JSON. "
                      "If you are using pickles instead of JSON "
                      "for XCOM, then you need to enable pickle "
                      "support for XCOM in your airflow config.")
            return {}
예제 #14
0
def check_if_tweet_is_avalaible(twitter_account_id=None,
                                since_id=None,
                                find_param=None,
                                **kwargs):
    """
    This method tweepy api via TwitterHook to check if a tweet from a specific twitter_account
    containing a specific search_string or not
    :param: twitter_account_id : for which tweets are to be fetched
    :param: since_id : airflow execution date of the dag
    :return: tweet_id
    """
    log = LoggingMixin().log
    try:
        # Load Configuration Data
        config = json.loads(Variable.get("config"))
        log.info("Config found")

    except AirflowException as e:
        log.error("Config missing")
        raise ConfigVariableNotFoundException()

    try:
        twitter_account_id = config['twitter_account_id']
    except KeyError as e:
        raise AirflowException('Missing Twitter Account Id in config variable')

    try:
        since_id = config['since_id']
    except KeyError as e:
        log.warn("Since id missing")

    try:
        find_param = config['find_param'].lower()
    except KeyError as e:
        raise AirflowException('Missing Find Param in config variable')

    try:
        twitter_credentials = BaseHook.get_connection("twitter_default")
        twitter_credentials = json.loads(twitter_credentials.extra)
        consumer_key = twitter_credentials['consumer_key']
        consumer_secret = twitter_credentials['consumer_secret']
        access_token = twitter_credentials['access_token']
        access_token_secret = twitter_credentials['access_token_secret']

    except AirflowException as e:
        raise TwitterConnectionNotFoundException()

    twitter_hook = TwitterHook(consumer_key=consumer_key,
                               consumer_secret=consumer_secret,
                               access_token=access_token,
                               access_token_secret=access_token_secret)

    tweepy_api = twitter_hook.get_tweepy_api()
    today = date.today()
    curr_date = today.strftime("%d-%m-%Y")
    # try to get tweet related to covid media bulliten from @diprjk handle

    tweets = tweepy_api.user_timeline(id=twitter_account_id,
                                      since_id=since_id,
                                      count=1000,
                                      exclude_replies=True,
                                      include_rts=False,
                                      tweet_mode="extended")
    if len(tweets) > 0:
        # find_param = "Media Bulletin on Novel".lower()
        log.info("Found : {}  tweets".format(len(tweets) + 1))
        # loop over all extracted tweets and
        # if tweet.full_text contains string "Media Bulletin On Novel"
        # then we got our concerned tweet and save its tweet_id
        image_urls = []
        for tweet in tweets:
            tweet_date = tweet.created_at
            tweet_date = tweet_date.strftime("%d-%m-%Y")
            text = tweet.full_text.lower()
            if find_param in text and tweet_date == curr_date:
                bulletin_tweet_id = tweet.id
                print('Tweet found')
                # save bulliten tweet id as environ variable or on file and then use in next run
                log.info("Tweet ID: {}  TEXT : {} ".format(
                    bulletin_tweet_id, tweet.full_text))
                if 'media' in tweet.entities:
                    for media in tweet.extended_entities['media']:
                        image_urls.append(media['media_url'])
                    detail_image_url = image_urls[2]
                    log.info("Tweet Image Url: {} ".format(detail_image_url))
                else:
                    log.info("No media found")
                    #skip the processing and end dag
                    return False
                data = {
                    "tweet_id": bulletin_tweet_id,
                    "tweet_date": tweet_date,
                    "media_url": detail_image_url
                }
                Variable.set("bulliten_tweet", json.dumps(data))
                return True
            else:
                pass
        else:
            log.info("No tweets related to {} found".format(find_param))
            return False

    else:
        log.info("No tweets found!")
        return False
예제 #15
0
# under the License.
"""Hook for Web HDFS"""
from hdfs import HdfsError, InsecureClient

from airflow.configuration import conf
from airflow.exceptions import AirflowException
from airflow.hooks.base_hook import BaseHook
from airflow.utils.log.logging_mixin import LoggingMixin

_kerberos_security_mode = conf.get("core", "security") == "kerberos"
if _kerberos_security_mode:
    try:
        from hdfs.ext.kerberos import KerberosClient  # pylint: disable=ungrouped-imports
    except ImportError:
        log = LoggingMixin().log
        log.error("Could not load the Kerberos extension for the WebHDFSHook.")
        raise


class AirflowWebHDFSHookException(AirflowException):
    """Exception specific for WebHDFS hook"""


class WebHDFSHook(BaseHook):
    """
    Interact with HDFS. This class is a wrapper around the hdfscli library.

    :param webhdfs_conn_id: The connection id for the webhdfs client to connect to.
    :type webhdfs_conn_id: str
    :param proxy_user: The user used to authenticate.
    :type proxy_user: str
예제 #16
0
            log.debug('Importing plugin module %s', filepath)
            # normalize root path as namespace
            namespace = '_'.join([re.sub(norm_pattern, '__', root), mod_name])

            m = imp.load_source(namespace, filepath)
            for obj in list(m.__dict__.values()):
                if (inspect.isclass(obj) and issubclass(obj, AirflowPlugin)
                        and obj is not AirflowPlugin):
                    obj.validate()
                    if obj not in plugins:
                        plugins.append(obj)

        except Exception as e:
            log.exception(e)
            log.error('Failed to import plugin %s', filepath)
            import_errors[filepath] = str(e)


def make_module(name, objects):
    log.debug('Creating module %s', name)
    name = name.lower()
    module = imp.new_module(name)
    module._name = name.split('.')[-1]
    module._objects = objects
    module.__dict__.update((o.__name__, o) for o in objects)
    return module


# Plugin components to integrate as modules
operators_modules = []
예제 #17
0
                os.path.split(filepath)[-1])
            if file_ext != '.py':
                continue

            log.debug('Importing plugin module %s', filepath)
            # normalize root path as namespace
            namespace = '_'.join([re.sub(norm_pattern, '__', root), mod_name])

            m = imp.load_source(namespace, filepath)
            for obj in list(m.__dict__.values()):
                if is_valid_plugin(obj, plugins):
                    plugins.append(obj)

        except Exception as e:
            log.exception(e)
            log.error('Failed to import plugin %s', filepath)
            import_errors[filepath] = str(e)

plugins = load_entrypoint_plugins(
    pkg_resources.iter_entry_points('airflow.plugins'),
    plugins
)


def make_module(name, objects):
    log.debug('Creating module %s', name)
    name = name.lower()
    module = imp.new_module(name)
    module._name = name.split('.')[-1]
    module._objects = objects
    module.__dict__.update((o.__name__, o) for o in objects)
          catchup=True,
          max_active_runs=1)

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

ddl_sql_file_name = '../create_tables.sql'
sql_path = path.join(path.dirname(path.abspath(__file__)), ddl_sql_file_name)

sql_content = None

try:
    with open(sql_path) as reader:
        sql_content = reader.read()

except Exception as err:
    log.error(f"Failure when reading file {sql_path}")

# Tables
staging_events_table = "staging_events"
staging_songs_table = "staging_songs"
target_events_table = "staging_events"
target_songs_table = "staging_songs"
facts_songplays_table_name = "songplays"
dim_users_table_name = "users"
dim_songs_table_name = "songs"
dim_artists_table_name = "artists"
dim_time_table_name = "time"

# Task ids
stage_events_task_id = "Stage_Events_to_Redshift_And_Validate"
stage_songs_task_id = "Stage_Songs_to_Redshift_And_Validate"
예제 #19
0
# limitations under the License.

from airflow.hooks.base_hook import BaseHook
from airflow import configuration

from hdfs import InsecureClient, HdfsError

from airflow.utils.log.logging_mixin import LoggingMixin

_kerberos_security_mode = configuration.get("core", "security") == "kerberos"
if _kerberos_security_mode:
    try:
        from hdfs.ext.kerberos import KerberosClient
    except ImportError:
        log = LoggingMixin().log
        log.error("Could not load the Kerberos extension for the WebHDFSHook.")
        raise
from airflow.exceptions import AirflowException


class AirflowWebHDFSHookException(AirflowException):
    pass


class WebHDFSHook(BaseHook):
    """
    Interact with HDFS. This class is a wrapper around the hdfscli library.
    """
    def __init__(self, webhdfs_conn_id='webhdfs_default', proxy_user=None):
        self.webhdfs_conn_id = webhdfs_conn_id
        self.proxy_user = proxy_user