def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) try: subprocess.check_call(command, shell=True) except subprocess.CalledProcessError as e: log.error(e) raise AirflowException('Celery command failed')
def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) try: subprocess.check_call(command, shell=True) except subprocess.CalledProcessError as e: log.error(e) raise AirflowException('Celery command failed')
def execute_command(command): log = LoggingMixin().log log.info("Executing command in Celery: %s", command) env = os.environ.copy() try: subprocess.check_call(command, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def execute_command(command_to_exec): log = LoggingMixin().log log.info("Executing command in Celery: %s", command_to_exec) env = os.environ.copy() try: subprocess.check_call(command_to_exec, stderr=subprocess.STDOUT, close_fds=True, env=env) except subprocess.CalledProcessError as e: log.exception('execute_command encountered a CalledProcessError') log.error(e.output) raise AirflowException('Celery command failed')
def extract_xcom_parameter(value): enable_pickling = conf.getboolean('core', 'enable_xcom_pickling') if enable_pickling: return pickle.loads(value) else: try: return json.loads(value.decode('UTF-8')) except ValueError: log = LoggingMixin().log log.error("Could not deserialize the XCOM value from JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise
def get_val(self): log = LoggingMixin().log if self._val and self.is_encrypted: try: fernet = get_fernet() return fernet.decrypt(bytes(self._val, 'utf-8')).decode() except InvalidFernetToken: log.error("Can't decrypt _val for key=%s, invalid token or value", self.key) return None except Exception: log.error("Can't decrypt _val for key=%s, FERNET_KEY configuration missing", self.key) return None else: return self._val
def set( cls, key, value, execution_date, task_id, dag_id, session=None): """ Store an XCom value. TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0. :return: None """ session.expunge_all() enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling') if enable_pickling: value = pickle.dumps(value) else: try: value = json.dumps(value).encode('UTF-8') except ValueError: log = LoggingMixin().log log.error("Could not serialize the XCOM value into JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise # remove any duplicate XComs session.query(cls).filter( cls.key == key, cls.execution_date == execution_date, cls.task_id == task_id, cls.dag_id == dag_id).delete() session.commit() # insert new XCom session.add(XCom( key=key, value=value, execution_date=execution_date, task_id=task_id, dag_id=dag_id)) session.commit()
def serialize_value(value): # TODO: "pickling" has been deprecated and JSON is preferred. # "pickling" will be removed in Airflow 2.0. if configuration.getboolean('core', 'enable_xcom_pickling'): return pickle.dumps(value) try: return json.dumps(value).encode('UTF-8') except ValueError: log = LoggingMixin().log log.error("Could not serialize the XCOM value into JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise
def try_get_one(execution_date, key=None, task_id=None, dag_id=None, include_prior_dates=False, enable_pickling=None, session=None): """ Retrieve an XCom value, optionally meeting certain criteria. TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0. :param enable_pickling: If pickling is not enabled, the XCOM value will be parsed to JSON instead. :return: XCom value """ filters = [] if key: filters.append(XCom.key == key) if task_id: filters.append(XCom.task_id == task_id) if dag_id: filters.append(XCom.dag_id == dag_id) if include_prior_dates: filters.append(XCom.execution_date <= execution_date) else: filters.append(XCom.execution_date == execution_date) query = (session.query(XCom.value).filter(and_(*filters)).order_by( XCom.execution_date.desc(), XCom.timestamp.desc())) result = query.first() if result: if enable_pickling is None: enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling') if enable_pickling: return (True, pickle.loads(result.value)) else: try: return (True, json.loads(result.value.decode('UTF-8'))) except ValueError: log = LoggingMixin().log log.error("Could not serialize the XCOM value into JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise return (False, None)
def deserialize_value(result) -> Any: # TODO: "pickling" has been deprecated and JSON is preferred. # "pickling" will be removed in Airflow 2.0. enable_pickling = conf.getboolean('core', 'enable_xcom_pickling') if enable_pickling: return pickle.loads(result.value) try: return json.loads(result.value.decode('UTF-8')) except ValueError: log = LoggingMixin().log log.error("Could not deserialize the XCOM value from JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise
def get_val(self): log = LoggingMixin().log if self._val and self.is_encrypted: try: fernet = get_fernet() return fernet.decrypt(bytes(self._val, 'utf-8')).decode() except InvalidFernetToken: log.error("Can't decrypt _val for key={}, invalid token " "or value".format(self.key)) return None except Exception: log.error("Can't decrypt _val for key={}, FERNET_KEY " "configuration missing".format(self.key)) return None else: return self._val
def get_one(cls, execution_date, key=None, task_id=None, dag_id=None, include_prior_dates=False, session=None): """ Retrieve an XCom value, optionally meeting certain criteria. TODO: "pickling" has been deprecated and JSON is preferred. "pickling" will be removed in Airflow 2.0. :return: XCom value """ filters = [] if key: filters.append(cls.key == key) if task_id: filters.append(cls.task_id == task_id) if dag_id: filters.append(cls.dag_id == dag_id) if include_prior_dates: filters.append(cls.execution_date <= execution_date) else: filters.append(cls.execution_date == execution_date) query = ( session.query(cls.value).filter(and_(*filters)) .order_by(cls.execution_date.desc(), cls.timestamp.desc())) result = query.first() if result: enable_pickling = configuration.getboolean('core', 'enable_xcom_pickling') if enable_pickling: return pickle.loads(result.value) else: try: return json.loads(result.value.decode('UTF-8')) except ValueError: log = LoggingMixin().log log.error("Could not deserialize the XCOM value from JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") raise
def extract_xcom_parameter(value): """Deserializes value stored in xcom table.""" enable_pickling = conf.getboolean("core", "enable_xcom_pickling") if enable_pickling: value = pickle.loads(value) try: value = json.loads(value) return value except Exception: return {} else: try: return json.loads(value.decode("UTF-8")) except ValueError: log = LoggingMixin().log log.error("Could not deserialize the XCOM value from JSON. " "If you are using pickles instead of JSON " "for XCOM, then you need to enable pickle " "support for XCOM in your airflow config.") return {}
def check_if_tweet_is_avalaible(twitter_account_id=None, since_id=None, find_param=None, **kwargs): """ This method tweepy api via TwitterHook to check if a tweet from a specific twitter_account containing a specific search_string or not :param: twitter_account_id : for which tweets are to be fetched :param: since_id : airflow execution date of the dag :return: tweet_id """ log = LoggingMixin().log try: # Load Configuration Data config = json.loads(Variable.get("config")) log.info("Config found") except AirflowException as e: log.error("Config missing") raise ConfigVariableNotFoundException() try: twitter_account_id = config['twitter_account_id'] except KeyError as e: raise AirflowException('Missing Twitter Account Id in config variable') try: since_id = config['since_id'] except KeyError as e: log.warn("Since id missing") try: find_param = config['find_param'].lower() except KeyError as e: raise AirflowException('Missing Find Param in config variable') try: twitter_credentials = BaseHook.get_connection("twitter_default") twitter_credentials = json.loads(twitter_credentials.extra) consumer_key = twitter_credentials['consumer_key'] consumer_secret = twitter_credentials['consumer_secret'] access_token = twitter_credentials['access_token'] access_token_secret = twitter_credentials['access_token_secret'] except AirflowException as e: raise TwitterConnectionNotFoundException() twitter_hook = TwitterHook(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) tweepy_api = twitter_hook.get_tweepy_api() today = date.today() curr_date = today.strftime("%d-%m-%Y") # try to get tweet related to covid media bulliten from @diprjk handle tweets = tweepy_api.user_timeline(id=twitter_account_id, since_id=since_id, count=1000, exclude_replies=True, include_rts=False, tweet_mode="extended") if len(tweets) > 0: # find_param = "Media Bulletin on Novel".lower() log.info("Found : {} tweets".format(len(tweets) + 1)) # loop over all extracted tweets and # if tweet.full_text contains string "Media Bulletin On Novel" # then we got our concerned tweet and save its tweet_id image_urls = [] for tweet in tweets: tweet_date = tweet.created_at tweet_date = tweet_date.strftime("%d-%m-%Y") text = tweet.full_text.lower() if find_param in text and tweet_date == curr_date: bulletin_tweet_id = tweet.id print('Tweet found') # save bulliten tweet id as environ variable or on file and then use in next run log.info("Tweet ID: {} TEXT : {} ".format( bulletin_tweet_id, tweet.full_text)) if 'media' in tweet.entities: for media in tweet.extended_entities['media']: image_urls.append(media['media_url']) detail_image_url = image_urls[2] log.info("Tweet Image Url: {} ".format(detail_image_url)) else: log.info("No media found") #skip the processing and end dag return False data = { "tweet_id": bulletin_tweet_id, "tweet_date": tweet_date, "media_url": detail_image_url } Variable.set("bulliten_tweet", json.dumps(data)) return True else: pass else: log.info("No tweets related to {} found".format(find_param)) return False else: log.info("No tweets found!") return False
# under the License. """Hook for Web HDFS""" from hdfs import HdfsError, InsecureClient from airflow.configuration import conf from airflow.exceptions import AirflowException from airflow.hooks.base_hook import BaseHook from airflow.utils.log.logging_mixin import LoggingMixin _kerberos_security_mode = conf.get("core", "security") == "kerberos" if _kerberos_security_mode: try: from hdfs.ext.kerberos import KerberosClient # pylint: disable=ungrouped-imports except ImportError: log = LoggingMixin().log log.error("Could not load the Kerberos extension for the WebHDFSHook.") raise class AirflowWebHDFSHookException(AirflowException): """Exception specific for WebHDFS hook""" class WebHDFSHook(BaseHook): """ Interact with HDFS. This class is a wrapper around the hdfscli library. :param webhdfs_conn_id: The connection id for the webhdfs client to connect to. :type webhdfs_conn_id: str :param proxy_user: The user used to authenticate. :type proxy_user: str
log.debug('Importing plugin module %s', filepath) # normalize root path as namespace namespace = '_'.join([re.sub(norm_pattern, '__', root), mod_name]) m = imp.load_source(namespace, filepath) for obj in list(m.__dict__.values()): if (inspect.isclass(obj) and issubclass(obj, AirflowPlugin) and obj is not AirflowPlugin): obj.validate() if obj not in plugins: plugins.append(obj) except Exception as e: log.exception(e) log.error('Failed to import plugin %s', filepath) import_errors[filepath] = str(e) def make_module(name, objects): log.debug('Creating module %s', name) name = name.lower() module = imp.new_module(name) module._name = name.split('.')[-1] module._objects = objects module.__dict__.update((o.__name__, o) for o in objects) return module # Plugin components to integrate as modules operators_modules = []
os.path.split(filepath)[-1]) if file_ext != '.py': continue log.debug('Importing plugin module %s', filepath) # normalize root path as namespace namespace = '_'.join([re.sub(norm_pattern, '__', root), mod_name]) m = imp.load_source(namespace, filepath) for obj in list(m.__dict__.values()): if is_valid_plugin(obj, plugins): plugins.append(obj) except Exception as e: log.exception(e) log.error('Failed to import plugin %s', filepath) import_errors[filepath] = str(e) plugins = load_entrypoint_plugins( pkg_resources.iter_entry_points('airflow.plugins'), plugins ) def make_module(name, objects): log.debug('Creating module %s', name) name = name.lower() module = imp.new_module(name) module._name = name.split('.')[-1] module._objects = objects module.__dict__.update((o.__name__, o) for o in objects)
catchup=True, max_active_runs=1) start_operator = DummyOperator(task_id='Begin_execution', dag=dag) ddl_sql_file_name = '../create_tables.sql' sql_path = path.join(path.dirname(path.abspath(__file__)), ddl_sql_file_name) sql_content = None try: with open(sql_path) as reader: sql_content = reader.read() except Exception as err: log.error(f"Failure when reading file {sql_path}") # Tables staging_events_table = "staging_events" staging_songs_table = "staging_songs" target_events_table = "staging_events" target_songs_table = "staging_songs" facts_songplays_table_name = "songplays" dim_users_table_name = "users" dim_songs_table_name = "songs" dim_artists_table_name = "artists" dim_time_table_name = "time" # Task ids stage_events_task_id = "Stage_Events_to_Redshift_And_Validate" stage_songs_task_id = "Stage_Songs_to_Redshift_And_Validate"
# limitations under the License. from airflow.hooks.base_hook import BaseHook from airflow import configuration from hdfs import InsecureClient, HdfsError from airflow.utils.log.logging_mixin import LoggingMixin _kerberos_security_mode = configuration.get("core", "security") == "kerberos" if _kerberos_security_mode: try: from hdfs.ext.kerberos import KerberosClient except ImportError: log = LoggingMixin().log log.error("Could not load the Kerberos extension for the WebHDFSHook.") raise from airflow.exceptions import AirflowException class AirflowWebHDFSHookException(AirflowException): pass class WebHDFSHook(BaseHook): """ Interact with HDFS. This class is a wrapper around the hdfscli library. """ def __init__(self, webhdfs_conn_id='webhdfs_default', proxy_user=None): self.webhdfs_conn_id = webhdfs_conn_id self.proxy_user = proxy_user