"owner": "Robin", "start_date": days_ago(0), "retries": 3, "retry_delay": timedelta(minutes=30), "email": ["*****@*****.**"] } dag = DAG( dag_id="platform_kpi_scraper", default_args=args, schedule_interval='0 * * * *', ) # https://github.com/apache/incubator-airflow/blob/5a3f39913739998ca2e9a17d0f1d10fccb840d36/airflow/contrib/operators/kubernetes_pod_operator.py#L129 surveys_to_s3 = KubernetesPodOperator( namespace="airflow", image=SCRAPER_IMAGE, image_pull_policy='Always', cmds=["bash", "-c"], arguments=["python main.py"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=True, task_id="get_kpis", get_logs=True, dag=dag, annotations={"iam.amazonaws.com/role": SCRAPER_IAM_ROLE}, ) except ImportError as e: log.warn("Could not import KubernetesPodOperator: " + str(e))
# pip install apache-airflow[kubernetes] from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator args = { 'owner': 'airflow', 'start_date': days_ago(2) } dag = DAG( dag_id='example_kubernetes_operator', default_args=args, schedule_interval=None) k = KubernetesPodOperator( namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=False, task_id="task", get_logs=True, dag=dag, is_delete_operator_pod=False) except ImportError as e: log.warn("Could not import KubernetesPodOperator: " + str(e)) log.warn("Install kubernetes dependencies with: " " pip install apache-airflow[kubernetes]")
from airflow.models import DAG log = LoggingMixin().log try: # Kubernetes is optional, so not available in vanilla Airflow # pip install airflow[kubernetes] from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator args = {'owner': 'airflow', 'start_date': days_ago(2)} dag = DAG(dag_id='example_kubernetes_operator', default_args=args, schedule_interval=None) k = KubernetesPodOperator(namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=False, task_id="task", get_logs=True, dag=dag) except ImportError as e: log.warn("Could not import KubernetesPodOperator: " + str(e)) log.warn("Install kubernetes dependencies with: " " pip install airflow['kubernetes']")
def check_if_tweet_is_avalaible(twitter_account_id=None, since_id=None, find_param=None, **kwargs): """ This method tweepy api via TwitterHook to check if a tweet from a specific twitter_account containing a specific search_string or not :param: twitter_account_id : for which tweets are to be fetched :param: since_id : airflow execution date of the dag :return: tweet_id """ log = LoggingMixin().log try: # Load Configuration Data config = json.loads(Variable.get("config")) log.info("Config found") except AirflowException as e: log.error("Config missing") raise ConfigVariableNotFoundException() try: twitter_account_id = config['twitter_account_id'] except KeyError as e: raise AirflowException('Missing Twitter Account Id in config variable') try: since_id = config['since_id'] except KeyError as e: log.warn("Since id missing") try: find_param = config['find_param'].lower() except KeyError as e: raise AirflowException('Missing Find Param in config variable') try: twitter_credentials = BaseHook.get_connection("twitter_default") twitter_credentials = json.loads(twitter_credentials.extra) consumer_key = twitter_credentials['consumer_key'] consumer_secret = twitter_credentials['consumer_secret'] access_token = twitter_credentials['access_token'] access_token_secret = twitter_credentials['access_token_secret'] except AirflowException as e: raise TwitterConnectionNotFoundException() twitter_hook = TwitterHook(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) tweepy_api = twitter_hook.get_tweepy_api() today = date.today() curr_date = today.strftime("%d-%m-%Y") # try to get tweet related to covid media bulliten from @diprjk handle tweets = tweepy_api.user_timeline(id=twitter_account_id, since_id=since_id, count=1000, exclude_replies=True, include_rts=False, tweet_mode="extended") if len(tweets) > 0: # find_param = "Media Bulletin on Novel".lower() log.info("Found : {} tweets".format(len(tweets) + 1)) # loop over all extracted tweets and # if tweet.full_text contains string "Media Bulletin On Novel" # then we got our concerned tweet and save its tweet_id image_urls = [] for tweet in tweets: tweet_date = tweet.created_at tweet_date = tweet_date.strftime("%d-%m-%Y") text = tweet.full_text.lower() if find_param in text and tweet_date == curr_date: bulletin_tweet_id = tweet.id print('Tweet found') # save bulliten tweet id as environ variable or on file and then use in next run log.info("Tweet ID: {} TEXT : {} ".format( bulletin_tweet_id, tweet.full_text)) if 'media' in tweet.entities: for media in tweet.extended_entities['media']: image_urls.append(media['media_url']) detail_image_url = image_urls[2] log.info("Tweet Image Url: {} ".format(detail_image_url)) else: log.info("No media found") #skip the processing and end dag return False data = { "tweet_id": bulletin_tweet_id, "tweet_date": tweet_date, "media_url": detail_image_url } Variable.set("bulliten_tweet", json.dumps(data)) return True else: pass else: log.info("No tweets related to {} found".format(find_param)) return False else: log.info("No tweets found!") return False
try: # Kubernetes is optional, so not available in vanilla Airflow # pip install 'apache-airflow[kubernetes]' from airflow.contrib.operators.kubernetes_pod_operator import KubernetesPodOperator args = {'owner': 'airflow', 'start_date': days_ago(2)} dag = DAG(dag_id='example_kubernetes_operator', default_args=args, schedule_interval=None) tolerations = [{'key': "key", 'operator': 'Equal', 'value': 'value'}] k = KubernetesPodOperator(namespace='default', image="ubuntu:16.04", cmds=["bash", "-cx"], arguments=["echo", "10"], labels={"foo": "bar"}, name="airflow-test-pod", in_cluster=False, task_id="task", get_logs=True, dag=dag, is_delete_operator_pod=False, tolerations=tolerations) except ImportError as e: log.warn("Could not import KubernetesPodOperator: " + str(e)) log.warn("Install kubernetes dependencies with: " " pip install 'apache-airflow[kubernetes]'")
from flask_login import current_user, logout_user, login_required, login_user from flask_oauthlib.client import OAuth from airflow import models, configuration from airflow.utils.db import provide_session from airflow.utils.log.logging_mixin import LoggingMixin import ssl log = LoggingMixin().log if configuration.conf.has_section("http_client") \ and configuration.conf.has_option("http_client", "insecure") \ and configuration.conf.getboolean("http_client", "insecure") \ and getattr(ssl, '_create_unverified_context', None): ssl._create_default_https_context = ssl._create_unverified_context log.warn( "Airflow is using an insecure HTTP client. ROOT CA check is disabled.") def get_config_param(param): return str(configuration.conf.get('oauth', param)) def has_config_param(param): return configuration.conf.has_option('oauth', param) class OAuthUser(models.User): def __init__(self, user): self.user = user @property